aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/keys.txt39
-rw-r--r--Documentation/kmemleak.txt31
-rw-r--r--Documentation/s390/s390dbf.txt7
-rw-r--r--Documentation/sysctl/kernel.txt16
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/alpha/include/asm/thread_info.h5
-rw-r--r--arch/alpha/kernel/signal.c8
-rw-r--r--arch/arm/include/asm/thread_info.h3
-rw-r--r--arch/arm/kernel/entry-common.S2
-rw-r--r--arch/arm/kernel/signal.c8
-rw-r--r--arch/avr32/include/asm/thread_info.h6
-rw-r--r--arch/avr32/kernel/entry-avr32b.S2
-rw-r--r--arch/avr32/kernel/signal.c8
-rw-r--r--arch/cris/kernel/ptrace.c8
-rw-r--r--arch/frv/kernel/signal.c2
-rw-r--r--arch/h8300/include/asm/thread_info.h2
-rw-r--r--arch/h8300/kernel/signal.c8
-rw-r--r--arch/ia64/kernel/process.c2
-rw-r--r--arch/m32r/include/asm/thread_info.h2
-rw-r--r--arch/m32r/kernel/signal.c8
-rw-r--r--arch/mips/include/asm/thread_info.h2
-rw-r--r--arch/mips/kernel/signal.c8
-rw-r--r--arch/mn10300/kernel/signal.c2
-rw-r--r--arch/parisc/include/asm/thread_info.h4
-rw-r--r--arch/parisc/kernel/entry.S2
-rw-r--r--arch/parisc/kernel/signal.c8
-rw-r--r--arch/s390/Kconfig8
-rw-r--r--arch/s390/Makefile3
-rw-r--r--arch/s390/hypfs/inode.c6
-rw-r--r--arch/s390/include/asm/atomic.h205
-rw-r--r--arch/s390/include/asm/checksum.h25
-rw-r--r--arch/s390/include/asm/chsc.h28
-rw-r--r--arch/s390/include/asm/cio.h223
-rw-r--r--arch/s390/include/asm/cpu.h26
-rw-r--r--arch/s390/include/asm/cpuid.h25
-rw-r--r--arch/s390/include/asm/debug.h9
-rw-r--r--arch/s390/include/asm/hardirq.h7
-rw-r--r--arch/s390/include/asm/ipl.h5
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/kvm_virtio.h10
-rw-r--r--arch/s390/include/asm/lowcore.h6
-rw-r--r--arch/s390/include/asm/mmu.h1
-rw-r--r--arch/s390/include/asm/page.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/processor.h4
-rw-r--r--arch/s390/include/asm/scatterlist.h20
-rw-r--r--arch/s390/include/asm/scsw.h (renamed from drivers/s390/cio/scsw.c)345
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/smp.h32
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/timex.h14
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/early.c74
-rw-r--r--arch/s390/kernel/entry.S16
-rw-r--r--arch/s390/kernel/entry64.S4
-rw-r--r--arch/s390/kernel/head.S1
-rw-r--r--arch/s390/kernel/head31.S1
-rw-r--r--arch/s390/kernel/head64.S9
-rw-r--r--arch/s390/kernel/ipl.c166
-rw-r--r--arch/s390/kernel/mcount.S147
-rw-r--r--arch/s390/kernel/mcount64.S78
-rw-r--r--arch/s390/kernel/setup.c10
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/smp.c39
-rw-r--r--arch/s390/kernel/suspend.c (renamed from arch/s390/power/swsusp.c)35
-rw-r--r--arch/s390/kernel/swsusp_asm64.S (renamed from arch/s390/power/swsusp_asm64.S)2
-rw-r--r--arch/s390/kernel/time.c3
-rw-r--r--arch/s390/kernel/vmlinux.lds.S87
-rw-r--r--arch/s390/mm/Makefile4
-rw-r--r--arch/s390/mm/fault.c13
-rw-r--r--arch/s390/mm/page-states.c6
-rw-r--r--arch/s390/mm/pgtable.c24
-rw-r--r--arch/s390/mm/vmem.c1
-rw-r--r--arch/s390/power/Makefile8
-rw-r--r--arch/s390/power/suspend.c40
-rw-r--r--arch/s390/power/swsusp_64.c17
-rw-r--r--arch/sh/kernel/signal_32.c2
-rw-r--r--arch/sh/kernel/signal_64.c2
-rw-r--r--arch/sparc/kernel/signal_32.c2
-rw-r--r--arch/sparc/kernel/signal_64.c3
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/pci-dma.c6
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c14
-rw-r--r--block/blk-core.c1
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/char/hvc_iucv.c2
-rw-r--r--drivers/char/mem.c1
-rw-r--r--drivers/char/tpm/tpm_tis.c12
-rw-r--r--drivers/infiniband/core/iwcm.c1
-rw-r--r--drivers/infiniband/core/mad.c35
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/multicast.c10
-rw-r--r--drivers/infiniband/core/sa_query.c7
-rw-r--r--drivers/infiniband/core/smi.c8
-rw-r--r--drivers/infiniband/core/uverbs_main.c10
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c24
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c5
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c37
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c68
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c52
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c47
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c12
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c1
-rw-r--r--drivers/infiniband/hw/nes/nes.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c128
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c767
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h103
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c204
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c21
-rw-r--r--drivers/md/dm-log-userspace-base.c2
-rw-r--r--drivers/net/cxgb3/cxgb3_main.c6
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.c6
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.h8
-rw-r--r--drivers/net/mlx4/cq.c1
-rw-r--r--drivers/net/mlx4/eq.c77
-rw-r--r--drivers/net/mlx4/icm.c1
-rw-r--r--drivers/net/mlx4/main.c37
-rw-r--r--drivers/net/mlx4/mcg.c1
-rw-r--r--drivers/net/mlx4/mlx4.h7
-rw-r--r--drivers/net/mlx4/mr.c1
-rw-r--r--drivers/net/mlx4/pd.c1
-rw-r--r--drivers/net/mlx4/profile.c2
-rw-r--r--drivers/net/mlx4/qp.c2
-rw-r--r--drivers/net/mlx4/reset.c1
-rw-r--r--drivers/net/mlx4/srq.c2
-rw-r--r--drivers/net/tun.c22
-rw-r--r--drivers/s390/block/dasd.c26
-rw-r--r--drivers/s390/block/dasd_3990_erp.c2
-rw-r--r--drivers/s390/block/dasd_alias.c5
-rw-r--r--drivers/s390/block/dasd_diag.c5
-rw-r--r--drivers/s390/block/dasd_eckd.c47
-rw-r--r--drivers/s390/block/dasd_eer.c4
-rw-r--r--drivers/s390/block/dasd_erp.c4
-rw-r--r--drivers/s390/block/dasd_fba.c9
-rw-r--r--drivers/s390/block/dasd_int.h11
-rw-r--r--drivers/s390/block/dasd_ioctl.c24
-rw-r--r--drivers/s390/block/xpram.c65
-rw-r--r--drivers/s390/char/Kconfig10
-rw-r--r--drivers/s390/char/Makefile1
-rw-r--r--drivers/s390/char/monreader.c2
-rw-r--r--drivers/s390/char/sclp.h4
-rw-r--r--drivers/s390/char/sclp_async.c224
-rw-r--r--drivers/s390/char/tape_34xx.c2
-rw-r--r--drivers/s390/char/tape_3590.c4
-rw-r--r--drivers/s390/char/tape_block.c12
-rw-r--r--drivers/s390/char/tape_core.c18
-rw-r--r--drivers/s390/char/tape_std.c2
-rw-r--r--drivers/s390/char/vmlogrdr.c4
-rw-r--r--drivers/s390/char/vmur.c19
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/cio/Makefile2
-rw-r--r--drivers/s390/cio/chp.c3
-rw-r--r--drivers/s390/cio/chsc.h24
-rw-r--r--drivers/s390/cio/cio.c56
-rw-r--r--drivers/s390/cio/cio.h4
-rw-r--r--drivers/s390/cio/css.c32
-rw-r--r--drivers/s390/cio/device.c172
-rw-r--r--drivers/s390/cio/device_fsm.c22
-rw-r--r--drivers/s390/cio/qdio.h4
-rw-r--r--drivers/s390/cio/qdio_debug.c55
-rw-r--r--drivers/s390/cio/qdio_main.c4
-rw-r--r--drivers/s390/crypto/ap_bus.c17
-rw-r--r--drivers/s390/kvm/kvm_virtio.c8
-rw-r--r--drivers/s390/net/netiucv.c9
-rw-r--r--drivers/s390/net/smsgiucv.c6
-rw-r--r--drivers/scsi/cxgb3i/cxgb3i_init.c12
-rw-r--r--drivers/staging/comedi/comedi_fops.c8
-rw-r--r--drivers/staging/pohmelfs/inode.c9
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/ext2/acl.c8
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext4/acl.c8
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/fs-writeback.c1065
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/jffs2/acl.c7
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jfs/acl.c7
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namei.c88
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfsd/auth.c4
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/ocfs2/dlm/dlmfs.c1
-rw-r--r--fs/open.c12
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c20
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/inode.c135
-rw-r--r--fs/sysfs/symlink.c2
-rw-r--r--fs/sysfs/sysfs.h12
-rw-r--r--fs/ubifs/budget.c16
-rw-r--r--fs/ubifs/super.c9
-rw-r--r--fs/xattr.c55
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--include/linux/backing-dev.h55
-rw-r--r--include/linux/cred.h69
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/linux/key.h8
-rw-r--r--include/linux/keyctl.h1
-rw-r--r--include/linux/kmemcheck.h7
-rw-r--r--include/linux/kmemleak.h18
-rw-r--r--include/linux/lsm_audit.h12
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/security.h154
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/linux/writeback.h23
-rw-r--r--include/linux/xattr.h1
-rw-r--r--kernel/acct.c8
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cred.c293
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/kmod.c5
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/sysctl.c1
-rw-r--r--lib/Kconfig.debug15
-rw-r--r--lib/is_single_threaded.c61
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c381
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/kmemleak.c336
-rw-r--r--mm/page-writeback.c182
-rw-r--r--mm/pdflush.c269
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/shmem_acl.c11
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/vmscan.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/ipv4/tcp_cong.c4
-rw-r--r--security/Makefile4
-rw-r--r--security/capability.c63
-rw-r--r--security/commoncap.c4
-rw-r--r--security/keys/Makefile1
-rw-r--r--security/keys/compat.c3
-rw-r--r--security/keys/gc.c194
-rw-r--r--security/keys/internal.h10
-rw-r--r--security/keys/key.c24
-rw-r--r--security/keys/keyctl.c161
-rw-r--r--security/keys/keyring.c85
-rw-r--r--security/keys/proc.c93
-rw-r--r--security/keys/process_keys.c69
-rw-r--r--security/keys/sysctl.c28
-rw-r--r--security/lsm_audit.c2
-rw-r--r--security/security.c62
-rw-r--r--security/selinux/avc.c205
-rw-r--r--security/selinux/hooks.c318
-rw-r--r--security/selinux/include/av_inherit.h1
-rw-r--r--security/selinux/include/av_perm_to_string.h1
-rw-r--r--security/selinux/include/av_permissions.h23
-rw-r--r--security/selinux/include/avc.h55
-rw-r--r--security/selinux/include/class_to_string.h1
-rw-r--r--security/selinux/include/flask.h1
-rw-r--r--security/selinux/include/netlabel.h4
-rw-r--r--security/selinux/include/xfrm.h8
-rw-r--r--security/selinux/netlabel.c2
-rw-r--r--security/selinux/ss/services.c142
-rw-r--r--security/selinux/xfrm.c4
-rw-r--r--security/smack/smack.h2
-rw-r--r--security/smack/smack_access.c11
-rw-r--r--security/smack/smack_lsm.c65
-rw-r--r--security/tomoyo/common.c30
-rw-r--r--security/tomoyo/common.h2
-rw-r--r--security/tomoyo/domain.c42
-rw-r--r--security/tomoyo/tomoyo.c27
-rw-r--r--security/tomoyo/tomoyo.h3
313 files changed, 6646 insertions, 3529 deletions
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index b56aacc1fff8..e4dbbdb1bd96 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -26,7 +26,7 @@ This document has the following sections:
26 - Notes on accessing payload contents 26 - Notes on accessing payload contents
27 - Defining a key type 27 - Defining a key type
28 - Request-key callback service 28 - Request-key callback service
29 - Key access filesystem 29 - Garbage collection
30 30
31 31
32============ 32============
@@ -113,6 +113,9 @@ Each key has a number of attributes:
113 113
114 (*) Dead. The key's type was unregistered, and so the key is now useless. 114 (*) Dead. The key's type was unregistered, and so the key is now useless.
115 115
116Keys in the last three states are subject to garbage collection. See the
117section on "Garbage collection".
118
116 119
117==================== 120====================
118KEY SERVICE OVERVIEW 121KEY SERVICE OVERVIEW
@@ -754,6 +757,26 @@ The keyctl syscall functions are:
754 successful. 757 successful.
755 758
756 759
760 (*) Install the calling process's session keyring on its parent.
761
762 long keyctl(KEYCTL_SESSION_TO_PARENT);
763
764 This functions attempts to install the calling process's session keyring
765 on to the calling process's parent, replacing the parent's current session
766 keyring.
767
768 The calling process must have the same ownership as its parent, the
769 keyring must have the same ownership as the calling process, the calling
770 process must have LINK permission on the keyring and the active LSM module
771 mustn't deny permission, otherwise error EPERM will be returned.
772
773 Error ENOMEM will be returned if there was insufficient memory to complete
774 the operation, otherwise 0 will be returned to indicate success.
775
776 The keyring will be replaced next time the parent process leaves the
777 kernel and resumes executing userspace.
778
779
757=============== 780===============
758KERNEL SERVICES 781KERNEL SERVICES
759=============== 782===============
@@ -1231,3 +1254,17 @@ by executing:
1231 1254
1232In this case, the program isn't required to actually attach the key to a ring; 1255In this case, the program isn't required to actually attach the key to a ring;
1233the rings are provided for reference. 1256the rings are provided for reference.
1257
1258
1259==================
1260GARBAGE COLLECTION
1261==================
1262
1263Dead keys (for which the type has been removed) will be automatically unlinked
1264from those keyrings that point to them and deleted as soon as possible by a
1265background garbage collector.
1266
1267Similarly, revoked and expired keys will be garbage collected, but only after a
1268certain amount of time has passed. This time is set as a number of seconds in:
1269
1270 /proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 89068030b01b..34f6638aa5ac 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -27,6 +27,13 @@ To trigger an intermediate memory scan:
27 27
28 # echo scan > /sys/kernel/debug/kmemleak 28 # echo scan > /sys/kernel/debug/kmemleak
29 29
30To clear the list of all current possible memory leaks:
31
32 # echo clear > /sys/kernel/debug/kmemleak
33
34New leaks will then come up upon reading /sys/kernel/debug/kmemleak
35again.
36
30Note that the orphan objects are listed in the order they were allocated 37Note that the orphan objects are listed in the order they were allocated
31and one object at the beginning of the list may cause other subsequent 38and one object at the beginning of the list may cause other subsequent
32objects to be reported as orphan. 39objects to be reported as orphan.
@@ -42,6 +49,9 @@ Memory scanning parameters can be modified at run-time by writing to the
42 scan=<secs> - set the automatic memory scanning period in seconds 49 scan=<secs> - set the automatic memory scanning period in seconds
43 (default 600, 0 to stop the automatic scanning) 50 (default 600, 0 to stop the automatic scanning)
44 scan - trigger a memory scan 51 scan - trigger a memory scan
52 clear - clear list of current memory leak suspects, done by
53 marking all current reported unreferenced objects grey
54 dump=<addr> - dump information about the object found at <addr>
45 55
46Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on 56Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
47the kernel command line. 57the kernel command line.
@@ -86,6 +96,27 @@ avoid this, kmemleak can also store the number of values pointing to an
86address inside the block address range that need to be found so that the 96address inside the block address range that need to be found so that the
87block is not considered a leak. One example is __vmalloc(). 97block is not considered a leak. One example is __vmalloc().
88 98
99Testing specific sections with kmemleak
100---------------------------------------
101
102Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
103quite extensive. This can also be the case if you have very buggy code
104when doing development. To work around these situations you can use the
105'clear' command to clear all reported unreferenced objects from the
106/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
107you can find new unreferenced objects; this should help with testing
108specific sections of code.
109
110To test a critical section on demand with a clean kmemleak do:
111
112 # echo clear > /sys/kernel/debug/kmemleak
113 ... test your kernel or modules ...
114 # echo scan > /sys/kernel/debug/kmemleak
115
116Then as usual to get your report with:
117
118 # cat /sys/kernel/debug/kmemleak
119
89Kmemleak API 120Kmemleak API
90------------ 121------------
91 122
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 2d10053dd97e..ae66f9b90a25 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -495,6 +495,13 @@ and for each vararg a long value. So e.g. for a debug entry with a format
495string plus two varargs one would need to allocate a (3 * sizeof(long)) 495string plus two varargs one would need to allocate a (3 * sizeof(long))
496byte data area in the debug_register() function. 496byte data area in the debug_register() function.
497 497
498IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
499use "%s" in the sprintf event functions, if the memory for the passed string is
500available as long as the debug feature exists. The reason behind this is that
501due to performance considerations only a pointer to the string is stored in
502the debug feature. If you log a string that is freed afterwards, you will get
503an OOPS when inspecting the debug feature, because then the debug feature will
504access the already freed memory.
498 505
499NOTE: If using the sprintf view do NOT use other event/exception functions 506NOTE: If using the sprintf view do NOT use other event/exception functions
500than the sprintf-event and -exception functions. 507than the sprintf-event and -exception functions.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 322a00bb99d9..2dbff53369d0 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -19,6 +19,7 @@ Currently, these files might (depending on your configuration)
19show up in /proc/sys/kernel: 19show up in /proc/sys/kernel:
20- acpi_video_flags 20- acpi_video_flags
21- acct 21- acct
22- callhome [ S390 only ]
22- auto_msgmni 23- auto_msgmni
23- core_pattern 24- core_pattern
24- core_uses_pid 25- core_uses_pid
@@ -91,6 +92,21 @@ valid for 30 seconds.
91 92
92============================================================== 93==============================================================
93 94
95callhome:
96
97Controls the kernel's callhome behavior in case of a kernel panic.
98
99The s390 hardware allows an operating system to send a notification
100to a service organization (callhome) in case of an operating system panic.
101
102When the value in this file is 0 (which is the default behavior)
103nothing happens in case of a kernel panic. If this value is set to "1"
104the complete kernel oops message is send to the IBM customer service
105organization in case the mainframe the Linux operating system is running
106on has a service contract with IBM.
107
108==============================================================
109
94core_pattern: 110core_pattern:
95 111
96core_pattern is used to specify a core dumpfile pattern name. 112core_pattern is used to specify a core dumpfile pattern name.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8dca9d89c6c1..989ff1149390 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -439,7 +439,7 @@ F: drivers/hwmon/ams/
439AMSO1100 RNIC DRIVER 439AMSO1100 RNIC DRIVER
440M: Tom Tucker <tom@opengridcomputing.com> 440M: Tom Tucker <tom@opengridcomputing.com>
441M: Steve Wise <swise@opengridcomputing.com> 441M: Steve Wise <swise@opengridcomputing.com>
442L: general@lists.openfabrics.org 442L: linux-rdma@vger.kernel.org
443S: Maintained 443S: Maintained
444F: drivers/infiniband/hw/amso1100/ 444F: drivers/infiniband/hw/amso1100/
445 445
@@ -1494,7 +1494,7 @@ F: drivers/net/cxgb3/
1494 1494
1495CXGB3 IWARP RNIC DRIVER (IW_CXGB3) 1495CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
1496M: Steve Wise <swise@chelsio.com> 1496M: Steve Wise <swise@chelsio.com>
1497L: general@lists.openfabrics.org 1497L: linux-rdma@vger.kernel.org
1498W: http://www.openfabrics.org 1498W: http://www.openfabrics.org
1499S: Supported 1499S: Supported
1500F: drivers/infiniband/hw/cxgb3/ 1500F: drivers/infiniband/hw/cxgb3/
@@ -1868,7 +1868,7 @@ F: fs/efs/
1868EHCA (IBM GX bus InfiniBand adapter) DRIVER 1868EHCA (IBM GX bus InfiniBand adapter) DRIVER
1869M: Hoang-Nam Nguyen <hnguyen@de.ibm.com> 1869M: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
1870M: Christoph Raisch <raisch@de.ibm.com> 1870M: Christoph Raisch <raisch@de.ibm.com>
1871L: general@lists.openfabrics.org 1871L: linux-rdma@vger.kernel.org
1872S: Supported 1872S: Supported
1873F: drivers/infiniband/hw/ehca/ 1873F: drivers/infiniband/hw/ehca/
1874 1874
@@ -2552,7 +2552,7 @@ INFINIBAND SUBSYSTEM
2552M: Roland Dreier <rolandd@cisco.com> 2552M: Roland Dreier <rolandd@cisco.com>
2553M: Sean Hefty <sean.hefty@intel.com> 2553M: Sean Hefty <sean.hefty@intel.com>
2554M: Hal Rosenstock <hal.rosenstock@gmail.com> 2554M: Hal Rosenstock <hal.rosenstock@gmail.com>
2555L: general@lists.openfabrics.org (moderated for non-subscribers) 2555L: linux-rdma@vger.kernel.org
2556W: http://www.openib.org/ 2556W: http://www.openib.org/
2557T: git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git 2557T: git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
2558S: Supported 2558S: Supported
@@ -2729,7 +2729,7 @@ F: drivers/net/ipg.c
2729 2729
2730IPATH DRIVER 2730IPATH DRIVER
2731M: Ralph Campbell <infinipath@qlogic.com> 2731M: Ralph Campbell <infinipath@qlogic.com>
2732L: general@lists.openfabrics.org 2732L: linux-rdma@vger.kernel.org
2733T: git git://git.qlogic.com/ipath-linux-2.6 2733T: git git://git.qlogic.com/ipath-linux-2.6
2734S: Supported 2734S: Supported
2735F: drivers/infiniband/hw/ipath/ 2735F: drivers/infiniband/hw/ipath/
@@ -3485,7 +3485,7 @@ F: drivers/scsi/NCR_D700.*
3485NETEFFECT IWARP RNIC DRIVER (IW_NES) 3485NETEFFECT IWARP RNIC DRIVER (IW_NES)
3486M: Faisal Latif <faisal.latif@intel.com> 3486M: Faisal Latif <faisal.latif@intel.com>
3487M: Chien Tung <chien.tin.tung@intel.com> 3487M: Chien Tung <chien.tin.tung@intel.com>
3488L: general@lists.openfabrics.org 3488L: linux-rdma@vger.kernel.org
3489W: http://www.neteffect.com 3489W: http://www.neteffect.com
3490S: Supported 3490S: Supported
3491F: drivers/infiniband/hw/nes/ 3491F: drivers/infiniband/hw/nes/
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 60c83abfde70..5076a8860b18 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -75,6 +75,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
75#define TIF_UAC_SIGBUS 7 75#define TIF_UAC_SIGBUS 7
76#define TIF_MEMDIE 8 76#define TIF_MEMDIE 8
77#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ 77#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
78#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */
78#define TIF_FREEZE 16 /* is freezing for suspend */ 79#define TIF_FREEZE 16 /* is freezing for suspend */
79 80
80#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 81#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -82,10 +83,12 @@ register struct thread_info *__current_thread_info __asm__("$8");
82#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 83#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
83#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 84#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
84#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 85#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
86#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
85#define _TIF_FREEZE (1<<TIF_FREEZE) 87#define _TIF_FREEZE (1<<TIF_FREEZE)
86 88
87/* Work to do on interrupt/exception return. */ 89/* Work to do on interrupt/exception return. */
88#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED) 90#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
91 _TIF_NOTIFY_RESUME)
89 92
90/* Work to do on any return to userspace. */ 93/* Work to do on any return to userspace. */
91#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ 94#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index df65eaa84c4c..0932dbb1ef8e 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -20,6 +20,7 @@
20#include <linux/binfmts.h> 20#include <linux/binfmts.h>
21#include <linux/bitops.h> 21#include <linux/bitops.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/tracehook.h>
23 24
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include <asm/sigcontext.h> 26#include <asm/sigcontext.h>
@@ -683,4 +684,11 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
683{ 684{
684 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 685 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
685 do_signal(regs, sw, r0, r19); 686 do_signal(regs, sw, r0, r19);
687
688 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
689 clear_thread_flag(TIF_NOTIFY_RESUME);
690 tracehook_notify_resume(regs);
691 if (current->replacement_session_keyring)
692 key_replace_session_keyring();
693 }
686} 694}
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 73394e50cbca..d3a39b1e6c0f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -130,11 +130,13 @@ extern void vfp_sync_state(struct thread_info *thread);
130 * TIF_SYSCALL_TRACE - syscall trace active 130 * TIF_SYSCALL_TRACE - syscall trace active
131 * TIF_SIGPENDING - signal pending 131 * TIF_SIGPENDING - signal pending
132 * TIF_NEED_RESCHED - rescheduling necessary 132 * TIF_NEED_RESCHED - rescheduling necessary
133 * TIF_NOTIFY_RESUME - callback before returning to user
133 * TIF_USEDFPU - FPU was used by this task this quantum (SMP) 134 * TIF_USEDFPU - FPU was used by this task this quantum (SMP)
134 * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED 135 * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
135 */ 136 */
136#define TIF_SIGPENDING 0 137#define TIF_SIGPENDING 0
137#define TIF_NEED_RESCHED 1 138#define TIF_NEED_RESCHED 1
139#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
138#define TIF_SYSCALL_TRACE 8 140#define TIF_SYSCALL_TRACE 8
139#define TIF_POLLING_NRFLAG 16 141#define TIF_POLLING_NRFLAG 16
140#define TIF_USING_IWMMXT 17 142#define TIF_USING_IWMMXT 17
@@ -143,6 +145,7 @@ extern void vfp_sync_state(struct thread_info *thread);
143 145
144#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 146#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
145#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) 147#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
148#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
146#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 149#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
147#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) 150#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
148#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) 151#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 8c3de1a350b5..7813ab782fda 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -51,7 +51,7 @@ fast_work_pending:
51work_pending: 51work_pending:
52 tst r1, #_TIF_NEED_RESCHED 52 tst r1, #_TIF_NEED_RESCHED
53 bne work_resched 53 bne work_resched
54 tst r1, #_TIF_SIGPENDING 54 tst r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME
55 beq no_work_pending 55 beq no_work_pending
56 mov r0, sp @ 'regs' 56 mov r0, sp @ 'regs'
57 mov r2, why @ 'syscall' 57 mov r2, why @ 'syscall'
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index f6bc5d442782..b76fe06d92e7 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -12,6 +12,7 @@
12#include <linux/personality.h> 12#include <linux/personality.h>
13#include <linux/freezer.h> 13#include <linux/freezer.h>
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/tracehook.h>
15 16
16#include <asm/elf.h> 17#include <asm/elf.h>
17#include <asm/cacheflush.h> 18#include <asm/cacheflush.h>
@@ -707,4 +708,11 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
707{ 708{
708 if (thread_flags & _TIF_SIGPENDING) 709 if (thread_flags & _TIF_SIGPENDING)
709 do_signal(&current->blocked, regs, syscall); 710 do_signal(&current->blocked, regs, syscall);
711
712 if (thread_flags & _TIF_NOTIFY_RESUME) {
713 clear_thread_flag(TIF_NOTIFY_RESUME);
714 tracehook_notify_resume(regs);
715 if (current->replacement_session_keyring)
716 key_replace_session_keyring();
717 }
710} 718}
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index fc42de5ca209..fd0c5d7e9337 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -84,6 +84,7 @@ static inline struct thread_info *current_thread_info(void)
84#define TIF_MEMDIE 6 84#define TIF_MEMDIE 6
85#define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */ 85#define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */
86#define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */ 86#define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */
87#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */
87#define TIF_FREEZE 29 88#define TIF_FREEZE 29
88#define TIF_DEBUG 30 /* debugging enabled */ 89#define TIF_DEBUG 30 /* debugging enabled */
89#define TIF_USERSPACE 31 /* true if FS sets userspace */ 90#define TIF_USERSPACE 31 /* true if FS sets userspace */
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
96#define _TIF_MEMDIE (1 << TIF_MEMDIE) 97#define _TIF_MEMDIE (1 << TIF_MEMDIE)
97#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 98#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
98#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP) 99#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
100#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
99#define _TIF_FREEZE (1 << TIF_FREEZE) 101#define _TIF_FREEZE (1 << TIF_FREEZE)
100 102
101/* Note: The masks below must never span more than 16 bits! */ 103/* Note: The masks below must never span more than 16 bits! */
@@ -103,13 +105,15 @@ static inline struct thread_info *current_thread_info(void)
103/* work to do on interrupt/exception return */ 105/* work to do on interrupt/exception return */
104#define _TIF_WORK_MASK \ 106#define _TIF_WORK_MASK \
105 ((1 << TIF_SIGPENDING) \ 107 ((1 << TIF_SIGPENDING) \
108 | _TIF_NOTIFY_RESUME \
106 | (1 << TIF_NEED_RESCHED) \ 109 | (1 << TIF_NEED_RESCHED) \
107 | (1 << TIF_POLLING_NRFLAG) \ 110 | (1 << TIF_POLLING_NRFLAG) \
108 | (1 << TIF_BREAKPOINT) \ 111 | (1 << TIF_BREAKPOINT) \
109 | (1 << TIF_RESTORE_SIGMASK)) 112 | (1 << TIF_RESTORE_SIGMASK))
110 113
111/* work to do on any return to userspace */ 114/* work to do on any return to userspace */
112#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE)) 115#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
116 _TIF_NOTIFY_RESUME)
113/* work to do on return from debug mode */ 117/* work to do on return from debug mode */
114#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT)) 118#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
115 119
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index 009a80155d67..169268c40ae2 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -281,7 +281,7 @@ syscall_exit_work:
281 ld.w r1, r0[TI_flags] 281 ld.w r1, r0[TI_flags]
282 rjmp 1b 282 rjmp 1b
283 283
2842: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK 2842: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME
285 tst r1, r2 285 tst r1, r2
286 breq 3f 286 breq 3f
287 unmask_interrupts 287 unmask_interrupts
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 27227561bad6..64f886fac2ef 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -16,6 +16,7 @@
16#include <linux/ptrace.h> 16#include <linux/ptrace.h>
17#include <linux/unistd.h> 17#include <linux/unistd.h>
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/tracehook.h>
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/ucontext.h> 22#include <asm/ucontext.h>
@@ -322,4 +323,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
322 323
323 if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 324 if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
324 do_signal(regs, &current->blocked, syscall); 325 do_signal(regs, &current->blocked, syscall);
326
327 if (ti->flags & _TIF_NOTIFY_RESUME) {
328 clear_thread_flag(TIF_NOTIFY_RESUME);
329 tracehook_notify_resume(regs);
330 if (current->replacement_session_keyring)
331 key_replace_session_keyring();
332 }
325} 333}
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index b326023baab2..48b0f3912632 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -16,6 +16,7 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/user.h> 18#include <linux/user.h>
19#include <linux/tracehook.h>
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/page.h> 22#include <asm/page.h>
@@ -36,4 +37,11 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
36 /* deal with pending signal delivery */ 37 /* deal with pending signal delivery */
37 if (thread_info_flags & _TIF_SIGPENDING) 38 if (thread_info_flags & _TIF_SIGPENDING)
38 do_signal(canrestart,regs); 39 do_signal(canrestart,regs);
40
41 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
42 clear_thread_flag(TIF_NOTIFY_RESUME);
43 tracehook_notify_resume(regs);
44 if (current->replacement_session_keyring)
45 key_replace_session_keyring();
46 }
39} 47}
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 4a7a62c6e783..6b0a2b6fed6a 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -572,6 +572,8 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
572 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 572 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
573 clear_thread_flag(TIF_NOTIFY_RESUME); 573 clear_thread_flag(TIF_NOTIFY_RESUME);
574 tracehook_notify_resume(__frame); 574 tracehook_notify_resume(__frame);
575 if (current->replacement_session_keyring)
576 key_replace_session_keyring();
575 } 577 }
576 578
577} /* end do_notify_resume() */ 579} /* end do_notify_resume() */
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index 8bbc8b0ee45d..70e67e47d020 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
89 TIF_NEED_RESCHED */ 89 TIF_NEED_RESCHED */
90#define TIF_MEMDIE 4 90#define TIF_MEMDIE 4
91#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ 91#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
92#define TIF_NOTIFY_RESUME 6 /* callback before returning to user */
92#define TIF_FREEZE 16 /* is freezing for suspend */ 93#define TIF_FREEZE 16 /* is freezing for suspend */
93 94
94/* as above, but as bit values */ 95/* as above, but as bit values */
@@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void)
97#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 98#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
98#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 99#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
99#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 100#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
101#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
100#define _TIF_FREEZE (1<<TIF_FREEZE) 102#define _TIF_FREEZE (1<<TIF_FREEZE)
101 103
102#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ 104#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index cf3472f7389b..af842c369d24 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -39,6 +39,7 @@
39#include <linux/tty.h> 39#include <linux/tty.h>
40#include <linux/binfmts.h> 40#include <linux/binfmts.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/tracehook.h>
42 43
43#include <asm/setup.h> 44#include <asm/setup.h>
44#include <asm/uaccess.h> 45#include <asm/uaccess.h>
@@ -552,4 +553,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
552{ 553{
553 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 554 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
554 do_signal(regs, NULL); 555 do_signal(regs, NULL);
556
557 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
558 clear_thread_flag(TIF_NOTIFY_RESUME);
559 tracehook_notify_resume(regs);
560 if (current->replacement_session_keyring)
561 key_replace_session_keyring();
562 }
555} 563}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5d7c0e5b9e76..89969e950045 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
192 if (test_thread_flag(TIF_NOTIFY_RESUME)) { 192 if (test_thread_flag(TIF_NOTIFY_RESUME)) {
193 clear_thread_flag(TIF_NOTIFY_RESUME); 193 clear_thread_flag(TIF_NOTIFY_RESUME);
194 tracehook_notify_resume(&scr->pt); 194 tracehook_notify_resume(&scr->pt);
195 if (current->replacement_session_keyring)
196 key_replace_session_keyring();
195 } 197 }
196 198
197 /* copy user rbs to kernel rbs */ 199 /* copy user rbs to kernel rbs */
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 07bb5bd00e2a..71578151a403 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -149,6 +149,7 @@ static inline unsigned int get_thread_fault_code(void)
149#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 149#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
150#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ 150#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */
151#define TIF_IRET 4 /* return with iret */ 151#define TIF_IRET 4 /* return with iret */
152#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
152#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ 153#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */
153#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ 154#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
154#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 155#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -160,6 +161,7 @@ static inline unsigned int get_thread_fault_code(void)
160#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 161#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
161#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) 162#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
162#define _TIF_IRET (1<<TIF_IRET) 163#define _TIF_IRET (1<<TIF_IRET)
164#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
163#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 165#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
164#define _TIF_USEDFPU (1<<TIF_USEDFPU) 166#define _TIF_USEDFPU (1<<TIF_USEDFPU)
165#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 167#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 18124542a6eb..144b0f124fc7 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -21,6 +21,7 @@
21#include <linux/stddef.h> 21#include <linux/stddef.h>
22#include <linux/personality.h> 22#include <linux/personality.h>
23#include <linux/freezer.h> 23#include <linux/freezer.h>
24#include <linux/tracehook.h>
24#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
25#include <asm/ucontext.h> 26#include <asm/ucontext.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
@@ -408,5 +409,12 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
408 if (thread_info_flags & _TIF_SIGPENDING) 409 if (thread_info_flags & _TIF_SIGPENDING)
409 do_signal(regs,oldset); 410 do_signal(regs,oldset);
410 411
412 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
413 clear_thread_flag(TIF_NOTIFY_RESUME);
414 tracehook_notify_resume(regs);
415 if (current->replacement_session_keyring)
416 key_replace_session_keyring();
417 }
418
411 clear_thread_flag(TIF_IRET); 419 clear_thread_flag(TIF_IRET);
412} 420}
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9df720d2e40..01cc1630b66c 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -115,6 +115,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
115#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ 115#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
116#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ 116#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
117#define TIF_SECCOMP 4 /* secure computing */ 117#define TIF_SECCOMP 4 /* secure computing */
118#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
118#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ 119#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
119#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ 120#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
120#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ 121#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -139,6 +140,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
139#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) 140#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
140#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 141#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
141#define _TIF_SECCOMP (1<<TIF_SECCOMP) 142#define _TIF_SECCOMP (1<<TIF_SECCOMP)
143#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
142#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 144#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
143#define _TIF_USEDFPU (1<<TIF_USEDFPU) 145#define _TIF_USEDFPU (1<<TIF_USEDFPU)
144#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) 146#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 830c5ef9932b..6254041b942f 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -21,6 +21,7 @@
21#include <linux/compiler.h> 21#include <linux/compiler.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/tracehook.h>
24 25
25#include <asm/abi.h> 26#include <asm/abi.h>
26#include <asm/asm.h> 27#include <asm/asm.h>
@@ -700,4 +701,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
700 /* deal with pending signal delivery */ 701 /* deal with pending signal delivery */
701 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) 702 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
702 do_signal(regs); 703 do_signal(regs);
704
705 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
706 clear_thread_flag(TIF_NOTIFY_RESUME);
707 tracehook_notify_resume(regs);
708 if (current->replacement_session_keyring)
709 key_replace_session_keyring();
710 }
703} 711}
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index feb2f2e810db..a21f43bc68e2 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -568,5 +568,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
568 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 568 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
569 clear_thread_flag(TIF_NOTIFY_RESUME); 569 clear_thread_flag(TIF_NOTIFY_RESUME);
570 tracehook_notify_resume(__frame); 570 tracehook_notify_resume(__frame);
571 if (current->replacement_session_keyring)
572 key_replace_session_keyring();
571 } 573 }
572} 574}
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 4ce0edfbe969..ac775a76bff7 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -59,6 +59,7 @@ struct thread_info {
59#define TIF_MEMDIE 5 59#define TIF_MEMDIE 5
60#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ 60#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */
61#define TIF_FREEZE 7 /* is freezing for suspend */ 61#define TIF_FREEZE 7 /* is freezing for suspend */
62#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
62 63
63#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 64#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
64#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) 65#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -67,8 +68,9 @@ struct thread_info {
67#define _TIF_32BIT (1 << TIF_32BIT) 68#define _TIF_32BIT (1 << TIF_32BIT)
68#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) 69#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
69#define _TIF_FREEZE (1 << TIF_FREEZE) 70#define _TIF_FREEZE (1 << TIF_FREEZE)
71#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
70 72
71#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \ 73#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
72 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) 74 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
73 75
74#endif /* __KERNEL__ */ 76#endif /* __KERNEL__ */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index e552e547cb93..8c4712b74dc1 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -948,7 +948,7 @@ intr_check_sig:
948 /* As above */ 948 /* As above */
949 mfctl %cr30,%r1 949 mfctl %cr30,%r1
950 LDREG TI_FLAGS(%r1),%r19 950 LDREG TI_FLAGS(%r1),%r19
951 ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20 951 ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20
952 and,COND(<>) %r19, %r20, %r0 952 and,COND(<>) %r19, %r20, %r0
953 b,n intr_restore /* skip past if we've nothing to do */ 953 b,n intr_restore /* skip past if we've nothing to do */
954 954
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index f82544225e8e..8eb3c63c407a 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -25,6 +25,7 @@
25#include <linux/stddef.h> 25#include <linux/stddef.h>
26#include <linux/compat.h> 26#include <linux/compat.h>
27#include <linux/elf.h> 27#include <linux/elf.h>
28#include <linux/tracehook.h>
28#include <asm/ucontext.h> 29#include <asm/ucontext.h>
29#include <asm/rt_sigframe.h> 30#include <asm/rt_sigframe.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
@@ -645,4 +646,11 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
645 if (test_thread_flag(TIF_SIGPENDING) || 646 if (test_thread_flag(TIF_SIGPENDING) ||
646 test_thread_flag(TIF_RESTORE_SIGMASK)) 647 test_thread_flag(TIF_RESTORE_SIGMASK))
647 do_signal(regs, in_syscall); 648 do_signal(regs, in_syscall);
649
650 if (test_thread_flag(TIF_NOTIFY_RESUME)) {
651 clear_thread_flag(TIF_NOTIFY_RESUME);
652 tracehook_notify_resume(regs);
653 if (current->replacement_session_keyring)
654 key_replace_session_keyring();
655 }
648} 656}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2ae5d72f47ed..e030e86ff6a3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,7 +95,6 @@ config S390
95 select HAVE_ARCH_TRACEHOOK 95 select HAVE_ARCH_TRACEHOOK
96 select INIT_ALL_POSSIBLE 96 select INIT_ALL_POSSIBLE
97 select HAVE_PERF_COUNTERS 97 select HAVE_PERF_COUNTERS
98 select GENERIC_ATOMIC64 if !64BIT
99 98
100config SCHED_OMIT_FRAME_POINTER 99config SCHED_OMIT_FRAME_POINTER
101 bool 100 bool
@@ -481,13 +480,6 @@ config CMM_IUCV
481 Select this option to enable the special message interface to 480 Select this option to enable the special message interface to
482 the cooperative memory management. 481 the cooperative memory management.
483 482
484config PAGE_STATES
485 bool "Unused page notification"
486 help
487 This enables the notification of unused pages to the
488 hypervisor. The ESSA instruction is used to do the states
489 changes between a page that has content and the unused state.
490
491config APPLDATA_BASE 483config APPLDATA_BASE
492 bool "Linux - VM Monitor Stream, base infrastructure" 484 bool "Linux - VM Monitor Stream, base infrastructure"
493 depends on PROC_FS 485 depends on PROC_FS
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0ff387cebf88..fc8fb20e7fc0 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -88,8 +88,7 @@ LDFLAGS_vmlinux := -e start
88head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o 88head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
89 89
90core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ 90core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
91 arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ \ 91 arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
92 arch/s390/power/
93 92
94libs-y += arch/s390/lib/ 93libs-y += arch/s390/lib/
95drivers-y += drivers/s390/ 94drivers-y += drivers/s390/
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5a805df216bb..bd9914b89488 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -355,11 +355,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
355{ 355{
356 struct dentry *dentry; 356 struct dentry *dentry;
357 struct inode *inode; 357 struct inode *inode;
358 struct qstr qname;
359 358
360 qname.name = name;
361 qname.len = strlen(name);
362 qname.hash = full_name_hash(name, qname.len);
363 mutex_lock(&parent->d_inode->i_mutex); 359 mutex_lock(&parent->d_inode->i_mutex);
364 dentry = lookup_one_len(name, parent, strlen(name)); 360 dentry = lookup_one_len(name, parent, strlen(name));
365 if (IS_ERR(dentry)) { 361 if (IS_ERR(dentry)) {
@@ -426,7 +422,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
426 char tmp[TMP_SIZE]; 422 char tmp[TMP_SIZE];
427 struct dentry *dentry; 423 struct dentry *dentry;
428 424
429 snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value); 425 snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
430 buffer = kstrdup(tmp, GFP_KERNEL); 426 buffer = kstrdup(tmp, GFP_KERNEL);
431 if (!buffer) 427 if (!buffer)
432 return ERR_PTR(-ENOMEM); 428 return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c7d0abfb0f00..ae7c8f9f94a5 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -1,33 +1,23 @@
1#ifndef __ARCH_S390_ATOMIC__ 1#ifndef __ARCH_S390_ATOMIC__
2#define __ARCH_S390_ATOMIC__ 2#define __ARCH_S390_ATOMIC__
3 3
4#include <linux/compiler.h>
5#include <linux/types.h>
6
7/* 4/*
8 * include/asm-s390/atomic.h 5 * Copyright 1999,2009 IBM Corp.
6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
7 * Denis Joseph Barrow,
8 * Arnd Bergmann <arndb@de.ibm.com>,
9 * 9 *
10 * S390 version 10 * Atomic operations that C can't guarantee us.
11 * Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation 11 * Useful for resource counting etc.
12 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), 12 * s390 uses 'Compare And Swap' for atomicity in SMP enviroment.
13 * Denis Joseph Barrow,
14 * Arnd Bergmann (arndb@de.ibm.com)
15 *
16 * Derived from "include/asm-i386/bitops.h"
17 * Copyright (C) 1992, Linus Torvalds
18 * 13 *
19 */ 14 */
20 15
21/* 16#include <linux/compiler.h>
22 * Atomic operations that C can't guarantee us. Useful for 17#include <linux/types.h>
23 * resource counting etc..
24 * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
25 */
26 18
27#define ATOMIC_INIT(i) { (i) } 19#define ATOMIC_INIT(i) { (i) }
28 20
29#ifdef __KERNEL__
30
31#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 21#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
32 22
33#define __CS_LOOP(ptr, op_val, op_string) ({ \ 23#define __CS_LOOP(ptr, op_val, op_string) ({ \
@@ -77,7 +67,7 @@ static inline void atomic_set(atomic_t *v, int i)
77 barrier(); 67 barrier();
78} 68}
79 69
80static __inline__ int atomic_add_return(int i, atomic_t * v) 70static inline int atomic_add_return(int i, atomic_t *v)
81{ 71{
82 return __CS_LOOP(v, i, "ar"); 72 return __CS_LOOP(v, i, "ar");
83} 73}
@@ -87,7 +77,7 @@ static __inline__ int atomic_add_return(int i, atomic_t * v)
87#define atomic_inc_return(_v) atomic_add_return(1, _v) 77#define atomic_inc_return(_v) atomic_add_return(1, _v)
88#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) 78#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
89 79
90static __inline__ int atomic_sub_return(int i, atomic_t * v) 80static inline int atomic_sub_return(int i, atomic_t *v)
91{ 81{
92 return __CS_LOOP(v, i, "sr"); 82 return __CS_LOOP(v, i, "sr");
93} 83}
@@ -97,19 +87,19 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v)
97#define atomic_dec_return(_v) atomic_sub_return(1, _v) 87#define atomic_dec_return(_v) atomic_sub_return(1, _v)
98#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) 88#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
99 89
100static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v) 90static inline void atomic_clear_mask(unsigned long mask, atomic_t *v)
101{ 91{
102 __CS_LOOP(v, ~mask, "nr"); 92 __CS_LOOP(v, ~mask, "nr");
103} 93}
104 94
105static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v) 95static inline void atomic_set_mask(unsigned long mask, atomic_t *v)
106{ 96{
107 __CS_LOOP(v, mask, "or"); 97 __CS_LOOP(v, mask, "or");
108} 98}
109 99
110#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 100#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
111 101
112static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new) 102static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
113{ 103{
114#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 104#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
115 asm volatile( 105 asm volatile(
@@ -127,7 +117,7 @@ static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
127 return old; 117 return old;
128} 118}
129 119
130static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) 120static inline int atomic_add_unless(atomic_t *v, int a, int u)
131{ 121{
132 int c, old; 122 int c, old;
133 c = atomic_read(v); 123 c = atomic_read(v);
@@ -146,9 +136,10 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
146 136
147#undef __CS_LOOP 137#undef __CS_LOOP
148 138
149#ifdef __s390x__
150#define ATOMIC64_INIT(i) { (i) } 139#define ATOMIC64_INIT(i) { (i) }
151 140
141#ifdef CONFIG_64BIT
142
152#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 143#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
153 144
154#define __CSG_LOOP(ptr, op_val, op_string) ({ \ 145#define __CSG_LOOP(ptr, op_val, op_string) ({ \
@@ -162,7 +153,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
162 : "=&d" (old_val), "=&d" (new_val), \ 153 : "=&d" (old_val), "=&d" (new_val), \
163 "=Q" (((atomic_t *)(ptr))->counter) \ 154 "=Q" (((atomic_t *)(ptr))->counter) \
164 : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ 155 : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \
165 : "cc", "memory" ); \ 156 : "cc", "memory"); \
166 new_val; \ 157 new_val; \
167}) 158})
168 159
@@ -180,7 +171,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
180 "=m" (((atomic_t *)(ptr))->counter) \ 171 "=m" (((atomic_t *)(ptr))->counter) \
181 : "a" (ptr), "d" (op_val), \ 172 : "a" (ptr), "d" (op_val), \
182 "m" (((atomic_t *)(ptr))->counter) \ 173 "m" (((atomic_t *)(ptr))->counter) \
183 : "cc", "memory" ); \ 174 : "cc", "memory"); \
184 new_val; \ 175 new_val; \
185}) 176})
186 177
@@ -198,39 +189,29 @@ static inline void atomic64_set(atomic64_t *v, long long i)
198 barrier(); 189 barrier();
199} 190}
200 191
201static __inline__ long long atomic64_add_return(long long i, atomic64_t * v) 192static inline long long atomic64_add_return(long long i, atomic64_t *v)
202{ 193{
203 return __CSG_LOOP(v, i, "agr"); 194 return __CSG_LOOP(v, i, "agr");
204} 195}
205#define atomic64_add(_i, _v) atomic64_add_return(_i, _v)
206#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
207#define atomic64_inc(_v) atomic64_add_return(1, _v)
208#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
209#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
210 196
211static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v) 197static inline long long atomic64_sub_return(long long i, atomic64_t *v)
212{ 198{
213 return __CSG_LOOP(v, i, "sgr"); 199 return __CSG_LOOP(v, i, "sgr");
214} 200}
215#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v)
216#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
217#define atomic64_dec(_v) atomic64_sub_return(1, _v)
218#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
219#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
220 201
221static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v) 202static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
222{ 203{
223 __CSG_LOOP(v, ~mask, "ngr"); 204 __CSG_LOOP(v, ~mask, "ngr");
224} 205}
225 206
226static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v) 207static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
227{ 208{
228 __CSG_LOOP(v, mask, "ogr"); 209 __CSG_LOOP(v, mask, "ogr");
229} 210}
230 211
231#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) 212#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
232 213
233static __inline__ long long atomic64_cmpxchg(atomic64_t *v, 214static inline long long atomic64_cmpxchg(atomic64_t *v,
234 long long old, long long new) 215 long long old, long long new)
235{ 216{
236#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 217#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
@@ -249,8 +230,112 @@ static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
249 return old; 230 return old;
250} 231}
251 232
252static __inline__ int atomic64_add_unless(atomic64_t *v, 233#undef __CSG_LOOP
253 long long a, long long u) 234
235#else /* CONFIG_64BIT */
236
237typedef struct {
238 long long counter;
239} atomic64_t;
240
241static inline long long atomic64_read(const atomic64_t *v)
242{
243 register_pair rp;
244
245 asm volatile(
246 " lm %0,%N0,0(%1)"
247 : "=&d" (rp)
248 : "a" (&v->counter), "m" (v->counter)
249 );
250 return rp.pair;
251}
252
253static inline void atomic64_set(atomic64_t *v, long long i)
254{
255 register_pair rp = {.pair = i};
256
257 asm volatile(
258 " stm %1,%N1,0(%2)"
259 : "=m" (v->counter)
260 : "d" (rp), "a" (&v->counter)
261 );
262}
263
264static inline long long atomic64_xchg(atomic64_t *v, long long new)
265{
266 register_pair rp_new = {.pair = new};
267 register_pair rp_old;
268
269 asm volatile(
270 " lm %0,%N0,0(%2)\n"
271 "0: cds %0,%3,0(%2)\n"
272 " jl 0b\n"
273 : "=&d" (rp_old), "+m" (v->counter)
274 : "a" (&v->counter), "d" (rp_new)
275 : "cc");
276 return rp_old.pair;
277}
278
279static inline long long atomic64_cmpxchg(atomic64_t *v,
280 long long old, long long new)
281{
282 register_pair rp_old = {.pair = old};
283 register_pair rp_new = {.pair = new};
284
285 asm volatile(
286 " cds %0,%3,0(%2)"
287 : "+&d" (rp_old), "+m" (v->counter)
288 : "a" (&v->counter), "d" (rp_new)
289 : "cc");
290 return rp_old.pair;
291}
292
293
294static inline long long atomic64_add_return(long long i, atomic64_t *v)
295{
296 long long old, new;
297
298 do {
299 old = atomic64_read(v);
300 new = old + i;
301 } while (atomic64_cmpxchg(v, old, new) != old);
302 return new;
303}
304
305static inline long long atomic64_sub_return(long long i, atomic64_t *v)
306{
307 long long old, new;
308
309 do {
310 old = atomic64_read(v);
311 new = old - i;
312 } while (atomic64_cmpxchg(v, old, new) != old);
313 return new;
314}
315
316static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
317{
318 long long old, new;
319
320 do {
321 old = atomic64_read(v);
322 new = old | mask;
323 } while (atomic64_cmpxchg(v, old, new) != old);
324}
325
326static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
327{
328 long long old, new;
329
330 do {
331 old = atomic64_read(v);
332 new = old & mask;
333 } while (atomic64_cmpxchg(v, old, new) != old);
334}
335
336#endif /* CONFIG_64BIT */
337
338static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
254{ 339{
255 long long c, old; 340 long long c, old;
256 c = atomic64_read(v); 341 c = atomic64_read(v);
@@ -265,15 +350,17 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
265 return c != u; 350 return c != u;
266} 351}
267 352
268#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 353#define atomic64_add(_i, _v) atomic64_add_return(_i, _v)
269 354#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
270#undef __CSG_LOOP 355#define atomic64_inc(_v) atomic64_add_return(1, _v)
271 356#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
272#else /* __s390x__ */ 357#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
273 358#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v)
274#include <asm-generic/atomic64.h> 359#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
275 360#define atomic64_dec(_v) atomic64_sub_return(1, _v)
276#endif /* __s390x__ */ 361#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
362#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
363#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
277 364
278#define smp_mb__before_atomic_dec() smp_mb() 365#define smp_mb__before_atomic_dec() smp_mb()
279#define smp_mb__after_atomic_dec() smp_mb() 366#define smp_mb__after_atomic_dec() smp_mb()
@@ -281,5 +368,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
281#define smp_mb__after_atomic_inc() smp_mb() 368#define smp_mb__after_atomic_inc() smp_mb()
282 369
283#include <asm-generic/atomic-long.h> 370#include <asm-generic/atomic-long.h>
284#endif /* __KERNEL__ */ 371
285#endif /* __ARCH_S390_ATOMIC__ */ 372#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index d5a8e7c1477c..6c00f6800a34 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
78 */ 78 */
79static inline __sum16 csum_fold(__wsum sum) 79static inline __sum16 csum_fold(__wsum sum)
80{ 80{
81#ifndef __s390x__ 81 u32 csum = (__force u32) sum;
82 register_pair rp;
83 82
84 asm volatile( 83 csum += (csum >> 16) + (csum << 16);
85 " slr %N1,%N1\n" /* %0 = H L */ 84 csum >>= 16;
86 " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */ 85 return (__force __sum16) ~csum;
87 " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */
88 " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */
89 " alr %0,%1\n" /* %0 = H+L+C L+H */
90 " srl %0,16\n" /* %0 = H+L+C */
91 : "+&d" (sum), "=d" (rp) : : "cc");
92#else /* __s390x__ */
93 asm volatile(
94 " sr 3,3\n" /* %0 = H*65536 + L */
95 " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */
96 " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */
97 " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */
98 " alr %0,2\n" /* %0 = H+L+C L+H */
99 " srl %0,16\n" /* %0 = H+L+C */
100 : "+&d" (sum) : : "cc", "2", "3");
101#endif /* __s390x__ */
102 return (__force __sum16) ~sum;
103} 86}
104 87
105/* 88/*
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index 807997f7414b..4943654ed7fd 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -125,4 +125,32 @@ struct chsc_cpd_info {
125#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) 125#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
126#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) 126#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
127 127
128#ifdef __KERNEL__
129
130struct css_general_char {
131 u64 : 12;
132 u32 dynio : 1; /* bit 12 */
133 u32 : 28;
134 u32 aif : 1; /* bit 41 */
135 u32 : 3;
136 u32 mcss : 1; /* bit 45 */
137 u32 fcs : 1; /* bit 46 */
138 u32 : 1;
139 u32 ext_mb : 1; /* bit 48 */
140 u32 : 7;
141 u32 aif_tdd : 1; /* bit 56 */
142 u32 : 1;
143 u32 qebsm : 1; /* bit 58 */
144 u32 : 8;
145 u32 aif_osa : 1; /* bit 67 */
146 u32 : 14;
147 u32 cib : 1; /* bit 82 */
148 u32 : 5;
149 u32 fcx : 1; /* bit 88 */
150 u32 : 7;
151}__attribute__((packed));
152
153extern struct css_general_char css_general_characteristics;
154
155#endif /* __KERNEL__ */
128#endif 156#endif
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 619bf94b11f1..e85679af54dd 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -15,228 +15,7 @@
15#define LPM_ANYPATH 0xff 15#define LPM_ANYPATH 0xff
16#define __MAX_CSSID 0 16#define __MAX_CSSID 0
17 17
18/** 18#include <asm/scsw.h>
19 * struct cmd_scsw - command-mode subchannel status word
20 * @key: subchannel key
21 * @sctl: suspend control
22 * @eswf: esw format
23 * @cc: deferred condition code
24 * @fmt: format
25 * @pfch: prefetch
26 * @isic: initial-status interruption control
27 * @alcc: address-limit checking control
28 * @ssi: suppress-suspended interruption
29 * @zcc: zero condition code
30 * @ectl: extended control
31 * @pno: path not operational
32 * @res: reserved
33 * @fctl: function control
34 * @actl: activity control
35 * @stctl: status control
36 * @cpa: channel program address
37 * @dstat: device status
38 * @cstat: subchannel status
39 * @count: residual count
40 */
41struct cmd_scsw {
42 __u32 key : 4;
43 __u32 sctl : 1;
44 __u32 eswf : 1;
45 __u32 cc : 2;
46 __u32 fmt : 1;
47 __u32 pfch : 1;
48 __u32 isic : 1;
49 __u32 alcc : 1;
50 __u32 ssi : 1;
51 __u32 zcc : 1;
52 __u32 ectl : 1;
53 __u32 pno : 1;
54 __u32 res : 1;
55 __u32 fctl : 3;
56 __u32 actl : 7;
57 __u32 stctl : 5;
58 __u32 cpa;
59 __u32 dstat : 8;
60 __u32 cstat : 8;
61 __u32 count : 16;
62} __attribute__ ((packed));
63
64/**
65 * struct tm_scsw - transport-mode subchannel status word
66 * @key: subchannel key
67 * @eswf: esw format
68 * @cc: deferred condition code
69 * @fmt: format
70 * @x: IRB-format control
71 * @q: interrogate-complete
72 * @ectl: extended control
73 * @pno: path not operational
74 * @fctl: function control
75 * @actl: activity control
76 * @stctl: status control
77 * @tcw: TCW address
78 * @dstat: device status
79 * @cstat: subchannel status
80 * @fcxs: FCX status
81 * @schxs: subchannel-extended status
82 */
83struct tm_scsw {
84 u32 key:4;
85 u32 :1;
86 u32 eswf:1;
87 u32 cc:2;
88 u32 fmt:3;
89 u32 x:1;
90 u32 q:1;
91 u32 :1;
92 u32 ectl:1;
93 u32 pno:1;
94 u32 :1;
95 u32 fctl:3;
96 u32 actl:7;
97 u32 stctl:5;
98 u32 tcw;
99 u32 dstat:8;
100 u32 cstat:8;
101 u32 fcxs:8;
102 u32 schxs:8;
103} __attribute__ ((packed));
104
105/**
106 * union scsw - subchannel status word
107 * @cmd: command-mode SCSW
108 * @tm: transport-mode SCSW
109 */
110union scsw {
111 struct cmd_scsw cmd;
112 struct tm_scsw tm;
113} __attribute__ ((packed));
114
115int scsw_is_tm(union scsw *scsw);
116u32 scsw_key(union scsw *scsw);
117u32 scsw_eswf(union scsw *scsw);
118u32 scsw_cc(union scsw *scsw);
119u32 scsw_ectl(union scsw *scsw);
120u32 scsw_pno(union scsw *scsw);
121u32 scsw_fctl(union scsw *scsw);
122u32 scsw_actl(union scsw *scsw);
123u32 scsw_stctl(union scsw *scsw);
124u32 scsw_dstat(union scsw *scsw);
125u32 scsw_cstat(union scsw *scsw);
126int scsw_is_solicited(union scsw *scsw);
127int scsw_is_valid_key(union scsw *scsw);
128int scsw_is_valid_eswf(union scsw *scsw);
129int scsw_is_valid_cc(union scsw *scsw);
130int scsw_is_valid_ectl(union scsw *scsw);
131int scsw_is_valid_pno(union scsw *scsw);
132int scsw_is_valid_fctl(union scsw *scsw);
133int scsw_is_valid_actl(union scsw *scsw);
134int scsw_is_valid_stctl(union scsw *scsw);
135int scsw_is_valid_dstat(union scsw *scsw);
136int scsw_is_valid_cstat(union scsw *scsw);
137int scsw_cmd_is_valid_key(union scsw *scsw);
138int scsw_cmd_is_valid_sctl(union scsw *scsw);
139int scsw_cmd_is_valid_eswf(union scsw *scsw);
140int scsw_cmd_is_valid_cc(union scsw *scsw);
141int scsw_cmd_is_valid_fmt(union scsw *scsw);
142int scsw_cmd_is_valid_pfch(union scsw *scsw);
143int scsw_cmd_is_valid_isic(union scsw *scsw);
144int scsw_cmd_is_valid_alcc(union scsw *scsw);
145int scsw_cmd_is_valid_ssi(union scsw *scsw);
146int scsw_cmd_is_valid_zcc(union scsw *scsw);
147int scsw_cmd_is_valid_ectl(union scsw *scsw);
148int scsw_cmd_is_valid_pno(union scsw *scsw);
149int scsw_cmd_is_valid_fctl(union scsw *scsw);
150int scsw_cmd_is_valid_actl(union scsw *scsw);
151int scsw_cmd_is_valid_stctl(union scsw *scsw);
152int scsw_cmd_is_valid_dstat(union scsw *scsw);
153int scsw_cmd_is_valid_cstat(union scsw *scsw);
154int scsw_cmd_is_solicited(union scsw *scsw);
155int scsw_tm_is_valid_key(union scsw *scsw);
156int scsw_tm_is_valid_eswf(union scsw *scsw);
157int scsw_tm_is_valid_cc(union scsw *scsw);
158int scsw_tm_is_valid_fmt(union scsw *scsw);
159int scsw_tm_is_valid_x(union scsw *scsw);
160int scsw_tm_is_valid_q(union scsw *scsw);
161int scsw_tm_is_valid_ectl(union scsw *scsw);
162int scsw_tm_is_valid_pno(union scsw *scsw);
163int scsw_tm_is_valid_fctl(union scsw *scsw);
164int scsw_tm_is_valid_actl(union scsw *scsw);
165int scsw_tm_is_valid_stctl(union scsw *scsw);
166int scsw_tm_is_valid_dstat(union scsw *scsw);
167int scsw_tm_is_valid_cstat(union scsw *scsw);
168int scsw_tm_is_valid_fcxs(union scsw *scsw);
169int scsw_tm_is_valid_schxs(union scsw *scsw);
170int scsw_tm_is_solicited(union scsw *scsw);
171
172#define SCSW_FCTL_CLEAR_FUNC 0x1
173#define SCSW_FCTL_HALT_FUNC 0x2
174#define SCSW_FCTL_START_FUNC 0x4
175
176#define SCSW_ACTL_SUSPENDED 0x1
177#define SCSW_ACTL_DEVACT 0x2
178#define SCSW_ACTL_SCHACT 0x4
179#define SCSW_ACTL_CLEAR_PEND 0x8
180#define SCSW_ACTL_HALT_PEND 0x10
181#define SCSW_ACTL_START_PEND 0x20
182#define SCSW_ACTL_RESUME_PEND 0x40
183
184#define SCSW_STCTL_STATUS_PEND 0x1
185#define SCSW_STCTL_SEC_STATUS 0x2
186#define SCSW_STCTL_PRIM_STATUS 0x4
187#define SCSW_STCTL_INTER_STATUS 0x8
188#define SCSW_STCTL_ALERT_STATUS 0x10
189
190#define DEV_STAT_ATTENTION 0x80
191#define DEV_STAT_STAT_MOD 0x40
192#define DEV_STAT_CU_END 0x20
193#define DEV_STAT_BUSY 0x10
194#define DEV_STAT_CHN_END 0x08
195#define DEV_STAT_DEV_END 0x04
196#define DEV_STAT_UNIT_CHECK 0x02
197#define DEV_STAT_UNIT_EXCEP 0x01
198
199#define SCHN_STAT_PCI 0x80
200#define SCHN_STAT_INCORR_LEN 0x40
201#define SCHN_STAT_PROG_CHECK 0x20
202#define SCHN_STAT_PROT_CHECK 0x10
203#define SCHN_STAT_CHN_DATA_CHK 0x08
204#define SCHN_STAT_CHN_CTRL_CHK 0x04
205#define SCHN_STAT_INTF_CTRL_CHK 0x02
206#define SCHN_STAT_CHAIN_CHECK 0x01
207
208/*
209 * architectured values for first sense byte
210 */
211#define SNS0_CMD_REJECT 0x80
212#define SNS_CMD_REJECT SNS0_CMD_REJEC
213#define SNS0_INTERVENTION_REQ 0x40
214#define SNS0_BUS_OUT_CHECK 0x20
215#define SNS0_EQUIPMENT_CHECK 0x10
216#define SNS0_DATA_CHECK 0x08
217#define SNS0_OVERRUN 0x04
218#define SNS0_INCOMPL_DOMAIN 0x01
219
220/*
221 * architectured values for second sense byte
222 */
223#define SNS1_PERM_ERR 0x80
224#define SNS1_INV_TRACK_FORMAT 0x40
225#define SNS1_EOC 0x20
226#define SNS1_MESSAGE_TO_OPER 0x10
227#define SNS1_NO_REC_FOUND 0x08
228#define SNS1_FILE_PROTECTED 0x04
229#define SNS1_WRITE_INHIBITED 0x02
230#define SNS1_INPRECISE_END 0x01
231
232/*
233 * architectured values for third sense byte
234 */
235#define SNS2_REQ_INH_WRITE 0x80
236#define SNS2_CORRECTABLE 0x40
237#define SNS2_FIRST_LOG_ERR 0x20
238#define SNS2_ENV_DATA_PRESENT 0x10
239#define SNS2_INPRECISE_END 0x04
240 19
241/** 20/**
242 * struct ccw1 - channel command word 21 * struct ccw1 - channel command word
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 000000000000..471234b90574
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright IBM Corp. 2000,2009
3 * Author(s): Hartmut Penner <hp@de.ibm.com>,
4 * Martin Schwidefsky <schwidefsky@de.ibm.com>,
5 * Christian Ehrhardt <ehrhardt@de.ibm.com>,
6 */
7
8#ifndef _ASM_S390_CPU_H
9#define _ASM_S390_CPU_H
10
11#define MAX_CPU_ADDRESS 255
12
13#ifndef __ASSEMBLY__
14
15#include <linux/types.h>
16
17struct cpuid
18{
19 unsigned int version : 8;
20 unsigned int ident : 24;
21 unsigned int machine : 16;
22 unsigned int unused : 16;
23} __packed;
24
25#endif /* __ASSEMBLY__ */
26#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpuid.h b/arch/s390/include/asm/cpuid.h
deleted file mode 100644
index 07836a2e5222..000000000000
--- a/arch/s390/include/asm/cpuid.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright IBM Corp. 2000,2009
3 * Author(s): Hartmut Penner <hp@de.ibm.com>,
4 * Martin Schwidefsky <schwidefsky@de.ibm.com>
5 * Christian Ehrhardt <ehrhardt@de.ibm.com>
6 */
7
8#ifndef _ASM_S390_CPUID_H_
9#define _ASM_S390_CPUID_H_
10
11/*
12 * CPU type and hardware bug flags. Kept separately for each CPU.
13 * Members of this structure are referenced in head.S, so think twice
14 * before touching them. [mj]
15 */
16
17typedef struct
18{
19 unsigned int version : 8;
20 unsigned int ident : 24;
21 unsigned int machine : 16;
22 unsigned int unused : 16;
23} __attribute__ ((packed)) cpuid_t;
24
25#endif /* _ASM_S390_CPUID_H_ */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 31ed5686a968..18124b75a7ab 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -167,6 +167,10 @@ debug_text_event(debug_info_t* id, int level, const char* txt)
167 return debug_event_common(id,level,txt,strlen(txt)); 167 return debug_event_common(id,level,txt,strlen(txt));
168} 168}
169 169
170/*
171 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
172 * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
173 */
170extern debug_entry_t * 174extern debug_entry_t *
171debug_sprintf_event(debug_info_t* id,int level,char *string,...) 175debug_sprintf_event(debug_info_t* id,int level,char *string,...)
172 __attribute__ ((format(printf, 3, 4))); 176 __attribute__ ((format(printf, 3, 4)));
@@ -206,7 +210,10 @@ debug_text_exception(debug_info_t* id, int level, const char* txt)
206 return debug_exception_common(id,level,txt,strlen(txt)); 210 return debug_exception_common(id,level,txt,strlen(txt));
207} 211}
208 212
209 213/*
214 * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
215 * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
216 */
210extern debug_entry_t * 217extern debug_entry_t *
211debug_sprintf_exception(debug_info_t* id,int level,char *string,...) 218debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
212 __attribute__ ((format(printf, 3, 4))); 219 __attribute__ ((format(printf, 3, 4)));
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 89ec7056da28..498bc3892385 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -18,13 +18,6 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <asm/lowcore.h> 19#include <asm/lowcore.h>
20 20
21/* irq_cpustat_t is unused currently, but could be converted
22 * into a percpu variable instead of storing softirq_pending
23 * on the lowcore */
24typedef struct {
25 unsigned int __softirq_pending;
26} irq_cpustat_t;
27
28#define local_softirq_pending() (S390_lowcore.softirq_pending) 21#define local_softirq_pending() (S390_lowcore.softirq_pending)
29 22
30#define __ARCH_IRQ_STAT 23#define __ARCH_IRQ_STAT
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 1171e6d144a3..5e95d95450b3 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -57,6 +57,8 @@ struct ipl_block_fcp {
57} __attribute__((packed)); 57} __attribute__((packed));
58 58
59#define DIAG308_VMPARM_SIZE 64 59#define DIAG308_VMPARM_SIZE 64
60#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
61 offsetof(struct ipl_block_fcp, scp_data)))
60 62
61struct ipl_block_ccw { 63struct ipl_block_ccw {
62 u8 load_parm[8]; 64 u8 load_parm[8];
@@ -91,7 +93,8 @@ extern void do_halt(void);
91extern void do_poff(void); 93extern void do_poff(void);
92extern void ipl_save_parameters(void); 94extern void ipl_save_parameters(void);
93extern void ipl_update_parameters(void); 95extern void ipl_update_parameters(void);
94extern void get_ipl_vmparm(char *); 96extern size_t append_ipl_vmparm(char *, size_t);
97extern size_t append_ipl_scpdata(char *, size_t);
95 98
96enum { 99enum {
97 IPL_DEVNO_VALID = 1, 100 IPL_DEVNO_VALID = 1,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd02f6073a0..698988f69403 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -17,7 +17,7 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/kvm_host.h> 18#include <linux/kvm_host.h>
19#include <asm/debug.h> 19#include <asm/debug.h>
20#include <asm/cpuid.h> 20#include <asm/cpu.h>
21 21
22#define KVM_MAX_VCPUS 64 22#define KVM_MAX_VCPUS 64
23#define KVM_MEMORY_SLOTS 32 23#define KVM_MEMORY_SLOTS 32
@@ -217,8 +217,8 @@ struct kvm_vcpu_arch {
217 struct hrtimer ckc_timer; 217 struct hrtimer ckc_timer;
218 struct tasklet_struct tasklet; 218 struct tasklet_struct tasklet;
219 union { 219 union {
220 cpuid_t cpu_id; 220 struct cpuid cpu_id;
221 u64 stidp_data; 221 u64 stidp_data;
222 }; 222 };
223}; 223};
224 224
diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h
index 0503936f101f..acdfdff26611 100644
--- a/arch/s390/include/asm/kvm_virtio.h
+++ b/arch/s390/include/asm/kvm_virtio.h
@@ -54,14 +54,4 @@ struct kvm_vqconfig {
54 * This is pagesize for historical reasons. */ 54 * This is pagesize for historical reasons. */
55#define KVM_S390_VIRTIO_RING_ALIGN 4096 55#define KVM_S390_VIRTIO_RING_ALIGN 4096
56 56
57#ifdef __KERNEL__
58/* early virtio console setup */
59#ifdef CONFIG_S390_GUEST
60extern void s390_virtio_console_init(void);
61#else
62static inline void s390_virtio_console_init(void)
63{
64}
65#endif /* CONFIG_VIRTIO_CONSOLE */
66#endif /* __KERNEL__ */
67#endif 57#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5046ad6b7a63..6bc9426a6fbf 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -132,7 +132,7 @@
132 132
133#ifndef __ASSEMBLY__ 133#ifndef __ASSEMBLY__
134 134
135#include <asm/cpuid.h> 135#include <asm/cpu.h>
136#include <asm/ptrace.h> 136#include <asm/ptrace.h>
137#include <linux/types.h> 137#include <linux/types.h>
138 138
@@ -275,7 +275,7 @@ struct _lowcore
275 __u32 user_exec_asce; /* 0x02ac */ 275 __u32 user_exec_asce; /* 0x02ac */
276 276
277 /* SMP info area */ 277 /* SMP info area */
278 cpuid_t cpu_id; /* 0x02b0 */ 278 struct cpuid cpu_id; /* 0x02b0 */
279 __u32 cpu_nr; /* 0x02b8 */ 279 __u32 cpu_nr; /* 0x02b8 */
280 __u32 softirq_pending; /* 0x02bc */ 280 __u32 softirq_pending; /* 0x02bc */
281 __u32 percpu_offset; /* 0x02c0 */ 281 __u32 percpu_offset; /* 0x02c0 */
@@ -380,7 +380,7 @@ struct _lowcore
380 __u64 user_exec_asce; /* 0x0318 */ 380 __u64 user_exec_asce; /* 0x0318 */
381 381
382 /* SMP info area */ 382 /* SMP info area */
383 cpuid_t cpu_id; /* 0x0320 */ 383 struct cpuid cpu_id; /* 0x0320 */
384 __u32 cpu_nr; /* 0x0328 */ 384 __u32 cpu_nr; /* 0x0328 */
385 __u32 softirq_pending; /* 0x032c */ 385 __u32 softirq_pending; /* 0x032c */
386 __u64 percpu_offset; /* 0x0330 */ 386 __u64 percpu_offset; /* 0x0330 */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 3b59216e6284..03be99919d62 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -2,6 +2,7 @@
2#define __MMU_H 2#define __MMU_H
3 3
4typedef struct { 4typedef struct {
5 spinlock_t list_lock;
5 struct list_head crst_list; 6 struct list_head crst_list;
6 struct list_head pgtable_list; 7 struct list_head pgtable_list;
7 unsigned long asce_bits; 8 unsigned long asce_bits;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3e3594d01f83..5e9daf5d7f22 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -125,8 +125,6 @@ page_get_storage_key(unsigned long addr)
125 return skey; 125 return skey;
126} 126}
127 127
128#ifdef CONFIG_PAGE_STATES
129
130struct page; 128struct page;
131void arch_free_page(struct page *page, int order); 129void arch_free_page(struct page *page, int order);
132void arch_alloc_page(struct page *page, int order); 130void arch_alloc_page(struct page *page, int order);
@@ -134,8 +132,6 @@ void arch_alloc_page(struct page *page, int order);
134#define HAVE_ARCH_FREE_PAGE 132#define HAVE_ARCH_FREE_PAGE
135#define HAVE_ARCH_ALLOC_PAGE 133#define HAVE_ARCH_ALLOC_PAGE
136 134
137#endif
138
139#endif /* !__ASSEMBLY__ */ 135#endif /* !__ASSEMBLY__ */
140 136
141#define __PAGE_OFFSET 0x0UL 137#define __PAGE_OFFSET 0x0UL
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index b2658b9220fe..ddad5903341c 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
140 140
141static inline pgd_t *pgd_alloc(struct mm_struct *mm) 141static inline pgd_t *pgd_alloc(struct mm_struct *mm)
142{ 142{
143 spin_lock_init(&mm->context.list_lock);
143 INIT_LIST_HEAD(&mm->context.crst_list); 144 INIT_LIST_HEAD(&mm->context.crst_list);
144 INIT_LIST_HEAD(&mm->context.pgtable_list); 145 INIT_LIST_HEAD(&mm->context.pgtable_list);
145 return (pgd_t *) crst_table_alloc(mm, s390_noexec); 146 return (pgd_t *) crst_table_alloc(mm, s390_noexec);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c139fa7b8e89..cf8eed3fa779 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,7 +14,7 @@
14#define __ASM_S390_PROCESSOR_H 14#define __ASM_S390_PROCESSOR_H
15 15
16#include <linux/linkage.h> 16#include <linux/linkage.h>
17#include <asm/cpuid.h> 17#include <asm/cpu.h>
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
@@ -26,7 +26,7 @@
26 */ 26 */
27#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; }) 27#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
28 28
29static inline void get_cpu_id(cpuid_t *ptr) 29static inline void get_cpu_id(struct cpuid *ptr)
30{ 30{
31 asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr)); 31 asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr));
32} 32}
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
index 29ec8e28c8df..35d786fe93ae 100644
--- a/arch/s390/include/asm/scatterlist.h
+++ b/arch/s390/include/asm/scatterlist.h
@@ -1,19 +1 @@
1#ifndef _ASMS390_SCATTERLIST_H #include <asm-generic/scatterlist.h>
2#define _ASMS390_SCATTERLIST_H
3
4struct scatterlist {
5#ifdef CONFIG_DEBUG_SG
6 unsigned long sg_magic;
7#endif
8 unsigned long page_link;
9 unsigned int offset;
10 unsigned int length;
11};
12
13#ifdef __s390x__
14#define ISA_DMA_THRESHOLD (0xffffffffffffffffUL)
15#else
16#define ISA_DMA_THRESHOLD (0xffffffffUL)
17#endif
18
19#endif /* _ASMS390X_SCATTERLIST_H */
diff --git a/drivers/s390/cio/scsw.c b/arch/s390/include/asm/scsw.h
index f8da25ab576d..de389cb54d28 100644
--- a/drivers/s390/cio/scsw.c
+++ b/arch/s390/include/asm/scsw.h
@@ -1,15 +1,182 @@
1/* 1/*
2 * Helper functions for scsw access. 2 * Helper functions for scsw access.
3 * 3 *
4 * Copyright IBM Corp. 2008 4 * Copyright IBM Corp. 2008,2009
5 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> 5 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
6 */ 6 */
7 7
8#ifndef _ASM_S390_SCSW_H_
9#define _ASM_S390_SCSW_H_
10
8#include <linux/types.h> 11#include <linux/types.h>
9#include <linux/module.h> 12#include <asm/chsc.h>
10#include <asm/cio.h> 13#include <asm/cio.h>
11#include "css.h" 14
12#include "chsc.h" 15/**
16 * struct cmd_scsw - command-mode subchannel status word
17 * @key: subchannel key
18 * @sctl: suspend control
19 * @eswf: esw format
20 * @cc: deferred condition code
21 * @fmt: format
22 * @pfch: prefetch
23 * @isic: initial-status interruption control
24 * @alcc: address-limit checking control
25 * @ssi: suppress-suspended interruption
26 * @zcc: zero condition code
27 * @ectl: extended control
28 * @pno: path not operational
29 * @res: reserved
30 * @fctl: function control
31 * @actl: activity control
32 * @stctl: status control
33 * @cpa: channel program address
34 * @dstat: device status
35 * @cstat: subchannel status
36 * @count: residual count
37 */
38struct cmd_scsw {
39 __u32 key : 4;
40 __u32 sctl : 1;
41 __u32 eswf : 1;
42 __u32 cc : 2;
43 __u32 fmt : 1;
44 __u32 pfch : 1;
45 __u32 isic : 1;
46 __u32 alcc : 1;
47 __u32 ssi : 1;
48 __u32 zcc : 1;
49 __u32 ectl : 1;
50 __u32 pno : 1;
51 __u32 res : 1;
52 __u32 fctl : 3;
53 __u32 actl : 7;
54 __u32 stctl : 5;
55 __u32 cpa;
56 __u32 dstat : 8;
57 __u32 cstat : 8;
58 __u32 count : 16;
59} __attribute__ ((packed));
60
61/**
62 * struct tm_scsw - transport-mode subchannel status word
63 * @key: subchannel key
64 * @eswf: esw format
65 * @cc: deferred condition code
66 * @fmt: format
67 * @x: IRB-format control
68 * @q: interrogate-complete
69 * @ectl: extended control
70 * @pno: path not operational
71 * @fctl: function control
72 * @actl: activity control
73 * @stctl: status control
74 * @tcw: TCW address
75 * @dstat: device status
76 * @cstat: subchannel status
77 * @fcxs: FCX status
78 * @schxs: subchannel-extended status
79 */
80struct tm_scsw {
81 u32 key:4;
82 u32 :1;
83 u32 eswf:1;
84 u32 cc:2;
85 u32 fmt:3;
86 u32 x:1;
87 u32 q:1;
88 u32 :1;
89 u32 ectl:1;
90 u32 pno:1;
91 u32 :1;
92 u32 fctl:3;
93 u32 actl:7;
94 u32 stctl:5;
95 u32 tcw;
96 u32 dstat:8;
97 u32 cstat:8;
98 u32 fcxs:8;
99 u32 schxs:8;
100} __attribute__ ((packed));
101
102/**
103 * union scsw - subchannel status word
104 * @cmd: command-mode SCSW
105 * @tm: transport-mode SCSW
106 */
107union scsw {
108 struct cmd_scsw cmd;
109 struct tm_scsw tm;
110} __attribute__ ((packed));
111
112#define SCSW_FCTL_CLEAR_FUNC 0x1
113#define SCSW_FCTL_HALT_FUNC 0x2
114#define SCSW_FCTL_START_FUNC 0x4
115
116#define SCSW_ACTL_SUSPENDED 0x1
117#define SCSW_ACTL_DEVACT 0x2
118#define SCSW_ACTL_SCHACT 0x4
119#define SCSW_ACTL_CLEAR_PEND 0x8
120#define SCSW_ACTL_HALT_PEND 0x10
121#define SCSW_ACTL_START_PEND 0x20
122#define SCSW_ACTL_RESUME_PEND 0x40
123
124#define SCSW_STCTL_STATUS_PEND 0x1
125#define SCSW_STCTL_SEC_STATUS 0x2
126#define SCSW_STCTL_PRIM_STATUS 0x4
127#define SCSW_STCTL_INTER_STATUS 0x8
128#define SCSW_STCTL_ALERT_STATUS 0x10
129
130#define DEV_STAT_ATTENTION 0x80
131#define DEV_STAT_STAT_MOD 0x40
132#define DEV_STAT_CU_END 0x20
133#define DEV_STAT_BUSY 0x10
134#define DEV_STAT_CHN_END 0x08
135#define DEV_STAT_DEV_END 0x04
136#define DEV_STAT_UNIT_CHECK 0x02
137#define DEV_STAT_UNIT_EXCEP 0x01
138
139#define SCHN_STAT_PCI 0x80
140#define SCHN_STAT_INCORR_LEN 0x40
141#define SCHN_STAT_PROG_CHECK 0x20
142#define SCHN_STAT_PROT_CHECK 0x10
143#define SCHN_STAT_CHN_DATA_CHK 0x08
144#define SCHN_STAT_CHN_CTRL_CHK 0x04
145#define SCHN_STAT_INTF_CTRL_CHK 0x02
146#define SCHN_STAT_CHAIN_CHECK 0x01
147
148/*
149 * architectured values for first sense byte
150 */
151#define SNS0_CMD_REJECT 0x80
152#define SNS_CMD_REJECT SNS0_CMD_REJEC
153#define SNS0_INTERVENTION_REQ 0x40
154#define SNS0_BUS_OUT_CHECK 0x20
155#define SNS0_EQUIPMENT_CHECK 0x10
156#define SNS0_DATA_CHECK 0x08
157#define SNS0_OVERRUN 0x04
158#define SNS0_INCOMPL_DOMAIN 0x01
159
160/*
161 * architectured values for second sense byte
162 */
163#define SNS1_PERM_ERR 0x80
164#define SNS1_INV_TRACK_FORMAT 0x40
165#define SNS1_EOC 0x20
166#define SNS1_MESSAGE_TO_OPER 0x10
167#define SNS1_NO_REC_FOUND 0x08
168#define SNS1_FILE_PROTECTED 0x04
169#define SNS1_WRITE_INHIBITED 0x02
170#define SNS1_INPRECISE_END 0x01
171
172/*
173 * architectured values for third sense byte
174 */
175#define SNS2_REQ_INH_WRITE 0x80
176#define SNS2_CORRECTABLE 0x40
177#define SNS2_FIRST_LOG_ERR 0x20
178#define SNS2_ENV_DATA_PRESENT 0x10
179#define SNS2_INPRECISE_END 0x04
13 180
14/** 181/**
15 * scsw_is_tm - check for transport mode scsw 182 * scsw_is_tm - check for transport mode scsw
@@ -18,11 +185,10 @@
18 * Return non-zero if the specified scsw is a transport mode scsw, zero 185 * Return non-zero if the specified scsw is a transport mode scsw, zero
19 * otherwise. 186 * otherwise.
20 */ 187 */
21int scsw_is_tm(union scsw *scsw) 188static inline int scsw_is_tm(union scsw *scsw)
22{ 189{
23 return css_general_characteristics.fcx && (scsw->tm.x == 1); 190 return css_general_characteristics.fcx && (scsw->tm.x == 1);
24} 191}
25EXPORT_SYMBOL(scsw_is_tm);
26 192
27/** 193/**
28 * scsw_key - return scsw key field 194 * scsw_key - return scsw key field
@@ -31,14 +197,13 @@ EXPORT_SYMBOL(scsw_is_tm);
31 * Return the value of the key field of the specified scsw, regardless of 197 * Return the value of the key field of the specified scsw, regardless of
32 * whether it is a transport mode or command mode scsw. 198 * whether it is a transport mode or command mode scsw.
33 */ 199 */
34u32 scsw_key(union scsw *scsw) 200static inline u32 scsw_key(union scsw *scsw)
35{ 201{
36 if (scsw_is_tm(scsw)) 202 if (scsw_is_tm(scsw))
37 return scsw->tm.key; 203 return scsw->tm.key;
38 else 204 else
39 return scsw->cmd.key; 205 return scsw->cmd.key;
40} 206}
41EXPORT_SYMBOL(scsw_key);
42 207
43/** 208/**
44 * scsw_eswf - return scsw eswf field 209 * scsw_eswf - return scsw eswf field
@@ -47,14 +212,13 @@ EXPORT_SYMBOL(scsw_key);
47 * Return the value of the eswf field of the specified scsw, regardless of 212 * Return the value of the eswf field of the specified scsw, regardless of
48 * whether it is a transport mode or command mode scsw. 213 * whether it is a transport mode or command mode scsw.
49 */ 214 */
50u32 scsw_eswf(union scsw *scsw) 215static inline u32 scsw_eswf(union scsw *scsw)
51{ 216{
52 if (scsw_is_tm(scsw)) 217 if (scsw_is_tm(scsw))
53 return scsw->tm.eswf; 218 return scsw->tm.eswf;
54 else 219 else
55 return scsw->cmd.eswf; 220 return scsw->cmd.eswf;
56} 221}
57EXPORT_SYMBOL(scsw_eswf);
58 222
59/** 223/**
60 * scsw_cc - return scsw cc field 224 * scsw_cc - return scsw cc field
@@ -63,14 +227,13 @@ EXPORT_SYMBOL(scsw_eswf);
63 * Return the value of the cc field of the specified scsw, regardless of 227 * Return the value of the cc field of the specified scsw, regardless of
64 * whether it is a transport mode or command mode scsw. 228 * whether it is a transport mode or command mode scsw.
65 */ 229 */
66u32 scsw_cc(union scsw *scsw) 230static inline u32 scsw_cc(union scsw *scsw)
67{ 231{
68 if (scsw_is_tm(scsw)) 232 if (scsw_is_tm(scsw))
69 return scsw->tm.cc; 233 return scsw->tm.cc;
70 else 234 else
71 return scsw->cmd.cc; 235 return scsw->cmd.cc;
72} 236}
73EXPORT_SYMBOL(scsw_cc);
74 237
75/** 238/**
76 * scsw_ectl - return scsw ectl field 239 * scsw_ectl - return scsw ectl field
@@ -79,14 +242,13 @@ EXPORT_SYMBOL(scsw_cc);
79 * Return the value of the ectl field of the specified scsw, regardless of 242 * Return the value of the ectl field of the specified scsw, regardless of
80 * whether it is a transport mode or command mode scsw. 243 * whether it is a transport mode or command mode scsw.
81 */ 244 */
82u32 scsw_ectl(union scsw *scsw) 245static inline u32 scsw_ectl(union scsw *scsw)
83{ 246{
84 if (scsw_is_tm(scsw)) 247 if (scsw_is_tm(scsw))
85 return scsw->tm.ectl; 248 return scsw->tm.ectl;
86 else 249 else
87 return scsw->cmd.ectl; 250 return scsw->cmd.ectl;
88} 251}
89EXPORT_SYMBOL(scsw_ectl);
90 252
91/** 253/**
92 * scsw_pno - return scsw pno field 254 * scsw_pno - return scsw pno field
@@ -95,14 +257,13 @@ EXPORT_SYMBOL(scsw_ectl);
95 * Return the value of the pno field of the specified scsw, regardless of 257 * Return the value of the pno field of the specified scsw, regardless of
96 * whether it is a transport mode or command mode scsw. 258 * whether it is a transport mode or command mode scsw.
97 */ 259 */
98u32 scsw_pno(union scsw *scsw) 260static inline u32 scsw_pno(union scsw *scsw)
99{ 261{
100 if (scsw_is_tm(scsw)) 262 if (scsw_is_tm(scsw))
101 return scsw->tm.pno; 263 return scsw->tm.pno;
102 else 264 else
103 return scsw->cmd.pno; 265 return scsw->cmd.pno;
104} 266}
105EXPORT_SYMBOL(scsw_pno);
106 267
107/** 268/**
108 * scsw_fctl - return scsw fctl field 269 * scsw_fctl - return scsw fctl field
@@ -111,14 +272,13 @@ EXPORT_SYMBOL(scsw_pno);
111 * Return the value of the fctl field of the specified scsw, regardless of 272 * Return the value of the fctl field of the specified scsw, regardless of
112 * whether it is a transport mode or command mode scsw. 273 * whether it is a transport mode or command mode scsw.
113 */ 274 */
114u32 scsw_fctl(union scsw *scsw) 275static inline u32 scsw_fctl(union scsw *scsw)
115{ 276{
116 if (scsw_is_tm(scsw)) 277 if (scsw_is_tm(scsw))
117 return scsw->tm.fctl; 278 return scsw->tm.fctl;
118 else 279 else
119 return scsw->cmd.fctl; 280 return scsw->cmd.fctl;
120} 281}
121EXPORT_SYMBOL(scsw_fctl);
122 282
123/** 283/**
124 * scsw_actl - return scsw actl field 284 * scsw_actl - return scsw actl field
@@ -127,14 +287,13 @@ EXPORT_SYMBOL(scsw_fctl);
127 * Return the value of the actl field of the specified scsw, regardless of 287 * Return the value of the actl field of the specified scsw, regardless of
128 * whether it is a transport mode or command mode scsw. 288 * whether it is a transport mode or command mode scsw.
129 */ 289 */
130u32 scsw_actl(union scsw *scsw) 290static inline u32 scsw_actl(union scsw *scsw)
131{ 291{
132 if (scsw_is_tm(scsw)) 292 if (scsw_is_tm(scsw))
133 return scsw->tm.actl; 293 return scsw->tm.actl;
134 else 294 else
135 return scsw->cmd.actl; 295 return scsw->cmd.actl;
136} 296}
137EXPORT_SYMBOL(scsw_actl);
138 297
139/** 298/**
140 * scsw_stctl - return scsw stctl field 299 * scsw_stctl - return scsw stctl field
@@ -143,14 +302,13 @@ EXPORT_SYMBOL(scsw_actl);
143 * Return the value of the stctl field of the specified scsw, regardless of 302 * Return the value of the stctl field of the specified scsw, regardless of
144 * whether it is a transport mode or command mode scsw. 303 * whether it is a transport mode or command mode scsw.
145 */ 304 */
146u32 scsw_stctl(union scsw *scsw) 305static inline u32 scsw_stctl(union scsw *scsw)
147{ 306{
148 if (scsw_is_tm(scsw)) 307 if (scsw_is_tm(scsw))
149 return scsw->tm.stctl; 308 return scsw->tm.stctl;
150 else 309 else
151 return scsw->cmd.stctl; 310 return scsw->cmd.stctl;
152} 311}
153EXPORT_SYMBOL(scsw_stctl);
154 312
155/** 313/**
156 * scsw_dstat - return scsw dstat field 314 * scsw_dstat - return scsw dstat field
@@ -159,14 +317,13 @@ EXPORT_SYMBOL(scsw_stctl);
159 * Return the value of the dstat field of the specified scsw, regardless of 317 * Return the value of the dstat field of the specified scsw, regardless of
160 * whether it is a transport mode or command mode scsw. 318 * whether it is a transport mode or command mode scsw.
161 */ 319 */
162u32 scsw_dstat(union scsw *scsw) 320static inline u32 scsw_dstat(union scsw *scsw)
163{ 321{
164 if (scsw_is_tm(scsw)) 322 if (scsw_is_tm(scsw))
165 return scsw->tm.dstat; 323 return scsw->tm.dstat;
166 else 324 else
167 return scsw->cmd.dstat; 325 return scsw->cmd.dstat;
168} 326}
169EXPORT_SYMBOL(scsw_dstat);
170 327
171/** 328/**
172 * scsw_cstat - return scsw cstat field 329 * scsw_cstat - return scsw cstat field
@@ -175,14 +332,13 @@ EXPORT_SYMBOL(scsw_dstat);
175 * Return the value of the cstat field of the specified scsw, regardless of 332 * Return the value of the cstat field of the specified scsw, regardless of
176 * whether it is a transport mode or command mode scsw. 333 * whether it is a transport mode or command mode scsw.
177 */ 334 */
178u32 scsw_cstat(union scsw *scsw) 335static inline u32 scsw_cstat(union scsw *scsw)
179{ 336{
180 if (scsw_is_tm(scsw)) 337 if (scsw_is_tm(scsw))
181 return scsw->tm.cstat; 338 return scsw->tm.cstat;
182 else 339 else
183 return scsw->cmd.cstat; 340 return scsw->cmd.cstat;
184} 341}
185EXPORT_SYMBOL(scsw_cstat);
186 342
187/** 343/**
188 * scsw_cmd_is_valid_key - check key field validity 344 * scsw_cmd_is_valid_key - check key field validity
@@ -191,11 +347,10 @@ EXPORT_SYMBOL(scsw_cstat);
191 * Return non-zero if the key field of the specified command mode scsw is 347 * Return non-zero if the key field of the specified command mode scsw is
192 * valid, zero otherwise. 348 * valid, zero otherwise.
193 */ 349 */
194int scsw_cmd_is_valid_key(union scsw *scsw) 350static inline int scsw_cmd_is_valid_key(union scsw *scsw)
195{ 351{
196 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 352 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
197} 353}
198EXPORT_SYMBOL(scsw_cmd_is_valid_key);
199 354
200/** 355/**
201 * scsw_cmd_is_valid_sctl - check fctl field validity 356 * scsw_cmd_is_valid_sctl - check fctl field validity
@@ -204,11 +359,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_key);
204 * Return non-zero if the fctl field of the specified command mode scsw is 359 * Return non-zero if the fctl field of the specified command mode scsw is
205 * valid, zero otherwise. 360 * valid, zero otherwise.
206 */ 361 */
207int scsw_cmd_is_valid_sctl(union scsw *scsw) 362static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
208{ 363{
209 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 364 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
210} 365}
211EXPORT_SYMBOL(scsw_cmd_is_valid_sctl);
212 366
213/** 367/**
214 * scsw_cmd_is_valid_eswf - check eswf field validity 368 * scsw_cmd_is_valid_eswf - check eswf field validity
@@ -217,11 +371,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_sctl);
217 * Return non-zero if the eswf field of the specified command mode scsw is 371 * Return non-zero if the eswf field of the specified command mode scsw is
218 * valid, zero otherwise. 372 * valid, zero otherwise.
219 */ 373 */
220int scsw_cmd_is_valid_eswf(union scsw *scsw) 374static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
221{ 375{
222 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); 376 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
223} 377}
224EXPORT_SYMBOL(scsw_cmd_is_valid_eswf);
225 378
226/** 379/**
227 * scsw_cmd_is_valid_cc - check cc field validity 380 * scsw_cmd_is_valid_cc - check cc field validity
@@ -230,12 +383,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_eswf);
230 * Return non-zero if the cc field of the specified command mode scsw is 383 * Return non-zero if the cc field of the specified command mode scsw is
231 * valid, zero otherwise. 384 * valid, zero otherwise.
232 */ 385 */
233int scsw_cmd_is_valid_cc(union scsw *scsw) 386static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
234{ 387{
235 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && 388 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
236 (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); 389 (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
237} 390}
238EXPORT_SYMBOL(scsw_cmd_is_valid_cc);
239 391
240/** 392/**
241 * scsw_cmd_is_valid_fmt - check fmt field validity 393 * scsw_cmd_is_valid_fmt - check fmt field validity
@@ -244,11 +396,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_cc);
244 * Return non-zero if the fmt field of the specified command mode scsw is 396 * Return non-zero if the fmt field of the specified command mode scsw is
245 * valid, zero otherwise. 397 * valid, zero otherwise.
246 */ 398 */
247int scsw_cmd_is_valid_fmt(union scsw *scsw) 399static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
248{ 400{
249 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 401 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
250} 402}
251EXPORT_SYMBOL(scsw_cmd_is_valid_fmt);
252 403
253/** 404/**
254 * scsw_cmd_is_valid_pfch - check pfch field validity 405 * scsw_cmd_is_valid_pfch - check pfch field validity
@@ -257,11 +408,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_fmt);
257 * Return non-zero if the pfch field of the specified command mode scsw is 408 * Return non-zero if the pfch field of the specified command mode scsw is
258 * valid, zero otherwise. 409 * valid, zero otherwise.
259 */ 410 */
260int scsw_cmd_is_valid_pfch(union scsw *scsw) 411static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
261{ 412{
262 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 413 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
263} 414}
264EXPORT_SYMBOL(scsw_cmd_is_valid_pfch);
265 415
266/** 416/**
267 * scsw_cmd_is_valid_isic - check isic field validity 417 * scsw_cmd_is_valid_isic - check isic field validity
@@ -270,11 +420,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_pfch);
270 * Return non-zero if the isic field of the specified command mode scsw is 420 * Return non-zero if the isic field of the specified command mode scsw is
271 * valid, zero otherwise. 421 * valid, zero otherwise.
272 */ 422 */
273int scsw_cmd_is_valid_isic(union scsw *scsw) 423static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
274{ 424{
275 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 425 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
276} 426}
277EXPORT_SYMBOL(scsw_cmd_is_valid_isic);
278 427
279/** 428/**
280 * scsw_cmd_is_valid_alcc - check alcc field validity 429 * scsw_cmd_is_valid_alcc - check alcc field validity
@@ -283,11 +432,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_isic);
283 * Return non-zero if the alcc field of the specified command mode scsw is 432 * Return non-zero if the alcc field of the specified command mode scsw is
284 * valid, zero otherwise. 433 * valid, zero otherwise.
285 */ 434 */
286int scsw_cmd_is_valid_alcc(union scsw *scsw) 435static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
287{ 436{
288 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 437 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
289} 438}
290EXPORT_SYMBOL(scsw_cmd_is_valid_alcc);
291 439
292/** 440/**
293 * scsw_cmd_is_valid_ssi - check ssi field validity 441 * scsw_cmd_is_valid_ssi - check ssi field validity
@@ -296,11 +444,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_alcc);
296 * Return non-zero if the ssi field of the specified command mode scsw is 444 * Return non-zero if the ssi field of the specified command mode scsw is
297 * valid, zero otherwise. 445 * valid, zero otherwise.
298 */ 446 */
299int scsw_cmd_is_valid_ssi(union scsw *scsw) 447static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
300{ 448{
301 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); 449 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
302} 450}
303EXPORT_SYMBOL(scsw_cmd_is_valid_ssi);
304 451
305/** 452/**
306 * scsw_cmd_is_valid_zcc - check zcc field validity 453 * scsw_cmd_is_valid_zcc - check zcc field validity
@@ -309,12 +456,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_ssi);
309 * Return non-zero if the zcc field of the specified command mode scsw is 456 * Return non-zero if the zcc field of the specified command mode scsw is
310 * valid, zero otherwise. 457 * valid, zero otherwise.
311 */ 458 */
312int scsw_cmd_is_valid_zcc(union scsw *scsw) 459static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
313{ 460{
314 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && 461 return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
315 (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS); 462 (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
316} 463}
317EXPORT_SYMBOL(scsw_cmd_is_valid_zcc);
318 464
319/** 465/**
320 * scsw_cmd_is_valid_ectl - check ectl field validity 466 * scsw_cmd_is_valid_ectl - check ectl field validity
@@ -323,13 +469,12 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_zcc);
323 * Return non-zero if the ectl field of the specified command mode scsw is 469 * Return non-zero if the ectl field of the specified command mode scsw is
324 * valid, zero otherwise. 470 * valid, zero otherwise.
325 */ 471 */
326int scsw_cmd_is_valid_ectl(union scsw *scsw) 472static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
327{ 473{
328 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && 474 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
329 !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && 475 !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
330 (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS); 476 (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
331} 477}
332EXPORT_SYMBOL(scsw_cmd_is_valid_ectl);
333 478
334/** 479/**
335 * scsw_cmd_is_valid_pno - check pno field validity 480 * scsw_cmd_is_valid_pno - check pno field validity
@@ -338,7 +483,7 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_ectl);
338 * Return non-zero if the pno field of the specified command mode scsw is 483 * Return non-zero if the pno field of the specified command mode scsw is
339 * valid, zero otherwise. 484 * valid, zero otherwise.
340 */ 485 */
341int scsw_cmd_is_valid_pno(union scsw *scsw) 486static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
342{ 487{
343 return (scsw->cmd.fctl != 0) && 488 return (scsw->cmd.fctl != 0) &&
344 (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && 489 (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
@@ -346,7 +491,6 @@ int scsw_cmd_is_valid_pno(union scsw *scsw)
346 ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && 491 ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
347 (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); 492 (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
348} 493}
349EXPORT_SYMBOL(scsw_cmd_is_valid_pno);
350 494
351/** 495/**
352 * scsw_cmd_is_valid_fctl - check fctl field validity 496 * scsw_cmd_is_valid_fctl - check fctl field validity
@@ -355,12 +499,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_pno);
355 * Return non-zero if the fctl field of the specified command mode scsw is 499 * Return non-zero if the fctl field of the specified command mode scsw is
356 * valid, zero otherwise. 500 * valid, zero otherwise.
357 */ 501 */
358int scsw_cmd_is_valid_fctl(union scsw *scsw) 502static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
359{ 503{
360 /* Only valid if pmcw.dnv == 1*/ 504 /* Only valid if pmcw.dnv == 1*/
361 return 1; 505 return 1;
362} 506}
363EXPORT_SYMBOL(scsw_cmd_is_valid_fctl);
364 507
365/** 508/**
366 * scsw_cmd_is_valid_actl - check actl field validity 509 * scsw_cmd_is_valid_actl - check actl field validity
@@ -369,12 +512,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_fctl);
369 * Return non-zero if the actl field of the specified command mode scsw is 512 * Return non-zero if the actl field of the specified command mode scsw is
370 * valid, zero otherwise. 513 * valid, zero otherwise.
371 */ 514 */
372int scsw_cmd_is_valid_actl(union scsw *scsw) 515static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
373{ 516{
374 /* Only valid if pmcw.dnv == 1*/ 517 /* Only valid if pmcw.dnv == 1*/
375 return 1; 518 return 1;
376} 519}
377EXPORT_SYMBOL(scsw_cmd_is_valid_actl);
378 520
379/** 521/**
380 * scsw_cmd_is_valid_stctl - check stctl field validity 522 * scsw_cmd_is_valid_stctl - check stctl field validity
@@ -383,12 +525,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_actl);
383 * Return non-zero if the stctl field of the specified command mode scsw is 525 * Return non-zero if the stctl field of the specified command mode scsw is
384 * valid, zero otherwise. 526 * valid, zero otherwise.
385 */ 527 */
386int scsw_cmd_is_valid_stctl(union scsw *scsw) 528static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
387{ 529{
388 /* Only valid if pmcw.dnv == 1*/ 530 /* Only valid if pmcw.dnv == 1*/
389 return 1; 531 return 1;
390} 532}
391EXPORT_SYMBOL(scsw_cmd_is_valid_stctl);
392 533
393/** 534/**
394 * scsw_cmd_is_valid_dstat - check dstat field validity 535 * scsw_cmd_is_valid_dstat - check dstat field validity
@@ -397,12 +538,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_stctl);
397 * Return non-zero if the dstat field of the specified command mode scsw is 538 * Return non-zero if the dstat field of the specified command mode scsw is
398 * valid, zero otherwise. 539 * valid, zero otherwise.
399 */ 540 */
400int scsw_cmd_is_valid_dstat(union scsw *scsw) 541static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
401{ 542{
402 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && 543 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
403 (scsw->cmd.cc != 3); 544 (scsw->cmd.cc != 3);
404} 545}
405EXPORT_SYMBOL(scsw_cmd_is_valid_dstat);
406 546
407/** 547/**
408 * scsw_cmd_is_valid_cstat - check cstat field validity 548 * scsw_cmd_is_valid_cstat - check cstat field validity
@@ -411,12 +551,11 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_dstat);
411 * Return non-zero if the cstat field of the specified command mode scsw is 551 * Return non-zero if the cstat field of the specified command mode scsw is
412 * valid, zero otherwise. 552 * valid, zero otherwise.
413 */ 553 */
414int scsw_cmd_is_valid_cstat(union scsw *scsw) 554static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
415{ 555{
416 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && 556 return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
417 (scsw->cmd.cc != 3); 557 (scsw->cmd.cc != 3);
418} 558}
419EXPORT_SYMBOL(scsw_cmd_is_valid_cstat);
420 559
421/** 560/**
422 * scsw_tm_is_valid_key - check key field validity 561 * scsw_tm_is_valid_key - check key field validity
@@ -425,11 +564,10 @@ EXPORT_SYMBOL(scsw_cmd_is_valid_cstat);
425 * Return non-zero if the key field of the specified transport mode scsw is 564 * Return non-zero if the key field of the specified transport mode scsw is
426 * valid, zero otherwise. 565 * valid, zero otherwise.
427 */ 566 */
428int scsw_tm_is_valid_key(union scsw *scsw) 567static inline int scsw_tm_is_valid_key(union scsw *scsw)
429{ 568{
430 return (scsw->tm.fctl & SCSW_FCTL_START_FUNC); 569 return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
431} 570}
432EXPORT_SYMBOL(scsw_tm_is_valid_key);
433 571
434/** 572/**
435 * scsw_tm_is_valid_eswf - check eswf field validity 573 * scsw_tm_is_valid_eswf - check eswf field validity
@@ -438,11 +576,10 @@ EXPORT_SYMBOL(scsw_tm_is_valid_key);
438 * Return non-zero if the eswf field of the specified transport mode scsw is 576 * Return non-zero if the eswf field of the specified transport mode scsw is
439 * valid, zero otherwise. 577 * valid, zero otherwise.
440 */ 578 */
441int scsw_tm_is_valid_eswf(union scsw *scsw) 579static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
442{ 580{
443 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); 581 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
444} 582}
445EXPORT_SYMBOL(scsw_tm_is_valid_eswf);
446 583
447/** 584/**
448 * scsw_tm_is_valid_cc - check cc field validity 585 * scsw_tm_is_valid_cc - check cc field validity
@@ -451,12 +588,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_eswf);
451 * Return non-zero if the cc field of the specified transport mode scsw is 588 * Return non-zero if the cc field of the specified transport mode scsw is
452 * valid, zero otherwise. 589 * valid, zero otherwise.
453 */ 590 */
454int scsw_tm_is_valid_cc(union scsw *scsw) 591static inline int scsw_tm_is_valid_cc(union scsw *scsw)
455{ 592{
456 return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) && 593 return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
457 (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); 594 (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
458} 595}
459EXPORT_SYMBOL(scsw_tm_is_valid_cc);
460 596
461/** 597/**
462 * scsw_tm_is_valid_fmt - check fmt field validity 598 * scsw_tm_is_valid_fmt - check fmt field validity
@@ -465,11 +601,10 @@ EXPORT_SYMBOL(scsw_tm_is_valid_cc);
465 * Return non-zero if the fmt field of the specified transport mode scsw is 601 * Return non-zero if the fmt field of the specified transport mode scsw is
466 * valid, zero otherwise. 602 * valid, zero otherwise.
467 */ 603 */
468int scsw_tm_is_valid_fmt(union scsw *scsw) 604static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
469{ 605{
470 return 1; 606 return 1;
471} 607}
472EXPORT_SYMBOL(scsw_tm_is_valid_fmt);
473 608
474/** 609/**
475 * scsw_tm_is_valid_x - check x field validity 610 * scsw_tm_is_valid_x - check x field validity
@@ -478,11 +613,10 @@ EXPORT_SYMBOL(scsw_tm_is_valid_fmt);
478 * Return non-zero if the x field of the specified transport mode scsw is 613 * Return non-zero if the x field of the specified transport mode scsw is
479 * valid, zero otherwise. 614 * valid, zero otherwise.
480 */ 615 */
481int scsw_tm_is_valid_x(union scsw *scsw) 616static inline int scsw_tm_is_valid_x(union scsw *scsw)
482{ 617{
483 return 1; 618 return 1;
484} 619}
485EXPORT_SYMBOL(scsw_tm_is_valid_x);
486 620
487/** 621/**
488 * scsw_tm_is_valid_q - check q field validity 622 * scsw_tm_is_valid_q - check q field validity
@@ -491,11 +625,10 @@ EXPORT_SYMBOL(scsw_tm_is_valid_x);
491 * Return non-zero if the q field of the specified transport mode scsw is 625 * Return non-zero if the q field of the specified transport mode scsw is
492 * valid, zero otherwise. 626 * valid, zero otherwise.
493 */ 627 */
494int scsw_tm_is_valid_q(union scsw *scsw) 628static inline int scsw_tm_is_valid_q(union scsw *scsw)
495{ 629{
496 return 1; 630 return 1;
497} 631}
498EXPORT_SYMBOL(scsw_tm_is_valid_q);
499 632
500/** 633/**
501 * scsw_tm_is_valid_ectl - check ectl field validity 634 * scsw_tm_is_valid_ectl - check ectl field validity
@@ -504,13 +637,12 @@ EXPORT_SYMBOL(scsw_tm_is_valid_q);
504 * Return non-zero if the ectl field of the specified transport mode scsw is 637 * Return non-zero if the ectl field of the specified transport mode scsw is
505 * valid, zero otherwise. 638 * valid, zero otherwise.
506 */ 639 */
507int scsw_tm_is_valid_ectl(union scsw *scsw) 640static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
508{ 641{
509 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && 642 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
510 !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && 643 !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
511 (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS); 644 (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
512} 645}
513EXPORT_SYMBOL(scsw_tm_is_valid_ectl);
514 646
515/** 647/**
516 * scsw_tm_is_valid_pno - check pno field validity 648 * scsw_tm_is_valid_pno - check pno field validity
@@ -519,7 +651,7 @@ EXPORT_SYMBOL(scsw_tm_is_valid_ectl);
519 * Return non-zero if the pno field of the specified transport mode scsw is 651 * Return non-zero if the pno field of the specified transport mode scsw is
520 * valid, zero otherwise. 652 * valid, zero otherwise.
521 */ 653 */
522int scsw_tm_is_valid_pno(union scsw *scsw) 654static inline int scsw_tm_is_valid_pno(union scsw *scsw)
523{ 655{
524 return (scsw->tm.fctl != 0) && 656 return (scsw->tm.fctl != 0) &&
525 (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && 657 (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
@@ -527,7 +659,6 @@ int scsw_tm_is_valid_pno(union scsw *scsw)
527 ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && 659 ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
528 (scsw->tm.actl & SCSW_ACTL_SUSPENDED))); 660 (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
529} 661}
530EXPORT_SYMBOL(scsw_tm_is_valid_pno);
531 662
532/** 663/**
533 * scsw_tm_is_valid_fctl - check fctl field validity 664 * scsw_tm_is_valid_fctl - check fctl field validity
@@ -536,12 +667,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_pno);
536 * Return non-zero if the fctl field of the specified transport mode scsw is 667 * Return non-zero if the fctl field of the specified transport mode scsw is
537 * valid, zero otherwise. 668 * valid, zero otherwise.
538 */ 669 */
539int scsw_tm_is_valid_fctl(union scsw *scsw) 670static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
540{ 671{
541 /* Only valid if pmcw.dnv == 1*/ 672 /* Only valid if pmcw.dnv == 1*/
542 return 1; 673 return 1;
543} 674}
544EXPORT_SYMBOL(scsw_tm_is_valid_fctl);
545 675
546/** 676/**
547 * scsw_tm_is_valid_actl - check actl field validity 677 * scsw_tm_is_valid_actl - check actl field validity
@@ -550,12 +680,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_fctl);
550 * Return non-zero if the actl field of the specified transport mode scsw is 680 * Return non-zero if the actl field of the specified transport mode scsw is
551 * valid, zero otherwise. 681 * valid, zero otherwise.
552 */ 682 */
553int scsw_tm_is_valid_actl(union scsw *scsw) 683static inline int scsw_tm_is_valid_actl(union scsw *scsw)
554{ 684{
555 /* Only valid if pmcw.dnv == 1*/ 685 /* Only valid if pmcw.dnv == 1*/
556 return 1; 686 return 1;
557} 687}
558EXPORT_SYMBOL(scsw_tm_is_valid_actl);
559 688
560/** 689/**
561 * scsw_tm_is_valid_stctl - check stctl field validity 690 * scsw_tm_is_valid_stctl - check stctl field validity
@@ -564,12 +693,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_actl);
564 * Return non-zero if the stctl field of the specified transport mode scsw is 693 * Return non-zero if the stctl field of the specified transport mode scsw is
565 * valid, zero otherwise. 694 * valid, zero otherwise.
566 */ 695 */
567int scsw_tm_is_valid_stctl(union scsw *scsw) 696static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
568{ 697{
569 /* Only valid if pmcw.dnv == 1*/ 698 /* Only valid if pmcw.dnv == 1*/
570 return 1; 699 return 1;
571} 700}
572EXPORT_SYMBOL(scsw_tm_is_valid_stctl);
573 701
574/** 702/**
575 * scsw_tm_is_valid_dstat - check dstat field validity 703 * scsw_tm_is_valid_dstat - check dstat field validity
@@ -578,12 +706,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_stctl);
578 * Return non-zero if the dstat field of the specified transport mode scsw is 706 * Return non-zero if the dstat field of the specified transport mode scsw is
579 * valid, zero otherwise. 707 * valid, zero otherwise.
580 */ 708 */
581int scsw_tm_is_valid_dstat(union scsw *scsw) 709static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
582{ 710{
583 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && 711 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
584 (scsw->tm.cc != 3); 712 (scsw->tm.cc != 3);
585} 713}
586EXPORT_SYMBOL(scsw_tm_is_valid_dstat);
587 714
588/** 715/**
589 * scsw_tm_is_valid_cstat - check cstat field validity 716 * scsw_tm_is_valid_cstat - check cstat field validity
@@ -592,12 +719,11 @@ EXPORT_SYMBOL(scsw_tm_is_valid_dstat);
592 * Return non-zero if the cstat field of the specified transport mode scsw is 719 * Return non-zero if the cstat field of the specified transport mode scsw is
593 * valid, zero otherwise. 720 * valid, zero otherwise.
594 */ 721 */
595int scsw_tm_is_valid_cstat(union scsw *scsw) 722static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
596{ 723{
597 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && 724 return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
598 (scsw->tm.cc != 3); 725 (scsw->tm.cc != 3);
599} 726}
600EXPORT_SYMBOL(scsw_tm_is_valid_cstat);
601 727
602/** 728/**
603 * scsw_tm_is_valid_fcxs - check fcxs field validity 729 * scsw_tm_is_valid_fcxs - check fcxs field validity
@@ -606,11 +732,10 @@ EXPORT_SYMBOL(scsw_tm_is_valid_cstat);
606 * Return non-zero if the fcxs field of the specified transport mode scsw is 732 * Return non-zero if the fcxs field of the specified transport mode scsw is
607 * valid, zero otherwise. 733 * valid, zero otherwise.
608 */ 734 */
609int scsw_tm_is_valid_fcxs(union scsw *scsw) 735static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
610{ 736{
611 return 1; 737 return 1;
612} 738}
613EXPORT_SYMBOL(scsw_tm_is_valid_fcxs);
614 739
615/** 740/**
616 * scsw_tm_is_valid_schxs - check schxs field validity 741 * scsw_tm_is_valid_schxs - check schxs field validity
@@ -619,14 +744,13 @@ EXPORT_SYMBOL(scsw_tm_is_valid_fcxs);
619 * Return non-zero if the schxs field of the specified transport mode scsw is 744 * Return non-zero if the schxs field of the specified transport mode scsw is
620 * valid, zero otherwise. 745 * valid, zero otherwise.
621 */ 746 */
622int scsw_tm_is_valid_schxs(union scsw *scsw) 747static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
623{ 748{
624 return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK | 749 return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
625 SCHN_STAT_INTF_CTRL_CHK | 750 SCHN_STAT_INTF_CTRL_CHK |
626 SCHN_STAT_PROT_CHECK | 751 SCHN_STAT_PROT_CHECK |
627 SCHN_STAT_CHN_DATA_CHK)); 752 SCHN_STAT_CHN_DATA_CHK));
628} 753}
629EXPORT_SYMBOL(scsw_tm_is_valid_schxs);
630 754
631/** 755/**
632 * scsw_is_valid_actl - check actl field validity 756 * scsw_is_valid_actl - check actl field validity
@@ -636,14 +760,13 @@ EXPORT_SYMBOL(scsw_tm_is_valid_schxs);
636 * regardless of whether it is a transport mode or command mode scsw. 760 * regardless of whether it is a transport mode or command mode scsw.
637 * Return zero if the field does not contain a valid value. 761 * Return zero if the field does not contain a valid value.
638 */ 762 */
639int scsw_is_valid_actl(union scsw *scsw) 763static inline int scsw_is_valid_actl(union scsw *scsw)
640{ 764{
641 if (scsw_is_tm(scsw)) 765 if (scsw_is_tm(scsw))
642 return scsw_tm_is_valid_actl(scsw); 766 return scsw_tm_is_valid_actl(scsw);
643 else 767 else
644 return scsw_cmd_is_valid_actl(scsw); 768 return scsw_cmd_is_valid_actl(scsw);
645} 769}
646EXPORT_SYMBOL(scsw_is_valid_actl);
647 770
648/** 771/**
649 * scsw_is_valid_cc - check cc field validity 772 * scsw_is_valid_cc - check cc field validity
@@ -653,14 +776,13 @@ EXPORT_SYMBOL(scsw_is_valid_actl);
653 * regardless of whether it is a transport mode or command mode scsw. 776 * regardless of whether it is a transport mode or command mode scsw.
654 * Return zero if the field does not contain a valid value. 777 * Return zero if the field does not contain a valid value.
655 */ 778 */
656int scsw_is_valid_cc(union scsw *scsw) 779static inline int scsw_is_valid_cc(union scsw *scsw)
657{ 780{
658 if (scsw_is_tm(scsw)) 781 if (scsw_is_tm(scsw))
659 return scsw_tm_is_valid_cc(scsw); 782 return scsw_tm_is_valid_cc(scsw);
660 else 783 else
661 return scsw_cmd_is_valid_cc(scsw); 784 return scsw_cmd_is_valid_cc(scsw);
662} 785}
663EXPORT_SYMBOL(scsw_is_valid_cc);
664 786
665/** 787/**
666 * scsw_is_valid_cstat - check cstat field validity 788 * scsw_is_valid_cstat - check cstat field validity
@@ -670,14 +792,13 @@ EXPORT_SYMBOL(scsw_is_valid_cc);
670 * regardless of whether it is a transport mode or command mode scsw. 792 * regardless of whether it is a transport mode or command mode scsw.
671 * Return zero if the field does not contain a valid value. 793 * Return zero if the field does not contain a valid value.
672 */ 794 */
673int scsw_is_valid_cstat(union scsw *scsw) 795static inline int scsw_is_valid_cstat(union scsw *scsw)
674{ 796{
675 if (scsw_is_tm(scsw)) 797 if (scsw_is_tm(scsw))
676 return scsw_tm_is_valid_cstat(scsw); 798 return scsw_tm_is_valid_cstat(scsw);
677 else 799 else
678 return scsw_cmd_is_valid_cstat(scsw); 800 return scsw_cmd_is_valid_cstat(scsw);
679} 801}
680EXPORT_SYMBOL(scsw_is_valid_cstat);
681 802
682/** 803/**
683 * scsw_is_valid_dstat - check dstat field validity 804 * scsw_is_valid_dstat - check dstat field validity
@@ -687,14 +808,13 @@ EXPORT_SYMBOL(scsw_is_valid_cstat);
687 * regardless of whether it is a transport mode or command mode scsw. 808 * regardless of whether it is a transport mode or command mode scsw.
688 * Return zero if the field does not contain a valid value. 809 * Return zero if the field does not contain a valid value.
689 */ 810 */
690int scsw_is_valid_dstat(union scsw *scsw) 811static inline int scsw_is_valid_dstat(union scsw *scsw)
691{ 812{
692 if (scsw_is_tm(scsw)) 813 if (scsw_is_tm(scsw))
693 return scsw_tm_is_valid_dstat(scsw); 814 return scsw_tm_is_valid_dstat(scsw);
694 else 815 else
695 return scsw_cmd_is_valid_dstat(scsw); 816 return scsw_cmd_is_valid_dstat(scsw);
696} 817}
697EXPORT_SYMBOL(scsw_is_valid_dstat);
698 818
699/** 819/**
700 * scsw_is_valid_ectl - check ectl field validity 820 * scsw_is_valid_ectl - check ectl field validity
@@ -704,14 +824,13 @@ EXPORT_SYMBOL(scsw_is_valid_dstat);
704 * regardless of whether it is a transport mode or command mode scsw. 824 * regardless of whether it is a transport mode or command mode scsw.
705 * Return zero if the field does not contain a valid value. 825 * Return zero if the field does not contain a valid value.
706 */ 826 */
707int scsw_is_valid_ectl(union scsw *scsw) 827static inline int scsw_is_valid_ectl(union scsw *scsw)
708{ 828{
709 if (scsw_is_tm(scsw)) 829 if (scsw_is_tm(scsw))
710 return scsw_tm_is_valid_ectl(scsw); 830 return scsw_tm_is_valid_ectl(scsw);
711 else 831 else
712 return scsw_cmd_is_valid_ectl(scsw); 832 return scsw_cmd_is_valid_ectl(scsw);
713} 833}
714EXPORT_SYMBOL(scsw_is_valid_ectl);
715 834
716/** 835/**
717 * scsw_is_valid_eswf - check eswf field validity 836 * scsw_is_valid_eswf - check eswf field validity
@@ -721,14 +840,13 @@ EXPORT_SYMBOL(scsw_is_valid_ectl);
721 * regardless of whether it is a transport mode or command mode scsw. 840 * regardless of whether it is a transport mode or command mode scsw.
722 * Return zero if the field does not contain a valid value. 841 * Return zero if the field does not contain a valid value.
723 */ 842 */
724int scsw_is_valid_eswf(union scsw *scsw) 843static inline int scsw_is_valid_eswf(union scsw *scsw)
725{ 844{
726 if (scsw_is_tm(scsw)) 845 if (scsw_is_tm(scsw))
727 return scsw_tm_is_valid_eswf(scsw); 846 return scsw_tm_is_valid_eswf(scsw);
728 else 847 else
729 return scsw_cmd_is_valid_eswf(scsw); 848 return scsw_cmd_is_valid_eswf(scsw);
730} 849}
731EXPORT_SYMBOL(scsw_is_valid_eswf);
732 850
733/** 851/**
734 * scsw_is_valid_fctl - check fctl field validity 852 * scsw_is_valid_fctl - check fctl field validity
@@ -738,14 +856,13 @@ EXPORT_SYMBOL(scsw_is_valid_eswf);
738 * regardless of whether it is a transport mode or command mode scsw. 856 * regardless of whether it is a transport mode or command mode scsw.
739 * Return zero if the field does not contain a valid value. 857 * Return zero if the field does not contain a valid value.
740 */ 858 */
741int scsw_is_valid_fctl(union scsw *scsw) 859static inline int scsw_is_valid_fctl(union scsw *scsw)
742{ 860{
743 if (scsw_is_tm(scsw)) 861 if (scsw_is_tm(scsw))
744 return scsw_tm_is_valid_fctl(scsw); 862 return scsw_tm_is_valid_fctl(scsw);
745 else 863 else
746 return scsw_cmd_is_valid_fctl(scsw); 864 return scsw_cmd_is_valid_fctl(scsw);
747} 865}
748EXPORT_SYMBOL(scsw_is_valid_fctl);
749 866
750/** 867/**
751 * scsw_is_valid_key - check key field validity 868 * scsw_is_valid_key - check key field validity
@@ -755,14 +872,13 @@ EXPORT_SYMBOL(scsw_is_valid_fctl);
755 * regardless of whether it is a transport mode or command mode scsw. 872 * regardless of whether it is a transport mode or command mode scsw.
756 * Return zero if the field does not contain a valid value. 873 * Return zero if the field does not contain a valid value.
757 */ 874 */
758int scsw_is_valid_key(union scsw *scsw) 875static inline int scsw_is_valid_key(union scsw *scsw)
759{ 876{
760 if (scsw_is_tm(scsw)) 877 if (scsw_is_tm(scsw))
761 return scsw_tm_is_valid_key(scsw); 878 return scsw_tm_is_valid_key(scsw);
762 else 879 else
763 return scsw_cmd_is_valid_key(scsw); 880 return scsw_cmd_is_valid_key(scsw);
764} 881}
765EXPORT_SYMBOL(scsw_is_valid_key);
766 882
767/** 883/**
768 * scsw_is_valid_pno - check pno field validity 884 * scsw_is_valid_pno - check pno field validity
@@ -772,14 +888,13 @@ EXPORT_SYMBOL(scsw_is_valid_key);
772 * regardless of whether it is a transport mode or command mode scsw. 888 * regardless of whether it is a transport mode or command mode scsw.
773 * Return zero if the field does not contain a valid value. 889 * Return zero if the field does not contain a valid value.
774 */ 890 */
775int scsw_is_valid_pno(union scsw *scsw) 891static inline int scsw_is_valid_pno(union scsw *scsw)
776{ 892{
777 if (scsw_is_tm(scsw)) 893 if (scsw_is_tm(scsw))
778 return scsw_tm_is_valid_pno(scsw); 894 return scsw_tm_is_valid_pno(scsw);
779 else 895 else
780 return scsw_cmd_is_valid_pno(scsw); 896 return scsw_cmd_is_valid_pno(scsw);
781} 897}
782EXPORT_SYMBOL(scsw_is_valid_pno);
783 898
784/** 899/**
785 * scsw_is_valid_stctl - check stctl field validity 900 * scsw_is_valid_stctl - check stctl field validity
@@ -789,14 +904,13 @@ EXPORT_SYMBOL(scsw_is_valid_pno);
789 * regardless of whether it is a transport mode or command mode scsw. 904 * regardless of whether it is a transport mode or command mode scsw.
790 * Return zero if the field does not contain a valid value. 905 * Return zero if the field does not contain a valid value.
791 */ 906 */
792int scsw_is_valid_stctl(union scsw *scsw) 907static inline int scsw_is_valid_stctl(union scsw *scsw)
793{ 908{
794 if (scsw_is_tm(scsw)) 909 if (scsw_is_tm(scsw))
795 return scsw_tm_is_valid_stctl(scsw); 910 return scsw_tm_is_valid_stctl(scsw);
796 else 911 else
797 return scsw_cmd_is_valid_stctl(scsw); 912 return scsw_cmd_is_valid_stctl(scsw);
798} 913}
799EXPORT_SYMBOL(scsw_is_valid_stctl);
800 914
801/** 915/**
802 * scsw_cmd_is_solicited - check for solicited scsw 916 * scsw_cmd_is_solicited - check for solicited scsw
@@ -805,12 +919,11 @@ EXPORT_SYMBOL(scsw_is_valid_stctl);
805 * Return non-zero if the command mode scsw indicates that the associated 919 * Return non-zero if the command mode scsw indicates that the associated
806 * status condition is solicited, zero if it is unsolicited. 920 * status condition is solicited, zero if it is unsolicited.
807 */ 921 */
808int scsw_cmd_is_solicited(union scsw *scsw) 922static inline int scsw_cmd_is_solicited(union scsw *scsw)
809{ 923{
810 return (scsw->cmd.cc != 0) || (scsw->cmd.stctl != 924 return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
811 (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); 925 (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
812} 926}
813EXPORT_SYMBOL(scsw_cmd_is_solicited);
814 927
815/** 928/**
816 * scsw_tm_is_solicited - check for solicited scsw 929 * scsw_tm_is_solicited - check for solicited scsw
@@ -819,12 +932,11 @@ EXPORT_SYMBOL(scsw_cmd_is_solicited);
819 * Return non-zero if the transport mode scsw indicates that the associated 932 * Return non-zero if the transport mode scsw indicates that the associated
820 * status condition is solicited, zero if it is unsolicited. 933 * status condition is solicited, zero if it is unsolicited.
821 */ 934 */
822int scsw_tm_is_solicited(union scsw *scsw) 935static inline int scsw_tm_is_solicited(union scsw *scsw)
823{ 936{
824 return (scsw->tm.cc != 0) || (scsw->tm.stctl != 937 return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
825 (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); 938 (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
826} 939}
827EXPORT_SYMBOL(scsw_tm_is_solicited);
828 940
829/** 941/**
830 * scsw_is_solicited - check for solicited scsw 942 * scsw_is_solicited - check for solicited scsw
@@ -833,11 +945,12 @@ EXPORT_SYMBOL(scsw_tm_is_solicited);
833 * Return non-zero if the transport or command mode scsw indicates that the 945 * Return non-zero if the transport or command mode scsw indicates that the
834 * associated status condition is solicited, zero if it is unsolicited. 946 * associated status condition is solicited, zero if it is unsolicited.
835 */ 947 */
836int scsw_is_solicited(union scsw *scsw) 948static inline int scsw_is_solicited(union scsw *scsw)
837{ 949{
838 if (scsw_is_tm(scsw)) 950 if (scsw_is_tm(scsw))
839 return scsw_tm_is_solicited(scsw); 951 return scsw_tm_is_solicited(scsw);
840 else 952 else
841 return scsw_cmd_is_solicited(scsw); 953 return scsw_cmd_is_solicited(scsw);
842} 954}
843EXPORT_SYMBOL(scsw_is_solicited); 955
956#endif /* _ASM_S390_SCSW_H_ */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 38b0fc221ed7..e37478e87286 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -8,7 +8,7 @@
8#ifndef _ASM_S390_SETUP_H 8#ifndef _ASM_S390_SETUP_H
9#define _ASM_S390_SETUP_H 9#define _ASM_S390_SETUP_H
10 10
11#define COMMAND_LINE_SIZE 1024 11#define COMMAND_LINE_SIZE 4096
12 12
13#define ARCH_COMMAND_LINE_SIZE 896 13#define ARCH_COMMAND_LINE_SIZE 896
14 14
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 72137bc907ac..c991fe6473c9 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -51,32 +51,7 @@ extern void machine_power_off_smp(void);
51#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ 51#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */
52 52
53#define raw_smp_processor_id() (S390_lowcore.cpu_nr) 53#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
54 54#define cpu_logical_map(cpu) (cpu)
55/*
56 * returns 1 if cpu is in stopped/check stopped state or not operational
57 * returns 0 otherwise
58 */
59static inline int
60smp_cpu_not_running(int cpu)
61{
62 __u32 status;
63
64 switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
65 case sigp_order_code_accepted:
66 case sigp_status_stored:
67 /* Check for stopped and check stop state */
68 if (status & 0x50)
69 return 1;
70 break;
71 case sigp_not_operational:
72 return 1;
73 default:
74 break;
75 }
76 return 0;
77}
78
79#define cpu_logical_map(cpu) (cpu)
80 55
81extern int __cpu_disable (void); 56extern int __cpu_disable (void);
82extern void __cpu_die (unsigned int cpu); 57extern void __cpu_die (unsigned int cpu);
@@ -91,11 +66,6 @@ extern void arch_send_call_function_ipi(cpumask_t mask);
91 66
92#endif 67#endif
93 68
94#ifndef CONFIG_SMP
95#define hard_smp_processor_id() 0
96#define smp_cpu_not_running(cpu) 1
97#endif
98
99#ifdef CONFIG_HOTPLUG_CPU 69#ifdef CONFIG_HOTPLUG_CPU
100extern int smp_rescan_cpus(void); 70extern int smp_rescan_cpus(void);
101#else 71#else
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 4fb83c1cdb77..379661d2f81a 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -109,11 +109,7 @@ extern void pfault_fini(void);
109#define pfault_fini() do { } while (0) 109#define pfault_fini() do { } while (0)
110#endif /* CONFIG_PFAULT */ 110#endif /* CONFIG_PFAULT */
111 111
112#ifdef CONFIG_PAGE_STATES
113extern void cmma_init(void); 112extern void cmma_init(void);
114#else
115static inline void cmma_init(void) { }
116#endif
117 113
118#define finish_arch_switch(prev) do { \ 114#define finish_arch_switch(prev) do { \
119 set_fs(current->thread.mm_segment); \ 115 set_fs(current->thread.mm_segment); \
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index cc21e3e20fd7..24aa1cda20ad 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -90,4 +90,18 @@ unsigned long long monotonic_clock(void);
90 90
91extern u64 sched_clock_base_cc; 91extern u64 sched_clock_base_cc;
92 92
93/**
94 * get_clock_monotonic - returns current time in clock rate units
95 *
96 * The caller must ensure that preemption is disabled.
97 * The clock and sched_clock_base get changed via stop_machine.
98 * Therefore preemption must be disabled when calling this
99 * function, otherwise the returned value is not guaranteed to
100 * be monotonic.
101 */
102static inline unsigned long long get_clock_monotonic(void)
103{
104 return get_clock_xt() - sched_clock_base_cc;
105}
106
93#endif 107#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index c75ed43b1a18..c7be8e10b87e 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -32,7 +32,7 @@ extra-y += head.o init_task.o vmlinux.lds
32 32
33obj-$(CONFIG_MODULES) += s390_ksyms.o module.o 33obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
34obj-$(CONFIG_SMP) += smp.o topology.o 34obj-$(CONFIG_SMP) += smp.o topology.o
35 35obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o
36obj-$(CONFIG_AUDIT) += audit.o 36obj-$(CONFIG_AUDIT) += audit.o
37compat-obj-$(CONFIG_AUDIT) += compat_audit.o 37compat-obj-$(CONFIG_AUDIT) += compat_audit.o
38obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ 38obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
41 41
42obj-$(CONFIG_STACKTRACE) += stacktrace.o 42obj-$(CONFIG_STACKTRACE) += stacktrace.o
43obj-$(CONFIG_KPROBES) += kprobes.o 43obj-$(CONFIG_KPROBES) += kprobes.o
44obj-$(CONFIG_FUNCTION_TRACER) += mcount.o 44obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
45obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 45obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
46obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 46obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
47 47
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cae14c499511..bf8b4ae7ff2d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -6,6 +6,9 @@
6 * Heiko Carstens <heiko.carstens@de.ibm.com> 6 * Heiko Carstens <heiko.carstens@de.ibm.com>
7 */ 7 */
8 8
9#define KMSG_COMPONENT "setup"
10#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
9#include <linux/compiler.h> 12#include <linux/compiler.h>
10#include <linux/init.h> 13#include <linux/init.h>
11#include <linux/errno.h> 14#include <linux/errno.h>
@@ -16,6 +19,7 @@
16#include <linux/module.h> 19#include <linux/module.h>
17#include <linux/pfn.h> 20#include <linux/pfn.h>
18#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kernel.h>
19#include <asm/ebcdic.h> 23#include <asm/ebcdic.h>
20#include <asm/ipl.h> 24#include <asm/ipl.h>
21#include <asm/lowcore.h> 25#include <asm/lowcore.h>
@@ -35,8 +39,6 @@
35 39
36char kernel_nss_name[NSS_NAME_SIZE + 1]; 40char kernel_nss_name[NSS_NAME_SIZE + 1];
37 41
38static unsigned long machine_flags;
39
40static void __init setup_boot_command_line(void); 42static void __init setup_boot_command_line(void);
41 43
42/* 44/*
@@ -81,6 +83,8 @@ asm(
81 " br 14\n" 83 " br 14\n"
82 " .size savesys_ipl_nss, .-savesys_ipl_nss\n"); 84 " .size savesys_ipl_nss, .-savesys_ipl_nss\n");
83 85
86static __initdata char upper_command_line[COMMAND_LINE_SIZE];
87
84static noinline __init void create_kernel_nss(void) 88static noinline __init void create_kernel_nss(void)
85{ 89{
86 unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; 90 unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -90,7 +94,6 @@ static noinline __init void create_kernel_nss(void)
90 int response; 94 int response;
91 size_t len; 95 size_t len;
92 char *savesys_ptr; 96 char *savesys_ptr;
93 char upper_command_line[COMMAND_LINE_SIZE];
94 char defsys_cmd[DEFSYS_CMD_SIZE]; 97 char defsys_cmd[DEFSYS_CMD_SIZE];
95 char savesys_cmd[SAVESYS_CMD_SIZE]; 98 char savesys_cmd[SAVESYS_CMD_SIZE];
96 99
@@ -141,6 +144,8 @@ static noinline __init void create_kernel_nss(void)
141 __cpcmd(defsys_cmd, NULL, 0, &response); 144 __cpcmd(defsys_cmd, NULL, 0, &response);
142 145
143 if (response != 0) { 146 if (response != 0) {
147 pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
148 response);
144 kernel_nss_name[0] = '\0'; 149 kernel_nss_name[0] = '\0';
145 return; 150 return;
146 } 151 }
@@ -153,8 +158,11 @@ static noinline __init void create_kernel_nss(void)
153 * max SAVESYS_CMD_SIZE 158 * max SAVESYS_CMD_SIZE
154 * On error: response contains the numeric portion of cp error message. 159 * On error: response contains the numeric portion of cp error message.
155 * for SAVESYS it will be >= 263 160 * for SAVESYS it will be >= 263
161 * for missing privilege class, it will be 1
156 */ 162 */
157 if (response > SAVESYS_CMD_SIZE) { 163 if (response > SAVESYS_CMD_SIZE || response == 1) {
164 pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
165 response);
158 kernel_nss_name[0] = '\0'; 166 kernel_nss_name[0] = '\0';
159 return; 167 return;
160 } 168 }
@@ -205,12 +213,9 @@ static noinline __init void detect_machine_type(void)
205 213
206 /* Running under KVM? If not we assume z/VM */ 214 /* Running under KVM? If not we assume z/VM */
207 if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) 215 if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3))
208 machine_flags |= MACHINE_FLAG_KVM; 216 S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
209 else 217 else
210 machine_flags |= MACHINE_FLAG_VM; 218 S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
211
212 /* Store machine flags for setting up lowcore early */
213 S390_lowcore.machine_flags = machine_flags;
214} 219}
215 220
216static __init void early_pgm_check_handler(void) 221static __init void early_pgm_check_handler(void)
@@ -245,7 +250,7 @@ static noinline __init void setup_hpage(void)
245 facilities = stfl(); 250 facilities = stfl();
246 if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) 251 if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
247 return; 252 return;
248 machine_flags |= MACHINE_FLAG_HPAGE; 253 S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE;
249 __ctl_set_bit(0, 23); 254 __ctl_set_bit(0, 23);
250#endif 255#endif
251} 256}
@@ -263,7 +268,7 @@ static __init void detect_mvpg(void)
263 EX_TABLE(0b,1b) 268 EX_TABLE(0b,1b)
264 : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); 269 : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
265 if (!rc) 270 if (!rc)
266 machine_flags |= MACHINE_FLAG_MVPG; 271 S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
267#endif 272#endif
268} 273}
269 274
@@ -279,7 +284,7 @@ static __init void detect_ieee(void)
279 EX_TABLE(0b,1b) 284 EX_TABLE(0b,1b)
280 : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); 285 : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
281 if (!rc) 286 if (!rc)
282 machine_flags |= MACHINE_FLAG_IEEE; 287 S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
283#endif 288#endif
284} 289}
285 290
@@ -298,7 +303,7 @@ static __init void detect_csp(void)
298 EX_TABLE(0b,1b) 303 EX_TABLE(0b,1b)
299 : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); 304 : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
300 if (!rc) 305 if (!rc)
301 machine_flags |= MACHINE_FLAG_CSP; 306 S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
302#endif 307#endif
303} 308}
304 309
@@ -315,7 +320,7 @@ static __init void detect_diag9c(void)
315 EX_TABLE(0b,1b) 320 EX_TABLE(0b,1b)
316 : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); 321 : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
317 if (!rc) 322 if (!rc)
318 machine_flags |= MACHINE_FLAG_DIAG9C; 323 S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
319} 324}
320 325
321static __init void detect_diag44(void) 326static __init void detect_diag44(void)
@@ -330,7 +335,7 @@ static __init void detect_diag44(void)
330 EX_TABLE(0b,1b) 335 EX_TABLE(0b,1b)
331 : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); 336 : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
332 if (!rc) 337 if (!rc)
333 machine_flags |= MACHINE_FLAG_DIAG44; 338 S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
334#endif 339#endif
335} 340}
336 341
@@ -341,11 +346,11 @@ static __init void detect_machine_facilities(void)
341 346
342 facilities = stfl(); 347 facilities = stfl();
343 if (facilities & (1 << 28)) 348 if (facilities & (1 << 28))
344 machine_flags |= MACHINE_FLAG_IDTE; 349 S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
345 if (facilities & (1 << 23)) 350 if (facilities & (1 << 23))
346 machine_flags |= MACHINE_FLAG_PFMF; 351 S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
347 if (facilities & (1 << 4)) 352 if (facilities & (1 << 4))
348 machine_flags |= MACHINE_FLAG_MVCOS; 353 S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
349#endif 354#endif
350} 355}
351 356
@@ -367,21 +372,35 @@ static __init void rescue_initrd(void)
367} 372}
368 373
369/* Set up boot command line */ 374/* Set up boot command line */
370static void __init setup_boot_command_line(void) 375static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
371{ 376{
372 char *parm = NULL; 377 char *parm, *delim;
378 size_t rc, len;
379
380 len = strlen(boot_command_line);
381
382 delim = boot_command_line + len; /* '\0' character position */
383 parm = boot_command_line + len + 1; /* append right after '\0' */
373 384
385 rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
386 if (rc) {
387 if (*parm == '=')
388 memmove(boot_command_line, parm + 1, rc);
389 else
390 *delim = ' '; /* replace '\0' with space */
391 }
392}
393
394static void __init setup_boot_command_line(void)
395{
374 /* copy arch command line */ 396 /* copy arch command line */
375 strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); 397 strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
376 398
377 /* append IPL PARM data to the boot command line */ 399 /* append IPL PARM data to the boot command line */
378 if (MACHINE_IS_VM) { 400 if (MACHINE_IS_VM)
379 parm = boot_command_line + strlen(boot_command_line); 401 append_to_cmdline(append_ipl_vmparm);
380 *parm++ = ' '; 402
381 get_ipl_vmparm(parm); 403 append_to_cmdline(append_ipl_scpdata);
382 if (parm[0] == '=')
383 memmove(boot_command_line, parm + 1, strlen(parm));
384 }
385} 404}
386 405
387 406
@@ -413,7 +432,6 @@ void __init startup_init(void)
413 setup_hpage(); 432 setup_hpage();
414 sclp_facilities_detect(); 433 sclp_facilities_detect();
415 detect_memory_layout(memory_chunk); 434 detect_memory_layout(memory_chunk);
416 S390_lowcore.machine_flags = machine_flags;
417#ifdef CONFIG_DYNAMIC_FTRACE 435#ifdef CONFIG_DYNAMIC_FTRACE
418 S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; 436 S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
419#endif 437#endif
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index c4c80a22bc1f..f78580a74039 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -278,7 +278,8 @@ sysc_return:
278 bnz BASED(sysc_work) # there is work to do (signals etc.) 278 bnz BASED(sysc_work) # there is work to do (signals etc.)
279sysc_restore: 279sysc_restore:
280#ifdef CONFIG_TRACE_IRQFLAGS 280#ifdef CONFIG_TRACE_IRQFLAGS
281 la %r1,BASED(sysc_restore_trace_psw) 281 la %r1,BASED(sysc_restore_trace_psw_addr)
282 l %r1,0(%r1)
282 lpsw 0(%r1) 283 lpsw 0(%r1)
283sysc_restore_trace: 284sysc_restore_trace:
284 TRACE_IRQS_CHECK 285 TRACE_IRQS_CHECK
@@ -289,10 +290,15 @@ sysc_leave:
289sysc_done: 290sysc_done:
290 291
291#ifdef CONFIG_TRACE_IRQFLAGS 292#ifdef CONFIG_TRACE_IRQFLAGS
293sysc_restore_trace_psw_addr:
294 .long sysc_restore_trace_psw
295
296 .section .data,"aw",@progbits
292 .align 8 297 .align 8
293 .globl sysc_restore_trace_psw 298 .globl sysc_restore_trace_psw
294sysc_restore_trace_psw: 299sysc_restore_trace_psw:
295 .long 0, sysc_restore_trace + 0x80000000 300 .long 0, sysc_restore_trace + 0x80000000
301 .previous
296#endif 302#endif
297 303
298# 304#
@@ -606,7 +612,8 @@ io_return:
606 bnz BASED(io_work) # there is work to do (signals etc.) 612 bnz BASED(io_work) # there is work to do (signals etc.)
607io_restore: 613io_restore:
608#ifdef CONFIG_TRACE_IRQFLAGS 614#ifdef CONFIG_TRACE_IRQFLAGS
609 la %r1,BASED(io_restore_trace_psw) 615 la %r1,BASED(io_restore_trace_psw_addr)
616 l %r1,0(%r1)
610 lpsw 0(%r1) 617 lpsw 0(%r1)
611io_restore_trace: 618io_restore_trace:
612 TRACE_IRQS_CHECK 619 TRACE_IRQS_CHECK
@@ -617,10 +624,15 @@ io_leave:
617io_done: 624io_done:
618 625
619#ifdef CONFIG_TRACE_IRQFLAGS 626#ifdef CONFIG_TRACE_IRQFLAGS
627io_restore_trace_psw_addr:
628 .long io_restore_trace_psw
629
630 .section .data,"aw",@progbits
620 .align 8 631 .align 8
621 .globl io_restore_trace_psw 632 .globl io_restore_trace_psw
622io_restore_trace_psw: 633io_restore_trace_psw:
623 .long 0, io_restore_trace + 0x80000000 634 .long 0, io_restore_trace + 0x80000000
635 .previous
624#endif 636#endif
625 637
626# 638#
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index f6618e9e15ef..009ca6175db9 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -284,10 +284,12 @@ sysc_leave:
284sysc_done: 284sysc_done:
285 285
286#ifdef CONFIG_TRACE_IRQFLAGS 286#ifdef CONFIG_TRACE_IRQFLAGS
287 .section .data,"aw",@progbits
287 .align 8 288 .align 8
288 .globl sysc_restore_trace_psw 289 .globl sysc_restore_trace_psw
289sysc_restore_trace_psw: 290sysc_restore_trace_psw:
290 .quad 0, sysc_restore_trace 291 .quad 0, sysc_restore_trace
292 .previous
291#endif 293#endif
292 294
293# 295#
@@ -595,10 +597,12 @@ io_leave:
595io_done: 597io_done:
596 598
597#ifdef CONFIG_TRACE_IRQFLAGS 599#ifdef CONFIG_TRACE_IRQFLAGS
600 .section .data,"aw",@progbits
598 .align 8 601 .align 8
599 .globl io_restore_trace_psw 602 .globl io_restore_trace_psw
600io_restore_trace_psw: 603io_restore_trace_psw:
601 .quad 0, io_restore_trace 604 .quad 0, io_restore_trace
605 .previous
602#endif 606#endif
603 607
604# 608#
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index ec6882348520..c52b4f7742fa 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -27,6 +27,7 @@
27#include <asm/asm-offsets.h> 27#include <asm/asm-offsets.h>
28#include <asm/thread_info.h> 28#include <asm/thread_info.h>
29#include <asm/page.h> 29#include <asm/page.h>
30#include <asm/cpu.h>
30 31
31#ifdef CONFIG_64BIT 32#ifdef CONFIG_64BIT
32#define ARCH_OFFSET 4 33#define ARCH_OFFSET 4
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 2ced846065b7..602b508cd4c4 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -24,6 +24,7 @@ startup_continue:
24# Setup stack 24# Setup stack
25# 25#
26 l %r15,.Linittu-.LPG1(%r13) 26 l %r15,.Linittu-.LPG1(%r13)
27 st %r15,__LC_THREAD_INFO # cache thread info in lowcore
27 mvc __LC_CURRENT(4),__TI_task(%r15) 28 mvc __LC_CURRENT(4),__TI_task(%r15)
28 ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE 29 ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
29 st %r15,__LC_KERNEL_STACK # set end of kernel stack 30 st %r15,__LC_KERNEL_STACK # set end of kernel stack
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 65667b2e65ce..6a250808092b 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -62,9 +62,9 @@ startup_continue:
62 clr %r11,%r12 62 clr %r11,%r12
63 je 5f # no more space in prefix array 63 je 5f # no more space in prefix array
644: 644:
65 ahi %r8,1 # next cpu (r8 += 1) 65 ahi %r8,1 # next cpu (r8 += 1)
66 cl %r8,.Llast_cpu-.LPG1(%r13) # is last possible cpu ? 66 chi %r8,MAX_CPU_ADDRESS # is last possible cpu ?
67 jl 1b # jump if not last cpu 67 jle 1b # jump if not last cpu
685: 685:
69 lhi %r1,2 # mode 2 = esame (dump) 69 lhi %r1,2 # mode 2 = esame (dump)
70 j 6f 70 j 6f
@@ -92,6 +92,7 @@ startup_continue:
92# Setup stack 92# Setup stack
93# 93#
94 larl %r15,init_thread_union 94 larl %r15,init_thread_union
95 stg %r15,__LC_THREAD_INFO # cache thread info in lowcore
95 lg %r14,__TI_task(%r15) # cache current in lowcore 96 lg %r14,__TI_task(%r15) # cache current in lowcore
96 stg %r14,__LC_CURRENT 97 stg %r14,__LC_CURRENT
97 aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE 98 aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
@@ -129,8 +130,6 @@ startup_continue:
129#ifdef CONFIG_ZFCPDUMP 130#ifdef CONFIG_ZFCPDUMP
130.Lcurrent_cpu: 131.Lcurrent_cpu:
131 .long 0x0 132 .long 0x0
132.Llast_cpu:
133 .long 0x0000ffff
134.Lpref_arr_ptr: 133.Lpref_arr_ptr:
135 .long zfcpdump_prefix_array 134 .long zfcpdump_prefix_array
136#endif /* CONFIG_ZFCPDUMP */ 135#endif /* CONFIG_ZFCPDUMP */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 371a2d88f4ac..ee57a42e6e93 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -272,17 +272,18 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
272static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); 272static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
273 273
274/* VM IPL PARM routines */ 274/* VM IPL PARM routines */
275static void reipl_get_ascii_vmparm(char *dest, 275size_t reipl_get_ascii_vmparm(char *dest, size_t size,
276 const struct ipl_parameter_block *ipb) 276 const struct ipl_parameter_block *ipb)
277{ 277{
278 int i; 278 int i;
279 int len = 0; 279 size_t len;
280 char has_lowercase = 0; 280 char has_lowercase = 0;
281 281
282 len = 0;
282 if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && 283 if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
283 (ipb->ipl_info.ccw.vm_parm_len > 0)) { 284 (ipb->ipl_info.ccw.vm_parm_len > 0)) {
284 285
285 len = ipb->ipl_info.ccw.vm_parm_len; 286 len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
286 memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); 287 memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
287 /* If at least one character is lowercase, we assume mixed 288 /* If at least one character is lowercase, we assume mixed
288 * case; otherwise we convert everything to lowercase. 289 * case; otherwise we convert everything to lowercase.
@@ -299,14 +300,20 @@ static void reipl_get_ascii_vmparm(char *dest,
299 EBCASC(dest, len); 300 EBCASC(dest, len);
300 } 301 }
301 dest[len] = 0; 302 dest[len] = 0;
303
304 return len;
302} 305}
303 306
304void get_ipl_vmparm(char *dest) 307size_t append_ipl_vmparm(char *dest, size_t size)
305{ 308{
309 size_t rc;
310
311 rc = 0;
306 if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) 312 if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
307 reipl_get_ascii_vmparm(dest, &ipl_block); 313 rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
308 else 314 else
309 dest[0] = 0; 315 dest[0] = 0;
316 return rc;
310} 317}
311 318
312static ssize_t ipl_vm_parm_show(struct kobject *kobj, 319static ssize_t ipl_vm_parm_show(struct kobject *kobj,
@@ -314,10 +321,65 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
314{ 321{
315 char parm[DIAG308_VMPARM_SIZE + 1] = {}; 322 char parm[DIAG308_VMPARM_SIZE + 1] = {};
316 323
317 get_ipl_vmparm(parm); 324 append_ipl_vmparm(parm, sizeof(parm));
318 return sprintf(page, "%s\n", parm); 325 return sprintf(page, "%s\n", parm);
319} 326}
320 327
328static size_t scpdata_length(const char* buf, size_t count)
329{
330 while (count) {
331 if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
332 break;
333 count--;
334 }
335 return count;
336}
337
338size_t reipl_append_ascii_scpdata(char *dest, size_t size,
339 const struct ipl_parameter_block *ipb)
340{
341 size_t count;
342 size_t i;
343 int has_lowercase;
344
345 count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
346 ipb->ipl_info.fcp.scp_data_len));
347 if (!count)
348 goto out;
349
350 has_lowercase = 0;
351 for (i = 0; i < count; i++) {
352 if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
353 count = 0;
354 goto out;
355 }
356 if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
357 has_lowercase = 1;
358 }
359
360 if (has_lowercase)
361 memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
362 else
363 for (i = 0; i < count; i++)
364 dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
365out:
366 dest[count] = '\0';
367 return count;
368}
369
370size_t append_ipl_scpdata(char *dest, size_t len)
371{
372 size_t rc;
373
374 rc = 0;
375 if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
376 rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
377 else
378 dest[0] = 0;
379 return rc;
380}
381
382
321static struct kobj_attribute sys_ipl_vm_parm_attr = 383static struct kobj_attribute sys_ipl_vm_parm_attr =
322 __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); 384 __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
323 385
@@ -553,7 +615,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
553{ 615{
554 char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; 616 char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
555 617
556 reipl_get_ascii_vmparm(vmparm, ipb); 618 reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
557 return sprintf(page, "%s\n", vmparm); 619 return sprintf(page, "%s\n", vmparm);
558} 620}
559 621
@@ -626,6 +688,59 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
626 688
627/* FCP reipl device attributes */ 689/* FCP reipl device attributes */
628 690
691static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
692 struct bin_attribute *attr,
693 char *buf, loff_t off, size_t count)
694{
695 size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
696 void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
697
698 return memory_read_from_buffer(buf, count, &off, scp_data, size);
699}
700
701static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj,
702 struct bin_attribute *attr,
703 char *buf, loff_t off, size_t count)
704{
705 size_t padding;
706 size_t scpdata_len;
707
708 if (off < 0)
709 return -EINVAL;
710
711 if (off >= DIAG308_SCPDATA_SIZE)
712 return -ENOSPC;
713
714 if (count > DIAG308_SCPDATA_SIZE - off)
715 count = DIAG308_SCPDATA_SIZE - off;
716
717 memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
718 scpdata_len = off + count;
719
720 if (scpdata_len % 8) {
721 padding = 8 - (scpdata_len % 8);
722 memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
723 0, padding);
724 scpdata_len += padding;
725 }
726
727 reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
728 reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
729 reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
730
731 return count;
732}
733
734static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
735 .attr = {
736 .name = "scp_data",
737 .mode = S_IRUGO | S_IWUSR,
738 },
739 .size = PAGE_SIZE,
740 .read = reipl_fcp_scpdata_read,
741 .write = reipl_fcp_scpdata_write,
742};
743
629DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", 744DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
630 reipl_block_fcp->ipl_info.fcp.wwpn); 745 reipl_block_fcp->ipl_info.fcp.wwpn);
631DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", 746DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
@@ -647,7 +762,6 @@ static struct attribute *reipl_fcp_attrs[] = {
647}; 762};
648 763
649static struct attribute_group reipl_fcp_attr_group = { 764static struct attribute_group reipl_fcp_attr_group = {
650 .name = IPL_FCP_STR,
651 .attrs = reipl_fcp_attrs, 765 .attrs = reipl_fcp_attrs,
652}; 766};
653 767
@@ -895,6 +1009,7 @@ static struct kobj_attribute reipl_type_attr =
895 __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); 1009 __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
896 1010
897static struct kset *reipl_kset; 1011static struct kset *reipl_kset;
1012static struct kset *reipl_fcp_kset;
898 1013
899static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, 1014static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
900 const enum ipl_method m) 1015 const enum ipl_method m)
@@ -906,7 +1021,7 @@ static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
906 1021
907 reipl_get_ascii_loadparm(loadparm, ipb); 1022 reipl_get_ascii_loadparm(loadparm, ipb);
908 reipl_get_ascii_nss_name(nss_name, ipb); 1023 reipl_get_ascii_nss_name(nss_name, ipb);
909 reipl_get_ascii_vmparm(vmparm, ipb); 1024 reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
910 1025
911 switch (m) { 1026 switch (m) {
912 case REIPL_METHOD_CCW_VM: 1027 case REIPL_METHOD_CCW_VM:
@@ -1076,23 +1191,44 @@ static int __init reipl_fcp_init(void)
1076 int rc; 1191 int rc;
1077 1192
1078 if (!diag308_set_works) { 1193 if (!diag308_set_works) {
1079 if (ipl_info.type == IPL_TYPE_FCP) 1194 if (ipl_info.type == IPL_TYPE_FCP) {
1080 make_attrs_ro(reipl_fcp_attrs); 1195 make_attrs_ro(reipl_fcp_attrs);
1081 else 1196 sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
1197 } else
1082 return 0; 1198 return 0;
1083 } 1199 }
1084 1200
1085 reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); 1201 reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
1086 if (!reipl_block_fcp) 1202 if (!reipl_block_fcp)
1087 return -ENOMEM; 1203 return -ENOMEM;
1088 rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group); 1204
1205 /* sysfs: create fcp kset for mixing attr group and bin attrs */
1206 reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
1207 &reipl_kset->kobj);
1208 if (!reipl_kset) {
1209 free_page((unsigned long) reipl_block_fcp);
1210 return -ENOMEM;
1211 }
1212
1213 rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
1214 if (rc) {
1215 kset_unregister(reipl_fcp_kset);
1216 free_page((unsigned long) reipl_block_fcp);
1217 return rc;
1218 }
1219
1220 rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
1221 &sys_reipl_fcp_scp_data_attr);
1089 if (rc) { 1222 if (rc) {
1090 free_page((unsigned long)reipl_block_fcp); 1223 sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
1224 kset_unregister(reipl_fcp_kset);
1225 free_page((unsigned long) reipl_block_fcp);
1091 return rc; 1226 return rc;
1092 } 1227 }
1093 if (ipl_info.type == IPL_TYPE_FCP) { 1228
1229 if (ipl_info.type == IPL_TYPE_FCP)
1094 memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); 1230 memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
1095 } else { 1231 else {
1096 reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; 1232 reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
1097 reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; 1233 reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
1098 reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; 1234 reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 2a0a5e97ba8c..dfe015d7398c 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -11,111 +11,27 @@
11ftrace_stub: 11ftrace_stub:
12 br %r14 12 br %r14
13 13
14#ifdef CONFIG_64BIT
15
16#ifdef CONFIG_DYNAMIC_FTRACE
17
18 .globl _mcount 14 .globl _mcount
19_mcount: 15_mcount:
20 br %r14 16#ifdef CONFIG_DYNAMIC_FTRACE
21
22 .globl ftrace_caller
23ftrace_caller:
24 larl %r1,function_trace_stop
25 icm %r1,0xf,0(%r1)
26 bnzr %r14
27 stmg %r2,%r5,32(%r15)
28 stg %r14,112(%r15)
29 lgr %r1,%r15
30 aghi %r15,-160
31 stg %r1,__SF_BACKCHAIN(%r15)
32 lgr %r2,%r14
33 lg %r3,168(%r15)
34 larl %r14,ftrace_dyn_func
35 lg %r14,0(%r14)
36 basr %r14,%r14
37#ifdef CONFIG_FUNCTION_GRAPH_TRACER
38 .globl ftrace_graph_caller
39ftrace_graph_caller:
40 # This unconditional branch gets runtime patched. Change only if
41 # you know what you are doing. See ftrace_enable_graph_caller().
42 j 0f
43 lg %r2,272(%r15)
44 lg %r3,168(%r15)
45 brasl %r14,prepare_ftrace_return
46 stg %r2,168(%r15)
470:
48#endif
49 aghi %r15,160
50 lmg %r2,%r5,32(%r15)
51 lg %r14,112(%r15)
52 br %r14 17 br %r14
53 18
54 .data 19 .data
55 .globl ftrace_dyn_func 20 .globl ftrace_dyn_func
56ftrace_dyn_func: 21ftrace_dyn_func:
57 .quad ftrace_stub 22 .long ftrace_stub
58 .previous 23 .previous
59 24
60#else /* CONFIG_DYNAMIC_FTRACE */
61
62 .globl _mcount
63_mcount:
64 larl %r1,function_trace_stop
65 icm %r1,0xf,0(%r1)
66 bnzr %r14
67 stmg %r2,%r5,32(%r15)
68 stg %r14,112(%r15)
69 lgr %r1,%r15
70 aghi %r15,-160
71 stg %r1,__SF_BACKCHAIN(%r15)
72 lgr %r2,%r14
73 lg %r3,168(%r15)
74 larl %r14,ftrace_trace_function
75 lg %r14,0(%r14)
76 basr %r14,%r14
77#ifdef CONFIG_FUNCTION_GRAPH_TRACER
78 lg %r2,272(%r15)
79 lg %r3,168(%r15)
80 brasl %r14,prepare_ftrace_return
81 stg %r2,168(%r15)
82#endif
83 aghi %r15,160
84 lmg %r2,%r5,32(%r15)
85 lg %r14,112(%r15)
86 br %r14
87
88#endif /* CONFIG_DYNAMIC_FTRACE */
89
90#ifdef CONFIG_FUNCTION_GRAPH_TRACER
91
92 .globl return_to_handler
93return_to_handler:
94 stmg %r2,%r5,32(%r15)
95 lgr %r1,%r15
96 aghi %r15,-160
97 stg %r1,__SF_BACKCHAIN(%r15)
98 brasl %r14,ftrace_return_to_handler
99 aghi %r15,160
100 lgr %r14,%r2
101 lmg %r2,%r5,32(%r15)
102 br %r14
103
104#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
105
106#else /* CONFIG_64BIT */
107
108#ifdef CONFIG_DYNAMIC_FTRACE
109
110 .globl _mcount
111_mcount:
112 br %r14
113
114 .globl ftrace_caller 25 .globl ftrace_caller
115ftrace_caller: 26ftrace_caller:
27#endif
116 stm %r2,%r5,16(%r15) 28 stm %r2,%r5,16(%r15)
117 bras %r1,2f 29 bras %r1,2f
30#ifdef CONFIG_DYNAMIC_FTRACE
310: .long ftrace_dyn_func
32#else
1180: .long ftrace_trace_function 330: .long ftrace_trace_function
34#endif
1191: .long function_trace_stop 351: .long function_trace_stop
1202: l %r2,1b-0b(%r1) 362: l %r2,1b-0b(%r1)
121 icm %r2,0xf,0(%r2) 37 icm %r2,0xf,0(%r2)
@@ -131,53 +47,13 @@ ftrace_caller:
131 l %r14,0(%r14) 47 l %r14,0(%r14)
132 basr %r14,%r14 48 basr %r14,%r14
133#ifdef CONFIG_FUNCTION_GRAPH_TRACER 49#ifdef CONFIG_FUNCTION_GRAPH_TRACER
50#ifdef CONFIG_DYNAMIC_FTRACE
134 .globl ftrace_graph_caller 51 .globl ftrace_graph_caller
135ftrace_graph_caller: 52ftrace_graph_caller:
136 # This unconditional branch gets runtime patched. Change only if 53 # This unconditional branch gets runtime patched. Change only if
137 # you know what you are doing. See ftrace_enable_graph_caller(). 54 # you know what you are doing. See ftrace_enable_graph_caller().
138 j 1f 55 j 1f
139 bras %r1,0f
140 .long prepare_ftrace_return
1410: l %r2,152(%r15)
142 l %r4,0(%r1)
143 l %r3,100(%r15)
144 basr %r14,%r4
145 st %r2,100(%r15)
1461:
147#endif 56#endif
148 ahi %r15,96
149 l %r14,56(%r15)
1503: lm %r2,%r5,16(%r15)
151 br %r14
152
153 .data
154 .globl ftrace_dyn_func
155ftrace_dyn_func:
156 .long ftrace_stub
157 .previous
158
159#else /* CONFIG_DYNAMIC_FTRACE */
160
161 .globl _mcount
162_mcount:
163 stm %r2,%r5,16(%r15)
164 bras %r1,2f
1650: .long ftrace_trace_function
1661: .long function_trace_stop
1672: l %r2,1b-0b(%r1)
168 icm %r2,0xf,0(%r2)
169 jnz 3f
170 st %r14,56(%r15)
171 lr %r0,%r15
172 ahi %r15,-96
173 l %r3,100(%r15)
174 la %r2,0(%r14)
175 st %r0,__SF_BACKCHAIN(%r15)
176 la %r3,0(%r3)
177 l %r14,0b-0b(%r1)
178 l %r14,0(%r14)
179 basr %r14,%r14
180#ifdef CONFIG_FUNCTION_GRAPH_TRACER
181 bras %r1,0f 57 bras %r1,0f
182 .long prepare_ftrace_return 58 .long prepare_ftrace_return
1830: l %r2,152(%r15) 590: l %r2,152(%r15)
@@ -185,14 +61,13 @@ _mcount:
185 l %r3,100(%r15) 61 l %r3,100(%r15)
186 basr %r14,%r4 62 basr %r14,%r4
187 st %r2,100(%r15) 63 st %r2,100(%r15)
641:
188#endif 65#endif
189 ahi %r15,96 66 ahi %r15,96
190 l %r14,56(%r15) 67 l %r14,56(%r15)
1913: lm %r2,%r5,16(%r15) 683: lm %r2,%r5,16(%r15)
192 br %r14 69 br %r14
193 70
194#endif /* CONFIG_DYNAMIC_FTRACE */
195
196#ifdef CONFIG_FUNCTION_GRAPH_TRACER 71#ifdef CONFIG_FUNCTION_GRAPH_TRACER
197 72
198 .globl return_to_handler 73 .globl return_to_handler
@@ -211,6 +86,4 @@ return_to_handler:
211 lm %r2,%r5,16(%r15) 86 lm %r2,%r5,16(%r15)
212 br %r14 87 br %r14
213 88
214#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 89#endif
215
216#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
new file mode 100644
index 000000000000..c37211c6092b
--- /dev/null
+++ b/arch/s390/kernel/mcount64.S
@@ -0,0 +1,78 @@
1/*
2 * Copyright IBM Corp. 2008,2009
3 *
4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
5 *
6 */
7
8#include <asm/asm-offsets.h>
9
10 .globl ftrace_stub
11ftrace_stub:
12 br %r14
13
14 .globl _mcount
15_mcount:
16#ifdef CONFIG_DYNAMIC_FTRACE
17 br %r14
18
19 .data
20 .globl ftrace_dyn_func
21ftrace_dyn_func:
22 .quad ftrace_stub
23 .previous
24
25 .globl ftrace_caller
26ftrace_caller:
27#endif
28 larl %r1,function_trace_stop
29 icm %r1,0xf,0(%r1)
30 bnzr %r14
31 stmg %r2,%r5,32(%r15)
32 stg %r14,112(%r15)
33 lgr %r1,%r15
34 aghi %r15,-160
35 stg %r1,__SF_BACKCHAIN(%r15)
36 lgr %r2,%r14
37 lg %r3,168(%r15)
38#ifdef CONFIG_DYNAMIC_FTRACE
39 larl %r14,ftrace_dyn_func
40#else
41 larl %r14,ftrace_trace_function
42#endif
43 lg %r14,0(%r14)
44 basr %r14,%r14
45#ifdef CONFIG_FUNCTION_GRAPH_TRACER
46#ifdef CONFIG_DYNAMIC_FTRACE
47 .globl ftrace_graph_caller
48ftrace_graph_caller:
49 # This unconditional branch gets runtime patched. Change only if
50 # you know what you are doing. See ftrace_enable_graph_caller().
51 j 0f
52#endif
53 lg %r2,272(%r15)
54 lg %r3,168(%r15)
55 brasl %r14,prepare_ftrace_return
56 stg %r2,168(%r15)
570:
58#endif
59 aghi %r15,160
60 lmg %r2,%r5,32(%r15)
61 lg %r14,112(%r15)
62 br %r14
63
64#ifdef CONFIG_FUNCTION_GRAPH_TRACER
65
66 .globl return_to_handler
67return_to_handler:
68 stmg %r2,%r5,32(%r15)
69 lgr %r1,%r15
70 aghi %r15,-160
71 stg %r1,__SF_BACKCHAIN(%r15)
72 brasl %r14,ftrace_return_to_handler
73 aghi %r15,160
74 lgr %r14,%r2
75 lmg %r2,%r5,32(%r15)
76 br %r14
77
78#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index cbb897bc50bd..9ed13a1ed376 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -156,15 +156,11 @@ __setup("condev=", condev_setup);
156 156
157static void __init set_preferred_console(void) 157static void __init set_preferred_console(void)
158{ 158{
159 if (MACHINE_IS_KVM) { 159 if (MACHINE_IS_KVM)
160 add_preferred_console("hvc", 0, NULL); 160 add_preferred_console("hvc", 0, NULL);
161 s390_virtio_console_init(); 161 else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
162 return;
163 }
164
165 if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
166 add_preferred_console("ttyS", 0, NULL); 162 add_preferred_console("ttyS", 0, NULL);
167 if (CONSOLE_IS_3270) 163 else if (CONSOLE_IS_3270)
168 add_preferred_console("tty3270", 0, NULL); 164 add_preferred_console("tty3270", 0, NULL);
169} 165}
170 166
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 062bd64e65fa..6b4fef877f9d 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -536,4 +536,6 @@ void do_notify_resume(struct pt_regs *regs)
536{ 536{
537 clear_thread_flag(TIF_NOTIFY_RESUME); 537 clear_thread_flag(TIF_NOTIFY_RESUME);
538 tracehook_notify_resume(regs); 538 tracehook_notify_resume(regs);
539 if (current->replacement_session_keyring)
540 key_replace_session_keyring();
539} 541}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index be2cae083406..56c16876b919 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -49,6 +49,7 @@
49#include <asm/sclp.h> 49#include <asm/sclp.h>
50#include <asm/cputime.h> 50#include <asm/cputime.h>
51#include <asm/vdso.h> 51#include <asm/vdso.h>
52#include <asm/cpu.h>
52#include "entry.h" 53#include "entry.h"
53 54
54static struct task_struct *current_set[NR_CPUS]; 55static struct task_struct *current_set[NR_CPUS];
@@ -70,6 +71,23 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
70 71
71static void smp_ext_bitcall(int, ec_bit_sig); 72static void smp_ext_bitcall(int, ec_bit_sig);
72 73
74static int cpu_stopped(int cpu)
75{
76 __u32 status;
77
78 switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
79 case sigp_order_code_accepted:
80 case sigp_status_stored:
81 /* Check for stopped and check stop state */
82 if (status & 0x50)
83 return 1;
84 break;
85 default:
86 break;
87 }
88 return 0;
89}
90
73void smp_send_stop(void) 91void smp_send_stop(void)
74{ 92{
75 int cpu, rc; 93 int cpu, rc;
@@ -86,7 +104,7 @@ void smp_send_stop(void)
86 rc = signal_processor(cpu, sigp_stop); 104 rc = signal_processor(cpu, sigp_stop);
87 } while (rc == sigp_busy); 105 } while (rc == sigp_busy);
88 106
89 while (!smp_cpu_not_running(cpu)) 107 while (!cpu_stopped(cpu))
90 cpu_relax(); 108 cpu_relax();
91 } 109 }
92} 110}
@@ -269,19 +287,6 @@ static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
269 287
270#endif /* CONFIG_ZFCPDUMP */ 288#endif /* CONFIG_ZFCPDUMP */
271 289
272static int cpu_stopped(int cpu)
273{
274 __u32 status;
275
276 /* Check for stopped state */
277 if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
278 sigp_status_stored) {
279 if (status & 0x40)
280 return 1;
281 }
282 return 0;
283}
284
285static int cpu_known(int cpu_id) 290static int cpu_known(int cpu_id)
286{ 291{
287 int cpu; 292 int cpu;
@@ -300,7 +305,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
300 logical_cpu = cpumask_first(&avail); 305 logical_cpu = cpumask_first(&avail);
301 if (logical_cpu >= nr_cpu_ids) 306 if (logical_cpu >= nr_cpu_ids)
302 return 0; 307 return 0;
303 for (cpu_id = 0; cpu_id <= 65535; cpu_id++) { 308 for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) {
304 if (cpu_known(cpu_id)) 309 if (cpu_known(cpu_id))
305 continue; 310 continue;
306 __cpu_logical_map[logical_cpu] = cpu_id; 311 __cpu_logical_map[logical_cpu] = cpu_id;
@@ -379,7 +384,7 @@ static void __init smp_detect_cpus(void)
379 /* Use sigp detection algorithm if sclp doesn't work. */ 384 /* Use sigp detection algorithm if sclp doesn't work. */
380 if (sclp_get_cpu_info(info)) { 385 if (sclp_get_cpu_info(info)) {
381 smp_use_sigp_detection = 1; 386 smp_use_sigp_detection = 1;
382 for (cpu = 0; cpu <= 65535; cpu++) { 387 for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) {
383 if (cpu == boot_cpu_addr) 388 if (cpu == boot_cpu_addr)
384 continue; 389 continue;
385 __cpu_logical_map[CPU_INIT_NO] = cpu; 390 __cpu_logical_map[CPU_INIT_NO] = cpu;
@@ -635,7 +640,7 @@ int __cpu_disable(void)
635void __cpu_die(unsigned int cpu) 640void __cpu_die(unsigned int cpu)
636{ 641{
637 /* Wait until target cpu is down */ 642 /* Wait until target cpu is down */
638 while (!smp_cpu_not_running(cpu)) 643 while (!cpu_stopped(cpu))
639 cpu_relax(); 644 cpu_relax();
640 smp_free_lowcore(cpu); 645 smp_free_lowcore(cpu);
641 pr_info("Processor %d stopped\n", cpu); 646 pr_info("Processor %d stopped\n", cpu);
diff --git a/arch/s390/power/swsusp.c b/arch/s390/kernel/suspend.c
index bd1f5c6b0b8c..086bee970cae 100644
--- a/arch/s390/power/swsusp.c
+++ b/arch/s390/kernel/suspend.c
@@ -1,13 +1,44 @@
1/* 1/*
2 * Support for suspend and resume on s390 2 * Suspend support specific for s390.
3 * 3 *
4 * Copyright IBM Corp. 2009 4 * Copyright IBM Corp. 2009
5 * 5 *
6 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> 6 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
7 *
8 */ 7 */
9 8
9#include <linux/suspend.h>
10#include <linux/reboot.h>
11#include <linux/pfn.h>
12#include <linux/mm.h>
13#include <asm/sections.h>
10#include <asm/system.h> 14#include <asm/system.h>
15#include <asm/ipl.h>
16
17/*
18 * References to section boundaries
19 */
20extern const void __nosave_begin, __nosave_end;
21
22/*
23 * check if given pfn is in the 'nosave' or in the read only NSS section
24 */
25int pfn_is_nosave(unsigned long pfn)
26{
27 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
28 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
29 >> PAGE_SHIFT;
30 unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
31 unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
32
33 if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
34 return 1;
35 if (pfn >= stext_pfn && pfn <= eshared_pfn) {
36 if (ipl_info.type == IPL_TYPE_NSS)
37 return 1;
38 } else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
39 return 1;
40 return 0;
41}
11 42
12void save_processor_state(void) 43void save_processor_state(void)
13{ 44{
diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index b26df5c5933e..7cd6b096f0d1 100644
--- a/arch/s390/power/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -21,7 +21,7 @@
21 * This function runs with disabled interrupts. 21 * This function runs with disabled interrupts.
22 */ 22 */
23 .section .text 23 .section .text
24 .align 2 24 .align 4
25 .globl swsusp_arch_suspend 25 .globl swsusp_arch_suspend
26swsusp_arch_suspend: 26swsusp_arch_suspend:
27 stmg %r6,%r15,__SF_GPRS(%r15) 27 stmg %r6,%r15,__SF_GPRS(%r15)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d4c8e9c47c81..54e327e9af04 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -60,6 +60,7 @@
60#define TICK_SIZE tick 60#define TICK_SIZE tick
61 61
62u64 sched_clock_base_cc = -1; /* Force to data section. */ 62u64 sched_clock_base_cc = -1; /* Force to data section. */
63EXPORT_SYMBOL_GPL(sched_clock_base_cc);
63 64
64static DEFINE_PER_CPU(struct clock_event_device, comparators); 65static DEFINE_PER_CPU(struct clock_event_device, comparators);
65 66
@@ -68,7 +69,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
68 */ 69 */
69unsigned long long notrace sched_clock(void) 70unsigned long long notrace sched_clock(void)
70{ 71{
71 return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9; 72 return (get_clock_monotonic() * 125) >> 9;
72} 73}
73 74
74/* 75/*
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a53db23ee092..7315f9e67e1d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,55 +52,18 @@ SECTIONS
52 . = ALIGN(PAGE_SIZE); 52 . = ALIGN(PAGE_SIZE);
53 _eshared = .; /* End of shareable data */ 53 _eshared = .; /* End of shareable data */
54 54
55 . = ALIGN(16); /* Exception table */ 55 EXCEPTION_TABLE(16) :data
56 __ex_table : {
57 __start___ex_table = .;
58 *(__ex_table)
59 __stop___ex_table = .;
60 } :data
61
62 .data : { /* Data */
63 DATA_DATA
64 CONSTRUCTORS
65 }
66
67 . = ALIGN(PAGE_SIZE);
68 .data_nosave : {
69 __nosave_begin = .;
70 *(.data.nosave)
71 }
72 . = ALIGN(PAGE_SIZE);
73 __nosave_end = .;
74
75 . = ALIGN(PAGE_SIZE);
76 .data.page_aligned : {
77 *(.data.idt)
78 }
79 56
80 . = ALIGN(0x100); 57 RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
81 .data.cacheline_aligned : {
82 *(.data.cacheline_aligned)
83 }
84 58
85 . = ALIGN(0x100);
86 .data.read_mostly : {
87 *(.data.read_mostly)
88 }
89 _edata = .; /* End of data section */ 59 _edata = .; /* End of data section */
90 60
91 . = ALIGN(THREAD_SIZE); /* init_task */
92 .data.init_task : {
93 *(.data.init_task)
94 }
95
96 /* will be freed after init */ 61 /* will be freed after init */
97 . = ALIGN(PAGE_SIZE); /* Init code and data */ 62 . = ALIGN(PAGE_SIZE); /* Init code and data */
98 __init_begin = .; 63 __init_begin = .;
99 .init.text : { 64
100 _sinittext = .; 65 INIT_TEXT_SECTION(PAGE_SIZE)
101 INIT_TEXT 66
102 _einittext = .;
103 }
104 /* 67 /*
105 * .exit.text is discarded at runtime, not link time, 68 * .exit.text is discarded at runtime, not link time,
106 * to deal with references from __bug_table 69 * to deal with references from __bug_table
@@ -111,49 +74,13 @@ SECTIONS
111 74
112 /* early.c uses stsi, which requires page aligned data. */ 75 /* early.c uses stsi, which requires page aligned data. */
113 . = ALIGN(PAGE_SIZE); 76 . = ALIGN(PAGE_SIZE);
114 .init.data : { 77 INIT_DATA_SECTION(0x100)
115 INIT_DATA
116 }
117 . = ALIGN(0x100);
118 .init.setup : {
119 __setup_start = .;
120 *(.init.setup)
121 __setup_end = .;
122 }
123 .initcall.init : {
124 __initcall_start = .;
125 INITCALLS
126 __initcall_end = .;
127 }
128
129 .con_initcall.init : {
130 __con_initcall_start = .;
131 *(.con_initcall.init)
132 __con_initcall_end = .;
133 }
134 SECURITY_INIT
135
136#ifdef CONFIG_BLK_DEV_INITRD
137 . = ALIGN(0x100);
138 .init.ramfs : {
139 __initramfs_start = .;
140 *(.init.ramfs)
141 . = ALIGN(2);
142 __initramfs_end = .;
143 }
144#endif
145 78
146 PERCPU(PAGE_SIZE) 79 PERCPU(PAGE_SIZE)
147 . = ALIGN(PAGE_SIZE); 80 . = ALIGN(PAGE_SIZE);
148 __init_end = .; /* freed after init ends here */ 81 __init_end = .; /* freed after init ends here */
149 82
150 /* BSS */ 83 BSS_SECTION(0, 2, 0)
151 .bss : {
152 __bss_start = .;
153 *(.bss)
154 . = ALIGN(2);
155 __bss_stop = .;
156 }
157 84
158 _end = . ; 85 _end = . ;
159 86
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index db05661ac895..eec054484419 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux s390-specific parts of the memory manager. 2# Makefile for the linux s390-specific parts of the memory manager.
3# 3#
4 4
5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o 5obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
6 page-states.o
6obj-$(CONFIG_CMM) += cmm.o 7obj-$(CONFIG_CMM) += cmm.o
7obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
8obj-$(CONFIG_PAGE_STATES) += page-states.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e5e119fe03b2..1abbadd497e1 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,6 +10,7 @@
10 * Copyright (C) 1995 Linus Torvalds 10 * Copyright (C) 1995 Linus Torvalds
11 */ 11 */
12 12
13#include <linux/perf_counter.h>
13#include <linux/signal.h> 14#include <linux/signal.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -305,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
305 * interrupts again and then search the VMAs 306 * interrupts again and then search the VMAs
306 */ 307 */
307 local_irq_enable(); 308 local_irq_enable();
308 309 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
309 down_read(&mm->mmap_sem); 310 down_read(&mm->mmap_sem);
310 311
311 si_code = SEGV_MAPERR; 312 si_code = SEGV_MAPERR;
@@ -363,11 +364,15 @@ good_area:
363 } 364 }
364 BUG(); 365 BUG();
365 } 366 }
366 if (fault & VM_FAULT_MAJOR) 367 if (fault & VM_FAULT_MAJOR) {
367 tsk->maj_flt++; 368 tsk->maj_flt++;
368 else 369 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
370 regs, address);
371 } else {
369 tsk->min_flt++; 372 tsk->min_flt++;
370 373 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
374 regs, address);
375 }
371 up_read(&mm->mmap_sem); 376 up_read(&mm->mmap_sem);
372 /* 377 /*
373 * The instruction that caused the program check will 378 * The instruction that caused the program check will
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc0ad73ffd90..f92ec203ad92 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * arch/s390/mm/page-states.c
3 *
4 * Copyright IBM Corp. 2008 2 * Copyright IBM Corp. 2008
5 * 3 *
6 * Guest page hinting for unused pages. 4 * Guest page hinting for unused pages.
@@ -17,11 +15,12 @@
17#define ESSA_SET_STABLE 1 15#define ESSA_SET_STABLE 1
18#define ESSA_SET_UNUSED 2 16#define ESSA_SET_UNUSED 2
19 17
20static int cmma_flag; 18static int cmma_flag = 1;
21 19
22static int __init cmma(char *str) 20static int __init cmma(char *str)
23{ 21{
24 char *parm; 22 char *parm;
23
25 parm = strstrip(str); 24 parm = strstrip(str);
26 if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { 25 if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
27 cmma_flag = 1; 26 cmma_flag = 1;
@@ -32,7 +31,6 @@ static int __init cmma(char *str)
32 return 1; 31 return 1;
33 return 0; 32 return 0;
34} 33}
35
36__setup("cmma=", cmma); 34__setup("cmma=", cmma);
37 35
38void __init cmma_init(void) 36void __init cmma_init(void)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 565667207985..c70215247071 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
78 } 78 }
79 page->index = page_to_phys(shadow); 79 page->index = page_to_phys(shadow);
80 } 80 }
81 spin_lock(&mm->page_table_lock); 81 spin_lock(&mm->context.list_lock);
82 list_add(&page->lru, &mm->context.crst_list); 82 list_add(&page->lru, &mm->context.crst_list);
83 spin_unlock(&mm->page_table_lock); 83 spin_unlock(&mm->context.list_lock);
84 return (unsigned long *) page_to_phys(page); 84 return (unsigned long *) page_to_phys(page);
85} 85}
86 86
@@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
89 unsigned long *shadow = get_shadow_table(table); 89 unsigned long *shadow = get_shadow_table(table);
90 struct page *page = virt_to_page(table); 90 struct page *page = virt_to_page(table);
91 91
92 spin_lock(&mm->page_table_lock); 92 spin_lock(&mm->context.list_lock);
93 list_del(&page->lru); 93 list_del(&page->lru);
94 spin_unlock(&mm->page_table_lock); 94 spin_unlock(&mm->context.list_lock);
95 if (shadow) 95 if (shadow)
96 free_pages((unsigned long) shadow, ALLOC_ORDER); 96 free_pages((unsigned long) shadow, ALLOC_ORDER);
97 free_pages((unsigned long) table, ALLOC_ORDER); 97 free_pages((unsigned long) table, ALLOC_ORDER);
@@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
182 unsigned long bits; 182 unsigned long bits;
183 183
184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 184 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
185 spin_lock(&mm->page_table_lock); 185 spin_lock(&mm->context.list_lock);
186 page = NULL; 186 page = NULL;
187 if (!list_empty(&mm->context.pgtable_list)) { 187 if (!list_empty(&mm->context.pgtable_list)) {
188 page = list_first_entry(&mm->context.pgtable_list, 188 page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
191 page = NULL; 191 page = NULL;
192 } 192 }
193 if (!page) { 193 if (!page) {
194 spin_unlock(&mm->page_table_lock); 194 spin_unlock(&mm->context.list_lock);
195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 195 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
196 if (!page) 196 if (!page)
197 return NULL; 197 return NULL;
@@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
202 clear_table_pgstes(table); 202 clear_table_pgstes(table);
203 else 203 else
204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 204 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
205 spin_lock(&mm->page_table_lock); 205 spin_lock(&mm->context.list_lock);
206 list_add(&page->lru, &mm->context.pgtable_list); 206 list_add(&page->lru, &mm->context.pgtable_list);
207 } 207 }
208 table = (unsigned long *) page_to_phys(page); 208 table = (unsigned long *) page_to_phys(page);
@@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
213 page->flags |= bits; 213 page->flags |= bits;
214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 214 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
215 list_move_tail(&page->lru, &mm->context.pgtable_list); 215 list_move_tail(&page->lru, &mm->context.pgtable_list);
216 spin_unlock(&mm->page_table_lock); 216 spin_unlock(&mm->context.list_lock);
217 return table; 217 return table;
218} 218}
219 219
@@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 225 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 226 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 227 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
228 spin_lock(&mm->page_table_lock); 228 spin_lock(&mm->context.list_lock);
229 page->flags ^= bits; 229 page->flags ^= bits;
230 if (page->flags & FRAG_MASK) { 230 if (page->flags & FRAG_MASK) {
231 /* Page now has some free pgtable fragments. */ 231 /* Page now has some free pgtable fragments. */
@@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
234 } else 234 } else
235 /* All fragments of the 4K page have been freed. */ 235 /* All fragments of the 4K page have been freed. */
236 list_del(&page->lru); 236 list_del(&page->lru);
237 spin_unlock(&mm->page_table_lock); 237 spin_unlock(&mm->context.list_lock);
238 if (page) { 238 if (page) {
239 pgtable_page_dtor(page); 239 pgtable_page_dtor(page);
240 __free_page(page); 240 __free_page(page);
@@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
245{ 245{
246 struct page *page; 246 struct page *page;
247 247
248 spin_lock(&mm->page_table_lock); 248 spin_lock(&mm->context.list_lock);
249 /* Free shadow region and segment tables. */ 249 /* Free shadow region and segment tables. */
250 list_for_each_entry(page, &mm->context.crst_list, lru) 250 list_for_each_entry(page, &mm->context.crst_list, lru)
251 if (page->index) { 251 if (page->index) {
@@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
255 /* "Free" second halves of page tables. */ 255 /* "Free" second halves of page tables. */
256 list_for_each_entry(page, &mm->context.pgtable_list, lru) 256 list_for_each_entry(page, &mm->context.pgtable_list, lru)
257 page->flags &= ~SECOND_HALVES; 257 page->flags &= ~SECOND_HALVES;
258 spin_unlock(&mm->page_table_lock); 258 spin_unlock(&mm->context.list_lock);
259 mm->context.noexec = 0; 259 mm->context.noexec = 0;
260 update_mm(mm, tsk); 260 update_mm(mm, tsk);
261} 261}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index e4868bfc672f..5f91a38d7592 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -331,6 +331,7 @@ void __init vmem_map_init(void)
331 unsigned long start, end; 331 unsigned long start, end;
332 int i; 332 int i;
333 333
334 spin_lock_init(&init_mm.context.list_lock);
334 INIT_LIST_HEAD(&init_mm.context.crst_list); 335 INIT_LIST_HEAD(&init_mm.context.crst_list);
335 INIT_LIST_HEAD(&init_mm.context.pgtable_list); 336 INIT_LIST_HEAD(&init_mm.context.pgtable_list);
336 init_mm.context.noexec = 0; 337 init_mm.context.noexec = 0;
diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile
deleted file mode 100644
index 973bb45a8fec..000000000000
--- a/arch/s390/power/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for s390 PM support
3#
4
5obj-$(CONFIG_HIBERNATION) += suspend.o
6obj-$(CONFIG_HIBERNATION) += swsusp.o
7obj-$(CONFIG_HIBERNATION) += swsusp_64.o
8obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o
diff --git a/arch/s390/power/suspend.c b/arch/s390/power/suspend.c
deleted file mode 100644
index b3351eceebbe..000000000000
--- a/arch/s390/power/suspend.c
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * Suspend support specific for s390.
3 *
4 * Copyright IBM Corp. 2009
5 *
6 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
7 */
8
9#include <linux/mm.h>
10#include <linux/suspend.h>
11#include <linux/reboot.h>
12#include <linux/pfn.h>
13#include <asm/sections.h>
14#include <asm/ipl.h>
15
16/*
17 * References to section boundaries
18 */
19extern const void __nosave_begin, __nosave_end;
20
21/*
22 * check if given pfn is in the 'nosave' or in the read only NSS section
23 */
24int pfn_is_nosave(unsigned long pfn)
25{
26 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
27 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
28 >> PAGE_SHIFT;
29 unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
30 unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
31
32 if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
33 return 1;
34 if (pfn >= stext_pfn && pfn <= eshared_pfn) {
35 if (ipl_info.type == IPL_TYPE_NSS)
36 return 1;
37 } else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
38 return 1;
39 return 0;
40}
diff --git a/arch/s390/power/swsusp_64.c b/arch/s390/power/swsusp_64.c
deleted file mode 100644
index 9516a517d72f..000000000000
--- a/arch/s390/power/swsusp_64.c
+++ /dev/null
@@ -1,17 +0,0 @@
1/*
2 * Support for suspend and resume on s390
3 *
4 * Copyright IBM Corp. 2009
5 *
6 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
7 *
8 */
9
10#include <asm/system.h>
11#include <linux/interrupt.h>
12
13void do_after_copyback(void)
14{
15 mb();
16}
17
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index b5afbec1db59..04a21883f327 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -640,5 +640,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
640 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 640 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
641 clear_thread_flag(TIF_NOTIFY_RESUME); 641 clear_thread_flag(TIF_NOTIFY_RESUME);
642 tracehook_notify_resume(regs); 642 tracehook_notify_resume(regs);
643 if (current->replacement_session_keyring)
644 key_replace_session_keyring();
643 } 645 }
644} 646}
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0ee6021..9e5c9b1d7e98 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -772,5 +772,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info
772 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 772 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
773 clear_thread_flag(TIF_NOTIFY_RESUME); 773 clear_thread_flag(TIF_NOTIFY_RESUME);
774 tracehook_notify_resume(regs); 774 tracehook_notify_resume(regs);
775 if (current->replacement_session_keyring)
776 key_replace_session_keyring();
775 } 777 }
776} 778}
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 181d069a2d44..7ce1a1005b1d 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -590,6 +590,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
590 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 590 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
591 clear_thread_flag(TIF_NOTIFY_RESUME); 591 clear_thread_flag(TIF_NOTIFY_RESUME);
592 tracehook_notify_resume(regs); 592 tracehook_notify_resume(regs);
593 if (current->replacement_session_keyring)
594 key_replace_session_keyring();
593 } 595 }
594} 596}
595 597
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ec82d76dc6f2..647afbda7ae1 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -613,5 +613,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
613 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 613 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
614 clear_thread_flag(TIF_NOTIFY_RESUME); 614 clear_thread_flag(TIF_NOTIFY_RESUME);
615 tracehook_notify_resume(regs); 615 tracehook_notify_resume(regs);
616 if (current->replacement_session_keyring)
617 key_replace_session_keyring();
616 } 618 }
617} 619}
620
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 676debfc1702..128111d8ffe0 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,6 +20,7 @@
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/ioport.h> 21#include <linux/ioport.h>
22#include <linux/suspend.h> 22#include <linux/suspend.h>
23#include <linux/kmemleak.h>
23#include <asm/e820.h> 24#include <asm/e820.h>
24#include <asm/io.h> 25#include <asm/io.h>
25#include <asm/iommu.h> 26#include <asm/iommu.h>
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void)
94 * code for safe 95 * code for safe
95 */ 96 */
96 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); 97 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping.
101 */
102 kmemleak_ignore(p);
97 if (!p || __pa(p)+aper_size > 0xffffffff) { 103 if (!p || __pa(p)+aper_size > 0xffffffff) {
98 printk(KERN_ERR 104 printk(KERN_ERR
99 "Cannot allocate aperture memory hole (%p,%uK)\n", 105 "Cannot allocate aperture memory hole (%p,%uK)\n",
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bcf506b..fa80f60e9607 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -3,6 +3,7 @@
3#include <linux/dmar.h> 3#include <linux/dmar.h>
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/kmemleak.h>
6 7
7#include <asm/proto.h> 8#include <asm/proto.h>
8#include <asm/dma.h> 9#include <asm/dma.h>
@@ -88,6 +89,11 @@ void __init dma32_reserve_bootmem(void)
88 size = roundup(dma32_bootmem_size, align); 89 size = roundup(dma32_bootmem_size, align);
89 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 90 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
90 512ULL<<20); 91 512ULL<<20);
92 /*
93 * Kmemleak should not scan this block as it may not be mapped via the
94 * kernel direct mapping.
95 */
96 kmemleak_ignore(dma32_bootmem_ptr);
91 if (dma32_bootmem_ptr) 97 if (dma32_bootmem_ptr)
92 dma32_bootmem_size = size; 98 dma32_bootmem_size = size;
93 else 99 else
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94e..81e58238c4ce 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
869 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 869 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
870 clear_thread_flag(TIF_NOTIFY_RESUME); 870 clear_thread_flag(TIF_NOTIFY_RESUME);
871 tracehook_notify_resume(regs); 871 tracehook_notify_resume(regs);
872 if (current->replacement_session_keyring)
873 key_replace_session_keyring();
872 } 874 }
873 875
874#ifdef CONFIG_X86_32 876#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed098654..528bf954eb74 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
331 kmemcheck_shadow_set(shadow, size); 331 kmemcheck_shadow_set(shadow, size);
332} 332}
333 333
334bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
335{
336 enum kmemcheck_shadow status;
337 void *shadow;
338
339 shadow = kmemcheck_shadow_lookup(addr);
340 if (!shadow)
341 return true;
342
343 status = kmemcheck_shadow_test(shadow, size);
344
345 return status == KMEMCHECK_SHADOW_INITIALIZED;
346}
347
334/* Access may cross page boundary */ 348/* Access may cross page boundary */
335static void kmemcheck_read(struct pt_regs *regs, 349static void kmemcheck_read(struct pt_regs *regs,
336 unsigned long addr, unsigned int size) 350 unsigned long addr, unsigned int size)
diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a77a0d8..e695634882a6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -501,6 +501,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
501 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 501 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
502 q->backing_dev_info.state = 0; 502 q->backing_dev_info.state = 0;
503 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 503 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
504 q->backing_dev_info.name = "block";
504 505
505 err = bdi_init(&q->backing_dev_info); 506 err = bdi_init(&q->backing_dev_info);
506 if (err) { 507 if (err) {
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 1e15889c4b98..95d344971eda 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -268,6 +268,7 @@ aoeblk_gdalloc(void *vp)
268 if (!d->blkq) 268 if (!d->blkq)
269 goto err_mempool; 269 goto err_mempool;
270 blk_queue_make_request(d->blkq, aoeblk_make_request); 270 blk_queue_make_request(d->blkq, aoeblk_make_request);
271 d->blkq->backing_dev_info.name = "aoe";
271 if (bdi_init(&d->blkq->backing_dev_info)) 272 if (bdi_init(&d->blkq->backing_dev_info))
272 goto err_blkq; 273 goto err_blkq;
273 spin_lock_irqsave(&d->lock, flags); 274 spin_lock_irqsave(&d->lock, flags);
diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index 86105efb4eb6..0ecac7e532f6 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c
@@ -1006,7 +1006,7 @@ static int __init hvc_iucv_alloc(int id, unsigned int is_console)
1006 priv->dev->release = (void (*)(struct device *)) kfree; 1006 priv->dev->release = (void (*)(struct device *)) kfree;
1007 rc = device_register(priv->dev); 1007 rc = device_register(priv->dev);
1008 if (rc) { 1008 if (rc) {
1009 kfree(priv->dev); 1009 put_device(priv->dev);
1010 goto out_error_dev; 1010 goto out_error_dev;
1011 } 1011 }
1012 1012
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index afa8813e737a..645237bda682 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -822,6 +822,7 @@ static const struct file_operations zero_fops = {
822 * - permits private mappings, "copies" are taken of the source of zeros 822 * - permits private mappings, "copies" are taken of the source of zeros
823 */ 823 */
824static struct backing_dev_info zero_bdi = { 824static struct backing_dev_info zero_bdi = {
825 .name = "char/mem",
825 .capabilities = BDI_CAP_MAP_COPY, 826 .capabilities = BDI_CAP_MAP_COPY,
826}; 827};
827 828
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index aec1931608aa..0b73e4ec1add 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -450,6 +450,12 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
450 goto out_err; 450 goto out_err;
451 } 451 }
452 452
453 /* Default timeouts */
454 chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
455 chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
456 chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
457 chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
458
453 if (request_locality(chip, 0) != 0) { 459 if (request_locality(chip, 0) != 0) {
454 rc = -ENODEV; 460 rc = -ENODEV;
455 goto out_err; 461 goto out_err;
@@ -457,12 +463,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
457 463
458 vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); 464 vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
459 465
460 /* Default timeouts */
461 chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
462 chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
463 chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
464 chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
465
466 dev_info(dev, 466 dev_info(dev,
467 "1.2 TPM (device-id 0x%X, rev-id %d)\n", 467 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
468 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); 468 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 8f9509e1ebf7..55d093a36ae4 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -362,6 +362,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
362 * In either case, must tell the provider to reject. 362 * In either case, must tell the provider to reject.
363 */ 363 */
364 cm_id_priv->state = IW_CM_STATE_DESTROYING; 364 cm_id_priv->state = IW_CM_STATE_DESTROYING;
365 cm_id->device->iwcm->reject(cm_id, NULL, 0);
365 break; 366 break;
366 case IW_CM_STATE_CONN_SENT: 367 case IW_CM_STATE_CONN_SENT:
367 case IW_CM_STATE_DESTROYING: 368 case IW_CM_STATE_DESTROYING:
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index de922a04ca2d..7522008fda86 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2,6 +2,7 @@
2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. 2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
5 * 6 *
6 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU 8 * licenses. You may choose to be licensed under the terms of the GNU
@@ -45,14 +46,21 @@ MODULE_DESCRIPTION("kernel IB MAD API");
45MODULE_AUTHOR("Hal Rosenstock"); 46MODULE_AUTHOR("Hal Rosenstock");
46MODULE_AUTHOR("Sean Hefty"); 47MODULE_AUTHOR("Sean Hefty");
47 48
49int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
50int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
51
52module_param_named(send_queue_size, mad_sendq_size, int, 0444);
53MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
54module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
55MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
56
48static struct kmem_cache *ib_mad_cache; 57static struct kmem_cache *ib_mad_cache;
49 58
50static struct list_head ib_mad_port_list; 59static struct list_head ib_mad_port_list;
51static u32 ib_mad_client_id = 0; 60static u32 ib_mad_client_id = 0;
52 61
53/* Port list lock */ 62/* Port list lock */
54static spinlock_t ib_mad_port_list_lock; 63static DEFINE_SPINLOCK(ib_mad_port_list_lock);
55
56 64
57/* Forward declarations */ 65/* Forward declarations */
58static int method_in_use(struct ib_mad_mgmt_method_table **method, 66static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -1974,7 +1982,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1974 unsigned long delay; 1982 unsigned long delay;
1975 1983
1976 if (list_empty(&mad_agent_priv->wait_list)) { 1984 if (list_empty(&mad_agent_priv->wait_list)) {
1977 cancel_delayed_work(&mad_agent_priv->timed_work); 1985 __cancel_delayed_work(&mad_agent_priv->timed_work);
1978 } else { 1986 } else {
1979 mad_send_wr = list_entry(mad_agent_priv->wait_list.next, 1987 mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
1980 struct ib_mad_send_wr_private, 1988 struct ib_mad_send_wr_private,
@@ -1983,7 +1991,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1983 if (time_after(mad_agent_priv->timeout, 1991 if (time_after(mad_agent_priv->timeout,
1984 mad_send_wr->timeout)) { 1992 mad_send_wr->timeout)) {
1985 mad_agent_priv->timeout = mad_send_wr->timeout; 1993 mad_agent_priv->timeout = mad_send_wr->timeout;
1986 cancel_delayed_work(&mad_agent_priv->timed_work); 1994 __cancel_delayed_work(&mad_agent_priv->timed_work);
1987 delay = mad_send_wr->timeout - jiffies; 1995 delay = mad_send_wr->timeout - jiffies;
1988 if ((long)delay <= 0) 1996 if ((long)delay <= 0)
1989 delay = 1; 1997 delay = 1;
@@ -2023,7 +2031,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2023 2031
2024 /* Reschedule a work item if we have a shorter timeout */ 2032 /* Reschedule a work item if we have a shorter timeout */
2025 if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { 2033 if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
2026 cancel_delayed_work(&mad_agent_priv->timed_work); 2034 __cancel_delayed_work(&mad_agent_priv->timed_work);
2027 queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, 2035 queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2028 &mad_agent_priv->timed_work, delay); 2036 &mad_agent_priv->timed_work, delay);
2029 } 2037 }
@@ -2736,8 +2744,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2736 qp_init_attr.send_cq = qp_info->port_priv->cq; 2744 qp_init_attr.send_cq = qp_info->port_priv->cq;
2737 qp_init_attr.recv_cq = qp_info->port_priv->cq; 2745 qp_init_attr.recv_cq = qp_info->port_priv->cq;
2738 qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; 2746 qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2739 qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE; 2747 qp_init_attr.cap.max_send_wr = mad_sendq_size;
2740 qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE; 2748 qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2741 qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; 2749 qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2742 qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; 2750 qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2743 qp_init_attr.qp_type = qp_type; 2751 qp_init_attr.qp_type = qp_type;
@@ -2752,8 +2760,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2752 goto error; 2760 goto error;
2753 } 2761 }
2754 /* Use minimum queue sizes unless the CQ is resized */ 2762 /* Use minimum queue sizes unless the CQ is resized */
2755 qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE; 2763 qp_info->send_queue.max_active = mad_sendq_size;
2756 qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE; 2764 qp_info->recv_queue.max_active = mad_recvq_size;
2757 return 0; 2765 return 0;
2758 2766
2759error: 2767error:
@@ -2792,7 +2800,7 @@ static int ib_mad_port_open(struct ib_device *device,
2792 init_mad_qp(port_priv, &port_priv->qp_info[0]); 2800 init_mad_qp(port_priv, &port_priv->qp_info[0]);
2793 init_mad_qp(port_priv, &port_priv->qp_info[1]); 2801 init_mad_qp(port_priv, &port_priv->qp_info[1]);
2794 2802
2795 cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2; 2803 cq_size = (mad_sendq_size + mad_recvq_size) * 2;
2796 port_priv->cq = ib_create_cq(port_priv->device, 2804 port_priv->cq = ib_create_cq(port_priv->device,
2797 ib_mad_thread_completion_handler, 2805 ib_mad_thread_completion_handler,
2798 NULL, port_priv, cq_size, 0); 2806 NULL, port_priv, cq_size, 0);
@@ -2984,7 +2992,11 @@ static int __init ib_mad_init_module(void)
2984{ 2992{
2985 int ret; 2993 int ret;
2986 2994
2987 spin_lock_init(&ib_mad_port_list_lock); 2995 mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
2996 mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
2997
2998 mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
2999 mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
2988 3000
2989 ib_mad_cache = kmem_cache_create("ib_mad", 3001 ib_mad_cache = kmem_cache_create("ib_mad",
2990 sizeof(struct ib_mad_private), 3002 sizeof(struct ib_mad_private),
@@ -3021,4 +3033,3 @@ static void __exit ib_mad_cleanup_module(void)
3021 3033
3022module_init(ib_mad_init_module); 3034module_init(ib_mad_init_module);
3023module_exit(ib_mad_cleanup_module); 3035module_exit(ib_mad_cleanup_module);
3024
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 05ce331733b0..9430ab4969c5 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -2,6 +2,7 @@
2 * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved. 2 * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
5 * 6 *
6 * This software is available to you under a choice of one of two 7 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU 8 * licenses. You may choose to be licensed under the terms of the GNU
@@ -49,6 +50,8 @@
49/* QP and CQ parameters */ 50/* QP and CQ parameters */
50#define IB_MAD_QP_SEND_SIZE 128 51#define IB_MAD_QP_SEND_SIZE 128
51#define IB_MAD_QP_RECV_SIZE 512 52#define IB_MAD_QP_RECV_SIZE 512
53#define IB_MAD_QP_MIN_SIZE 64
54#define IB_MAD_QP_MAX_SIZE 8192
52#define IB_MAD_SEND_REQ_MAX_SG 2 55#define IB_MAD_SEND_REQ_MAX_SG 2
53#define IB_MAD_RECV_REQ_MAX_SG 1 56#define IB_MAD_RECV_REQ_MAX_SG 1
54 57
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 107f170c57cd..8d82ba171353 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -106,6 +106,8 @@ struct mcast_group {
106 struct ib_sa_query *query; 106 struct ib_sa_query *query;
107 int query_id; 107 int query_id;
108 u16 pkey_index; 108 u16 pkey_index;
109 u8 leave_state;
110 int retries;
109}; 111};
110 112
111struct mcast_member { 113struct mcast_member {
@@ -350,6 +352,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
350 352
351 rec = group->rec; 353 rec = group->rec;
352 rec.join_state = leave_state; 354 rec.join_state = leave_state;
355 group->leave_state = leave_state;
353 356
354 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 357 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
355 port->port_num, IB_SA_METHOD_DELETE, &rec, 358 port->port_num, IB_SA_METHOD_DELETE, &rec,
@@ -542,7 +545,11 @@ static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
542{ 545{
543 struct mcast_group *group = context; 546 struct mcast_group *group = context;
544 547
545 mcast_work_handler(&group->work); 548 if (status && group->retries > 0 &&
549 !send_leave(group, group->leave_state))
550 group->retries--;
551 else
552 mcast_work_handler(&group->work);
546} 553}
547 554
548static struct mcast_group *acquire_group(struct mcast_port *port, 555static struct mcast_group *acquire_group(struct mcast_port *port,
@@ -565,6 +572,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
565 if (!group) 572 if (!group)
566 return NULL; 573 return NULL;
567 574
575 group->retries = 3;
568 group->port = port; 576 group->port = port;
569 group->rec.mgid = *mgid; 577 group->rec.mgid = *mgid;
570 group->pkey_index = MCAST_INVALID_PKEY_INDEX; 578 group->pkey_index = MCAST_INVALID_PKEY_INDEX;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1865049e80f7..82543716d59e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -109,10 +109,10 @@ static struct ib_client sa_client = {
109 .remove = ib_sa_remove_one 109 .remove = ib_sa_remove_one
110}; 110};
111 111
112static spinlock_t idr_lock; 112static DEFINE_SPINLOCK(idr_lock);
113static DEFINE_IDR(query_idr); 113static DEFINE_IDR(query_idr);
114 114
115static spinlock_t tid_lock; 115static DEFINE_SPINLOCK(tid_lock);
116static u32 tid; 116static u32 tid;
117 117
118#define PATH_REC_FIELD(field) \ 118#define PATH_REC_FIELD(field) \
@@ -1077,9 +1077,6 @@ static int __init ib_sa_init(void)
1077{ 1077{
1078 int ret; 1078 int ret;
1079 1079
1080 spin_lock_init(&idr_lock);
1081 spin_lock_init(&tid_lock);
1082
1083 get_random_bytes(&tid, sizeof tid); 1080 get_random_bytes(&tid, sizeof tid);
1084 1081
1085 ret = ib_register_client(&sa_client); 1082 ret = ib_register_client(&sa_client);
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 87236753bce9..5855e4405d9b 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -52,6 +52,10 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
52 hop_cnt = smp->hop_cnt; 52 hop_cnt = smp->hop_cnt;
53 53
54 /* See section 14.2.2.2, Vol 1 IB spec */ 54 /* See section 14.2.2.2, Vol 1 IB spec */
55 /* C14-6 -- valid hop_cnt values are from 0 to 63 */
56 if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
57 return IB_SMI_DISCARD;
58
55 if (!ib_get_smp_direction(smp)) { 59 if (!ib_get_smp_direction(smp)) {
56 /* C14-9:1 */ 60 /* C14-9:1 */
57 if (hop_cnt && hop_ptr == 0) { 61 if (hop_cnt && hop_ptr == 0) {
@@ -133,6 +137,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
133 hop_cnt = smp->hop_cnt; 137 hop_cnt = smp->hop_cnt;
134 138
135 /* See section 14.2.2.2, Vol 1 IB spec */ 139 /* See section 14.2.2.2, Vol 1 IB spec */
140 /* C14-6 -- valid hop_cnt values are from 0 to 63 */
141 if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
142 return IB_SMI_DISCARD;
143
136 if (!ib_get_smp_direction(smp)) { 144 if (!ib_get_smp_direction(smp)) {
137 /* C14-9:1 -- sender should have incremented hop_ptr */ 145 /* C14-9:1 -- sender should have incremented hop_ptr */
138 if (hop_cnt && hop_ptr == 0) 146 if (hop_cnt && hop_ptr == 0)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index eb36a81dd09b..d3fff9e008a3 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -73,7 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr);
73DEFINE_IDR(ib_uverbs_qp_idr); 73DEFINE_IDR(ib_uverbs_qp_idr);
74DEFINE_IDR(ib_uverbs_srq_idr); 74DEFINE_IDR(ib_uverbs_srq_idr);
75 75
76static spinlock_t map_lock; 76static DEFINE_SPINLOCK(map_lock);
77static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; 77static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
78static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 78static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
79 79
@@ -584,14 +584,16 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
584 584
585 if (hdr.command < 0 || 585 if (hdr.command < 0 ||
586 hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 586 hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
587 !uverbs_cmd_table[hdr.command] || 587 !uverbs_cmd_table[hdr.command])
588 !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
589 return -EINVAL; 588 return -EINVAL;
590 589
591 if (!file->ucontext && 590 if (!file->ucontext &&
592 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 591 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
593 return -EINVAL; 592 return -EINVAL;
594 593
594 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
595 return -ENOSYS;
596
595 return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, 597 return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
596 hdr.in_words * 4, hdr.out_words * 4); 598 hdr.in_words * 4, hdr.out_words * 4);
597} 599}
@@ -836,8 +838,6 @@ static int __init ib_uverbs_init(void)
836{ 838{
837 int ret; 839 int ret;
838 840
839 spin_lock_init(&map_lock);
840
841 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, 841 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
842 "infiniband_verbs"); 842 "infiniband_verbs");
843 if (ret) { 843 if (ret) {
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 0cfbb6d2f762..8250740c94b0 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -86,11 +86,7 @@ MODULE_DEVICE_TABLE(pci, c2_pci_table);
86 86
87static void c2_print_macaddr(struct net_device *netdev) 87static void c2_print_macaddr(struct net_device *netdev)
88{ 88{
89 pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, " 89 pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq);
90 "IRQ %u\n", netdev->name,
91 netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
92 netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
93 netdev->irq);
94} 90}
95 91
96static void c2_set_rxbufsize(struct c2_port *c2_port) 92static void c2_set_rxbufsize(struct c2_port *c2_port)
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index f1948fad85d7..ad723bd8bf49 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -780,11 +780,11 @@ int c2_register_device(struct c2_dev *dev)
780 /* Register pseudo network device */ 780 /* Register pseudo network device */
781 dev->pseudo_netdev = c2_pseudo_netdev_init(dev); 781 dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
782 if (!dev->pseudo_netdev) 782 if (!dev->pseudo_netdev)
783 goto out3; 783 goto out;
784 784
785 ret = register_netdev(dev->pseudo_netdev); 785 ret = register_netdev(dev->pseudo_netdev);
786 if (ret) 786 if (ret)
787 goto out2; 787 goto out_free_netdev;
788 788
789 pr_debug("%s:%u\n", __func__, __LINE__); 789 pr_debug("%s:%u\n", __func__, __LINE__);
790 strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); 790 strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
@@ -851,6 +851,10 @@ int c2_register_device(struct c2_dev *dev)
851 dev->ibdev.post_recv = c2_post_receive; 851 dev->ibdev.post_recv = c2_post_receive;
852 852
853 dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); 853 dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
854 if (dev->ibdev.iwcm == NULL) {
855 ret = -ENOMEM;
856 goto out_unregister_netdev;
857 }
854 dev->ibdev.iwcm->add_ref = c2_add_ref; 858 dev->ibdev.iwcm->add_ref = c2_add_ref;
855 dev->ibdev.iwcm->rem_ref = c2_rem_ref; 859 dev->ibdev.iwcm->rem_ref = c2_rem_ref;
856 dev->ibdev.iwcm->get_qp = c2_get_qp; 860 dev->ibdev.iwcm->get_qp = c2_get_qp;
@@ -862,23 +866,25 @@ int c2_register_device(struct c2_dev *dev)
862 866
863 ret = ib_register_device(&dev->ibdev); 867 ret = ib_register_device(&dev->ibdev);
864 if (ret) 868 if (ret)
865 goto out1; 869 goto out_free_iwcm;
866 870
867 for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) { 871 for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
868 ret = device_create_file(&dev->ibdev.dev, 872 ret = device_create_file(&dev->ibdev.dev,
869 c2_dev_attributes[i]); 873 c2_dev_attributes[i]);
870 if (ret) 874 if (ret)
871 goto out0; 875 goto out_unregister_ibdev;
872 } 876 }
873 goto out3; 877 goto out;
874 878
875out0: 879out_unregister_ibdev:
876 ib_unregister_device(&dev->ibdev); 880 ib_unregister_device(&dev->ibdev);
877out1: 881out_free_iwcm:
882 kfree(dev->ibdev.iwcm);
883out_unregister_netdev:
878 unregister_netdev(dev->pseudo_netdev); 884 unregister_netdev(dev->pseudo_netdev);
879out2: 885out_free_netdev:
880 free_netdev(dev->pseudo_netdev); 886 free_netdev(dev->pseudo_netdev);
881out3: 887out:
882 pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret); 888 pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
883 return ret; 889 return ret;
884} 890}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 62f9cf2f94ec..72ed3396b721 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -852,7 +852,9 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
852 wqe->qpcaps = attr->qpcaps; 852 wqe->qpcaps = attr->qpcaps;
853 wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); 853 wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
854 wqe->rqe_count = cpu_to_be16(attr->rqe_count); 854 wqe->rqe_count = cpu_to_be16(attr->rqe_count);
855 wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type)); 855 wqe->flags_rtr_type = cpu_to_be16(attr->flags |
856 V_RTR_TYPE(attr->rtr_type) |
857 V_CHAN(attr->chan));
856 wqe->ord = cpu_to_be32(attr->ord); 858 wqe->ord = cpu_to_be32(attr->ord);
857 wqe->ird = cpu_to_be32(attr->ird); 859 wqe->ird = cpu_to_be32(attr->ird);
858 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); 860 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1032,6 +1034,7 @@ err3:
1032err2: 1034err2:
1033 cxio_hal_destroy_ctrl_qp(rdev_p); 1035 cxio_hal_destroy_ctrl_qp(rdev_p);
1034err1: 1036err1:
1037 rdev_p->t3cdev_p->ulp = NULL;
1035 list_del(&rdev_p->entry); 1038 list_del(&rdev_p->entry);
1036 return err; 1039 return err;
1037} 1040}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 32e3b1461d81..a197a5b7ac7f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -327,6 +327,11 @@ enum rdma_init_rtr_types {
327#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) 327#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
328#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) 328#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
329 329
330#define S_CHAN 4
331#define M_CHAN 0x3
332#define V_CHAN(x) ((x) << S_CHAN)
333#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
334
330struct t3_rdma_init_attr { 335struct t3_rdma_init_attr {
331 u32 tid; 336 u32 tid;
332 u32 qpid; 337 u32 qpid;
@@ -346,6 +351,7 @@ struct t3_rdma_init_attr {
346 u16 flags; 351 u16 flags;
347 u16 rqe_count; 352 u16 rqe_count;
348 u32 irs; 353 u32 irs;
354 u32 chan;
349}; 355};
350 356
351struct t3_rdma_init_wr { 357struct t3_rdma_init_wr {
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 26fc0a4eaa74..b0ea0105ddf6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -51,7 +51,7 @@ cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
51 51
52static void open_rnic_dev(struct t3cdev *); 52static void open_rnic_dev(struct t3cdev *);
53static void close_rnic_dev(struct t3cdev *); 53static void close_rnic_dev(struct t3cdev *);
54static void iwch_err_handler(struct t3cdev *, u32, u32); 54static void iwch_event_handler(struct t3cdev *, u32, u32);
55 55
56struct cxgb3_client t3c_client = { 56struct cxgb3_client t3c_client = {
57 .name = "iw_cxgb3", 57 .name = "iw_cxgb3",
@@ -59,7 +59,7 @@ struct cxgb3_client t3c_client = {
59 .remove = close_rnic_dev, 59 .remove = close_rnic_dev,
60 .handlers = t3c_handlers, 60 .handlers = t3c_handlers,
61 .redirect = iwch_ep_redirect, 61 .redirect = iwch_ep_redirect,
62 .err_handler = iwch_err_handler 62 .event_handler = iwch_event_handler
63}; 63};
64 64
65static LIST_HEAD(dev_list); 65static LIST_HEAD(dev_list);
@@ -105,11 +105,9 @@ static void rnic_init(struct iwch_dev *rnicp)
105static void open_rnic_dev(struct t3cdev *tdev) 105static void open_rnic_dev(struct t3cdev *tdev)
106{ 106{
107 struct iwch_dev *rnicp; 107 struct iwch_dev *rnicp;
108 static int vers_printed;
109 108
110 PDBG("%s t3cdev %p\n", __func__, tdev); 109 PDBG("%s t3cdev %p\n", __func__, tdev);
111 if (!vers_printed++) 110 printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
112 printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
113 DRV_VERSION); 111 DRV_VERSION);
114 rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); 112 rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
115 if (!rnicp) { 113 if (!rnicp) {
@@ -162,21 +160,36 @@ static void close_rnic_dev(struct t3cdev *tdev)
162 mutex_unlock(&dev_mutex); 160 mutex_unlock(&dev_mutex);
163} 161}
164 162
165static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error) 163static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
166{ 164{
167 struct cxio_rdev *rdev = tdev->ulp; 165 struct cxio_rdev *rdev = tdev->ulp;
168 struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev); 166 struct iwch_dev *rnicp;
169 struct ib_event event; 167 struct ib_event event;
168 u32 portnum = port_id + 1;
170 169
171 if (status == OFFLOAD_STATUS_DOWN) { 170 if (!rdev)
171 return;
172 rnicp = rdev_to_iwch_dev(rdev);
173 switch (evt) {
174 case OFFLOAD_STATUS_DOWN: {
172 rdev->flags = CXIO_ERROR_FATAL; 175 rdev->flags = CXIO_ERROR_FATAL;
173
174 event.device = &rnicp->ibdev;
175 event.event = IB_EVENT_DEVICE_FATAL; 176 event.event = IB_EVENT_DEVICE_FATAL;
176 event.element.port_num = 0; 177 break;
177 ib_dispatch_event(&event); 178 }
179 case OFFLOAD_PORT_DOWN: {
180 event.event = IB_EVENT_PORT_ERR;
181 break;
182 }
183 case OFFLOAD_PORT_UP: {
184 event.event = IB_EVENT_PORT_ACTIVE;
185 break;
186 }
178 } 187 }
179 188
189 event.device = &rnicp->ibdev;
190 event.element.port_num = portnum;
191 ib_dispatch_event(&event);
192
180 return; 193 return;
181} 194}
182 195
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 52d7bb0c2a12..66b41351910a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -286,7 +286,7 @@ void __free_ep(struct kref *kref)
286 ep = container_of(container_of(kref, struct iwch_ep_common, kref), 286 ep = container_of(container_of(kref, struct iwch_ep_common, kref),
287 struct iwch_ep, com); 287 struct iwch_ep, com);
288 PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); 288 PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
289 if (ep->com.flags & RELEASE_RESOURCES) { 289 if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
290 cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); 290 cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
291 dst_release(ep->dst); 291 dst_release(ep->dst);
292 l2t_release(L2DATA(ep->com.tdev), ep->l2t); 292 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
@@ -297,7 +297,7 @@ void __free_ep(struct kref *kref)
297static void release_ep_resources(struct iwch_ep *ep) 297static void release_ep_resources(struct iwch_ep *ep)
298{ 298{
299 PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); 299 PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
300 ep->com.flags |= RELEASE_RESOURCES; 300 set_bit(RELEASE_RESOURCES, &ep->com.flags);
301 put_ep(&ep->com); 301 put_ep(&ep->com);
302} 302}
303 303
@@ -786,10 +786,12 @@ static void connect_request_upcall(struct iwch_ep *ep)
786 event.private_data_len = ep->plen; 786 event.private_data_len = ep->plen;
787 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 787 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
788 event.provider_data = ep; 788 event.provider_data = ep;
789 if (state_read(&ep->parent_ep->com) != DEAD) 789 if (state_read(&ep->parent_ep->com) != DEAD) {
790 get_ep(&ep->com);
790 ep->parent_ep->com.cm_id->event_handler( 791 ep->parent_ep->com.cm_id->event_handler(
791 ep->parent_ep->com.cm_id, 792 ep->parent_ep->com.cm_id,
792 &event); 793 &event);
794 }
793 put_ep(&ep->parent_ep->com); 795 put_ep(&ep->parent_ep->com);
794 ep->parent_ep = NULL; 796 ep->parent_ep = NULL;
795} 797}
@@ -1156,8 +1158,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1156 * We get 2 abort replies from the HW. The first one must 1158 * We get 2 abort replies from the HW. The first one must
1157 * be ignored except for scribbling that we need one more. 1159 * be ignored except for scribbling that we need one more.
1158 */ 1160 */
1159 if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) { 1161 if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
1160 ep->com.flags |= ABORT_REQ_IN_PROGRESS;
1161 return CPL_RET_BUF_DONE; 1162 return CPL_RET_BUF_DONE;
1162 } 1163 }
1163 1164
@@ -1477,10 +1478,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1477 /* 1478 /*
1478 * We're gonna mark this puppy DEAD, but keep 1479 * We're gonna mark this puppy DEAD, but keep
1479 * the reference on it until the ULP accepts or 1480 * the reference on it until the ULP accepts or
1480 * rejects the CR. 1481 * rejects the CR. Also wake up anyone waiting
1482 * in rdma connection migration (see iwch_accept_cr()).
1481 */ 1483 */
1482 __state_set(&ep->com, CLOSING); 1484 __state_set(&ep->com, CLOSING);
1483 get_ep(&ep->com); 1485 ep->com.rpl_done = 1;
1486 ep->com.rpl_err = -ECONNRESET;
1487 PDBG("waking up ep %p\n", ep);
1488 wake_up(&ep->com.waitq);
1484 break; 1489 break;
1485 case MPA_REP_SENT: 1490 case MPA_REP_SENT:
1486 __state_set(&ep->com, CLOSING); 1491 __state_set(&ep->com, CLOSING);
@@ -1561,8 +1566,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1561 * We get 2 peer aborts from the HW. The first one must 1566 * We get 2 peer aborts from the HW. The first one must
1562 * be ignored except for scribbling that we need one more. 1567 * be ignored except for scribbling that we need one more.
1563 */ 1568 */
1564 if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) { 1569 if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
1565 ep->com.flags |= PEER_ABORT_IN_PROGRESS;
1566 return CPL_RET_BUF_DONE; 1570 return CPL_RET_BUF_DONE;
1567 } 1571 }
1568 1572
@@ -1589,9 +1593,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1589 /* 1593 /*
1590 * We're gonna mark this puppy DEAD, but keep 1594 * We're gonna mark this puppy DEAD, but keep
1591 * the reference on it until the ULP accepts or 1595 * the reference on it until the ULP accepts or
1592 * rejects the CR. 1596 * rejects the CR. Also wake up anyone waiting
1597 * in rdma connection migration (see iwch_accept_cr()).
1593 */ 1598 */
1594 get_ep(&ep->com); 1599 ep->com.rpl_done = 1;
1600 ep->com.rpl_err = -ECONNRESET;
1601 PDBG("waking up ep %p\n", ep);
1602 wake_up(&ep->com.waitq);
1595 break; 1603 break;
1596 case MORIBUND: 1604 case MORIBUND:
1597 case CLOSING: 1605 case CLOSING:
@@ -1797,6 +1805,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1797 err = send_mpa_reject(ep, pdata, pdata_len); 1805 err = send_mpa_reject(ep, pdata, pdata_len);
1798 err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); 1806 err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1799 } 1807 }
1808 put_ep(&ep->com);
1800 return 0; 1809 return 0;
1801} 1810}
1802 1811
@@ -1810,8 +1819,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1810 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1819 struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1811 1820
1812 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1821 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1813 if (state_read(&ep->com) == DEAD) 1822 if (state_read(&ep->com) == DEAD) {
1814 return -ECONNRESET; 1823 err = -ECONNRESET;
1824 goto err;
1825 }
1815 1826
1816 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1827 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1817 BUG_ON(!qp); 1828 BUG_ON(!qp);
@@ -1819,15 +1830,14 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1819 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1830 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1820 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1831 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1821 abort_connection(ep, NULL, GFP_KERNEL); 1832 abort_connection(ep, NULL, GFP_KERNEL);
1822 return -EINVAL; 1833 err = -EINVAL;
1834 goto err;
1823 } 1835 }
1824 1836
1825 cm_id->add_ref(cm_id); 1837 cm_id->add_ref(cm_id);
1826 ep->com.cm_id = cm_id; 1838 ep->com.cm_id = cm_id;
1827 ep->com.qp = qp; 1839 ep->com.qp = qp;
1828 1840
1829 ep->com.rpl_done = 0;
1830 ep->com.rpl_err = 0;
1831 ep->ird = conn_param->ird; 1841 ep->ird = conn_param->ird;
1832 ep->ord = conn_param->ord; 1842 ep->ord = conn_param->ord;
1833 1843
@@ -1836,8 +1846,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1836 1846
1837 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); 1847 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
1838 1848
1839 get_ep(&ep->com);
1840
1841 /* bind QP to EP and move to RTS */ 1849 /* bind QP to EP and move to RTS */
1842 attrs.mpa_attr = ep->mpa_attr; 1850 attrs.mpa_attr = ep->mpa_attr;
1843 attrs.max_ird = ep->ird; 1851 attrs.max_ird = ep->ird;
@@ -1855,30 +1863,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1855 err = iwch_modify_qp(ep->com.qp->rhp, 1863 err = iwch_modify_qp(ep->com.qp->rhp,
1856 ep->com.qp, mask, &attrs, 1); 1864 ep->com.qp, mask, &attrs, 1);
1857 if (err) 1865 if (err)
1858 goto err; 1866 goto err1;
1859 1867
1860 /* if needed, wait for wr_ack */ 1868 /* if needed, wait for wr_ack */
1861 if (iwch_rqes_posted(qp)) { 1869 if (iwch_rqes_posted(qp)) {
1862 wait_event(ep->com.waitq, ep->com.rpl_done); 1870 wait_event(ep->com.waitq, ep->com.rpl_done);
1863 err = ep->com.rpl_err; 1871 err = ep->com.rpl_err;
1864 if (err) 1872 if (err)
1865 goto err; 1873 goto err1;
1866 } 1874 }
1867 1875
1868 err = send_mpa_reply(ep, conn_param->private_data, 1876 err = send_mpa_reply(ep, conn_param->private_data,
1869 conn_param->private_data_len); 1877 conn_param->private_data_len);
1870 if (err) 1878 if (err)
1871 goto err; 1879 goto err1;
1872 1880
1873 1881
1874 state_set(&ep->com, FPDU_MODE); 1882 state_set(&ep->com, FPDU_MODE);
1875 established_upcall(ep); 1883 established_upcall(ep);
1876 put_ep(&ep->com); 1884 put_ep(&ep->com);
1877 return 0; 1885 return 0;
1878err: 1886err1:
1879 ep->com.cm_id = NULL; 1887 ep->com.cm_id = NULL;
1880 ep->com.qp = NULL; 1888 ep->com.qp = NULL;
1881 cm_id->rem_ref(cm_id); 1889 cm_id->rem_ref(cm_id);
1890err:
1882 put_ep(&ep->com); 1891 put_ep(&ep->com);
1883 return err; 1892 return err;
1884} 1893}
@@ -2097,14 +2106,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
2097 ep->com.state = CLOSING; 2106 ep->com.state = CLOSING;
2098 start_ep_timer(ep); 2107 start_ep_timer(ep);
2099 } 2108 }
2109 set_bit(CLOSE_SENT, &ep->com.flags);
2100 break; 2110 break;
2101 case CLOSING: 2111 case CLOSING:
2102 close = 1; 2112 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2103 if (abrupt) { 2113 close = 1;
2104 stop_ep_timer(ep); 2114 if (abrupt) {
2105 ep->com.state = ABORTING; 2115 stop_ep_timer(ep);
2106 } else 2116 ep->com.state = ABORTING;
2107 ep->com.state = MORIBUND; 2117 } else
2118 ep->com.state = MORIBUND;
2119 }
2108 break; 2120 break;
2109 case MORIBUND: 2121 case MORIBUND:
2110 case ABORTING: 2122 case ABORTING:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 43c0aea7eadc..b9efadfffb4f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -145,9 +145,10 @@ enum iwch_ep_state {
145}; 145};
146 146
147enum iwch_ep_flags { 147enum iwch_ep_flags {
148 PEER_ABORT_IN_PROGRESS = (1 << 0), 148 PEER_ABORT_IN_PROGRESS = 0,
149 ABORT_REQ_IN_PROGRESS = (1 << 1), 149 ABORT_REQ_IN_PROGRESS = 1,
150 RELEASE_RESOURCES = (1 << 2), 150 RELEASE_RESOURCES = 2,
151 CLOSE_SENT = 3,
151}; 152};
152 153
153struct iwch_ep_common { 154struct iwch_ep_common {
@@ -162,7 +163,7 @@ struct iwch_ep_common {
162 wait_queue_head_t waitq; 163 wait_queue_head_t waitq;
163 int rpl_done; 164 int rpl_done;
164 int rpl_err; 165 int rpl_err;
165 u32 flags; 166 unsigned long flags;
166}; 167};
167 168
168struct iwch_listen_ep { 169struct iwch_listen_ep {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index ec49a5cbdebb..e1ec65ebb016 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -39,7 +39,7 @@
39#include "iwch.h" 39#include "iwch.h"
40#include "iwch_provider.h" 40#include "iwch_provider.h"
41 41
42static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) 42static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
43{ 43{
44 u32 mmid; 44 u32 mmid;
45 45
@@ -47,14 +47,15 @@ static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
47 mhp->attr.stag = stag; 47 mhp->attr.stag = stag;
48 mmid = stag >> 8; 48 mmid = stag >> 8;
49 mhp->ibmr.rkey = mhp->ibmr.lkey = stag; 49 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
50 insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
51 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); 50 PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
51 return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
52} 52}
53 53
54int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 54int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
55 struct iwch_mr *mhp, int shift) 55 struct iwch_mr *mhp, int shift)
56{ 56{
57 u32 stag; 57 u32 stag;
58 int ret;
58 59
59 if (cxio_register_phys_mem(&rhp->rdev, 60 if (cxio_register_phys_mem(&rhp->rdev,
60 &stag, mhp->attr.pdid, 61 &stag, mhp->attr.pdid,
@@ -66,9 +67,11 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
66 mhp->attr.pbl_size, mhp->attr.pbl_addr)) 67 mhp->attr.pbl_size, mhp->attr.pbl_addr))
67 return -ENOMEM; 68 return -ENOMEM;
68 69
69 iwch_finish_mem_reg(mhp, stag); 70 ret = iwch_finish_mem_reg(mhp, stag);
70 71 if (ret)
71 return 0; 72 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
73 mhp->attr.pbl_addr);
74 return ret;
72} 75}
73 76
74int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, 77int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
@@ -77,6 +80,7 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
77 int npages) 80 int npages)
78{ 81{
79 u32 stag; 82 u32 stag;
83 int ret;
80 84
81 /* We could support this... */ 85 /* We could support this... */
82 if (npages > mhp->attr.pbl_size) 86 if (npages > mhp->attr.pbl_size)
@@ -93,9 +97,12 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
93 mhp->attr.pbl_size, mhp->attr.pbl_addr)) 97 mhp->attr.pbl_size, mhp->attr.pbl_addr))
94 return -ENOMEM; 98 return -ENOMEM;
95 99
96 iwch_finish_mem_reg(mhp, stag); 100 ret = iwch_finish_mem_reg(mhp, stag);
101 if (ret)
102 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
103 mhp->attr.pbl_addr);
97 104
98 return 0; 105 return ret;
99} 106}
100 107
101int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) 108int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index e2a63214008a..6895523779d0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -195,7 +195,11 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
195 spin_lock_init(&chp->lock); 195 spin_lock_init(&chp->lock);
196 atomic_set(&chp->refcnt, 1); 196 atomic_set(&chp->refcnt, 1);
197 init_waitqueue_head(&chp->wait); 197 init_waitqueue_head(&chp->wait);
198 insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 198 if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
199 cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
200 kfree(chp);
201 return ERR_PTR(-ENOMEM);
202 }
199 203
200 if (ucontext) { 204 if (ucontext) {
201 struct iwch_mm_entry *mm; 205 struct iwch_mm_entry *mm;
@@ -750,7 +754,11 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
750 mhp->attr.stag = stag; 754 mhp->attr.stag = stag;
751 mmid = (stag) >> 8; 755 mmid = (stag) >> 8;
752 mhp->ibmw.rkey = stag; 756 mhp->ibmw.rkey = stag;
753 insert_handle(rhp, &rhp->mmidr, mhp, mmid); 757 if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
758 cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
759 kfree(mhp);
760 return ERR_PTR(-ENOMEM);
761 }
754 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); 762 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
755 return &(mhp->ibmw); 763 return &(mhp->ibmw);
756} 764}
@@ -778,37 +786,43 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
778 struct iwch_mr *mhp; 786 struct iwch_mr *mhp;
779 u32 mmid; 787 u32 mmid;
780 u32 stag = 0; 788 u32 stag = 0;
781 int ret; 789 int ret = 0;
782 790
783 php = to_iwch_pd(pd); 791 php = to_iwch_pd(pd);
784 rhp = php->rhp; 792 rhp = php->rhp;
785 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); 793 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
786 if (!mhp) 794 if (!mhp)
787 return ERR_PTR(-ENOMEM); 795 goto err;
788 796
789 mhp->rhp = rhp; 797 mhp->rhp = rhp;
790 ret = iwch_alloc_pbl(mhp, pbl_depth); 798 ret = iwch_alloc_pbl(mhp, pbl_depth);
791 if (ret) { 799 if (ret)
792 kfree(mhp); 800 goto err1;
793 return ERR_PTR(ret);
794 }
795 mhp->attr.pbl_size = pbl_depth; 801 mhp->attr.pbl_size = pbl_depth;
796 ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, 802 ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
797 mhp->attr.pbl_size, mhp->attr.pbl_addr); 803 mhp->attr.pbl_size, mhp->attr.pbl_addr);
798 if (ret) { 804 if (ret)
799 iwch_free_pbl(mhp); 805 goto err2;
800 kfree(mhp);
801 return ERR_PTR(ret);
802 }
803 mhp->attr.pdid = php->pdid; 806 mhp->attr.pdid = php->pdid;
804 mhp->attr.type = TPT_NON_SHARED_MR; 807 mhp->attr.type = TPT_NON_SHARED_MR;
805 mhp->attr.stag = stag; 808 mhp->attr.stag = stag;
806 mhp->attr.state = 1; 809 mhp->attr.state = 1;
807 mmid = (stag) >> 8; 810 mmid = (stag) >> 8;
808 mhp->ibmr.rkey = mhp->ibmr.lkey = stag; 811 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
809 insert_handle(rhp, &rhp->mmidr, mhp, mmid); 812 if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
813 goto err3;
814
810 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); 815 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
811 return &(mhp->ibmr); 816 return &(mhp->ibmr);
817err3:
818 cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
819 mhp->attr.pbl_addr);
820err2:
821 iwch_free_pbl(mhp);
822err1:
823 kfree(mhp);
824err:
825 return ERR_PTR(ret);
812} 826}
813 827
814static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( 828static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
@@ -961,7 +975,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
961 spin_lock_init(&qhp->lock); 975 spin_lock_init(&qhp->lock);
962 init_waitqueue_head(&qhp->wait); 976 init_waitqueue_head(&qhp->wait);
963 atomic_set(&qhp->refcnt, 1); 977 atomic_set(&qhp->refcnt, 1);
964 insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid); 978
979 if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
980 cxio_destroy_qp(&rhp->rdev, &qhp->wq,
981 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
982 kfree(qhp);
983 return ERR_PTR(-ENOMEM);
984 }
965 985
966 if (udata) { 986 if (udata) {
967 987
@@ -1418,6 +1438,7 @@ int iwch_register_device(struct iwch_dev *dev)
1418bail2: 1438bail2:
1419 ib_unregister_device(&dev->ibdev); 1439 ib_unregister_device(&dev->ibdev);
1420bail1: 1440bail1:
1441 kfree(dev->ibdev.iwcm);
1421 return ret; 1442 return ret;
1422} 1443}
1423 1444
@@ -1430,5 +1451,6 @@ void iwch_unregister_device(struct iwch_dev *dev)
1430 device_remove_file(&dev->ibdev.dev, 1451 device_remove_file(&dev->ibdev.dev,
1431 iwch_class_attributes[i]); 1452 iwch_class_attributes[i]);
1432 ib_unregister_device(&dev->ibdev); 1453 ib_unregister_device(&dev->ibdev);
1454 kfree(dev->ibdev.iwcm);
1433 return; 1455 return;
1434} 1456}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 27bbdc8e773a..6e8653471941 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -889,6 +889,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
889 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 889 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
890 init_attr.rqe_count = iwch_rqes_posted(qhp); 890 init_attr.rqe_count = iwch_rqes_posted(qhp);
891 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; 891 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
892 init_attr.chan = qhp->ep->l2t->smt_idx;
892 if (peer2peer) { 893 if (peer2peer) {
893 init_attr.rtr_type = RTR_READ; 894 init_attr.rtr_type = RTR_READ;
894 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) 895 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fab18a2c74a8..5b635aa5947e 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
52#include "ehca_tools.h" 52#include "ehca_tools.h"
53#include "hcp_if.h" 53#include "hcp_if.h"
54 54
55#define HCAD_VERSION "0028" 55#define HCAD_VERSION "0029"
56 56
57MODULE_LICENSE("Dual BSD/GPL"); 57MODULE_LICENSE("Dual BSD/GPL");
58MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); 58MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
@@ -64,7 +64,7 @@ static int ehca_hw_level = 0;
64static int ehca_poll_all_eqs = 1; 64static int ehca_poll_all_eqs = 1;
65 65
66int ehca_debug_level = 0; 66int ehca_debug_level = 0;
67int ehca_nr_ports = 2; 67int ehca_nr_ports = -1;
68int ehca_use_hp_mr = 0; 68int ehca_use_hp_mr = 0;
69int ehca_port_act_time = 30; 69int ehca_port_act_time = 30;
70int ehca_static_rate = -1; 70int ehca_static_rate = -1;
@@ -95,8 +95,8 @@ MODULE_PARM_DESC(hw_level,
95 "Hardware level (0: autosensing (default), " 95 "Hardware level (0: autosensing (default), "
96 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); 96 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
97MODULE_PARM_DESC(nr_ports, 97MODULE_PARM_DESC(nr_ports,
98 "number of connected ports (-1: autodetect, 1: port one only, " 98 "number of connected ports (-1: autodetect (default), "
99 "2: two ports (default)"); 99 "1: port one only, 2: two ports)");
100MODULE_PARM_DESC(use_hp_mr, 100MODULE_PARM_DESC(use_hp_mr,
101 "Use high performance MRs (default: no)"); 101 "Use high performance MRs (default: no)");
102MODULE_PARM_DESC(port_act_time, 102MODULE_PARM_DESC(port_act_time,
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 5a3d96f84c79..8fd88cd828fd 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -786,7 +786,11 @@ repoll:
786 wc->slid = cqe->rlid; 786 wc->slid = cqe->rlid;
787 wc->dlid_path_bits = cqe->dlid; 787 wc->dlid_path_bits = cqe->dlid;
788 wc->src_qp = cqe->remote_qp_number; 788 wc->src_qp = cqe->remote_qp_number;
789 wc->wc_flags = cqe->w_completion_flags; 789 /*
790 * HW has "Immed data present" and "GRH present" in bits 6 and 5.
791 * SW defines those in bits 1 and 0, so we can just shift and mask.
792 */
793 wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
790 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); 794 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
791 wc->sl = cqe->service_level; 795 wc->sl = cqe->service_level;
792 796
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index c568b28f4e20..8c1213f8916a 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -125,14 +125,30 @@ struct ib_perf {
125 u8 data[192]; 125 u8 data[192];
126} __attribute__ ((packed)); 126} __attribute__ ((packed));
127 127
128/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
129struct tcslfl {
130 u32 tc:8;
131 u32 sl:4;
132 u32 fl:20;
133} __attribute__ ((packed));
134
135/* IP Version/TC/FL packed into 32 bits, as in GRH */
136struct vertcfl {
137 u32 ver:4;
138 u32 tc:8;
139 u32 fl:20;
140} __attribute__ ((packed));
128 141
129static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, 142static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
143 struct ib_wc *in_wc, struct ib_grh *in_grh,
130 struct ib_mad *in_mad, struct ib_mad *out_mad) 144 struct ib_mad *in_mad, struct ib_mad *out_mad)
131{ 145{
132 struct ib_perf *in_perf = (struct ib_perf *)in_mad; 146 struct ib_perf *in_perf = (struct ib_perf *)in_mad;
133 struct ib_perf *out_perf = (struct ib_perf *)out_mad; 147 struct ib_perf *out_perf = (struct ib_perf *)out_mad;
134 struct ib_class_port_info *poi = 148 struct ib_class_port_info *poi =
135 (struct ib_class_port_info *)out_perf->data; 149 (struct ib_class_port_info *)out_perf->data;
150 struct tcslfl *tcslfl =
151 (struct tcslfl *)&poi->redirect_tcslfl;
136 struct ehca_shca *shca = 152 struct ehca_shca *shca =
137 container_of(ibdev, struct ehca_shca, ib_device); 153 container_of(ibdev, struct ehca_shca, ib_device);
138 struct ehca_sport *sport = &shca->sport[port_num - 1]; 154 struct ehca_sport *sport = &shca->sport[port_num - 1];
@@ -158,10 +174,29 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
158 poi->base_version = 1; 174 poi->base_version = 1;
159 poi->class_version = 1; 175 poi->class_version = 1;
160 poi->resp_time_value = 18; 176 poi->resp_time_value = 18;
161 poi->redirect_lid = sport->saved_attr.lid; 177
162 poi->redirect_qp = sport->pma_qp_nr; 178 /* copy local routing information from WC where applicable */
179 tcslfl->sl = in_wc->sl;
180 poi->redirect_lid =
181 sport->saved_attr.lid | in_wc->dlid_path_bits;
182 poi->redirect_qp = sport->pma_qp_nr;
163 poi->redirect_qkey = IB_QP1_QKEY; 183 poi->redirect_qkey = IB_QP1_QKEY;
164 poi->redirect_pkey = IB_DEFAULT_PKEY_FULL; 184
185 ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
186 &poi->redirect_pkey);
187
188 /* if request was globally routed, copy route info */
189 if (in_grh) {
190 struct vertcfl *vertcfl =
191 (struct vertcfl *)&in_grh->version_tclass_flow;
192 memcpy(poi->redirect_gid, in_grh->dgid.raw,
193 sizeof(poi->redirect_gid));
194 tcslfl->tc = vertcfl->tc;
195 tcslfl->fl = vertcfl->fl;
196 } else
197 /* else only fill in default GID */
198 ehca_query_gid(ibdev, port_num, 0,
199 (union ib_gid *)&poi->redirect_gid);
165 200
166 ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", 201 ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
167 sport->saved_attr.lid, sport->pma_qp_nr); 202 sport->saved_attr.lid, sport->pma_qp_nr);
@@ -183,8 +218,7 @@ perf_reply:
183 218
184int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 219int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
185 struct ib_wc *in_wc, struct ib_grh *in_grh, 220 struct ib_wc *in_wc, struct ib_grh *in_grh,
186 struct ib_mad *in_mad, 221 struct ib_mad *in_mad, struct ib_mad *out_mad)
187 struct ib_mad *out_mad)
188{ 222{
189 int ret; 223 int ret;
190 224
@@ -196,7 +230,8 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
196 return IB_MAD_RESULT_SUCCESS; 230 return IB_MAD_RESULT_SUCCESS;
197 231
198 ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); 232 ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
199 ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad); 233 ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
234 in_mad, out_mad);
200 235
201 return ret; 236 return ret;
202} 237}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 23173982b32c..38a287006612 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1616,7 +1616,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
1616 pd->port_cnt = 1; 1616 pd->port_cnt = 1;
1617 port_fp(fp) = pd; 1617 port_fp(fp) = pd;
1618 pd->port_pid = get_pid(task_pid(current)); 1618 pd->port_pid = get_pid(task_pid(current));
1619 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1619 strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1620 ipath_stats.sps_ports++; 1620 ipath_stats.sps_ports++;
1621 ret = 0; 1621 ret = 0;
1622 } else 1622 } else
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 16a702d46018..ceb98ee78666 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -60,7 +60,7 @@ static int recv_subn_get_nodedescription(struct ib_smp *smp,
60 if (smp->attr_mod) 60 if (smp->attr_mod)
61 smp->status |= IB_SMP_INVALID_FIELD; 61 smp->status |= IB_SMP_INVALID_FIELD;
62 62
63 strncpy(smp->data, ibdev->node_desc, sizeof(smp->data)); 63 memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
64 64
65 return reply(smp); 65 return reply(smp);
66} 66}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ae3d7590346e..3cb3f47a10b8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -342,6 +342,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
342 struct mlx4_ib_alloc_ucontext_resp resp; 342 struct mlx4_ib_alloc_ucontext_resp resp;
343 int err; 343 int err;
344 344
345 if (!dev->ib_active)
346 return ERR_PTR(-EAGAIN);
347
345 resp.qp_tab_size = dev->dev->caps.num_qps; 348 resp.qp_tab_size = dev->dev->caps.num_qps;
346 resp.bf_reg_size = dev->dev->caps.bf_reg_size; 349 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
347 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; 350 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -540,15 +543,11 @@ static struct device_attribute *mlx4_class_attributes[] = {
540 543
541static void *mlx4_ib_add(struct mlx4_dev *dev) 544static void *mlx4_ib_add(struct mlx4_dev *dev)
542{ 545{
543 static int mlx4_ib_version_printed;
544 struct mlx4_ib_dev *ibdev; 546 struct mlx4_ib_dev *ibdev;
545 int num_ports = 0; 547 int num_ports = 0;
546 int i; 548 int i;
547 549
548 if (!mlx4_ib_version_printed) { 550 printk_once(KERN_INFO "%s", mlx4_ib_version);
549 printk(KERN_INFO "%s", mlx4_ib_version);
550 ++mlx4_ib_version_printed;
551 }
552 551
553 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 552 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
554 num_ports++; 553 num_ports++;
@@ -673,6 +672,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
673 goto err_reg; 672 goto err_reg;
674 } 673 }
675 674
675 ibdev->ib_active = true;
676
676 return ibdev; 677 return ibdev;
677 678
678err_reg: 679err_reg:
@@ -729,6 +730,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
729 break; 730 break;
730 731
731 case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: 732 case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
733 ibdev->ib_active = false;
732 ibev.event = IB_EVENT_DEVICE_FATAL; 734 ibev.event = IB_EVENT_DEVICE_FATAL;
733 break; 735 break;
734 736
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8a7dd6795fa0..3486d7675e56 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -175,6 +175,7 @@ struct mlx4_ib_dev {
175 spinlock_t sm_lock; 175 spinlock_t sm_lock;
176 176
177 struct mutex cap_mask_mutex; 177 struct mutex cap_mask_mutex;
178 bool ib_active;
178}; 179};
179 180
180static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) 181static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c4a02648c8af..219b10397b4d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -615,10 +615,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
615} 615}
616 616
617static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 617static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
618 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
618{ 619{
619 if (send_cq == recv_cq) 620 if (send_cq == recv_cq) {
620 spin_lock_irq(&send_cq->lock); 621 spin_lock_irq(&send_cq->lock);
621 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 622 __acquire(&recv_cq->lock);
623 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
622 spin_lock_irq(&send_cq->lock); 624 spin_lock_irq(&send_cq->lock);
623 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); 625 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
624 } else { 626 } else {
@@ -628,10 +630,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
628} 630}
629 631
630static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 632static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
633 __releases(&send_cq->lock) __releases(&recv_cq->lock)
631{ 634{
632 if (send_cq == recv_cq) 635 if (send_cq == recv_cq) {
636 __release(&recv_cq->lock);
633 spin_unlock_irq(&send_cq->lock); 637 spin_unlock_irq(&send_cq->lock);
634 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 638 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
635 spin_unlock(&recv_cq->lock); 639 spin_unlock(&recv_cq->lock);
636 spin_unlock_irq(&send_cq->lock); 640 spin_unlock_irq(&send_cq->lock);
637 } else { 641 } else {
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 65ad359fdf16..056b2a4c6970 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -88,6 +88,7 @@ static void handle_catas(struct mthca_dev *dev)
88 event.device = &dev->ib_dev; 88 event.device = &dev->ib_dev;
89 event.event = IB_EVENT_DEVICE_FATAL; 89 event.event = IB_EVENT_DEVICE_FATAL;
90 event.element.port_num = 0; 90 event.element.port_num = 0;
91 dev->active = false;
91 92
92 ib_dispatch_event(&event); 93 ib_dispatch_event(&event);
93 94
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index 75671f75cac4..155bc66395be 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -34,8 +34,6 @@
34#ifndef MTHCA_CONFIG_REG_H 34#ifndef MTHCA_CONFIG_REG_H
35#define MTHCA_CONFIG_REG_H 35#define MTHCA_CONFIG_REG_H
36 36
37#include <asm/page.h>
38
39#define MTHCA_HCR_BASE 0x80680 37#define MTHCA_HCR_BASE 0x80680
40#define MTHCA_HCR_SIZE 0x0001c 38#define MTHCA_HCR_SIZE 0x0001c
41#define MTHCA_ECR_BASE 0x80700 39#define MTHCA_ECR_BASE 0x80700
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9ef611f6dd36..7e6a6d64ad4e 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -357,6 +357,7 @@ struct mthca_dev {
357 struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; 357 struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
358 spinlock_t sm_lock; 358 spinlock_t sm_lock;
359 u8 rate[MTHCA_MAX_PORTS]; 359 u8 rate[MTHCA_MAX_PORTS];
360 bool active;
360}; 361};
361 362
362#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG 363#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 90e4e450a120..8c31fa36e95e 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -829,27 +829,34 @@ int mthca_init_eq_table(struct mthca_dev *dev)
829 829
830 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { 830 if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
831 static const char *eq_name[] = { 831 static const char *eq_name[] = {
832 [MTHCA_EQ_COMP] = DRV_NAME " (comp)", 832 [MTHCA_EQ_COMP] = DRV_NAME "-comp",
833 [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", 833 [MTHCA_EQ_ASYNC] = DRV_NAME "-async",
834 [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" 834 [MTHCA_EQ_CMD] = DRV_NAME "-cmd"
835 }; 835 };
836 836
837 for (i = 0; i < MTHCA_NUM_EQ; ++i) { 837 for (i = 0; i < MTHCA_NUM_EQ; ++i) {
838 snprintf(dev->eq_table.eq[i].irq_name,
839 IB_DEVICE_NAME_MAX,
840 "%s@pci:%s", eq_name[i],
841 pci_name(dev->pdev));
838 err = request_irq(dev->eq_table.eq[i].msi_x_vector, 842 err = request_irq(dev->eq_table.eq[i].msi_x_vector,
839 mthca_is_memfree(dev) ? 843 mthca_is_memfree(dev) ?
840 mthca_arbel_msi_x_interrupt : 844 mthca_arbel_msi_x_interrupt :
841 mthca_tavor_msi_x_interrupt, 845 mthca_tavor_msi_x_interrupt,
842 0, eq_name[i], dev->eq_table.eq + i); 846 0, dev->eq_table.eq[i].irq_name,
847 dev->eq_table.eq + i);
843 if (err) 848 if (err)
844 goto err_out_cmd; 849 goto err_out_cmd;
845 dev->eq_table.eq[i].have_irq = 1; 850 dev->eq_table.eq[i].have_irq = 1;
846 } 851 }
847 } else { 852 } else {
853 snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
854 DRV_NAME "@pci:%s", pci_name(dev->pdev));
848 err = request_irq(dev->pdev->irq, 855 err = request_irq(dev->pdev->irq,
849 mthca_is_memfree(dev) ? 856 mthca_is_memfree(dev) ?
850 mthca_arbel_interrupt : 857 mthca_arbel_interrupt :
851 mthca_tavor_interrupt, 858 mthca_tavor_interrupt,
852 IRQF_SHARED, DRV_NAME, dev); 859 IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
853 if (err) 860 if (err)
854 goto err_out_cmd; 861 goto err_out_cmd;
855 dev->eq_table.have_irq = 1; 862 dev->eq_table.have_irq = 1;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 13da9f1d24c0..b01b28987874 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1116,6 +1116,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1116 pci_set_drvdata(pdev, mdev); 1116 pci_set_drvdata(pdev, mdev);
1117 mdev->hca_type = hca_type; 1117 mdev->hca_type = hca_type;
1118 1118
1119 mdev->active = true;
1120
1119 return 0; 1121 return 0;
1120 1122
1121err_unregister: 1123err_unregister:
@@ -1215,15 +1217,11 @@ int __mthca_restart_one(struct pci_dev *pdev)
1215static int __devinit mthca_init_one(struct pci_dev *pdev, 1217static int __devinit mthca_init_one(struct pci_dev *pdev,
1216 const struct pci_device_id *id) 1218 const struct pci_device_id *id)
1217{ 1219{
1218 static int mthca_version_printed = 0;
1219 int ret; 1220 int ret;
1220 1221
1221 mutex_lock(&mthca_device_mutex); 1222 mutex_lock(&mthca_device_mutex);
1222 1223
1223 if (!mthca_version_printed) { 1224 printk_once(KERN_INFO "%s", mthca_version);
1224 printk(KERN_INFO "%s", mthca_version);
1225 ++mthca_version_printed;
1226 }
1227 1225
1228 if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { 1226 if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
1229 printk(KERN_ERR PFX "%s has invalid driver data %lx\n", 1227 printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 87ad889e367b..bcf7a4014820 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -334,6 +334,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
334 struct mthca_ucontext *context; 334 struct mthca_ucontext *context;
335 int err; 335 int err;
336 336
337 if (!(to_mdev(ibdev)->active))
338 return ERR_PTR(-EAGAIN);
339
337 memset(&uresp, 0, sizeof uresp); 340 memset(&uresp, 0, sizeof uresp);
338 341
339 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; 342 uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index c621f8794b88..90f4c4d2e983 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -113,6 +113,7 @@ struct mthca_eq {
113 int nent; 113 int nent;
114 struct mthca_buf_list *page_list; 114 struct mthca_buf_list *page_list;
115 struct mthca_mr mr; 115 struct mthca_mr mr;
116 char irq_name[IB_DEVICE_NAME_MAX];
116}; 117};
117 118
118struct mthca_av; 119struct mthca_av;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index f5081bfde6db..c10576fa60c1 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1319,10 +1319,12 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1319} 1319}
1320 1320
1321static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) 1321static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1322 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1322{ 1323{
1323 if (send_cq == recv_cq) 1324 if (send_cq == recv_cq) {
1324 spin_lock_irq(&send_cq->lock); 1325 spin_lock_irq(&send_cq->lock);
1325 else if (send_cq->cqn < recv_cq->cqn) { 1326 __acquire(&recv_cq->lock);
1327 } else if (send_cq->cqn < recv_cq->cqn) {
1326 spin_lock_irq(&send_cq->lock); 1328 spin_lock_irq(&send_cq->lock);
1327 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); 1329 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1328 } else { 1330 } else {
@@ -1332,10 +1334,12 @@ static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1332} 1334}
1333 1335
1334static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) 1336static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1337 __releases(&send_cq->lock) __releases(&recv_cq->lock)
1335{ 1338{
1336 if (send_cq == recv_cq) 1339 if (send_cq == recv_cq) {
1340 __release(&recv_cq->lock);
1337 spin_unlock_irq(&send_cq->lock); 1341 spin_unlock_irq(&send_cq->lock);
1338 else if (send_cq->cqn < recv_cq->cqn) { 1342 } else if (send_cq->cqn < recv_cq->cqn) {
1339 spin_unlock(&recv_cq->lock); 1343 spin_unlock(&recv_cq->lock);
1340 spin_unlock_irq(&send_cq->lock); 1344 spin_unlock_irq(&send_cq->lock);
1341 } else { 1345 } else {
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index acb6817f6060..2a13a163d337 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -30,7 +30,6 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#include <linux/init.h>
34#include <linux/errno.h> 33#include <linux/errno.h>
35#include <linux/pci.h> 34#include <linux/pci.h>
36#include <linux/delay.h> 35#include <linux/delay.h>
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bf1720f7f35f..bcc6abc4faff 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -523,7 +523,7 @@ int nes_cm_disconn(struct nes_qp *);
523void nes_cm_disconn_worker(void *); 523void nes_cm_disconn_worker(void *);
524 524
525/* nes_verbs.c */ 525/* nes_verbs.c */
526int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32); 526int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
527int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); 527int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
528struct nes_ib_device *nes_init_ofa_device(struct net_device *); 528struct nes_ib_device *nes_init_ofa_device(struct net_device *);
529void nes_destroy_ofa_device(struct nes_ib_device *); 529void nes_destroy_ofa_device(struct nes_ib_device *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 114b802771ad..73473db19863 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2450,19 +2450,16 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
2450 */ 2450 */
2451int nes_cm_disconn(struct nes_qp *nesqp) 2451int nes_cm_disconn(struct nes_qp *nesqp)
2452{ 2452{
2453 unsigned long flags; 2453 struct disconn_work *work;
2454
2455 spin_lock_irqsave(&nesqp->lock, flags);
2456 if (nesqp->disconn_pending == 0) {
2457 nesqp->disconn_pending++;
2458 spin_unlock_irqrestore(&nesqp->lock, flags);
2459 /* init our disconnect work element, to */
2460 INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
2461 2454
2462 queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); 2455 work = kzalloc(sizeof *work, GFP_ATOMIC);
2463 } else 2456 if (!work)
2464 spin_unlock_irqrestore(&nesqp->lock, flags); 2457 return -ENOMEM; /* Timer will clean up */
2465 2458
2459 nes_add_ref(&nesqp->ibqp);
2460 work->nesqp = nesqp;
2461 INIT_WORK(&work->work, nes_disconnect_worker);
2462 queue_work(g_cm_core->disconn_wq, &work->work);
2466 return 0; 2463 return 0;
2467} 2464}
2468 2465
@@ -2472,11 +2469,14 @@ int nes_cm_disconn(struct nes_qp *nesqp)
2472 */ 2469 */
2473static void nes_disconnect_worker(struct work_struct *work) 2470static void nes_disconnect_worker(struct work_struct *work)
2474{ 2471{
2475 struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work); 2472 struct disconn_work *dwork = container_of(work, struct disconn_work, work);
2473 struct nes_qp *nesqp = dwork->nesqp;
2476 2474
2475 kfree(dwork);
2477 nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", 2476 nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
2478 nesqp->last_aeq, nesqp->hwqp.qp_id); 2477 nesqp->last_aeq, nesqp->hwqp.qp_id);
2479 nes_cm_disconn_true(nesqp); 2478 nes_cm_disconn_true(nesqp);
2479 nes_rem_ref(&nesqp->ibqp);
2480} 2480}
2481 2481
2482 2482
@@ -2493,7 +2493,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2493 u16 last_ae; 2493 u16 last_ae;
2494 u8 original_hw_tcp_state; 2494 u8 original_hw_tcp_state;
2495 u8 original_ibqp_state; 2495 u8 original_ibqp_state;
2496 u8 issued_disconnect_reset = 0; 2496 enum iw_cm_event_type disconn_status = IW_CM_EVENT_STATUS_OK;
2497 int issue_disconn = 0;
2498 int issue_close = 0;
2499 int issue_flush = 0;
2500 u32 flush_q = NES_CQP_FLUSH_RQ;
2501 struct ib_event ibevent;
2497 2502
2498 if (!nesqp) { 2503 if (!nesqp) {
2499 nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n"); 2504 nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
@@ -2517,24 +2522,55 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2517 original_ibqp_state = nesqp->ibqp_state; 2522 original_ibqp_state = nesqp->ibqp_state;
2518 last_ae = nesqp->last_aeq; 2523 last_ae = nesqp->last_aeq;
2519 2524
2525 if (nesqp->term_flags) {
2526 issue_disconn = 1;
2527 issue_close = 1;
2528 nesqp->cm_id = NULL;
2529 if (nesqp->flush_issued == 0) {
2530 nesqp->flush_issued = 1;
2531 issue_flush = 1;
2532 }
2533 } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
2534 ((original_ibqp_state == IB_QPS_RTS) &&
2535 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
2536 issue_disconn = 1;
2537 if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
2538 disconn_status = IW_CM_EVENT_STATUS_RESET;
2539 }
2540
2541 if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
2542 (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
2543 (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
2544 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
2545 issue_close = 1;
2546 nesqp->cm_id = NULL;
2547 if (nesqp->flush_issued == 0) {
2548 nesqp->flush_issued = 1;
2549 issue_flush = 1;
2550 }
2551 }
2552
2553 spin_unlock_irqrestore(&nesqp->lock, flags);
2520 2554
2521 nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state); 2555 if ((issue_flush) && (nesqp->destroyed == 0)) {
2556 /* Flush the queue(s) */
2557 if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
2558 flush_q |= NES_CQP_FLUSH_SQ;
2559 flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
2522 2560
2523 if ((nesqp->cm_id) && (cm_id->event_handler)) { 2561 if (nesqp->term_flags) {
2524 if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || 2562 ibevent.device = nesqp->ibqp.device;
2525 ((original_ibqp_state == IB_QPS_RTS) && 2563 ibevent.event = nesqp->terminate_eventtype;
2526 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { 2564 ibevent.element.qp = &nesqp->ibqp;
2565 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
2566 }
2567 }
2568
2569 if ((cm_id) && (cm_id->event_handler)) {
2570 if (issue_disconn) {
2527 atomic_inc(&cm_disconnects); 2571 atomic_inc(&cm_disconnects);
2528 cm_event.event = IW_CM_EVENT_DISCONNECT; 2572 cm_event.event = IW_CM_EVENT_DISCONNECT;
2529 if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { 2573 cm_event.status = disconn_status;
2530 cm_event.status = IW_CM_EVENT_STATUS_RESET;
2531 nes_debug(NES_DBG_CM, "Generating a CM "
2532 "Disconnect Event (status reset) for "
2533 "QP%u, cm_id = %p. \n",
2534 nesqp->hwqp.qp_id, cm_id);
2535 } else
2536 cm_event.status = IW_CM_EVENT_STATUS_OK;
2537
2538 cm_event.local_addr = cm_id->local_addr; 2574 cm_event.local_addr = cm_id->local_addr;
2539 cm_event.remote_addr = cm_id->remote_addr; 2575 cm_event.remote_addr = cm_id->remote_addr;
2540 cm_event.private_data = NULL; 2576 cm_event.private_data = NULL;
@@ -2547,29 +2583,14 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2547 nesqp->hwqp.sq_tail, cm_id, 2583 nesqp->hwqp.sq_tail, cm_id,
2548 atomic_read(&nesqp->refcount)); 2584 atomic_read(&nesqp->refcount));
2549 2585
2550 spin_unlock_irqrestore(&nesqp->lock, flags);
2551 ret = cm_id->event_handler(cm_id, &cm_event); 2586 ret = cm_id->event_handler(cm_id, &cm_event);
2552 if (ret) 2587 if (ret)
2553 nes_debug(NES_DBG_CM, "OFA CM event_handler " 2588 nes_debug(NES_DBG_CM, "OFA CM event_handler "
2554 "returned, ret=%d\n", ret); 2589 "returned, ret=%d\n", ret);
2555 spin_lock_irqsave(&nesqp->lock, flags);
2556 } 2590 }
2557 2591
2558 nesqp->disconn_pending = 0; 2592 if (issue_close) {
2559 /* There might have been another AE while the lock was released */
2560 original_hw_tcp_state = nesqp->hw_tcp_state;
2561 original_ibqp_state = nesqp->ibqp_state;
2562 last_ae = nesqp->last_aeq;
2563
2564 if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
2565 ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
2566 (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
2567 (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
2568 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
2569 atomic_inc(&cm_closes); 2593 atomic_inc(&cm_closes);
2570 nesqp->cm_id = NULL;
2571 nesqp->in_disconnect = 0;
2572 spin_unlock_irqrestore(&nesqp->lock, flags);
2573 nes_disconnect(nesqp, 1); 2594 nes_disconnect(nesqp, 1);
2574 2595
2575 cm_id->provider_data = nesqp; 2596 cm_id->provider_data = nesqp;
@@ -2588,28 +2609,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2588 } 2609 }
2589 2610
2590 cm_id->rem_ref(cm_id); 2611 cm_id->rem_ref(cm_id);
2591
2592 spin_lock_irqsave(&nesqp->lock, flags);
2593 if (nesqp->flush_issued == 0) {
2594 nesqp->flush_issued = 1;
2595 spin_unlock_irqrestore(&nesqp->lock, flags);
2596 flush_wqes(nesvnic->nesdev, nesqp,
2597 NES_CQP_FLUSH_RQ, 1);
2598 } else
2599 spin_unlock_irqrestore(&nesqp->lock, flags);
2600 } else {
2601 cm_id = nesqp->cm_id;
2602 spin_unlock_irqrestore(&nesqp->lock, flags);
2603 /* check to see if the inbound reset beat the outbound reset */
2604 if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
2605 nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
2606 "due to inbound reset beating the "
2607 "outbound reset.\n", nesqp->hwqp.qp_id);
2608 }
2609 } 2612 }
2610 } else {
2611 nesqp->disconn_pending = 0;
2612 spin_unlock_irqrestore(&nesqp->lock, flags);
2613 } 2613 }
2614 2614
2615 return 0; 2615 return 0;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 8b7e7c0e496e..90e8e4d8a5ce 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -410,8 +410,6 @@ struct nes_cm_ops {
410int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *, 410int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
411 enum nes_timer_type, int, int); 411 enum nes_timer_type, int, int);
412 412
413int nes_cm_disconn(struct nes_qp *);
414
415int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *); 413int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
416int nes_reject(struct iw_cm_id *, const void *, u8); 414int nes_reject(struct iw_cm_id *, const void *, u8);
417int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *); 415int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 4a84d02ece06..63a1a8e1e8a3 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -74,6 +74,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
74static void process_critical_error(struct nes_device *nesdev); 74static void process_critical_error(struct nes_device *nesdev);
75static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); 75static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
76static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); 76static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
77static void nes_terminate_timeout(unsigned long context);
78static void nes_terminate_start_timer(struct nes_qp *nesqp);
77 79
78#ifdef CONFIG_INFINIBAND_NES_DEBUG 80#ifdef CONFIG_INFINIBAND_NES_DEBUG
79static unsigned char *nes_iwarp_state_str[] = { 81static unsigned char *nes_iwarp_state_str[] = {
@@ -2903,6 +2905,417 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
2903} 2905}
2904 2906
2905 2907
2908static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
2909{
2910 u16 pkt_len;
2911
2912 if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
2913 /* skip over ethernet header */
2914 pkt_len = be16_to_cpu(*(u16 *)(pkt + ETH_HLEN - 2));
2915 pkt += ETH_HLEN;
2916
2917 /* Skip over IP and TCP headers */
2918 pkt += 4 * (pkt[0] & 0x0f);
2919 pkt += 4 * ((pkt[12] >> 4) & 0x0f);
2920 }
2921 return pkt;
2922}
2923
2924/* Determine if incoming error pkt is rdma layer */
2925static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
2926{
2927 u8 *pkt;
2928 u16 *mpa;
2929 u32 opcode = 0xffffffff;
2930
2931 if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
2932 pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
2933 mpa = (u16 *)locate_mpa(pkt, aeq_info);
2934 opcode = be16_to_cpu(mpa[1]) & 0xf;
2935 }
2936
2937 return opcode;
2938}
2939
2940/* Build iWARP terminate header */
2941static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
2942{
2943 u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
2944 u16 ddp_seg_len;
2945 int copy_len = 0;
2946 u8 is_tagged = 0;
2947 u8 flush_code = 0;
2948 struct nes_terminate_hdr *termhdr;
2949
2950 termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
2951 memset(termhdr, 0, 64);
2952
2953 if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
2954
2955 /* Use data from offending packet to fill in ddp & rdma hdrs */
2956 pkt = locate_mpa(pkt, aeq_info);
2957 ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
2958 if (ddp_seg_len) {
2959 copy_len = 2;
2960 termhdr->hdrct = DDP_LEN_FLAG;
2961 if (pkt[2] & 0x80) {
2962 is_tagged = 1;
2963 if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
2964 copy_len += TERM_DDP_LEN_TAGGED;
2965 termhdr->hdrct |= DDP_HDR_FLAG;
2966 }
2967 } else {
2968 if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
2969 copy_len += TERM_DDP_LEN_UNTAGGED;
2970 termhdr->hdrct |= DDP_HDR_FLAG;
2971 }
2972
2973 if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
2974 if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
2975 copy_len += TERM_RDMA_LEN;
2976 termhdr->hdrct |= RDMA_HDR_FLAG;
2977 }
2978 }
2979 }
2980 }
2981 }
2982
2983 switch (async_event_id) {
2984 case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
2985 switch (iwarp_opcode(nesqp, aeq_info)) {
2986 case IWARP_OPCODE_WRITE:
2987 flush_code = IB_WC_LOC_PROT_ERR;
2988 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
2989 termhdr->error_code = DDP_TAGGED_INV_STAG;
2990 break;
2991 default:
2992 flush_code = IB_WC_REM_ACCESS_ERR;
2993 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
2994 termhdr->error_code = RDMAP_INV_STAG;
2995 }
2996 break;
2997 case NES_AEQE_AEID_AMP_INVALID_STAG:
2998 flush_code = IB_WC_REM_ACCESS_ERR;
2999 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3000 termhdr->error_code = RDMAP_INV_STAG;
3001 break;
3002 case NES_AEQE_AEID_AMP_BAD_QP:
3003 flush_code = IB_WC_LOC_QP_OP_ERR;
3004 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3005 termhdr->error_code = DDP_UNTAGGED_INV_QN;
3006 break;
3007 case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
3008 case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
3009 switch (iwarp_opcode(nesqp, aeq_info)) {
3010 case IWARP_OPCODE_SEND_INV:
3011 case IWARP_OPCODE_SEND_SE_INV:
3012 flush_code = IB_WC_REM_OP_ERR;
3013 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
3014 termhdr->error_code = RDMAP_CANT_INV_STAG;
3015 break;
3016 default:
3017 flush_code = IB_WC_REM_ACCESS_ERR;
3018 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3019 termhdr->error_code = RDMAP_INV_STAG;
3020 }
3021 break;
3022 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
3023 if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
3024 flush_code = IB_WC_LOC_PROT_ERR;
3025 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
3026 termhdr->error_code = DDP_TAGGED_BOUNDS;
3027 } else {
3028 flush_code = IB_WC_REM_ACCESS_ERR;
3029 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3030 termhdr->error_code = RDMAP_INV_BOUNDS;
3031 }
3032 break;
3033 case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
3034 case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
3035 case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
3036 flush_code = IB_WC_REM_ACCESS_ERR;
3037 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3038 termhdr->error_code = RDMAP_ACCESS;
3039 break;
3040 case NES_AEQE_AEID_AMP_TO_WRAP:
3041 flush_code = IB_WC_REM_ACCESS_ERR;
3042 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3043 termhdr->error_code = RDMAP_TO_WRAP;
3044 break;
3045 case NES_AEQE_AEID_AMP_BAD_PD:
3046 switch (iwarp_opcode(nesqp, aeq_info)) {
3047 case IWARP_OPCODE_WRITE:
3048 flush_code = IB_WC_LOC_PROT_ERR;
3049 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
3050 termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
3051 break;
3052 case IWARP_OPCODE_SEND_INV:
3053 case IWARP_OPCODE_SEND_SE_INV:
3054 flush_code = IB_WC_REM_ACCESS_ERR;
3055 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3056 termhdr->error_code = RDMAP_CANT_INV_STAG;
3057 break;
3058 default:
3059 flush_code = IB_WC_REM_ACCESS_ERR;
3060 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
3061 termhdr->error_code = RDMAP_UNASSOC_STAG;
3062 }
3063 break;
3064 case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
3065 flush_code = IB_WC_LOC_LEN_ERR;
3066 termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
3067 termhdr->error_code = MPA_MARKER;
3068 break;
3069 case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
3070 flush_code = IB_WC_GENERAL_ERR;
3071 termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
3072 termhdr->error_code = MPA_CRC;
3073 break;
3074 case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
3075 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
3076 flush_code = IB_WC_LOC_LEN_ERR;
3077 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
3078 termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
3079 break;
3080 case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
3081 case NES_AEQE_AEID_DDP_NO_L_BIT:
3082 flush_code = IB_WC_FATAL_ERR;
3083 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
3084 termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
3085 break;
3086 case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
3087 case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
3088 flush_code = IB_WC_GENERAL_ERR;
3089 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3090 termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
3091 break;
3092 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
3093 flush_code = IB_WC_LOC_LEN_ERR;
3094 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3095 termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
3096 break;
3097 case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
3098 flush_code = IB_WC_GENERAL_ERR;
3099 if (is_tagged) {
3100 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
3101 termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
3102 } else {
3103 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3104 termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
3105 }
3106 break;
3107 case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
3108 flush_code = IB_WC_GENERAL_ERR;
3109 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3110 termhdr->error_code = DDP_UNTAGGED_INV_MO;
3111 break;
3112 case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
3113 flush_code = IB_WC_REM_OP_ERR;
3114 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3115 termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
3116 break;
3117 case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
3118 flush_code = IB_WC_GENERAL_ERR;
3119 termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
3120 termhdr->error_code = DDP_UNTAGGED_INV_QN;
3121 break;
3122 case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
3123 flush_code = IB_WC_GENERAL_ERR;
3124 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
3125 termhdr->error_code = RDMAP_INV_RDMAP_VER;
3126 break;
3127 case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
3128 flush_code = IB_WC_LOC_QP_OP_ERR;
3129 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
3130 termhdr->error_code = RDMAP_UNEXPECTED_OP;
3131 break;
3132 default:
3133 flush_code = IB_WC_FATAL_ERR;
3134 termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
3135 termhdr->error_code = RDMAP_UNSPECIFIED;
3136 break;
3137 }
3138
3139 if (copy_len)
3140 memcpy(termhdr + 1, pkt, copy_len);
3141
3142 if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
3143 if (aeq_info & NES_AEQE_SQ)
3144 nesqp->term_sq_flush_code = flush_code;
3145 else
3146 nesqp->term_rq_flush_code = flush_code;
3147 }
3148
3149 return sizeof(struct nes_terminate_hdr) + copy_len;
3150}
3151
3152static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
3153 struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
3154{
3155 u64 context;
3156 unsigned long flags;
3157 u32 aeq_info;
3158 u16 async_event_id;
3159 u8 tcp_state;
3160 u8 iwarp_state;
3161 u32 termlen = 0;
3162 u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
3163 NES_CQP_QP_TERM_DONT_SEND_FIN;
3164 struct nes_adapter *nesadapter = nesdev->nesadapter;
3165
3166 if (nesqp->term_flags & NES_TERM_SENT)
3167 return; /* Sanity check */
3168
3169 aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
3170 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3171 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
3172 async_event_id = (u16)aeq_info;
3173
3174 context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
3175 aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
3176 if (!context) {
3177 WARN_ON(!context);
3178 return;
3179 }
3180
3181 nesqp = (struct nes_qp *)(unsigned long)context;
3182 spin_lock_irqsave(&nesqp->lock, flags);
3183 nesqp->hw_iwarp_state = iwarp_state;
3184 nesqp->hw_tcp_state = tcp_state;
3185 nesqp->last_aeq = async_event_id;
3186 nesqp->terminate_eventtype = eventtype;
3187 spin_unlock_irqrestore(&nesqp->lock, flags);
3188
3189 if (nesadapter->send_term_ok)
3190 termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
3191 else
3192 mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
3193
3194 nes_terminate_start_timer(nesqp);
3195 nesqp->term_flags |= NES_TERM_SENT;
3196 nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
3197}
3198
3199static void nes_terminate_send_fin(struct nes_device *nesdev,
3200 struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
3201{
3202 u32 aeq_info;
3203 u16 async_event_id;
3204 u8 tcp_state;
3205 u8 iwarp_state;
3206 unsigned long flags;
3207
3208 aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
3209 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3210 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
3211 async_event_id = (u16)aeq_info;
3212
3213 spin_lock_irqsave(&nesqp->lock, flags);
3214 nesqp->hw_iwarp_state = iwarp_state;
3215 nesqp->hw_tcp_state = tcp_state;
3216 nesqp->last_aeq = async_event_id;
3217 spin_unlock_irqrestore(&nesqp->lock, flags);
3218
3219 /* Send the fin only */
3220 nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
3221 NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
3222}
3223
3224/* Cleanup after a terminate sent or received */
3225static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
3226{
3227 u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
3228 unsigned long flags;
3229 struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
3230 struct nes_device *nesdev = nesvnic->nesdev;
3231 u8 first_time = 0;
3232
3233 spin_lock_irqsave(&nesqp->lock, flags);
3234 if (nesqp->hte_added) {
3235 nesqp->hte_added = 0;
3236 next_iwarp_state |= NES_CQP_QP_DEL_HTE;
3237 }
3238
3239 first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
3240 nesqp->term_flags |= NES_TERM_DONE;
3241 spin_unlock_irqrestore(&nesqp->lock, flags);
3242
3243 /* Make sure we go through this only once */
3244 if (first_time) {
3245 if (timeout_occurred == 0)
3246 del_timer(&nesqp->terminate_timer);
3247 else
3248 next_iwarp_state |= NES_CQP_QP_RESET;
3249
3250 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
3251 nes_cm_disconn(nesqp);
3252 }
3253}
3254
3255static void nes_terminate_received(struct nes_device *nesdev,
3256 struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
3257{
3258 u32 aeq_info;
3259 u8 *pkt;
3260 u32 *mpa;
3261 u8 ddp_ctl;
3262 u8 rdma_ctl;
3263 u16 aeq_id = 0;
3264
3265 aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
3266 if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
3267 /* Terminate is not a performance path so the silicon */
3268 /* did not validate the frame - do it now */
3269 pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
3270 mpa = (u32 *)locate_mpa(pkt, aeq_info);
3271 ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
3272 rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
3273 if ((ddp_ctl & 0xc0) != 0x40)
3274 aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
3275 else if ((ddp_ctl & 0x03) != 1)
3276 aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
3277 else if (be32_to_cpu(mpa[2]) != 2)
3278 aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
3279 else if (be32_to_cpu(mpa[3]) != 1)
3280 aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
3281 else if (be32_to_cpu(mpa[4]) != 0)
3282 aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
3283 else if ((rdma_ctl & 0xc0) != 0x40)
3284 aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
3285
3286 if (aeq_id) {
3287 /* Bad terminate recvd - send back a terminate */
3288 aeq_info = (aeq_info & 0xffff0000) | aeq_id;
3289 aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
3290 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3291 return;
3292 }
3293 }
3294
3295 nesqp->term_flags |= NES_TERM_RCVD;
3296 nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
3297 nes_terminate_start_timer(nesqp);
3298 nes_terminate_send_fin(nesdev, nesqp, aeqe);
3299}
3300
3301/* Timeout routine in case terminate fails to complete */
3302static void nes_terminate_timeout(unsigned long context)
3303{
3304 struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
3305
3306 nes_terminate_done(nesqp, 1);
3307}
3308
3309/* Set a timer in case hw cannot complete the terminate sequence */
3310static void nes_terminate_start_timer(struct nes_qp *nesqp)
3311{
3312 init_timer(&nesqp->terminate_timer);
3313 nesqp->terminate_timer.function = nes_terminate_timeout;
3314 nesqp->terminate_timer.expires = jiffies + HZ;
3315 nesqp->terminate_timer.data = (unsigned long)nesqp;
3316 add_timer(&nesqp->terminate_timer);
3317}
3318
2906/** 3319/**
2907 * nes_process_iwarp_aeqe 3320 * nes_process_iwarp_aeqe
2908 */ 3321 */
@@ -2910,28 +3323,27 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
2910 struct nes_hw_aeqe *aeqe) 3323 struct nes_hw_aeqe *aeqe)
2911{ 3324{
2912 u64 context; 3325 u64 context;
2913 u64 aeqe_context = 0;
2914 unsigned long flags; 3326 unsigned long flags;
2915 struct nes_qp *nesqp; 3327 struct nes_qp *nesqp;
3328 struct nes_hw_cq *hw_cq;
3329 struct nes_cq *nescq;
2916 int resource_allocated; 3330 int resource_allocated;
2917 /* struct iw_cm_id *cm_id; */
2918 struct nes_adapter *nesadapter = nesdev->nesadapter; 3331 struct nes_adapter *nesadapter = nesdev->nesadapter;
2919 struct ib_event ibevent;
2920 /* struct iw_cm_event cm_event; */
2921 u32 aeq_info; 3332 u32 aeq_info;
2922 u32 next_iwarp_state = 0; 3333 u32 next_iwarp_state = 0;
2923 u16 async_event_id; 3334 u16 async_event_id;
2924 u8 tcp_state; 3335 u8 tcp_state;
2925 u8 iwarp_state; 3336 u8 iwarp_state;
3337 int must_disconn = 1;
3338 int must_terminate = 0;
3339 struct ib_event ibevent;
2926 3340
2927 nes_debug(NES_DBG_AEQ, "\n"); 3341 nes_debug(NES_DBG_AEQ, "\n");
2928 aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); 3342 aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
2929 if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) { 3343 if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
2930 context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); 3344 context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
2931 context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; 3345 context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
2932 } else { 3346 } else {
2933 aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
2934 aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
2935 context = (unsigned long)nesadapter->qp_table[le32_to_cpu( 3347 context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
2936 aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; 3348 aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
2937 BUG_ON(!context); 3349 BUG_ON(!context);
@@ -2948,7 +3360,11 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
2948 3360
2949 switch (async_event_id) { 3361 switch (async_event_id) {
2950 case NES_AEQE_AEID_LLP_FIN_RECEIVED: 3362 case NES_AEQE_AEID_LLP_FIN_RECEIVED:
2951 nesqp = *((struct nes_qp **)&context); 3363 nesqp = (struct nes_qp *)(unsigned long)context;
3364
3365 if (nesqp->term_flags)
3366 return; /* Ignore it, wait for close complete */
3367
2952 if (atomic_inc_return(&nesqp->close_timer_started) == 1) { 3368 if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
2953 nesqp->cm_id->add_ref(nesqp->cm_id); 3369 nesqp->cm_id->add_ref(nesqp->cm_id);
2954 schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, 3370 schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
@@ -2959,18 +3375,24 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
2959 nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), 3375 nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
2960 async_event_id, nesqp->last_aeq, tcp_state); 3376 async_event_id, nesqp->last_aeq, tcp_state);
2961 } 3377 }
3378
2962 if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || 3379 if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
2963 (nesqp->ibqp_state != IB_QPS_RTS)) { 3380 (nesqp->ibqp_state != IB_QPS_RTS)) {
2964 /* FIN Received but tcp state or IB state moved on, 3381 /* FIN Received but tcp state or IB state moved on,
2965 should expect a close complete */ 3382 should expect a close complete */
2966 return; 3383 return;
2967 } 3384 }
3385
2968 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: 3386 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
3387 nesqp = (struct nes_qp *)(unsigned long)context;
3388 if (nesqp->term_flags) {
3389 nes_terminate_done(nesqp, 0);
3390 return;
3391 }
3392
2969 case NES_AEQE_AEID_LLP_CONNECTION_RESET: 3393 case NES_AEQE_AEID_LLP_CONNECTION_RESET:
2970 case NES_AEQE_AEID_TERMINATE_SENT:
2971 case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
2972 case NES_AEQE_AEID_RESET_SENT: 3394 case NES_AEQE_AEID_RESET_SENT:
2973 nesqp = *((struct nes_qp **)&context); 3395 nesqp = (struct nes_qp *)(unsigned long)context;
2974 if (async_event_id == NES_AEQE_AEID_RESET_SENT) { 3396 if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
2975 tcp_state = NES_AEQE_TCP_STATE_CLOSED; 3397 tcp_state = NES_AEQE_TCP_STATE_CLOSED;
2976 } 3398 }
@@ -2982,12 +3404,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
2982 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) || 3404 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
2983 (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) { 3405 (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
2984 nesqp->hte_added = 0; 3406 nesqp->hte_added = 0;
2985 spin_unlock_irqrestore(&nesqp->lock, flags); 3407 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
2986 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
2987 nesqp->hwqp.qp_id);
2988 nes_hw_modify_qp(nesdev, nesqp,
2989 NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
2990 spin_lock_irqsave(&nesqp->lock, flags);
2991 } 3408 }
2992 3409
2993 if ((nesqp->ibqp_state == IB_QPS_RTS) && 3410 if ((nesqp->ibqp_state == IB_QPS_RTS) &&
@@ -2999,151 +3416,106 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
2999 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; 3416 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3000 break; 3417 break;
3001 case NES_AEQE_IWARP_STATE_TERMINATE: 3418 case NES_AEQE_IWARP_STATE_TERMINATE:
3002 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; 3419 must_disconn = 0; /* terminate path takes care of disconn */
3003 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; 3420 if (nesqp->term_flags == 0)
3004 if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { 3421 must_terminate = 1;
3005 next_iwarp_state |= 0x02000000;
3006 nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3007 }
3008 break; 3422 break;
3009 default:
3010 next_iwarp_state = 0;
3011 }
3012 spin_unlock_irqrestore(&nesqp->lock, flags);
3013 if (next_iwarp_state) {
3014 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
3015 " also added another reference\n",
3016 nesqp->hwqp.qp_id, next_iwarp_state);
3017 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
3018 } 3423 }
3019 nes_cm_disconn(nesqp);
3020 } else { 3424 } else {
3021 if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) { 3425 if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
3022 /* FIN Received but ib state not RTS, 3426 /* FIN Received but ib state not RTS,
3023 close complete will be on its way */ 3427 close complete will be on its way */
3024 spin_unlock_irqrestore(&nesqp->lock, flags); 3428 must_disconn = 0;
3025 return;
3026 }
3027 spin_unlock_irqrestore(&nesqp->lock, flags);
3028 if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
3029 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
3030 nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3031 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
3032 " also added another reference\n",
3033 nesqp->hwqp.qp_id, next_iwarp_state);
3034 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
3035 } 3429 }
3036 nes_cm_disconn(nesqp);
3037 } 3430 }
3038 break;
3039 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
3040 nesqp = *((struct nes_qp **)&context);
3041 spin_lock_irqsave(&nesqp->lock, flags);
3042 nesqp->hw_iwarp_state = iwarp_state;
3043 nesqp->hw_tcp_state = tcp_state;
3044 nesqp->last_aeq = async_event_id;
3045 spin_unlock_irqrestore(&nesqp->lock, flags); 3431 spin_unlock_irqrestore(&nesqp->lock, flags);
3046 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED" 3432
3047 " event on QP%u \n Q2 Data:\n", 3433 if (must_terminate)
3048 nesqp->hwqp.qp_id); 3434 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3049 if (nesqp->ibqp.event_handler) { 3435 else if (must_disconn) {
3050 ibevent.device = nesqp->ibqp.device; 3436 if (next_iwarp_state) {
3051 ibevent.element.qp = &nesqp->ibqp; 3437 nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
3052 ibevent.event = IB_EVENT_QP_FATAL; 3438 nesqp->hwqp.qp_id, next_iwarp_state);
3053 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); 3439 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
3054 } 3440 }
3055 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
3056 ((nesqp->ibqp_state == IB_QPS_RTS)&&
3057 (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
3058 nes_cm_disconn(nesqp); 3441 nes_cm_disconn(nesqp);
3059 } else {
3060 nesqp->in_disconnect = 0;
3061 wake_up(&nesqp->kick_waitq);
3062 } 3442 }
3063 break; 3443 break;
3064 case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: 3444
3065 nesqp = *((struct nes_qp **)&context); 3445 case NES_AEQE_AEID_TERMINATE_SENT:
3066 spin_lock_irqsave(&nesqp->lock, flags); 3446 nesqp = (struct nes_qp *)(unsigned long)context;
3067 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; 3447 nes_terminate_send_fin(nesdev, nesqp, aeqe);
3068 nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
3069 nesqp->last_aeq = async_event_id;
3070 if (nesqp->cm_id) {
3071 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
3072 " event on QP%u, remote IP = 0x%08X \n",
3073 nesqp->hwqp.qp_id,
3074 ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
3075 } else {
3076 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
3077 " event on QP%u \n",
3078 nesqp->hwqp.qp_id);
3079 }
3080 spin_unlock_irqrestore(&nesqp->lock, flags);
3081 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
3082 nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
3083 if (nesqp->ibqp.event_handler) {
3084 ibevent.device = nesqp->ibqp.device;
3085 ibevent.element.qp = &nesqp->ibqp;
3086 ibevent.event = IB_EVENT_QP_FATAL;
3087 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3088 }
3089 break; 3448 break;
3090 case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: 3449
3091 if (NES_AEQE_INBOUND_RDMA&aeq_info) { 3450 case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
3092 nesqp = nesadapter->qp_table[le32_to_cpu( 3451 nesqp = (struct nes_qp *)(unsigned long)context;
3093 aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; 3452 nes_terminate_received(nesdev, nesqp, aeqe);
3094 } else {
3095 /* TODO: get the actual WQE and mask off wqe index */
3096 context &= ~((u64)511);
3097 nesqp = *((struct nes_qp **)&context);
3098 }
3099 spin_lock_irqsave(&nesqp->lock, flags);
3100 nesqp->hw_iwarp_state = iwarp_state;
3101 nesqp->hw_tcp_state = tcp_state;
3102 nesqp->last_aeq = async_event_id;
3103 spin_unlock_irqrestore(&nesqp->lock, flags);
3104 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
3105 nesqp->hwqp.qp_id);
3106 if (nesqp->ibqp.event_handler) {
3107 ibevent.device = nesqp->ibqp.device;
3108 ibevent.element.qp = &nesqp->ibqp;
3109 ibevent.event = IB_EVENT_QP_ACCESS_ERR;
3110 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3111 }
3112 break; 3453 break;
3454
3455 case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
3456 case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
3113 case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: 3457 case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
3114 nesqp = *((struct nes_qp **)&context); 3458 case NES_AEQE_AEID_AMP_INVALID_STAG:
3115 spin_lock_irqsave(&nesqp->lock, flags); 3459 case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
3116 nesqp->hw_iwarp_state = iwarp_state; 3460 case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
3117 nesqp->hw_tcp_state = tcp_state;
3118 nesqp->last_aeq = async_event_id;
3119 spin_unlock_irqrestore(&nesqp->lock, flags);
3120 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
3121 nesqp->hwqp.qp_id);
3122 if (nesqp->ibqp.event_handler) {
3123 ibevent.device = nesqp->ibqp.device;
3124 ibevent.element.qp = &nesqp->ibqp;
3125 ibevent.event = IB_EVENT_QP_ACCESS_ERR;
3126 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3127 }
3128 break;
3129 case NES_AEQE_AEID_PRIV_OPERATION_DENIED: 3461 case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
3130 nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words 3462 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
3131 [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; 3463 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
3132 spin_lock_irqsave(&nesqp->lock, flags); 3464 case NES_AEQE_AEID_AMP_TO_WRAP:
3133 nesqp->hw_iwarp_state = iwarp_state; 3465 nesqp = (struct nes_qp *)(unsigned long)context;
3134 nesqp->hw_tcp_state = tcp_state; 3466 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
3135 nesqp->last_aeq = async_event_id; 3467 break;
3136 spin_unlock_irqrestore(&nesqp->lock, flags); 3468
3137 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u," 3469 case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
3138 " nesqp = %p, AE reported %p\n", 3470 case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
3139 nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context)); 3471 case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
3140 if (nesqp->ibqp.event_handler) { 3472 case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
3141 ibevent.device = nesqp->ibqp.device; 3473 nesqp = (struct nes_qp *)(unsigned long)context;
3142 ibevent.element.qp = &nesqp->ibqp; 3474 if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
3143 ibevent.event = IB_EVENT_QP_ACCESS_ERR; 3475 aeq_info &= 0xffff0000;
3144 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); 3476 aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
3477 aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
3145 } 3478 }
3479
3480 case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
3481 case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
3482 case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
3483 case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
3484 case NES_AEQE_AEID_AMP_BAD_QP:
3485 case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
3486 case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
3487 case NES_AEQE_AEID_DDP_NO_L_BIT:
3488 case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
3489 case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
3490 case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
3491 case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
3492 case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
3493 case NES_AEQE_AEID_AMP_BAD_PD:
3494 case NES_AEQE_AEID_AMP_FASTREG_SHARED:
3495 case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
3496 case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
3497 case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
3498 case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
3499 case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
3500 case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
3501 case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
3502 case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
3503 case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
3504 case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
3505 case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
3506 case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
3507 case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
3508 case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
3509 case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
3510 case NES_AEQE_AEID_BAD_CLOSE:
3511 case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
3512 case NES_AEQE_AEID_STAG_ZERO_INVALID:
3513 case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
3514 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
3515 nesqp = (struct nes_qp *)(unsigned long)context;
3516 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3146 break; 3517 break;
3518
3147 case NES_AEQE_AEID_CQ_OPERATION_ERROR: 3519 case NES_AEQE_AEID_CQ_OPERATION_ERROR:
3148 context <<= 1; 3520 context <<= 1;
3149 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n", 3521 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
@@ -3153,83 +3525,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3153 if (resource_allocated) { 3525 if (resource_allocated) {
3154 printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n", 3526 printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
3155 __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); 3527 __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
3528 hw_cq = (struct nes_hw_cq *)(unsigned long)context;
3529 if (hw_cq) {
3530 nescq = container_of(hw_cq, struct nes_cq, hw_cq);
3531 if (nescq->ibcq.event_handler) {
3532 ibevent.device = nescq->ibcq.device;
3533 ibevent.event = IB_EVENT_CQ_ERR;
3534 ibevent.element.cq = &nescq->ibcq;
3535 nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
3536 }
3537 }
3156 } 3538 }
3157 break; 3539 break;
3158 case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: 3540
3159 nesqp = nesadapter->qp_table[le32_to_cpu(
3160 aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
3161 spin_lock_irqsave(&nesqp->lock, flags);
3162 nesqp->hw_iwarp_state = iwarp_state;
3163 nesqp->hw_tcp_state = tcp_state;
3164 nesqp->last_aeq = async_event_id;
3165 spin_unlock_irqrestore(&nesqp->lock, flags);
3166 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
3167 "_FOR_AVAILABLE_BUFFER event on QP%u\n",
3168 nesqp->hwqp.qp_id);
3169 if (nesqp->ibqp.event_handler) {
3170 ibevent.device = nesqp->ibqp.device;
3171 ibevent.element.qp = &nesqp->ibqp;
3172 ibevent.event = IB_EVENT_QP_ACCESS_ERR;
3173 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3174 }
3175 /* tell cm to disconnect, cm will queue work to thread */
3176 nes_cm_disconn(nesqp);
3177 break;
3178 case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
3179 nesqp = *((struct nes_qp **)&context);
3180 spin_lock_irqsave(&nesqp->lock, flags);
3181 nesqp->hw_iwarp_state = iwarp_state;
3182 nesqp->hw_tcp_state = tcp_state;
3183 nesqp->last_aeq = async_event_id;
3184 spin_unlock_irqrestore(&nesqp->lock, flags);
3185 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
3186 "_NO_BUFFER_AVAILABLE event on QP%u\n",
3187 nesqp->hwqp.qp_id);
3188 if (nesqp->ibqp.event_handler) {
3189 ibevent.device = nesqp->ibqp.device;
3190 ibevent.element.qp = &nesqp->ibqp;
3191 ibevent.event = IB_EVENT_QP_FATAL;
3192 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3193 }
3194 /* tell cm to disconnect, cm will queue work to thread */
3195 nes_cm_disconn(nesqp);
3196 break;
3197 case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
3198 nesqp = *((struct nes_qp **)&context);
3199 spin_lock_irqsave(&nesqp->lock, flags);
3200 nesqp->hw_iwarp_state = iwarp_state;
3201 nesqp->hw_tcp_state = tcp_state;
3202 nesqp->last_aeq = async_event_id;
3203 spin_unlock_irqrestore(&nesqp->lock, flags);
3204 nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
3205 " event on QP%u \n Q2 Data:\n",
3206 nesqp->hwqp.qp_id);
3207 if (nesqp->ibqp.event_handler) {
3208 ibevent.device = nesqp->ibqp.device;
3209 ibevent.element.qp = &nesqp->ibqp;
3210 ibevent.event = IB_EVENT_QP_FATAL;
3211 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
3212 }
3213 /* tell cm to disconnect, cm will queue work to thread */
3214 nes_cm_disconn(nesqp);
3215 break;
3216 /* TODO: additional AEs need to be here */
3217 case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
3218 nesqp = *((struct nes_qp **)&context);
3219 spin_lock_irqsave(&nesqp->lock, flags);
3220 nesqp->hw_iwarp_state = iwarp_state;
3221 nesqp->hw_tcp_state = tcp_state;
3222 nesqp->last_aeq = async_event_id;
3223 spin_unlock_irqrestore(&nesqp->lock, flags);
3224 if (nesqp->ibqp.event_handler) {
3225 ibevent.device = nesqp->ibqp.device;
3226 ibevent.element.qp = &nesqp->ibqp;
3227 ibevent.event = IB_EVENT_QP_ACCESS_ERR;
3228 nesqp->ibqp.event_handler(&ibevent,
3229 nesqp->ibqp.qp_context);
3230 }
3231 nes_cm_disconn(nesqp);
3232 break;
3233 default: 3541 default:
3234 nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", 3542 nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
3235 async_event_id); 3543 async_event_id);
@@ -3238,7 +3546,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3238 3546
3239} 3547}
3240 3548
3241
3242/** 3549/**
3243 * nes_iwarp_ce_handler 3550 * nes_iwarp_ce_handler
3244 */ 3551 */
@@ -3373,6 +3680,8 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
3373{ 3680{
3374 struct nes_cqp_request *cqp_request; 3681 struct nes_cqp_request *cqp_request;
3375 struct nes_hw_cqp_wqe *cqp_wqe; 3682 struct nes_hw_cqp_wqe *cqp_wqe;
3683 u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
3684 u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
3376 int ret; 3685 int ret;
3377 3686
3378 cqp_request = nes_get_cqp_request(nesdev); 3687 cqp_request = nes_get_cqp_request(nesdev);
@@ -3389,6 +3698,24 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
3389 cqp_wqe = &cqp_request->cqp_wqe; 3698 cqp_wqe = &cqp_request->cqp_wqe;
3390 nes_fill_init_cqp_wqe(cqp_wqe, nesdev); 3699 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
3391 3700
3701 /* If wqe in error was identified, set code to be put into cqe */
3702 if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
3703 which_wq |= NES_CQP_FLUSH_MAJ_MIN;
3704 sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
3705 nesqp->term_sq_flush_code = 0;
3706 }
3707
3708 if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
3709 which_wq |= NES_CQP_FLUSH_MAJ_MIN;
3710 rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
3711 nesqp->term_rq_flush_code = 0;
3712 }
3713
3714 if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
3715 cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
3716 cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
3717 }
3718
3392 cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = 3719 cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
3393 cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); 3720 cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
3394 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); 3721 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index c3654c6383fe..f28a41ba9fa1 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -241,6 +241,7 @@ enum nes_cqp_stag_wqeword_idx {
241}; 241};
242 242
243#define NES_CQP_OP_IWARP_STATE_SHIFT 28 243#define NES_CQP_OP_IWARP_STATE_SHIFT 28
244#define NES_CQP_OP_TERMLEN_SHIFT 28
244 245
245enum nes_cqp_qp_bits { 246enum nes_cqp_qp_bits {
246 NES_CQP_QP_ARP_VALID = (1<<8), 247 NES_CQP_QP_ARP_VALID = (1<<8),
@@ -265,12 +266,16 @@ enum nes_cqp_qp_bits {
265 NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT), 266 NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
266 NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT), 267 NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
267 NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT), 268 NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
269 NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
270 NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
268 NES_CQP_QP_RESET = (1<<31), 271 NES_CQP_QP_RESET = (1<<31),
269}; 272};
270 273
271enum nes_cqp_qp_wqe_word_idx { 274enum nes_cqp_qp_wqe_word_idx {
272 NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6, 275 NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
273 NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7, 276 NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
277 NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
278 NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
274 NES_CQP_QP_WQE_NEW_MSS_IDX = 15, 279 NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
275}; 280};
276 281
@@ -361,6 +366,7 @@ enum nes_cqp_arp_bits {
361enum nes_cqp_flush_bits { 366enum nes_cqp_flush_bits {
362 NES_CQP_FLUSH_SQ = (1<<30), 367 NES_CQP_FLUSH_SQ = (1<<30),
363 NES_CQP_FLUSH_RQ = (1<<31), 368 NES_CQP_FLUSH_RQ = (1<<31),
369 NES_CQP_FLUSH_MAJ_MIN = (1<<28),
364}; 370};
365 371
366enum nes_cqe_opcode_bits { 372enum nes_cqe_opcode_bits {
@@ -633,11 +639,14 @@ enum nes_aeqe_bits {
633 NES_AEQE_INBOUND_RDMA = (1<<19), 639 NES_AEQE_INBOUND_RDMA = (1<<19),
634 NES_AEQE_IWARP_STATE_MASK = (7<<20), 640 NES_AEQE_IWARP_STATE_MASK = (7<<20),
635 NES_AEQE_TCP_STATE_MASK = (0xf<<24), 641 NES_AEQE_TCP_STATE_MASK = (0xf<<24),
642 NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
636 NES_AEQE_VALID = (1<<31), 643 NES_AEQE_VALID = (1<<31),
637}; 644};
638 645
639#define NES_AEQE_IWARP_STATE_SHIFT 20 646#define NES_AEQE_IWARP_STATE_SHIFT 20
640#define NES_AEQE_TCP_STATE_SHIFT 24 647#define NES_AEQE_TCP_STATE_SHIFT 24
648#define NES_AEQE_Q2_DATA_ETHERNET (1<<28)
649#define NES_AEQE_Q2_DATA_MPA (1<<29)
641 650
642enum nes_aeqe_iwarp_state { 651enum nes_aeqe_iwarp_state {
643 NES_AEQE_IWARP_STATE_NON_EXISTANT = 0, 652 NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
@@ -751,6 +760,15 @@ enum nes_iwarp_sq_wqe_bits {
751 NES_IWARP_SQ_OP_NOP = 12, 760 NES_IWARP_SQ_OP_NOP = 12,
752}; 761};
753 762
763enum nes_iwarp_cqe_major_code {
764 NES_IWARP_CQE_MAJOR_FLUSH = 1,
765 NES_IWARP_CQE_MAJOR_DRV = 0x8000
766};
767
768enum nes_iwarp_cqe_minor_code {
769 NES_IWARP_CQE_MINOR_FLUSH = 1
770};
771
754#define NES_EEPROM_READ_REQUEST (1<<16) 772#define NES_EEPROM_READ_REQUEST (1<<16)
755#define NES_MAC_ADDR_VALID (1<<20) 773#define NES_MAC_ADDR_VALID (1<<20)
756 774
@@ -1119,6 +1137,7 @@ struct nes_adapter {
1119 u8 netdev_max; /* from host nic address count in EEPROM */ 1137 u8 netdev_max; /* from host nic address count in EEPROM */
1120 u8 port_count; 1138 u8 port_count;
1121 u8 virtwq; 1139 u8 virtwq;
1140 u8 send_term_ok;
1122 u8 et_use_adaptive_rx_coalesce; 1141 u8 et_use_adaptive_rx_coalesce;
1123 u8 adapter_fcn_count; 1142 u8 adapter_fcn_count;
1124 u8 pft_mcast_map[NES_PFT_SIZE]; 1143 u8 pft_mcast_map[NES_PFT_SIZE];
@@ -1217,6 +1236,90 @@ struct nes_ib_device {
1217 u32 num_pd; 1236 u32 num_pd;
1218}; 1237};
1219 1238
1239enum nes_hdrct_flags {
1240 DDP_LEN_FLAG = 0x80,
1241 DDP_HDR_FLAG = 0x40,
1242 RDMA_HDR_FLAG = 0x20
1243};
1244
1245enum nes_term_layers {
1246 LAYER_RDMA = 0,
1247 LAYER_DDP = 1,
1248 LAYER_MPA = 2
1249};
1250
1251enum nes_term_error_types {
1252 RDMAP_CATASTROPHIC = 0,
1253 RDMAP_REMOTE_PROT = 1,
1254 RDMAP_REMOTE_OP = 2,
1255 DDP_CATASTROPHIC = 0,
1256 DDP_TAGGED_BUFFER = 1,
1257 DDP_UNTAGGED_BUFFER = 2,
1258 DDP_LLP = 3
1259};
1260
1261enum nes_term_rdma_errors {
1262 RDMAP_INV_STAG = 0x00,
1263 RDMAP_INV_BOUNDS = 0x01,
1264 RDMAP_ACCESS = 0x02,
1265 RDMAP_UNASSOC_STAG = 0x03,
1266 RDMAP_TO_WRAP = 0x04,
1267 RDMAP_INV_RDMAP_VER = 0x05,
1268 RDMAP_UNEXPECTED_OP = 0x06,
1269 RDMAP_CATASTROPHIC_LOCAL = 0x07,
1270 RDMAP_CATASTROPHIC_GLOBAL = 0x08,
1271 RDMAP_CANT_INV_STAG = 0x09,
1272 RDMAP_UNSPECIFIED = 0xff
1273};
1274
1275enum nes_term_ddp_errors {
1276 DDP_CATASTROPHIC_LOCAL = 0x00,
1277 DDP_TAGGED_INV_STAG = 0x00,
1278 DDP_TAGGED_BOUNDS = 0x01,
1279 DDP_TAGGED_UNASSOC_STAG = 0x02,
1280 DDP_TAGGED_TO_WRAP = 0x03,
1281 DDP_TAGGED_INV_DDP_VER = 0x04,
1282 DDP_UNTAGGED_INV_QN = 0x01,
1283 DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
1284 DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
1285 DDP_UNTAGGED_INV_MO = 0x04,
1286 DDP_UNTAGGED_INV_TOO_LONG = 0x05,
1287 DDP_UNTAGGED_INV_DDP_VER = 0x06
1288};
1289
1290enum nes_term_mpa_errors {
1291 MPA_CLOSED = 0x01,
1292 MPA_CRC = 0x02,
1293 MPA_MARKER = 0x03,
1294 MPA_REQ_RSP = 0x04,
1295};
1296
1297struct nes_terminate_hdr {
1298 u8 layer_etype;
1299 u8 error_code;
1300 u8 hdrct;
1301 u8 rsvd;
1302};
1303
1304/* Used to determine how to fill in terminate error codes */
1305#define IWARP_OPCODE_WRITE 0
1306#define IWARP_OPCODE_READREQ 1
1307#define IWARP_OPCODE_READRSP 2
1308#define IWARP_OPCODE_SEND 3
1309#define IWARP_OPCODE_SEND_INV 4
1310#define IWARP_OPCODE_SEND_SE 5
1311#define IWARP_OPCODE_SEND_SE_INV 6
1312#define IWARP_OPCODE_TERM 7
1313
1314/* These values are used only during terminate processing */
1315#define TERM_DDP_LEN_TAGGED 14
1316#define TERM_DDP_LEN_UNTAGGED 18
1317#define TERM_RDMA_LEN 28
1318#define RDMA_OPCODE_MASK 0x0f
1319#define RDMA_READ_REQ_OPCODE 1
1320#define BAD_FRAME_OFFSET 64
1321#define CQE_MAJOR_DRV 0x8000
1322
1220#define nes_vlan_rx vlan_hwaccel_receive_skb 1323#define nes_vlan_rx vlan_hwaccel_receive_skb
1221#define nes_netif_rx netif_receive_skb 1324#define nes_netif_rx netif_receive_skb
1222 1325
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index a282031d15c7..9687c397ce1a 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -183,6 +183,9 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada
183 } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) { 183 } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
184 nesadapter->virtwq = 1; 184 nesadapter->virtwq = 1;
185 } 185 }
186 if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
187 nesadapter->send_term_ok = 1;
188
186 nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + 189 nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
187 (u32)((u8)eeprom_data); 190 (u32)((u8)eeprom_data);
188 191
@@ -548,7 +551,7 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
548 spin_unlock_irqrestore(&nesdev->cqp.lock, flags); 551 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
549 } 552 }
550 if (cqp_request == NULL) { 553 if (cqp_request == NULL) {
551 cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); 554 cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
552 if (cqp_request) { 555 if (cqp_request) {
553 cqp_request->dynamic = 1; 556 cqp_request->dynamic = 1;
554 INIT_LIST_HEAD(&cqp_request->list); 557 INIT_LIST_HEAD(&cqp_request->list);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 21e0fd336cf7..a680c42d6e8c 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -667,15 +667,32 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
667 */ 667 */
668static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) 668static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
669{ 669{
670 struct nes_vnic *nesvnic = to_nesvnic(ibdev);
671 struct net_device *netdev = nesvnic->netdev;
672
670 memset(props, 0, sizeof(*props)); 673 memset(props, 0, sizeof(*props));
671 674
672 props->max_mtu = IB_MTU_2048; 675 props->max_mtu = IB_MTU_4096;
673 props->active_mtu = IB_MTU_2048; 676
677 if (netdev->mtu >= 4096)
678 props->active_mtu = IB_MTU_4096;
679 else if (netdev->mtu >= 2048)
680 props->active_mtu = IB_MTU_2048;
681 else if (netdev->mtu >= 1024)
682 props->active_mtu = IB_MTU_1024;
683 else if (netdev->mtu >= 512)
684 props->active_mtu = IB_MTU_512;
685 else
686 props->active_mtu = IB_MTU_256;
687
674 props->lid = 1; 688 props->lid = 1;
675 props->lmc = 0; 689 props->lmc = 0;
676 props->sm_lid = 0; 690 props->sm_lid = 0;
677 props->sm_sl = 0; 691 props->sm_sl = 0;
678 props->state = IB_PORT_ACTIVE; 692 if (nesvnic->linkup)
693 props->state = IB_PORT_ACTIVE;
694 else
695 props->state = IB_PORT_DOWN;
679 props->phys_state = 0; 696 props->phys_state = 0;
680 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | 697 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
681 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; 698 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
@@ -1506,12 +1523,45 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1506 1523
1507 1524
1508/** 1525/**
1526 * nes_clean_cq
1527 */
1528static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
1529{
1530 u32 cq_head;
1531 u32 lo;
1532 u32 hi;
1533 u64 u64temp;
1534 unsigned long flags = 0;
1535
1536 spin_lock_irqsave(&nescq->lock, flags);
1537
1538 cq_head = nescq->hw_cq.cq_head;
1539 while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
1540 rmb();
1541 lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
1542 hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
1543 u64temp = (((u64)hi) << 32) | ((u64)lo);
1544 u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
1545 if (u64temp == (u64)(unsigned long)nesqp) {
1546 /* Zero the context value so cqe will be ignored */
1547 nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
1548 nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
1549 }
1550
1551 if (++cq_head >= nescq->hw_cq.cq_size)
1552 cq_head = 0;
1553 }
1554
1555 spin_unlock_irqrestore(&nescq->lock, flags);
1556}
1557
1558
1559/**
1509 * nes_destroy_qp 1560 * nes_destroy_qp
1510 */ 1561 */
1511static int nes_destroy_qp(struct ib_qp *ibqp) 1562static int nes_destroy_qp(struct ib_qp *ibqp)
1512{ 1563{
1513 struct nes_qp *nesqp = to_nesqp(ibqp); 1564 struct nes_qp *nesqp = to_nesqp(ibqp);
1514 /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
1515 struct nes_ucontext *nes_ucontext; 1565 struct nes_ucontext *nes_ucontext;
1516 struct ib_qp_attr attr; 1566 struct ib_qp_attr attr;
1517 struct iw_cm_id *cm_id; 1567 struct iw_cm_id *cm_id;
@@ -1548,7 +1598,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
1548 nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret); 1598 nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
1549 } 1599 }
1550 1600
1551
1552 if (nesqp->user_mode) { 1601 if (nesqp->user_mode) {
1553 if ((ibqp->uobject)&&(ibqp->uobject->context)) { 1602 if ((ibqp->uobject)&&(ibqp->uobject->context)) {
1554 nes_ucontext = to_nesucontext(ibqp->uobject->context); 1603 nes_ucontext = to_nesucontext(ibqp->uobject->context);
@@ -1560,6 +1609,13 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
1560 } 1609 }
1561 if (nesqp->pbl_pbase) 1610 if (nesqp->pbl_pbase)
1562 kunmap(nesqp->page); 1611 kunmap(nesqp->page);
1612 } else {
1613 /* Clean any pending completions from the cq(s) */
1614 if (nesqp->nesscq)
1615 nes_clean_cq(nesqp, nesqp->nesscq);
1616
1617 if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
1618 nes_clean_cq(nesqp, nesqp->nesrcq);
1563 } 1619 }
1564 1620
1565 nes_rem_ref(&nesqp->ibqp); 1621 nes_rem_ref(&nesqp->ibqp);
@@ -2884,7 +2940,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2884 * nes_hw_modify_qp 2940 * nes_hw_modify_qp
2885 */ 2941 */
2886int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, 2942int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
2887 u32 next_iwarp_state, u32 wait_completion) 2943 u32 next_iwarp_state, u32 termlen, u32 wait_completion)
2888{ 2944{
2889 struct nes_hw_cqp_wqe *cqp_wqe; 2945 struct nes_hw_cqp_wqe *cqp_wqe;
2890 /* struct iw_cm_id *cm_id = nesqp->cm_id; */ 2946 /* struct iw_cm_id *cm_id = nesqp->cm_id; */
@@ -2916,6 +2972,13 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
2916 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); 2972 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
2917 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); 2973 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
2918 2974
2975 /* If sending a terminate message, fill in the length (in words) */
2976 if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
2977 !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
2978 termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
2979 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
2980 }
2981
2919 atomic_set(&cqp_request->refcount, 2); 2982 atomic_set(&cqp_request->refcount, 2);
2920 nes_post_cqp_request(nesdev, cqp_request); 2983 nes_post_cqp_request(nesdev, cqp_request);
2921 2984
@@ -3086,6 +3149,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3086 } 3149 }
3087 nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", 3150 nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
3088 nesqp->hwqp.qp_id); 3151 nesqp->hwqp.qp_id);
3152 if (nesqp->term_flags)
3153 del_timer(&nesqp->terminate_timer);
3154
3089 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; 3155 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
3090 /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ 3156 /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
3091 if (nesqp->hte_added) { 3157 if (nesqp->hte_added) {
@@ -3163,7 +3229,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3163 3229
3164 if (issue_modify_qp) { 3230 if (issue_modify_qp) {
3165 nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n"); 3231 nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
3166 ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1); 3232 ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
3167 if (ret) 3233 if (ret)
3168 nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)" 3234 nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
3169 " failed for QP%u.\n", 3235 " failed for QP%u.\n",
@@ -3328,6 +3394,12 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3328 head = nesqp->hwqp.sq_head; 3394 head = nesqp->hwqp.sq_head;
3329 3395
3330 while (ib_wr) { 3396 while (ib_wr) {
3397 /* Check for QP error */
3398 if (nesqp->term_flags) {
3399 err = -EINVAL;
3400 break;
3401 }
3402
3331 /* Check for SQ overflow */ 3403 /* Check for SQ overflow */
3332 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { 3404 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
3333 err = -EINVAL; 3405 err = -EINVAL;
@@ -3484,6 +3556,12 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
3484 head = nesqp->hwqp.rq_head; 3556 head = nesqp->hwqp.rq_head;
3485 3557
3486 while (ib_wr) { 3558 while (ib_wr) {
3559 /* Check for QP error */
3560 if (nesqp->term_flags) {
3561 err = -EINVAL;
3562 break;
3563 }
3564
3487 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { 3565 if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
3488 err = -EINVAL; 3566 err = -EINVAL;
3489 break; 3567 break;
@@ -3547,7 +3625,6 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3547{ 3625{
3548 u64 u64temp; 3626 u64 u64temp;
3549 u64 wrid; 3627 u64 wrid;
3550 /* u64 u64temp; */
3551 unsigned long flags = 0; 3628 unsigned long flags = 0;
3552 struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); 3629 struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
3553 struct nes_device *nesdev = nesvnic->nesdev; 3630 struct nes_device *nesdev = nesvnic->nesdev;
@@ -3555,12 +3632,13 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3555 struct nes_qp *nesqp; 3632 struct nes_qp *nesqp;
3556 struct nes_hw_cqe cqe; 3633 struct nes_hw_cqe cqe;
3557 u32 head; 3634 u32 head;
3558 u32 wq_tail; 3635 u32 wq_tail = 0;
3559 u32 cq_size; 3636 u32 cq_size;
3560 u32 cqe_count = 0; 3637 u32 cqe_count = 0;
3561 u32 wqe_index; 3638 u32 wqe_index;
3562 u32 u32temp; 3639 u32 u32temp;
3563 /* u32 counter; */ 3640 u32 move_cq_head = 1;
3641 u32 err_code;
3564 3642
3565 nes_debug(NES_DBG_CQ, "\n"); 3643 nes_debug(NES_DBG_CQ, "\n");
3566 3644
@@ -3570,29 +3648,40 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3570 cq_size = nescq->hw_cq.cq_size; 3648 cq_size = nescq->hw_cq.cq_size;
3571 3649
3572 while (cqe_count < num_entries) { 3650 while (cqe_count < num_entries) {
3573 if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & 3651 if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
3574 NES_CQE_VALID) { 3652 NES_CQE_VALID) == 0)
3575 /* 3653 break;
3576 * Make sure we read CQ entry contents *after* 3654
3577 * we've checked the valid bit. 3655 /*
3578 */ 3656 * Make sure we read CQ entry contents *after*
3579 rmb(); 3657 * we've checked the valid bit.
3580 3658 */
3581 cqe = nescq->hw_cq.cq_vbase[head]; 3659 rmb();
3582 nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; 3660
3583 u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); 3661 cqe = nescq->hw_cq.cq_vbase[head];
3584 wqe_index = u32temp & 3662 u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
3585 (nesdev->nesadapter->max_qp_wr - 1); 3663 wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
3586 u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); 3664 u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
3587 /* parse CQE, get completion context from WQE (either rq or sq */ 3665 /* parse CQE, get completion context from WQE (either rq or sq) */
3588 u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | 3666 u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
3589 ((u64)u32temp); 3667 ((u64)u32temp);
3590 nesqp = *((struct nes_qp **)&u64temp); 3668
3669 if (u64temp) {
3670 nesqp = (struct nes_qp *)(unsigned long)u64temp;
3591 memset(entry, 0, sizeof *entry); 3671 memset(entry, 0, sizeof *entry);
3592 if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { 3672 if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
3593 entry->status = IB_WC_SUCCESS; 3673 entry->status = IB_WC_SUCCESS;
3594 } else { 3674 } else {
3595 entry->status = IB_WC_WR_FLUSH_ERR; 3675 err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
3676 if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
3677 entry->status = err_code & 0x0000ffff;
3678
3679 /* The rest of the cqe's will be marked as flushed */
3680 nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
3681 cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
3682 NES_IWARP_CQE_MINOR_FLUSH);
3683 } else
3684 entry->status = IB_WC_WR_FLUSH_ERR;
3596 } 3685 }
3597 3686
3598 entry->qp = &nesqp->ibqp; 3687 entry->qp = &nesqp->ibqp;
@@ -3601,20 +3690,18 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3601 if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) { 3690 if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
3602 if (nesqp->skip_lsmm) { 3691 if (nesqp->skip_lsmm) {
3603 nesqp->skip_lsmm = 0; 3692 nesqp->skip_lsmm = 0;
3604 wq_tail = nesqp->hwqp.sq_tail++; 3693 nesqp->hwqp.sq_tail++;
3605 } 3694 }
3606 3695
3607 /* Working on a SQ Completion*/ 3696 /* Working on a SQ Completion*/
3608 wq_tail = wqe_index; 3697 wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
3609 nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
3610 wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
3611 wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | 3698 wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
3612 ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. 3699 ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
3613 wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); 3700 wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
3614 entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. 3701 entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
3615 wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); 3702 wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
3616 3703
3617 switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. 3704 switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
3618 wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) { 3705 wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
3619 case NES_IWARP_SQ_OP_RDMAW: 3706 case NES_IWARP_SQ_OP_RDMAW:
3620 nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n"); 3707 nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
@@ -3623,7 +3710,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3623 case NES_IWARP_SQ_OP_RDMAR: 3710 case NES_IWARP_SQ_OP_RDMAR:
3624 nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n"); 3711 nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
3625 entry->opcode = IB_WC_RDMA_READ; 3712 entry->opcode = IB_WC_RDMA_READ;
3626 entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. 3713 entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
3627 wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]); 3714 wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
3628 break; 3715 break;
3629 case NES_IWARP_SQ_OP_SENDINV: 3716 case NES_IWARP_SQ_OP_SENDINV:
@@ -3634,33 +3721,54 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3634 entry->opcode = IB_WC_SEND; 3721 entry->opcode = IB_WC_SEND;
3635 break; 3722 break;
3636 } 3723 }
3724
3725 nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
3726 if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
3727 move_cq_head = 0;
3728 wq_tail = nesqp->hwqp.sq_tail;
3729 }
3637 } else { 3730 } else {
3638 /* Working on a RQ Completion*/ 3731 /* Working on a RQ Completion*/
3639 wq_tail = wqe_index;
3640 nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
3641 entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); 3732 entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
3642 wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | 3733 wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
3643 ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); 3734 ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
3644 entry->opcode = IB_WC_RECV; 3735 entry->opcode = IB_WC_RECV;
3736
3737 nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
3738 if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
3739 move_cq_head = 0;
3740 wq_tail = nesqp->hwqp.rq_tail;
3741 }
3645 } 3742 }
3743
3646 entry->wr_id = wrid; 3744 entry->wr_id = wrid;
3745 entry++;
3746 cqe_count++;
3747 }
3647 3748
3749 if (move_cq_head) {
3750 nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
3648 if (++head >= cq_size) 3751 if (++head >= cq_size)
3649 head = 0; 3752 head = 0;
3650 cqe_count++;
3651 nescq->polled_completions++; 3753 nescq->polled_completions++;
3754
3652 if ((nescq->polled_completions > (cq_size / 2)) || 3755 if ((nescq->polled_completions > (cq_size / 2)) ||
3653 (nescq->polled_completions == 255)) { 3756 (nescq->polled_completions == 255)) {
3654 nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" 3757 nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
3655 " are pending %u of %u.\n", 3758 " are pending %u of %u.\n",
3656 nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); 3759 nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
3657 nes_write32(nesdev->regs+NES_CQE_ALLOC, 3760 nes_write32(nesdev->regs+NES_CQE_ALLOC,
3658 nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); 3761 nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
3659 nescq->polled_completions = 0; 3762 nescq->polled_completions = 0;
3660 } 3763 }
3661 entry++; 3764 } else {
3662 } else 3765 /* Update the wqe index and set status to flush */
3663 break; 3766 wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
3767 wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
3768 nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
3769 cpu_to_le32(wqe_index);
3770 move_cq_head = 1; /* ready for next pass */
3771 }
3664 } 3772 }
3665 3773
3666 if (nescq->polled_completions) { 3774 if (nescq->polled_completions) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 41c07f29f7c9..89822d75f82e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -40,6 +40,10 @@ struct nes_device;
40#define NES_MAX_USER_DB_REGIONS 4096 40#define NES_MAX_USER_DB_REGIONS 4096
41#define NES_MAX_USER_WQ_REGIONS 4096 41#define NES_MAX_USER_WQ_REGIONS 4096
42 42
43#define NES_TERM_SENT 0x01
44#define NES_TERM_RCVD 0x02
45#define NES_TERM_DONE 0x04
46
43struct nes_ucontext { 47struct nes_ucontext {
44 struct ib_ucontext ibucontext; 48 struct ib_ucontext ibucontext;
45 struct nes_device *nesdev; 49 struct nes_device *nesdev;
@@ -119,6 +123,11 @@ struct nes_wq {
119 spinlock_t lock; 123 spinlock_t lock;
120}; 124};
121 125
126struct disconn_work {
127 struct work_struct work;
128 struct nes_qp *nesqp;
129};
130
122struct iw_cm_id; 131struct iw_cm_id;
123struct ietf_mpa_frame; 132struct ietf_mpa_frame;
124 133
@@ -127,7 +136,6 @@ struct nes_qp {
127 void *allocated_buffer; 136 void *allocated_buffer;
128 struct iw_cm_id *cm_id; 137 struct iw_cm_id *cm_id;
129 struct workqueue_struct *wq; 138 struct workqueue_struct *wq;
130 struct work_struct disconn_work;
131 struct nes_cq *nesscq; 139 struct nes_cq *nesscq;
132 struct nes_cq *nesrcq; 140 struct nes_cq *nesrcq;
133 struct nes_pd *nespd; 141 struct nes_pd *nespd;
@@ -155,9 +163,13 @@ struct nes_qp {
155 void *pbl_vbase; 163 void *pbl_vbase;
156 dma_addr_t pbl_pbase; 164 dma_addr_t pbl_pbase;
157 struct page *page; 165 struct page *page;
166 struct timer_list terminate_timer;
167 enum ib_event_type terminate_eventtype;
158 wait_queue_head_t kick_waitq; 168 wait_queue_head_t kick_waitq;
159 u16 in_disconnect; 169 u16 in_disconnect;
160 u16 private_data_len; 170 u16 private_data_len;
171 u16 term_sq_flush_code;
172 u16 term_rq_flush_code;
161 u8 active_conn; 173 u8 active_conn;
162 u8 skip_lsmm; 174 u8 skip_lsmm;
163 u8 user_mode; 175 u8 user_mode;
@@ -165,7 +177,7 @@ struct nes_qp {
165 u8 hw_iwarp_state; 177 u8 hw_iwarp_state;
166 u8 flush_issued; 178 u8 flush_issued;
167 u8 hw_tcp_state; 179 u8 hw_tcp_state;
168 u8 disconn_pending; 180 u8 term_flags;
169 u8 destroyed; 181 u8 destroyed;
170}; 182};
171#endif /* NES_VERBS_H */ 183#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 181b1f32325f..8f4b4fca2a1d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -31,7 +31,6 @@
31 */ 31 */
32 32
33#include <rdma/ib_cm.h> 33#include <rdma/ib_cm.h>
34#include <rdma/ib_cache.h>
35#include <net/dst.h> 34#include <net/dst.h>
36#include <net/icmp.h> 35#include <net/icmp.h>
37#include <linux/icmpv6.h> 36#include <linux/icmpv6.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index e7e5adf84e84..e35f4a0ea9d5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -36,7 +36,6 @@
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/dma-mapping.h> 37#include <linux/dma-mapping.h>
38 38
39#include <rdma/ib_cache.h>
40#include <linux/ip.h> 39#include <linux/ip.h>
41#include <linux/tcp.h> 40#include <linux/tcp.h>
42 41
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e319d91f60a6..2bf5116deec4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -604,8 +604,11 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
604 skb_queue_len(&neigh->queue)); 604 skb_queue_len(&neigh->queue));
605 goto err_drop; 605 goto err_drop;
606 } 606 }
607 } else 607 } else {
608 spin_unlock_irqrestore(&priv->lock, flags);
608 ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha)); 609 ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
610 return;
611 }
609 } else { 612 } else {
610 neigh->ah = NULL; 613 neigh->ah = NULL;
611 614
@@ -688,7 +691,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
688 ipoib_dbg(priv, "Send unicast ARP to %04x\n", 691 ipoib_dbg(priv, "Send unicast ARP to %04x\n",
689 be16_to_cpu(path->pathrec.dlid)); 692 be16_to_cpu(path->pathrec.dlid));
690 693
694 spin_unlock_irqrestore(&priv->lock, flags);
691 ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); 695 ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
696 return;
692 } else if ((path->query || !path_rec_start(dev, path)) && 697 } else if ((path->query || !path_rec_start(dev, path)) &&
693 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 698 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
694 /* put pseudoheader back on for next time */ 699 /* put pseudoheader back on for next time */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a0e97532e714..25874fc680c9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -720,7 +720,9 @@ out:
720 } 720 }
721 } 721 }
722 722
723 spin_unlock_irqrestore(&priv->lock, flags);
723 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 724 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
725 return;
724 } 726 }
725 727
726unlock: 728unlock:
@@ -758,6 +760,20 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
758 } 760 }
759} 761}
760 762
763static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
764 const u8 *broadcast)
765{
766 if (addrlen != INFINIBAND_ALEN)
767 return 0;
768 /* reserved QPN, prefix, scope */
769 if (memcmp(addr, broadcast, 6))
770 return 0;
771 /* signature lower, pkey */
772 if (memcmp(addr + 7, broadcast + 7, 3))
773 return 0;
774 return 1;
775}
776
761void ipoib_mcast_restart_task(struct work_struct *work) 777void ipoib_mcast_restart_task(struct work_struct *work)
762{ 778{
763 struct ipoib_dev_priv *priv = 779 struct ipoib_dev_priv *priv =
@@ -791,6 +807,11 @@ void ipoib_mcast_restart_task(struct work_struct *work)
791 for (mclist = dev->mc_list; mclist; mclist = mclist->next) { 807 for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
792 union ib_gid mgid; 808 union ib_gid mgid;
793 809
810 if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
811 mclist->dmi_addrlen,
812 dev->broadcast))
813 continue;
814
794 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); 815 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
795 816
796 mcast = __ipoib_mcast_find(dev, &mgid); 817 mcast = __ipoib_mcast_find(dev, &mgid);
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 6e186b1a062d..652bd33109e3 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -582,7 +582,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
582 break; 582 break;
583 case STATUSTYPE_TABLE: 583 case STATUSTYPE_TABLE:
584 sz = 0; 584 sz = 0;
585 table_args = strstr(lc->usr_argv_str, " "); 585 table_args = strchr(lc->usr_argv_str, ' ');
586 BUG_ON(!table_args); /* There will always be a ' ' */ 586 BUG_ON(!table_args); /* There will always be a ' ' */
587 table_args++; 587 table_args++;
588 588
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index fb5df5c6203e..c97ab82ec743 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1286,6 +1286,7 @@ static int cxgb_open(struct net_device *dev)
1286 if (!other_ports) 1286 if (!other_ports)
1287 schedule_chk_task(adapter); 1287 schedule_chk_task(adapter);
1288 1288
1289 cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
1289 return 0; 1290 return 0;
1290} 1291}
1291 1292
@@ -1318,6 +1319,7 @@ static int cxgb_close(struct net_device *dev)
1318 if (!adapter->open_device_map) 1319 if (!adapter->open_device_map)
1319 cxgb_down(adapter); 1320 cxgb_down(adapter);
1320 1321
1322 cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
1321 return 0; 1323 return 0;
1322} 1324}
1323 1325
@@ -2717,7 +2719,7 @@ static int t3_adapter_error(struct adapter *adapter, int reset)
2717 2719
2718 if (is_offload(adapter) && 2720 if (is_offload(adapter) &&
2719 test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { 2721 test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
2720 cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0); 2722 cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
2721 offload_close(&adapter->tdev); 2723 offload_close(&adapter->tdev);
2722 } 2724 }
2723 2725
@@ -2782,7 +2784,7 @@ static void t3_resume_ports(struct adapter *adapter)
2782 } 2784 }
2783 2785
2784 if (is_offload(adapter) && !ofld_disable) 2786 if (is_offload(adapter) && !ofld_disable)
2785 cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0); 2787 cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
2786} 2788}
2787 2789
2788/* 2790/*
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index f9f54b57b28c..75064eea1d87 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -153,14 +153,14 @@ void cxgb3_remove_clients(struct t3cdev *tdev)
153 mutex_unlock(&cxgb3_db_lock); 153 mutex_unlock(&cxgb3_db_lock);
154} 154}
155 155
156void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error) 156void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
157{ 157{
158 struct cxgb3_client *client; 158 struct cxgb3_client *client;
159 159
160 mutex_lock(&cxgb3_db_lock); 160 mutex_lock(&cxgb3_db_lock);
161 list_for_each_entry(client, &client_list, client_list) { 161 list_for_each_entry(client, &client_list, client_list) {
162 if (client->err_handler) 162 if (client->event_handler)
163 client->err_handler(tdev, status, error); 163 client->event_handler(tdev, event, port);
164 } 164 }
165 mutex_unlock(&cxgb3_db_lock); 165 mutex_unlock(&cxgb3_db_lock);
166} 166}
diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
index 55945f422aec..670aa62042da 100644
--- a/drivers/net/cxgb3/cxgb3_offload.h
+++ b/drivers/net/cxgb3/cxgb3_offload.h
@@ -64,14 +64,16 @@ void cxgb3_register_client(struct cxgb3_client *client);
64void cxgb3_unregister_client(struct cxgb3_client *client); 64void cxgb3_unregister_client(struct cxgb3_client *client);
65void cxgb3_add_clients(struct t3cdev *tdev); 65void cxgb3_add_clients(struct t3cdev *tdev);
66void cxgb3_remove_clients(struct t3cdev *tdev); 66void cxgb3_remove_clients(struct t3cdev *tdev);
67void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error); 67void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
68 68
69typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev, 69typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
70 struct sk_buff *skb, void *ctx); 70 struct sk_buff *skb, void *ctx);
71 71
72enum { 72enum {
73 OFFLOAD_STATUS_UP, 73 OFFLOAD_STATUS_UP,
74 OFFLOAD_STATUS_DOWN 74 OFFLOAD_STATUS_DOWN,
75 OFFLOAD_PORT_DOWN,
76 OFFLOAD_PORT_UP
75}; 77};
76 78
77struct cxgb3_client { 79struct cxgb3_client {
@@ -82,7 +84,7 @@ struct cxgb3_client {
82 int (*redirect)(void *ctx, struct dst_entry *old, 84 int (*redirect)(void *ctx, struct dst_entry *old,
83 struct dst_entry *new, struct l2t_entry *l2t); 85 struct dst_entry *new, struct l2t_entry *l2t);
84 struct list_head client_list; 86 struct list_head client_list;
85 void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error); 87 void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
86}; 88};
87 89
88/* 90/*
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index ac57b6a42c6e..ccfe276943f0 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -34,7 +34,6 @@
34 * SOFTWARE. 34 * SOFTWARE.
35 */ 35 */
36 36
37#include <linux/init.h>
38#include <linux/hardirq.h> 37#include <linux/hardirq.h>
39 38
40#include <linux/mlx4/cmd.h> 39#include <linux/mlx4/cmd.h>
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index b9ceddde46c0..bffb7995cb70 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35#include <linux/interrupt.h> 34#include <linux/interrupt.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
@@ -42,6 +41,10 @@
42#include "fw.h" 41#include "fw.h"
43 42
44enum { 43enum {
44 MLX4_IRQNAME_SIZE = 64
45};
46
47enum {
45 MLX4_NUM_ASYNC_EQE = 0x100, 48 MLX4_NUM_ASYNC_EQE = 0x100,
46 MLX4_NUM_SPARE_EQE = 0x80, 49 MLX4_NUM_SPARE_EQE = 0x80,
47 MLX4_EQ_ENTRY_SIZE = 0x20 50 MLX4_EQ_ENTRY_SIZE = 0x20
@@ -526,48 +529,6 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
526 iounmap(priv->clr_base); 529 iounmap(priv->clr_base);
527} 530}
528 531
529int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
530{
531 struct mlx4_priv *priv = mlx4_priv(dev);
532 int ret;
533
534 /*
535 * We assume that mapping one page is enough for the whole EQ
536 * context table. This is fine with all current HCAs, because
537 * we only use 32 EQs and each EQ uses 64 bytes of context
538 * memory, or 1 KB total.
539 */
540 priv->eq_table.icm_virt = icm_virt;
541 priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
542 if (!priv->eq_table.icm_page)
543 return -ENOMEM;
544 priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
545 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
546 if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
547 __free_page(priv->eq_table.icm_page);
548 return -ENOMEM;
549 }
550
551 ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
552 if (ret) {
553 pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
554 PCI_DMA_BIDIRECTIONAL);
555 __free_page(priv->eq_table.icm_page);
556 }
557
558 return ret;
559}
560
561void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
562{
563 struct mlx4_priv *priv = mlx4_priv(dev);
564
565 mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
566 pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
567 PCI_DMA_BIDIRECTIONAL);
568 __free_page(priv->eq_table.icm_page);
569}
570
571int mlx4_alloc_eq_table(struct mlx4_dev *dev) 532int mlx4_alloc_eq_table(struct mlx4_dev *dev)
572{ 533{
573 struct mlx4_priv *priv = mlx4_priv(dev); 534 struct mlx4_priv *priv = mlx4_priv(dev);
@@ -615,7 +576,9 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
615 priv->eq_table.clr_int = priv->clr_base + 576 priv->eq_table.clr_int = priv->clr_base +
616 (priv->eq_table.inta_pin < 32 ? 4 : 0); 577 (priv->eq_table.inta_pin < 32 ? 4 : 0);
617 578
618 priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL); 579 priv->eq_table.irq_names =
580 kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1),
581 GFP_KERNEL);
619 if (!priv->eq_table.irq_names) { 582 if (!priv->eq_table.irq_names) {
620 err = -ENOMEM; 583 err = -ENOMEM;
621 goto err_out_bitmap; 584 goto err_out_bitmap;
@@ -638,17 +601,25 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
638 goto err_out_comp; 601 goto err_out_comp;
639 602
640 if (dev->flags & MLX4_FLAG_MSI_X) { 603 if (dev->flags & MLX4_FLAG_MSI_X) {
641 static const char async_eq_name[] = "mlx4-async";
642 const char *eq_name; 604 const char *eq_name;
643 605
644 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { 606 for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
645 if (i < dev->caps.num_comp_vectors) { 607 if (i < dev->caps.num_comp_vectors) {
646 snprintf(priv->eq_table.irq_names + i * 16, 16, 608 snprintf(priv->eq_table.irq_names +
647 "mlx4-comp-%d", i); 609 i * MLX4_IRQNAME_SIZE,
648 eq_name = priv->eq_table.irq_names + i * 16; 610 MLX4_IRQNAME_SIZE,
649 } else 611 "mlx4-comp-%d@pci:%s", i,
650 eq_name = async_eq_name; 612 pci_name(dev->pdev));
613 } else {
614 snprintf(priv->eq_table.irq_names +
615 i * MLX4_IRQNAME_SIZE,
616 MLX4_IRQNAME_SIZE,
617 "mlx4-async@pci:%s",
618 pci_name(dev->pdev));
619 }
651 620
621 eq_name = priv->eq_table.irq_names +
622 i * MLX4_IRQNAME_SIZE;
652 err = request_irq(priv->eq_table.eq[i].irq, 623 err = request_irq(priv->eq_table.eq[i].irq,
653 mlx4_msi_x_interrupt, 0, eq_name, 624 mlx4_msi_x_interrupt, 0, eq_name,
654 priv->eq_table.eq + i); 625 priv->eq_table.eq + i);
@@ -658,8 +629,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
658 priv->eq_table.eq[i].have_irq = 1; 629 priv->eq_table.eq[i].have_irq = 1;
659 } 630 }
660 } else { 631 } else {
632 snprintf(priv->eq_table.irq_names,
633 MLX4_IRQNAME_SIZE,
634 DRV_NAME "@pci:%s",
635 pci_name(dev->pdev));
661 err = request_irq(dev->pdev->irq, mlx4_interrupt, 636 err = request_irq(dev->pdev->irq, mlx4_interrupt,
662 IRQF_SHARED, DRV_NAME, dev); 637 IRQF_SHARED, priv->eq_table.irq_names, dev);
663 if (err) 638 if (err)
664 goto err_out_async; 639 goto err_out_async;
665 640
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index baf4bf66062c..04b382fcb8c8 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35#include <linux/errno.h> 34#include <linux/errno.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37#include <linux/scatterlist.h> 36#include <linux/scatterlist.h>
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index dac621b1e9fc..3dd481e77f92 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -525,7 +525,10 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
525 goto err_unmap_aux; 525 goto err_unmap_aux;
526 } 526 }
527 527
528 err = mlx4_map_eq_icm(dev, init_hca->eqc_base); 528 err = mlx4_init_icm_table(dev, &priv->eq_table.table,
529 init_hca->eqc_base, dev_cap->eqc_entry_sz,
530 dev->caps.num_eqs, dev->caps.num_eqs,
531 0, 0);
529 if (err) { 532 if (err) {
530 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 533 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
531 goto err_unmap_cmpt; 534 goto err_unmap_cmpt;
@@ -668,7 +671,7 @@ err_unmap_mtt:
668 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 671 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
669 672
670err_unmap_eq: 673err_unmap_eq:
671 mlx4_unmap_eq_icm(dev); 674 mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
672 675
673err_unmap_cmpt: 676err_unmap_cmpt:
674 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 677 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
@@ -698,11 +701,11 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
698 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 701 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
699 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 702 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
700 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 703 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
704 mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
701 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 705 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
702 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 706 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
703 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 707 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
704 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 708 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
705 mlx4_unmap_eq_icm(dev);
706 709
707 mlx4_UNMAP_ICM_AUX(dev); 710 mlx4_UNMAP_ICM_AUX(dev);
708 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 711 mlx4_free_icm(dev, priv->fw.aux_icm, 0);
@@ -786,7 +789,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
786 return 0; 789 return 0;
787 790
788err_close: 791err_close:
789 mlx4_close_hca(dev); 792 mlx4_CLOSE_HCA(dev, 0);
790 793
791err_free_icm: 794err_free_icm:
792 mlx4_free_icms(dev); 795 mlx4_free_icms(dev);
@@ -1070,18 +1073,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1070 goto err_disable_pdev; 1073 goto err_disable_pdev;
1071 } 1074 }
1072 1075
1073 err = pci_request_region(pdev, 0, DRV_NAME); 1076 err = pci_request_regions(pdev, DRV_NAME);
1074 if (err) { 1077 if (err) {
1075 dev_err(&pdev->dev, "Cannot request control region, aborting.\n"); 1078 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
1076 goto err_disable_pdev; 1079 goto err_disable_pdev;
1077 } 1080 }
1078 1081
1079 err = pci_request_region(pdev, 2, DRV_NAME);
1080 if (err) {
1081 dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
1082 goto err_release_bar0;
1083 }
1084
1085 pci_set_master(pdev); 1082 pci_set_master(pdev);
1086 1083
1087 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 1084 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1090,7 +1087,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1090 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 1087 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1091 if (err) { 1088 if (err) {
1092 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 1089 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
1093 goto err_release_bar2; 1090 goto err_release_regions;
1094 } 1091 }
1095 } 1092 }
1096 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 1093 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1101,7 +1098,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1101 if (err) { 1098 if (err) {
1102 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 1099 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
1103 "aborting.\n"); 1100 "aborting.\n");
1104 goto err_release_bar2; 1101 goto err_release_regions;
1105 } 1102 }
1106 } 1103 }
1107 1104
@@ -1110,7 +1107,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1110 dev_err(&pdev->dev, "Device struct alloc failed, " 1107 dev_err(&pdev->dev, "Device struct alloc failed, "
1111 "aborting.\n"); 1108 "aborting.\n");
1112 err = -ENOMEM; 1109 err = -ENOMEM;
1113 goto err_release_bar2; 1110 goto err_release_regions;
1114 } 1111 }
1115 1112
1116 dev = &priv->dev; 1113 dev = &priv->dev;
@@ -1205,11 +1202,8 @@ err_cmd:
1205err_free_dev: 1202err_free_dev:
1206 kfree(priv); 1203 kfree(priv);
1207 1204
1208err_release_bar2: 1205err_release_regions:
1209 pci_release_region(pdev, 2); 1206 pci_release_regions(pdev);
1210
1211err_release_bar0:
1212 pci_release_region(pdev, 0);
1213 1207
1214err_disable_pdev: 1208err_disable_pdev:
1215 pci_disable_device(pdev); 1209 pci_disable_device(pdev);
@@ -1265,8 +1259,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
1265 pci_disable_msix(pdev); 1259 pci_disable_msix(pdev);
1266 1260
1267 kfree(priv); 1261 kfree(priv);
1268 pci_release_region(pdev, 2); 1262 pci_release_regions(pdev);
1269 pci_release_region(pdev, 0);
1270 pci_disable_device(pdev); 1263 pci_disable_device(pdev);
1271 pci_set_drvdata(pdev, NULL); 1264 pci_set_drvdata(pdev, NULL);
1272 } 1265 }
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 6053c357a470..5ccbce9866fe 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35#include <linux/string.h> 34#include <linux/string.h>
36#include <linux/slab.h> 35#include <linux/slab.h>
37 36
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 5bd79c2b184f..bc72d6e4919b 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -205,9 +205,7 @@ struct mlx4_eq_table {
205 void __iomem **uar_map; 205 void __iomem **uar_map;
206 u32 clr_mask; 206 u32 clr_mask;
207 struct mlx4_eq *eq; 207 struct mlx4_eq *eq;
208 u64 icm_virt; 208 struct mlx4_icm_table table;
209 struct page *icm_page;
210 dma_addr_t icm_dma;
211 struct mlx4_icm_table cmpt_table; 209 struct mlx4_icm_table cmpt_table;
212 int have_irq; 210 int have_irq;
213 u8 inta_pin; 211 u8 inta_pin;
@@ -373,9 +371,6 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
373 struct mlx4_dev_cap *dev_cap, 371 struct mlx4_dev_cap *dev_cap,
374 struct mlx4_init_hca_param *init_hca); 372 struct mlx4_init_hca_param *init_hca);
375 373
376int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
377void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
378
379int mlx4_cmd_init(struct mlx4_dev *dev); 374int mlx4_cmd_init(struct mlx4_dev *dev);
380void mlx4_cmd_cleanup(struct mlx4_dev *dev); 375void mlx4_cmd_cleanup(struct mlx4_dev *dev);
381void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); 376void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index f96948be0a44..ca7ab8e7b4cc 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -32,7 +32,6 @@
32 * SOFTWARE. 32 * SOFTWARE.
33 */ 33 */
34 34
35#include <linux/init.h>
36#include <linux/errno.h> 35#include <linux/errno.h>
37 36
38#include <linux/mlx4/cmd.h> 37#include <linux/mlx4/cmd.h>
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 26d1a7a9e375..c4988d6bd5b2 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35#include <linux/errno.h> 34#include <linux/errno.h>
36 35
37#include <asm/page.h> 36#include <asm/page.h>
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index bd22df95adf9..ca25b9dc8378 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -32,8 +32,6 @@
32 * SOFTWARE. 32 * SOFTWARE.
33 */ 33 */
34 34
35#include <linux/init.h>
36
37#include "mlx4.h" 35#include "mlx4.h"
38#include "fw.h" 36#include "fw.h"
39 37
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index 1c565ef8d179..42ab9fc01d3e 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c
@@ -33,8 +33,6 @@
33 * SOFTWARE. 33 * SOFTWARE.
34 */ 34 */
35 35
36#include <linux/init.h>
37
38#include <linux/mlx4/cmd.h> 36#include <linux/mlx4/cmd.h>
39#include <linux/mlx4/qp.h> 37#include <linux/mlx4/qp.h>
40 38
diff --git a/drivers/net/mlx4/reset.c b/drivers/net/mlx4/reset.c
index 3951b884c0fb..e5741dab3825 100644
--- a/drivers/net/mlx4/reset.c
+++ b/drivers/net/mlx4/reset.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35#include <linux/errno.h> 34#include <linux/errno.h>
36#include <linux/pci.h> 35#include <linux/pci.h>
37#include <linux/delay.h> 36#include <linux/delay.h>
diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
index fe9f218691f5..1377d0dc8f1f 100644
--- a/drivers/net/mlx4/srq.c
+++ b/drivers/net/mlx4/srq.c
@@ -31,8 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/init.h>
35
36#include <linux/mlx4/cmd.h> 34#include <linux/mlx4/cmd.h>
37 35
38#include "mlx4.h" 36#include "mlx4.h"
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 42b6c6319bc2..87214a257d2a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -130,17 +130,10 @@ static inline struct tun_sock *tun_sk(struct sock *sk)
130static int tun_attach(struct tun_struct *tun, struct file *file) 130static int tun_attach(struct tun_struct *tun, struct file *file)
131{ 131{
132 struct tun_file *tfile = file->private_data; 132 struct tun_file *tfile = file->private_data;
133 const struct cred *cred = current_cred();
134 int err; 133 int err;
135 134
136 ASSERT_RTNL(); 135 ASSERT_RTNL();
137 136
138 /* Check permissions */
139 if (((tun->owner != -1 && cred->euid != tun->owner) ||
140 (tun->group != -1 && !in_egroup_p(tun->group))) &&
141 !capable(CAP_NET_ADMIN))
142 return -EPERM;
143
144 netif_tx_lock_bh(tun->dev); 137 netif_tx_lock_bh(tun->dev);
145 138
146 err = -EINVAL; 139 err = -EINVAL;
@@ -926,6 +919,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
926 919
927 dev = __dev_get_by_name(net, ifr->ifr_name); 920 dev = __dev_get_by_name(net, ifr->ifr_name);
928 if (dev) { 921 if (dev) {
922 const struct cred *cred = current_cred();
923
929 if (ifr->ifr_flags & IFF_TUN_EXCL) 924 if (ifr->ifr_flags & IFF_TUN_EXCL)
930 return -EBUSY; 925 return -EBUSY;
931 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) 926 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
@@ -935,6 +930,14 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
935 else 930 else
936 return -EINVAL; 931 return -EINVAL;
937 932
933 if (((tun->owner != -1 && cred->euid != tun->owner) ||
934 (tun->group != -1 && !in_egroup_p(tun->group))) &&
935 !capable(CAP_NET_ADMIN))
936 return -EPERM;
937 err = security_tun_dev_attach(tun->sk);
938 if (err < 0)
939 return err;
940
938 err = tun_attach(tun, file); 941 err = tun_attach(tun, file);
939 if (err < 0) 942 if (err < 0)
940 return err; 943 return err;
@@ -947,6 +950,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
947 950
948 if (!capable(CAP_NET_ADMIN)) 951 if (!capable(CAP_NET_ADMIN))
949 return -EPERM; 952 return -EPERM;
953 err = security_tun_dev_create();
954 if (err < 0)
955 return err;
950 956
951 /* Set dev type */ 957 /* Set dev type */
952 if (ifr->ifr_flags & IFF_TUN) { 958 if (ifr->ifr_flags & IFF_TUN) {
@@ -989,6 +995,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
989 tun->sk = sk; 995 tun->sk = sk;
990 container_of(sk, struct tun_sock, sk)->tun = tun; 996 container_of(sk, struct tun_sock, sk)->tun = tun;
991 997
998 security_tun_dev_post_create(sk);
999
992 tun_net_init(dev); 1000 tun_net_init(dev);
993 1001
994 if (strchr(dev->name, '%')) { 1002 if (strchr(dev->name, '%')) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 3f62dd50bbbe..e109da4583a8 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -669,14 +669,14 @@ static void dasd_profile_end(struct dasd_block *block,
669 * memory and 2) dasd_smalloc_request uses the static ccw memory 669 * memory and 2) dasd_smalloc_request uses the static ccw memory
670 * that gets allocated for each device. 670 * that gets allocated for each device.
671 */ 671 */
672struct dasd_ccw_req *dasd_kmalloc_request(char *magic, int cplength, 672struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength,
673 int datasize, 673 int datasize,
674 struct dasd_device *device) 674 struct dasd_device *device)
675{ 675{
676 struct dasd_ccw_req *cqr; 676 struct dasd_ccw_req *cqr;
677 677
678 /* Sanity checks */ 678 /* Sanity checks */
679 BUG_ON( magic == NULL || datasize > PAGE_SIZE || 679 BUG_ON(datasize > PAGE_SIZE ||
680 (cplength*sizeof(struct ccw1)) > PAGE_SIZE); 680 (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
681 681
682 cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC); 682 cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC);
@@ -700,14 +700,13 @@ struct dasd_ccw_req *dasd_kmalloc_request(char *magic, int cplength,
700 return ERR_PTR(-ENOMEM); 700 return ERR_PTR(-ENOMEM);
701 } 701 }
702 } 702 }
703 strncpy((char *) &cqr->magic, magic, 4); 703 cqr->magic = magic;
704 ASCEBC((char *) &cqr->magic, 4);
705 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); 704 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
706 dasd_get_device(device); 705 dasd_get_device(device);
707 return cqr; 706 return cqr;
708} 707}
709 708
710struct dasd_ccw_req *dasd_smalloc_request(char *magic, int cplength, 709struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength,
711 int datasize, 710 int datasize,
712 struct dasd_device *device) 711 struct dasd_device *device)
713{ 712{
@@ -717,7 +716,7 @@ struct dasd_ccw_req *dasd_smalloc_request(char *magic, int cplength,
717 int size; 716 int size;
718 717
719 /* Sanity checks */ 718 /* Sanity checks */
720 BUG_ON( magic == NULL || datasize > PAGE_SIZE || 719 BUG_ON(datasize > PAGE_SIZE ||
721 (cplength*sizeof(struct ccw1)) > PAGE_SIZE); 720 (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
722 721
723 size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; 722 size = (sizeof(struct dasd_ccw_req) + 7L) & -8L;
@@ -744,8 +743,7 @@ struct dasd_ccw_req *dasd_smalloc_request(char *magic, int cplength,
744 cqr->data = data; 743 cqr->data = data;
745 memset(cqr->data, 0, datasize); 744 memset(cqr->data, 0, datasize);
746 } 745 }
747 strncpy((char *) &cqr->magic, magic, 4); 746 cqr->magic = magic;
748 ASCEBC((char *) &cqr->magic, 4);
749 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); 747 set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
750 dasd_get_device(device); 748 dasd_get_device(device);
751 return cqr; 749 return cqr;
@@ -899,9 +897,6 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
899 switch (rc) { 897 switch (rc) {
900 case 0: 898 case 0:
901 cqr->status = DASD_CQR_IN_IO; 899 cqr->status = DASD_CQR_IN_IO;
902 DBF_DEV_EVENT(DBF_DEBUG, device,
903 "start_IO: request %p started successful",
904 cqr);
905 break; 900 break;
906 case -EBUSY: 901 case -EBUSY:
907 DBF_DEV_EVENT(DBF_DEBUG, device, "%s", 902 DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
@@ -1699,8 +1694,11 @@ static void __dasd_process_request_queue(struct dasd_block *block)
1699 * for that. State DASD_STATE_ONLINE is normal block device 1694 * for that. State DASD_STATE_ONLINE is normal block device
1700 * operation. 1695 * operation.
1701 */ 1696 */
1702 if (basedev->state < DASD_STATE_READY) 1697 if (basedev->state < DASD_STATE_READY) {
1698 while ((req = blk_fetch_request(block->request_queue)))
1699 __blk_end_request_all(req, -EIO);
1703 return; 1700 return;
1701 }
1704 /* Now we try to fetch requests from the request queue */ 1702 /* Now we try to fetch requests from the request queue */
1705 while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) { 1703 while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
1706 if (basedev->features & DASD_FEATURE_READONLY && 1704 if (basedev->features & DASD_FEATURE_READONLY &&
@@ -2530,7 +2528,7 @@ EXPORT_SYMBOL_GPL(dasd_generic_restore_device);
2530static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, 2528static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
2531 void *rdc_buffer, 2529 void *rdc_buffer,
2532 int rdc_buffer_size, 2530 int rdc_buffer_size,
2533 char *magic) 2531 int magic)
2534{ 2532{
2535 struct dasd_ccw_req *cqr; 2533 struct dasd_ccw_req *cqr;
2536 struct ccw1 *ccw; 2534 struct ccw1 *ccw;
@@ -2561,7 +2559,7 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
2561} 2559}
2562 2560
2563 2561
2564int dasd_generic_read_dev_chars(struct dasd_device *device, char *magic, 2562int dasd_generic_read_dev_chars(struct dasd_device *device, int magic,
2565 void *rdc_buffer, int rdc_buffer_size) 2563 void *rdc_buffer, int rdc_buffer_size)
2566{ 2564{
2567 int ret; 2565 int ret;
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 27991b692056..e8ff7b0c961d 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -7,7 +7,7 @@
7 * 7 *
8 */ 8 */
9 9
10#define KMSG_COMPONENT "dasd" 10#define KMSG_COMPONENT "dasd-eckd"
11 11
12#include <linux/timer.h> 12#include <linux/timer.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 5b7bbc87593b..70a008c00522 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -5,7 +5,7 @@
5 * Author(s): Stefan Weinhuber <wein@de.ibm.com> 5 * Author(s): Stefan Weinhuber <wein@de.ibm.com>
6 */ 6 */
7 7
8#define KMSG_COMPONENT "dasd" 8#define KMSG_COMPONENT "dasd-eckd"
9 9
10#include <linux/list.h> 10#include <linux/list.h>
11#include <asm/ebcdic.h> 11#include <asm/ebcdic.h>
@@ -379,8 +379,7 @@ static int read_unit_address_configuration(struct dasd_device *device,
379 int rc; 379 int rc;
380 unsigned long flags; 380 unsigned long flags;
381 381
382 cqr = dasd_kmalloc_request("ECKD", 382 cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
383 1 /* PSF */ + 1 /* RSSD */ ,
384 (sizeof(struct dasd_psf_prssd_data)), 383 (sizeof(struct dasd_psf_prssd_data)),
385 device); 384 device);
386 if (IS_ERR(cqr)) 385 if (IS_ERR(cqr))
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 644086ba2ede..4e49b4a6c880 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -8,7 +8,7 @@
8 * 8 *
9 */ 9 */
10 10
11#define KMSG_COMPONENT "dasd" 11#define KMSG_COMPONENT "dasd-diag"
12 12
13#include <linux/stddef.h> 13#include <linux/stddef.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
@@ -523,8 +523,7 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
523 /* Build the request */ 523 /* Build the request */
524 datasize = sizeof(struct dasd_diag_req) + 524 datasize = sizeof(struct dasd_diag_req) +
525 count*sizeof(struct dasd_diag_bio); 525 count*sizeof(struct dasd_diag_bio);
526 cqr = dasd_smalloc_request(dasd_diag_discipline.name, 0, 526 cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev);
527 datasize, memdev);
528 if (IS_ERR(cqr)) 527 if (IS_ERR(cqr))
529 return cqr; 528 return cqr;
530 529
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index c11770f5b368..a1ce573648a2 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -10,7 +10,7 @@
10 * Author.........: Nigel Hislop <hislop_nigel@emc.com> 10 * Author.........: Nigel Hislop <hislop_nigel@emc.com>
11 */ 11 */
12 12
13#define KMSG_COMPONENT "dasd" 13#define KMSG_COMPONENT "dasd-eckd"
14 14
15#include <linux/stddef.h> 15#include <linux/stddef.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -730,7 +730,8 @@ static struct dasd_ccw_req *dasd_eckd_build_rcd_lpm(struct dasd_device *device,
730 struct dasd_ccw_req *cqr; 730 struct dasd_ccw_req *cqr;
731 struct ccw1 *ccw; 731 struct ccw1 *ccw;
732 732
733 cqr = dasd_smalloc_request("ECKD", 1 /* RCD */, ciw->count, device); 733 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* RCD */, ciw->count,
734 device);
734 735
735 if (IS_ERR(cqr)) { 736 if (IS_ERR(cqr)) {
736 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 737 DBF_DEV_EVENT(DBF_WARNING, device, "%s",
@@ -934,8 +935,7 @@ static int dasd_eckd_read_features(struct dasd_device *device)
934 struct dasd_eckd_private *private; 935 struct dasd_eckd_private *private;
935 936
936 private = (struct dasd_eckd_private *) device->private; 937 private = (struct dasd_eckd_private *) device->private;
937 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 938 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
938 1 /* PSF */ + 1 /* RSSD */ ,
939 (sizeof(struct dasd_psf_prssd_data) + 939 (sizeof(struct dasd_psf_prssd_data) +
940 sizeof(struct dasd_rssd_features)), 940 sizeof(struct dasd_rssd_features)),
941 device); 941 device);
@@ -998,7 +998,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device,
998 struct dasd_psf_ssc_data *psf_ssc_data; 998 struct dasd_psf_ssc_data *psf_ssc_data;
999 struct ccw1 *ccw; 999 struct ccw1 *ccw;
1000 1000
1001 cqr = dasd_smalloc_request("ECKD", 1 /* PSF */ , 1001 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ ,
1002 sizeof(struct dasd_psf_ssc_data), 1002 sizeof(struct dasd_psf_ssc_data),
1003 device); 1003 device);
1004 1004
@@ -1149,8 +1149,8 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
1149 goto out_err3; 1149 goto out_err3;
1150 1150
1151 /* Read Device Characteristics */ 1151 /* Read Device Characteristics */
1152 rc = dasd_generic_read_dev_chars(device, "ECKD", &private->rdc_data, 1152 rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
1153 64); 1153 &private->rdc_data, 64);
1154 if (rc) { 1154 if (rc) {
1155 DBF_EVENT(DBF_WARNING, 1155 DBF_EVENT(DBF_WARNING,
1156 "Read device characteristics failed, rc=%d for " 1156 "Read device characteristics failed, rc=%d for "
@@ -1217,8 +1217,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device)
1217 1217
1218 cplength = 8; 1218 cplength = 8;
1219 datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data); 1219 datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data);
1220 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 1220 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device);
1221 cplength, datasize, device);
1222 if (IS_ERR(cqr)) 1221 if (IS_ERR(cqr))
1223 return cqr; 1222 return cqr;
1224 ccw = cqr->cpaddr; 1223 ccw = cqr->cpaddr;
@@ -1499,8 +1498,7 @@ dasd_eckd_format_device(struct dasd_device * device,
1499 return ERR_PTR(-EINVAL); 1498 return ERR_PTR(-EINVAL);
1500 } 1499 }
1501 /* Allocate the format ccw request. */ 1500 /* Allocate the format ccw request. */
1502 fcp = dasd_smalloc_request(dasd_eckd_discipline.name, 1501 fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device);
1503 cplength, datasize, device);
1504 if (IS_ERR(fcp)) 1502 if (IS_ERR(fcp))
1505 return fcp; 1503 return fcp;
1506 1504
@@ -1783,8 +1781,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
1783 datasize += count*sizeof(struct LO_eckd_data); 1781 datasize += count*sizeof(struct LO_eckd_data);
1784 } 1782 }
1785 /* Allocate the ccw request. */ 1783 /* Allocate the ccw request. */
1786 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 1784 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
1787 cplength, datasize, startdev); 1785 startdev);
1788 if (IS_ERR(cqr)) 1786 if (IS_ERR(cqr))
1789 return cqr; 1787 return cqr;
1790 ccw = cqr->cpaddr; 1788 ccw = cqr->cpaddr;
@@ -1948,8 +1946,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
1948 cidaw * sizeof(unsigned long long); 1946 cidaw * sizeof(unsigned long long);
1949 1947
1950 /* Allocate the ccw request. */ 1948 /* Allocate the ccw request. */
1951 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 1949 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
1952 cplength, datasize, startdev); 1950 startdev);
1953 if (IS_ERR(cqr)) 1951 if (IS_ERR(cqr))
1954 return cqr; 1952 return cqr;
1955 ccw = cqr->cpaddr; 1953 ccw = cqr->cpaddr;
@@ -2249,8 +2247,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
2249 2247
2250 /* Allocate the ccw request. */ 2248 /* Allocate the ccw request. */
2251 itcw_size = itcw_calc_size(0, ctidaw, 0); 2249 itcw_size = itcw_calc_size(0, ctidaw, 0);
2252 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 2250 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev);
2253 0, itcw_size, startdev);
2254 if (IS_ERR(cqr)) 2251 if (IS_ERR(cqr))
2255 return cqr; 2252 return cqr;
2256 2253
@@ -2557,8 +2554,7 @@ dasd_eckd_release(struct dasd_device *device)
2557 if (!capable(CAP_SYS_ADMIN)) 2554 if (!capable(CAP_SYS_ADMIN))
2558 return -EACCES; 2555 return -EACCES;
2559 2556
2560 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 2557 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
2561 1, 32, device);
2562 if (IS_ERR(cqr)) { 2558 if (IS_ERR(cqr)) {
2563 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 2559 DBF_DEV_EVENT(DBF_WARNING, device, "%s",
2564 "Could not allocate initialization request"); 2560 "Could not allocate initialization request");
@@ -2600,8 +2596,7 @@ dasd_eckd_reserve(struct dasd_device *device)
2600 if (!capable(CAP_SYS_ADMIN)) 2596 if (!capable(CAP_SYS_ADMIN))
2601 return -EACCES; 2597 return -EACCES;
2602 2598
2603 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 2599 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
2604 1, 32, device);
2605 if (IS_ERR(cqr)) { 2600 if (IS_ERR(cqr)) {
2606 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 2601 DBF_DEV_EVENT(DBF_WARNING, device, "%s",
2607 "Could not allocate initialization request"); 2602 "Could not allocate initialization request");
@@ -2642,8 +2637,7 @@ dasd_eckd_steal_lock(struct dasd_device *device)
2642 if (!capable(CAP_SYS_ADMIN)) 2637 if (!capable(CAP_SYS_ADMIN))
2643 return -EACCES; 2638 return -EACCES;
2644 2639
2645 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 2640 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
2646 1, 32, device);
2647 if (IS_ERR(cqr)) { 2641 if (IS_ERR(cqr)) {
2648 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 2642 DBF_DEV_EVENT(DBF_WARNING, device, "%s",
2649 "Could not allocate initialization request"); 2643 "Could not allocate initialization request");
@@ -2681,8 +2675,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp)
2681 struct ccw1 *ccw; 2675 struct ccw1 *ccw;
2682 int rc; 2676 int rc;
2683 2677
2684 cqr = dasd_smalloc_request(dasd_eckd_discipline.name, 2678 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
2685 1 /* PSF */ + 1 /* RSSD */ ,
2686 (sizeof(struct dasd_psf_prssd_data) + 2679 (sizeof(struct dasd_psf_prssd_data) +
2687 sizeof(struct dasd_rssd_perf_stats_t)), 2680 sizeof(struct dasd_rssd_perf_stats_t)),
2688 device); 2681 device);
@@ -2828,7 +2821,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
2828 } 2821 }
2829 2822
2830 /* setup CCWs for PSF + RSSD */ 2823 /* setup CCWs for PSF + RSSD */
2831 cqr = dasd_smalloc_request("ECKD", 2 , 0, device); 2824 cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2 , 0, device);
2832 if (IS_ERR(cqr)) { 2825 if (IS_ERR(cqr)) {
2833 DBF_DEV_EVENT(DBF_WARNING, device, "%s", 2826 DBF_DEV_EVENT(DBF_WARNING, device, "%s",
2834 "Could not allocate initialization request"); 2827 "Could not allocate initialization request");
@@ -3254,7 +3247,7 @@ int dasd_eckd_restore_device(struct dasd_device *device)
3254 3247
3255 /* Read Device Characteristics */ 3248 /* Read Device Characteristics */
3256 memset(&private->rdc_data, 0, sizeof(private->rdc_data)); 3249 memset(&private->rdc_data, 0, sizeof(private->rdc_data));
3257 rc = dasd_generic_read_dev_chars(device, "ECKD", 3250 rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
3258 &private->rdc_data, 64); 3251 &private->rdc_data, 64);
3259 if (rc) { 3252 if (rc) {
3260 DBF_EVENT(DBF_WARNING, 3253 DBF_EVENT(DBF_WARNING,
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index c24c8c30380d..d96039eae59b 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -6,7 +6,7 @@
6 * Author(s): Stefan Weinhuber <wein@de.ibm.com> 6 * Author(s): Stefan Weinhuber <wein@de.ibm.com>
7 */ 7 */
8 8
9#define KMSG_COMPONENT "dasd" 9#define KMSG_COMPONENT "dasd-eckd"
10 10
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
@@ -464,7 +464,7 @@ int dasd_eer_enable(struct dasd_device *device)
464 if (!device->discipline || strcmp(device->discipline->name, "ECKD")) 464 if (!device->discipline || strcmp(device->discipline->name, "ECKD"))
465 return -EPERM; /* FIXME: -EMEDIUMTYPE ? */ 465 return -EPERM; /* FIXME: -EMEDIUMTYPE ? */
466 466
467 cqr = dasd_kmalloc_request("ECKD", 1 /* SNSS */, 467 cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
468 SNSS_DATA_SIZE, device); 468 SNSS_DATA_SIZE, device);
469 if (IS_ERR(cqr)) 469 if (IS_ERR(cqr))
470 return -ENOMEM; 470 return -ENOMEM;
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index cb8f9cef7429..7656384a811d 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -99,8 +99,8 @@ dasd_default_erp_action(struct dasd_ccw_req *cqr)
99 cqr->lpm = LPM_ANYPATH; 99 cqr->lpm = LPM_ANYPATH;
100 cqr->status = DASD_CQR_FILLED; 100 cqr->status = DASD_CQR_FILLED;
101 } else { 101 } else {
102 dev_err(&device->cdev->dev, 102 pr_err("%s: default ERP has run out of retries and failed\n",
103 "default ERP has run out of retries and failed\n"); 103 dev_name(&device->cdev->dev));
104 cqr->status = DASD_CQR_FAILED; 104 cqr->status = DASD_CQR_FAILED;
105 cqr->stopclk = get_clock(); 105 cqr->stopclk = get_clock();
106 } 106 }
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 31849ad5e59f..f245377e8e27 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -5,7 +5,7 @@
5 * Copyright IBM Corp. 1999, 2009 5 * Copyright IBM Corp. 1999, 2009
6 */ 6 */
7 7
8#define KMSG_COMPONENT "dasd" 8#define KMSG_COMPONENT "dasd-fba"
9 9
10#include <linux/stddef.h> 10#include <linux/stddef.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
@@ -152,8 +152,8 @@ dasd_fba_check_characteristics(struct dasd_device *device)
152 block->base = device; 152 block->base = device;
153 153
154 /* Read Device Characteristics */ 154 /* Read Device Characteristics */
155 rc = dasd_generic_read_dev_chars(device, "FBA ", &private->rdc_data, 155 rc = dasd_generic_read_dev_chars(device, DASD_FBA_MAGIC,
156 32); 156 &private->rdc_data, 32);
157 if (rc) { 157 if (rc) {
158 DBF_EVENT(DBF_WARNING, "Read device characteristics returned " 158 DBF_EVENT(DBF_WARNING, "Read device characteristics returned "
159 "error %d for device: %s", 159 "error %d for device: %s",
@@ -305,8 +305,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
305 datasize += (count - 1)*sizeof(struct LO_fba_data); 305 datasize += (count - 1)*sizeof(struct LO_fba_data);
306 } 306 }
307 /* Allocate the ccw request. */ 307 /* Allocate the ccw request. */
308 cqr = dasd_smalloc_request(dasd_fba_discipline.name, 308 cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
309 cplength, datasize, memdev);
310 if (IS_ERR(cqr)) 309 if (IS_ERR(cqr))
311 return cqr; 310 return cqr;
312 ccw = cqr->cpaddr; 311 ccw = cqr->cpaddr;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index b699ca356ac5..5e47a1ee52b9 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -59,6 +59,11 @@
59#include <asm/dasd.h> 59#include <asm/dasd.h>
60#include <asm/idals.h> 60#include <asm/idals.h>
61 61
62/* DASD discipline magic */
63#define DASD_ECKD_MAGIC 0xC5C3D2C4
64#define DASD_DIAG_MAGIC 0xC4C9C1C7
65#define DASD_FBA_MAGIC 0xC6C2C140
66
62/* 67/*
63 * SECTION: Type definitions 68 * SECTION: Type definitions
64 */ 69 */
@@ -540,9 +545,9 @@ extern struct block_device_operations dasd_device_operations;
540extern struct kmem_cache *dasd_page_cache; 545extern struct kmem_cache *dasd_page_cache;
541 546
542struct dasd_ccw_req * 547struct dasd_ccw_req *
543dasd_kmalloc_request(char *, int, int, struct dasd_device *); 548dasd_kmalloc_request(int , int, int, struct dasd_device *);
544struct dasd_ccw_req * 549struct dasd_ccw_req *
545dasd_smalloc_request(char *, int, int, struct dasd_device *); 550dasd_smalloc_request(int , int, int, struct dasd_device *);
546void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); 551void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *);
547void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); 552void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *);
548 553
@@ -587,7 +592,7 @@ void dasd_generic_handle_state_change(struct dasd_device *);
587int dasd_generic_pm_freeze(struct ccw_device *); 592int dasd_generic_pm_freeze(struct ccw_device *);
588int dasd_generic_restore_device(struct ccw_device *); 593int dasd_generic_restore_device(struct ccw_device *);
589 594
590int dasd_generic_read_dev_chars(struct dasd_device *, char *, void *, int); 595int dasd_generic_read_dev_chars(struct dasd_device *, int, void *, int);
591char *dasd_get_sense(struct irb *); 596char *dasd_get_sense(struct irb *);
592 597
593/* externals in dasd_devmap.c */ 598/* externals in dasd_devmap.c */
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index df918ef27965..f756a1b0c57a 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -98,8 +98,8 @@ static int dasd_ioctl_quiesce(struct dasd_block *block)
98 if (!capable (CAP_SYS_ADMIN)) 98 if (!capable (CAP_SYS_ADMIN))
99 return -EACCES; 99 return -EACCES;
100 100
101 dev_info(&base->cdev->dev, "The DASD has been put in the quiesce " 101 pr_info("%s: The DASD has been put in the quiesce "
102 "state\n"); 102 "state\n", dev_name(&base->cdev->dev));
103 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); 103 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
104 base->stopped |= DASD_STOPPED_QUIESCE; 104 base->stopped |= DASD_STOPPED_QUIESCE;
105 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); 105 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
@@ -119,8 +119,8 @@ static int dasd_ioctl_resume(struct dasd_block *block)
119 if (!capable (CAP_SYS_ADMIN)) 119 if (!capable (CAP_SYS_ADMIN))
120 return -EACCES; 120 return -EACCES;
121 121
122 dev_info(&base->cdev->dev, "I/O operations have been resumed " 122 pr_info("%s: I/O operations have been resumed "
123 "on the DASD\n"); 123 "on the DASD\n", dev_name(&base->cdev->dev));
124 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); 124 spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
125 base->stopped &= ~DASD_STOPPED_QUIESCE; 125 base->stopped &= ~DASD_STOPPED_QUIESCE;
126 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); 126 spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
@@ -146,8 +146,8 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata)
146 return -EPERM; 146 return -EPERM;
147 147
148 if (base->state != DASD_STATE_BASIC) { 148 if (base->state != DASD_STATE_BASIC) {
149 dev_warn(&base->cdev->dev, 149 pr_warning("%s: The DASD cannot be formatted while it is "
150 "The DASD cannot be formatted while it is enabled\n"); 150 "enabled\n", dev_name(&base->cdev->dev));
151 return -EBUSY; 151 return -EBUSY;
152 } 152 }
153 153
@@ -175,9 +175,9 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata)
175 dasd_sfree_request(cqr, cqr->memdev); 175 dasd_sfree_request(cqr, cqr->memdev);
176 if (rc) { 176 if (rc) {
177 if (rc != -ERESTARTSYS) 177 if (rc != -ERESTARTSYS)
178 dev_err(&base->cdev->dev, 178 pr_err("%s: Formatting unit %d failed with "
179 "Formatting unit %d failed with " 179 "rc=%d\n", dev_name(&base->cdev->dev),
180 "rc=%d\n", fdata->start_unit, rc); 180 fdata->start_unit, rc);
181 return rc; 181 return rc;
182 } 182 }
183 fdata->start_unit++; 183 fdata->start_unit++;
@@ -204,9 +204,9 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp)
204 if (copy_from_user(&fdata, argp, sizeof(struct format_data_t))) 204 if (copy_from_user(&fdata, argp, sizeof(struct format_data_t)))
205 return -EFAULT; 205 return -EFAULT;
206 if (bdev != bdev->bd_contains) { 206 if (bdev != bdev->bd_contains) {
207 dev_warn(&block->base->cdev->dev, 207 pr_warning("%s: The specified DASD is a partition and cannot "
208 "The specified DASD is a partition and cannot be " 208 "be formatted\n",
209 "formatted\n"); 209 dev_name(&block->base->cdev->dev));
210 return -EINVAL; 210 return -EINVAL;
211 } 211 }
212 return dasd_format(block, &fdata); 212 return dasd_format(block, &fdata);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index db442cd6621e..ee604e92a5fa 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -42,7 +42,6 @@
42#include <linux/suspend.h> 42#include <linux/suspend.h>
43#include <linux/platform_device.h> 43#include <linux/platform_device.h>
44#include <asm/uaccess.h> 44#include <asm/uaccess.h>
45#include <asm/checksum.h>
46 45
47#define XPRAM_NAME "xpram" 46#define XPRAM_NAME "xpram"
48#define XPRAM_DEVS 1 /* one partition */ 47#define XPRAM_DEVS 1 /* one partition */
@@ -51,7 +50,6 @@
51typedef struct { 50typedef struct {
52 unsigned int size; /* size of xpram segment in pages */ 51 unsigned int size; /* size of xpram segment in pages */
53 unsigned int offset; /* start page of xpram segment */ 52 unsigned int offset; /* start page of xpram segment */
54 unsigned int csum; /* partition checksum for suspend */
55} xpram_device_t; 53} xpram_device_t;
56 54
57static xpram_device_t xpram_devices[XPRAM_MAX_DEVS]; 55static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
@@ -387,58 +385,6 @@ out:
387} 385}
388 386
389/* 387/*
390 * Save checksums for all partitions.
391 */
392static int xpram_save_checksums(void)
393{
394 unsigned long mem_page;
395 int rc, i;
396
397 rc = 0;
398 mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
399 if (!mem_page)
400 return -ENOMEM;
401 for (i = 0; i < xpram_devs; i++) {
402 rc = xpram_page_in(mem_page, xpram_devices[i].offset);
403 if (rc)
404 goto fail;
405 xpram_devices[i].csum = csum_partial((const void *) mem_page,
406 PAGE_SIZE, 0);
407 }
408fail:
409 free_page(mem_page);
410 return rc ? -ENXIO : 0;
411}
412
413/*
414 * Verify checksums for all partitions.
415 */
416static int xpram_validate_checksums(void)
417{
418 unsigned long mem_page;
419 unsigned int csum;
420 int rc, i;
421
422 rc = 0;
423 mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
424 if (!mem_page)
425 return -ENOMEM;
426 for (i = 0; i < xpram_devs; i++) {
427 rc = xpram_page_in(mem_page, xpram_devices[i].offset);
428 if (rc)
429 goto fail;
430 csum = csum_partial((const void *) mem_page, PAGE_SIZE, 0);
431 if (xpram_devices[i].csum != csum) {
432 rc = -EINVAL;
433 goto fail;
434 }
435 }
436fail:
437 free_page(mem_page);
438 return rc ? -ENXIO : 0;
439}
440
441/*
442 * Resume failed: Print error message and call panic. 388 * Resume failed: Print error message and call panic.
443 */ 389 */
444static void xpram_resume_error(const char *message) 390static void xpram_resume_error(const char *message)
@@ -458,21 +404,10 @@ static int xpram_restore(struct device *dev)
458 xpram_resume_error("xpram disappeared"); 404 xpram_resume_error("xpram disappeared");
459 if (xpram_pages != xpram_highest_page_index() + 1) 405 if (xpram_pages != xpram_highest_page_index() + 1)
460 xpram_resume_error("Size of xpram changed"); 406 xpram_resume_error("Size of xpram changed");
461 if (xpram_validate_checksums())
462 xpram_resume_error("Data of xpram changed");
463 return 0; 407 return 0;
464} 408}
465 409
466/*
467 * Save necessary state in suspend.
468 */
469static int xpram_freeze(struct device *dev)
470{
471 return xpram_save_checksums();
472}
473
474static struct dev_pm_ops xpram_pm_ops = { 410static struct dev_pm_ops xpram_pm_ops = {
475 .freeze = xpram_freeze,
476 .restore = xpram_restore, 411 .restore = xpram_restore,
477}; 412};
478 413
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 0769ced52dbd..4e34d3686c23 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -82,6 +82,16 @@ config SCLP_CPI
82 You should only select this option if you know what you are doing, 82 You should only select this option if you know what you are doing,
83 need this feature and intend to run your kernel in LPAR. 83 need this feature and intend to run your kernel in LPAR.
84 84
85config SCLP_ASYNC
86 tristate "Support for Call Home via Asynchronous SCLP Records"
87 depends on S390
88 help
89 This option enables the call home function, which is able to inform
90 the service element and connected organisations about a kernel panic.
91 You should only select this option if you know what you are doing,
92 want for inform other people about your kernel panics,
93 need this feature and intend to run your kernel in LPAR.
94
85config S390_TAPE 95config S390_TAPE
86 tristate "S/390 tape device support" 96 tristate "S/390 tape device support"
87 depends on CCW 97 depends on CCW
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 7e73e39a1741..efb500ab66c0 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_SCLP_TTY) += sclp_tty.o
16obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o 16obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
17obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o 17obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
18obj-$(CONFIG_SCLP_CPI) += sclp_cpi.o 18obj-$(CONFIG_SCLP_CPI) += sclp_cpi.o
19obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
19 20
20obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o 21obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o
21obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o 22obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 3234e90bd7f9..89ece1c235aa 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -581,7 +581,7 @@ static int __init mon_init(void)
581 monreader_device->release = (void (*)(struct device *))kfree; 581 monreader_device->release = (void (*)(struct device *))kfree;
582 rc = device_register(monreader_device); 582 rc = device_register(monreader_device);
583 if (rc) { 583 if (rc) {
584 kfree(monreader_device); 584 put_device(monreader_device);
585 goto out_driver; 585 goto out_driver;
586 } 586 }
587 587
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 60e7cb07095b..6bb5a6bdfab5 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -27,6 +27,7 @@
27#define EVTYP_VT220MSG 0x1A 27#define EVTYP_VT220MSG 0x1A
28#define EVTYP_CONFMGMDATA 0x04 28#define EVTYP_CONFMGMDATA 0x04
29#define EVTYP_SDIAS 0x1C 29#define EVTYP_SDIAS 0x1C
30#define EVTYP_ASYNC 0x0A
30 31
31#define EVTYP_OPCMD_MASK 0x80000000 32#define EVTYP_OPCMD_MASK 0x80000000
32#define EVTYP_MSG_MASK 0x40000000 33#define EVTYP_MSG_MASK 0x40000000
@@ -38,6 +39,7 @@
38#define EVTYP_VT220MSG_MASK 0x00000040 39#define EVTYP_VT220MSG_MASK 0x00000040
39#define EVTYP_CONFMGMDATA_MASK 0x10000000 40#define EVTYP_CONFMGMDATA_MASK 0x10000000
40#define EVTYP_SDIAS_MASK 0x00000010 41#define EVTYP_SDIAS_MASK 0x00000010
42#define EVTYP_ASYNC_MASK 0x00400000
41 43
42#define GNRLMSGFLGS_DOM 0x8000 44#define GNRLMSGFLGS_DOM 0x8000
43#define GNRLMSGFLGS_SNDALRM 0x4000 45#define GNRLMSGFLGS_SNDALRM 0x4000
@@ -85,12 +87,12 @@ struct sccb_header {
85} __attribute__((packed)); 87} __attribute__((packed));
86 88
87extern u64 sclp_facilities; 89extern u64 sclp_facilities;
88
89#define SCLP_HAS_CHP_INFO (sclp_facilities & 0x8000000000000000ULL) 90#define SCLP_HAS_CHP_INFO (sclp_facilities & 0x8000000000000000ULL)
90#define SCLP_HAS_CHP_RECONFIG (sclp_facilities & 0x2000000000000000ULL) 91#define SCLP_HAS_CHP_RECONFIG (sclp_facilities & 0x2000000000000000ULL)
91#define SCLP_HAS_CPU_INFO (sclp_facilities & 0x0800000000000000ULL) 92#define SCLP_HAS_CPU_INFO (sclp_facilities & 0x0800000000000000ULL)
92#define SCLP_HAS_CPU_RECONFIG (sclp_facilities & 0x0400000000000000ULL) 93#define SCLP_HAS_CPU_RECONFIG (sclp_facilities & 0x0400000000000000ULL)
93 94
95
94struct gds_subvector { 96struct gds_subvector {
95 u8 length; 97 u8 length;
96 u8 key; 98 u8 key;
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
new file mode 100644
index 000000000000..daaec185ed36
--- /dev/null
+++ b/drivers/s390/char/sclp_async.c
@@ -0,0 +1,224 @@
1/*
2 * Enable Asynchronous Notification via SCLP.
3 *
4 * Copyright IBM Corp. 2009
5 * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
6 *
7 */
8
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/device.h>
12#include <linux/stat.h>
13#include <linux/string.h>
14#include <linux/ctype.h>
15#include <linux/kmod.h>
16#include <linux/err.h>
17#include <linux/errno.h>
18#include <linux/proc_fs.h>
19#include <linux/sysctl.h>
20#include <linux/utsname.h>
21#include "sclp.h"
22
23static int callhome_enabled;
24static struct sclp_req *request;
25static struct sclp_async_sccb *sccb;
26static int sclp_async_send_wait(char *message);
27static struct ctl_table_header *callhome_sysctl_header;
28static DEFINE_SPINLOCK(sclp_async_lock);
29static char nodename[64];
30#define SCLP_NORMAL_WRITE 0x00
31
32struct async_evbuf {
33 struct evbuf_header header;
34 u64 reserved;
35 u8 rflags;
36 u8 empty;
37 u8 rtype;
38 u8 otype;
39 char comp_id[12];
40 char data[3000]; /* there is still some space left */
41} __attribute__((packed));
42
43struct sclp_async_sccb {
44 struct sccb_header header;
45 struct async_evbuf evbuf;
46} __attribute__((packed));
47
48static struct sclp_register sclp_async_register = {
49 .send_mask = EVTYP_ASYNC_MASK,
50};
51
52static int call_home_on_panic(struct notifier_block *self,
53 unsigned long event, void *data)
54{
55 strncat(data, nodename, strlen(nodename));
56 sclp_async_send_wait(data);
57 return NOTIFY_DONE;
58}
59
60static struct notifier_block call_home_panic_nb = {
61 .notifier_call = call_home_on_panic,
62 .priority = INT_MAX,
63};
64
65static int proc_handler_callhome(ctl_table *ctl, int write, struct file *filp,
66 void __user *buffer, size_t *count,
67 loff_t *ppos)
68{
69 unsigned long val;
70 int len, rc;
71 char buf[2];
72
73 if (!*count | (*ppos && !write)) {
74 *count = 0;
75 return 0;
76 }
77 if (!write) {
78 len = sprintf(buf, "%d\n", callhome_enabled);
79 buf[len] = '\0';
80 rc = copy_to_user(buffer, buf, sizeof(buf));
81 if (rc != 0)
82 return -EFAULT;
83 } else {
84 len = *count;
85 rc = copy_from_user(buf, buffer, sizeof(buf));
86 if (rc != 0)
87 return -EFAULT;
88 if (strict_strtoul(buf, 0, &val) != 0)
89 return -EINVAL;
90 if (val != 0 && val != 1)
91 return -EINVAL;
92 callhome_enabled = val;
93 }
94 *count = len;
95 *ppos += len;
96 return 0;
97}
98
99static struct ctl_table callhome_table[] = {
100 {
101 .procname = "callhome",
102 .mode = 0644,
103 .proc_handler = &proc_handler_callhome,
104 },
105 { .ctl_name = 0 }
106};
107
108static struct ctl_table kern_dir_table[] = {
109 {
110 .ctl_name = CTL_KERN,
111 .procname = "kernel",
112 .maxlen = 0,
113 .mode = 0555,
114 .child = callhome_table,
115 },
116 { .ctl_name = 0 }
117};
118
119/*
120 * Function used to transfer asynchronous notification
121 * records which waits for send completion
122 */
123static int sclp_async_send_wait(char *message)
124{
125 struct async_evbuf *evb;
126 int rc;
127 unsigned long flags;
128
129 if (!callhome_enabled)
130 return 0;
131 sccb->evbuf.header.type = EVTYP_ASYNC;
132 sccb->evbuf.rtype = 0xA5;
133 sccb->evbuf.otype = 0x00;
134 evb = &sccb->evbuf;
135 request->command = SCLP_CMDW_WRITE_EVENT_DATA;
136 request->sccb = sccb;
137 request->status = SCLP_REQ_FILLED;
138 strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
139 /*
140 * Retain Queue
141 * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
142 */
143 strncpy(sccb->evbuf.comp_id, "000000000", sizeof(sccb->evbuf.comp_id));
144 sccb->evbuf.header.length = sizeof(sccb->evbuf);
145 sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
146 sccb->header.function_code = SCLP_NORMAL_WRITE;
147 rc = sclp_add_request(request);
148 if (rc)
149 return rc;
150 spin_lock_irqsave(&sclp_async_lock, flags);
151 while (request->status != SCLP_REQ_DONE &&
152 request->status != SCLP_REQ_FAILED) {
153 sclp_sync_wait();
154 }
155 spin_unlock_irqrestore(&sclp_async_lock, flags);
156 if (request->status != SCLP_REQ_DONE)
157 return -EIO;
158 rc = ((struct sclp_async_sccb *)
159 request->sccb)->header.response_code;
160 if (rc != 0x0020)
161 return -EIO;
162 if (evb->header.flags != 0x80)
163 return -EIO;
164 return rc;
165}
166
167static int __init sclp_async_init(void)
168{
169 int rc;
170
171 rc = sclp_register(&sclp_async_register);
172 if (rc)
173 return rc;
174 callhome_sysctl_header = register_sysctl_table(kern_dir_table);
175 if (!callhome_sysctl_header) {
176 rc = -ENOMEM;
177 goto out_sclp;
178 }
179 if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK)) {
180 rc = -EOPNOTSUPP;
181 goto out_sclp;
182 }
183 rc = -ENOMEM;
184 request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
185 if (!request)
186 goto out_sys;
187 sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
188 if (!sccb)
189 goto out_mem;
190 rc = atomic_notifier_chain_register(&panic_notifier_list,
191 &call_home_panic_nb);
192 if (rc)
193 goto out_mem;
194
195 strncpy(nodename, init_utsname()->nodename, 64);
196 return 0;
197
198out_mem:
199 kfree(request);
200 free_page((unsigned long) sccb);
201out_sys:
202 unregister_sysctl_table(callhome_sysctl_header);
203out_sclp:
204 sclp_unregister(&sclp_async_register);
205 return rc;
206
207}
208module_init(sclp_async_init);
209
210static void __exit sclp_async_exit(void)
211{
212 atomic_notifier_chain_unregister(&panic_notifier_list,
213 &call_home_panic_nb);
214 unregister_sysctl_table(callhome_sysctl_header);
215 sclp_unregister(&sclp_async_register);
216 free_page((unsigned long) sccb);
217 kfree(request);
218}
219module_exit(sclp_async_exit);
220
221MODULE_AUTHOR("Copyright IBM Corp. 2009");
222MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
223MODULE_LICENSE("GPL");
224MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5a519fac37b7..2fe45ff77b75 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -8,7 +8,7 @@
8 * Martin Schwidefsky <schwidefsky@de.ibm.com> 8 * Martin Schwidefsky <schwidefsky@de.ibm.com>
9 */ 9 */
10 10
11#define KMSG_COMPONENT "tape" 11#define KMSG_COMPONENT "tape_34xx"
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 418f72dd39b4..e4cc3aae9162 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -8,7 +8,7 @@
8 * Martin Schwidefsky <schwidefsky@de.ibm.com> 8 * Martin Schwidefsky <schwidefsky@de.ibm.com>
9 */ 9 */
10 10
11#define KMSG_COMPONENT "tape" 11#define KMSG_COMPONENT "tape_3590"
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
@@ -39,8 +39,6 @@ EXPORT_SYMBOL(TAPE_DBF_AREA);
39 * - Read Alternate: implemented 39 * - Read Alternate: implemented
40 *******************************************************************/ 40 *******************************************************************/
41 41
42#define KMSG_COMPONENT "tape"
43
44static const char *tape_3590_msg[TAPE_3590_MAX_MSG] = { 42static const char *tape_3590_msg[TAPE_3590_MAX_MSG] = {
45 [0x00] = "", 43 [0x00] = "",
46 [0x10] = "Lost Sense", 44 [0x10] = "Lost Sense",
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 47ff695255ea..4cb9e70507ab 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -302,8 +302,6 @@ tapeblock_revalidate_disk(struct gendisk *disk)
302 if (!device->blk_data.medium_changed) 302 if (!device->blk_data.medium_changed)
303 return 0; 303 return 0;
304 304
305 dev_info(&device->cdev->dev, "Determining the size of the recorded "
306 "area...\n");
307 rc = tape_mtop(device, MTFSFM, 1); 305 rc = tape_mtop(device, MTFSFM, 1);
308 if (rc) 306 if (rc)
309 return rc; 307 return rc;
@@ -312,6 +310,8 @@ tapeblock_revalidate_disk(struct gendisk *disk)
312 if (rc < 0) 310 if (rc < 0)
313 return rc; 311 return rc;
314 312
313 pr_info("%s: Determining the size of the recorded area...\n",
314 dev_name(&device->cdev->dev));
315 DBF_LH(3, "Image file ends at %d\n", rc); 315 DBF_LH(3, "Image file ends at %d\n", rc);
316 nr_of_blks = rc; 316 nr_of_blks = rc;
317 317
@@ -330,8 +330,8 @@ tapeblock_revalidate_disk(struct gendisk *disk)
330 device->bof = rc; 330 device->bof = rc;
331 nr_of_blks -= rc; 331 nr_of_blks -= rc;
332 332
333 dev_info(&device->cdev->dev, "The size of the recorded area is %i " 333 pr_info("%s: The size of the recorded area is %i blocks\n",
334 "blocks\n", nr_of_blks); 334 dev_name(&device->cdev->dev), nr_of_blks);
335 set_capacity(device->blk_data.disk, 335 set_capacity(device->blk_data.disk,
336 nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512)); 336 nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512));
337 337
@@ -366,8 +366,8 @@ tapeblock_open(struct block_device *bdev, fmode_t mode)
366 366
367 if (device->required_tapemarks) { 367 if (device->required_tapemarks) {
368 DBF_EVENT(2, "TBLOCK: missing tapemarks\n"); 368 DBF_EVENT(2, "TBLOCK: missing tapemarks\n");
369 dev_warn(&device->cdev->dev, "Opening the tape failed because" 369 pr_warning("%s: Opening the tape failed because of missing "
370 " of missing end-of-file marks\n"); 370 "end-of-file marks\n", dev_name(&device->cdev->dev));
371 rc = -EPERM; 371 rc = -EPERM;
372 goto put_device; 372 goto put_device;
373 } 373 }
diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c
index 1d420d947596..5cd31e071647 100644
--- a/drivers/s390/char/tape_core.c
+++ b/drivers/s390/char/tape_core.c
@@ -214,13 +214,15 @@ tape_med_state_set(struct tape_device *device, enum tape_medium_state newstate)
214 switch(newstate){ 214 switch(newstate){
215 case MS_UNLOADED: 215 case MS_UNLOADED:
216 device->tape_generic_status |= GMT_DR_OPEN(~0); 216 device->tape_generic_status |= GMT_DR_OPEN(~0);
217 dev_info(&device->cdev->dev, "The tape cartridge has been " 217 if (device->medium_state == MS_LOADED)
218 "successfully unloaded\n"); 218 pr_info("%s: The tape cartridge has been successfully "
219 "unloaded\n", dev_name(&device->cdev->dev));
219 break; 220 break;
220 case MS_LOADED: 221 case MS_LOADED:
221 device->tape_generic_status &= ~GMT_DR_OPEN(~0); 222 device->tape_generic_status &= ~GMT_DR_OPEN(~0);
222 dev_info(&device->cdev->dev, "A tape cartridge has been " 223 if (device->medium_state == MS_UNLOADED)
223 "mounted\n"); 224 pr_info("%s: A tape cartridge has been mounted\n",
225 dev_name(&device->cdev->dev));
224 break; 226 break;
225 default: 227 default:
226 // print nothing 228 // print nothing
@@ -358,11 +360,11 @@ tape_generic_online(struct tape_device *device,
358 360
359out_char: 361out_char:
360 tapechar_cleanup_device(device); 362 tapechar_cleanup_device(device);
363out_minor:
364 tape_remove_minor(device);
361out_discipline: 365out_discipline:
362 device->discipline->cleanup_device(device); 366 device->discipline->cleanup_device(device);
363 device->discipline = NULL; 367 device->discipline = NULL;
364out_minor:
365 tape_remove_minor(device);
366out: 368out:
367 module_put(discipline->owner); 369 module_put(discipline->owner);
368 return rc; 370 return rc;
@@ -654,8 +656,8 @@ tape_generic_remove(struct ccw_device *cdev)
654 */ 656 */
655 DBF_EVENT(3, "(%08x): Drive in use vanished!\n", 657 DBF_EVENT(3, "(%08x): Drive in use vanished!\n",
656 device->cdev_id); 658 device->cdev_id);
657 dev_warn(&device->cdev->dev, "A tape unit was detached" 659 pr_warning("%s: A tape unit was detached while in "
658 " while in use\n"); 660 "use\n", dev_name(&device->cdev->dev));
659 tape_state_set(device, TS_NOT_OPER); 661 tape_state_set(device, TS_NOT_OPER);
660 __tape_discard_requests(device); 662 __tape_discard_requests(device);
661 spin_unlock_irq(get_ccwdev_lock(device->cdev)); 663 spin_unlock_irq(get_ccwdev_lock(device->cdev));
diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c
index 1a9420ba518d..750354ad16e5 100644
--- a/drivers/s390/char/tape_std.c
+++ b/drivers/s390/char/tape_std.c
@@ -68,7 +68,7 @@ tape_std_assign(struct tape_device *device)
68 * to another host (actually this shouldn't happen but it does). 68 * to another host (actually this shouldn't happen but it does).
69 * So we set up a timeout for this call. 69 * So we set up a timeout for this call.
70 */ 70 */
71 init_timer(&timeout); 71 init_timer_on_stack(&timeout);
72 timeout.function = tape_std_assign_timeout; 72 timeout.function = tape_std_assign_timeout;
73 timeout.data = (unsigned long) request; 73 timeout.data = (unsigned long) request;
74 timeout.expires = jiffies + 2 * HZ; 74 timeout.expires = jiffies + 2 * HZ;
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index c20a4fe6da51..d1a142fa3eb4 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -765,8 +765,10 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv)
765 } else 765 } else
766 return -ENOMEM; 766 return -ENOMEM;
767 ret = device_register(dev); 767 ret = device_register(dev);
768 if (ret) 768 if (ret) {
769 put_device(dev);
769 return ret; 770 return ret;
771 }
770 772
771 ret = sysfs_create_group(&dev->kobj, &vmlogrdr_attr_group); 773 ret = sysfs_create_group(&dev->kobj, &vmlogrdr_attr_group);
772 if (ret) { 774 if (ret) {
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 31b902e94f7b..77571b68539a 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -1026,9 +1026,15 @@ static int __init ur_init(void)
1026 1026
1027 debug_set_level(vmur_dbf, 6); 1027 debug_set_level(vmur_dbf, 6);
1028 1028
1029 vmur_class = class_create(THIS_MODULE, "vmur");
1030 if (IS_ERR(vmur_class)) {
1031 rc = PTR_ERR(vmur_class);
1032 goto fail_free_dbf;
1033 }
1034
1029 rc = ccw_driver_register(&ur_driver); 1035 rc = ccw_driver_register(&ur_driver);
1030 if (rc) 1036 if (rc)
1031 goto fail_free_dbf; 1037 goto fail_class_destroy;
1032 1038
1033 rc = alloc_chrdev_region(&dev, 0, NUM_MINORS, "vmur"); 1039 rc = alloc_chrdev_region(&dev, 0, NUM_MINORS, "vmur");
1034 if (rc) { 1040 if (rc) {
@@ -1038,18 +1044,13 @@ static int __init ur_init(void)
1038 } 1044 }
1039 ur_first_dev_maj_min = MKDEV(MAJOR(dev), 0); 1045 ur_first_dev_maj_min = MKDEV(MAJOR(dev), 0);
1040 1046
1041 vmur_class = class_create(THIS_MODULE, "vmur");
1042 if (IS_ERR(vmur_class)) {
1043 rc = PTR_ERR(vmur_class);
1044 goto fail_unregister_region;
1045 }
1046 pr_info("%s loaded.\n", ur_banner); 1047 pr_info("%s loaded.\n", ur_banner);
1047 return 0; 1048 return 0;
1048 1049
1049fail_unregister_region:
1050 unregister_chrdev_region(ur_first_dev_maj_min, NUM_MINORS);
1051fail_unregister_driver: 1050fail_unregister_driver:
1052 ccw_driver_unregister(&ur_driver); 1051 ccw_driver_unregister(&ur_driver);
1052fail_class_destroy:
1053 class_destroy(vmur_class);
1053fail_free_dbf: 1054fail_free_dbf:
1054 debug_unregister(vmur_dbf); 1055 debug_unregister(vmur_dbf);
1055 return rc; 1056 return rc;
@@ -1057,9 +1058,9 @@ fail_free_dbf:
1057 1058
1058static void __exit ur_exit(void) 1059static void __exit ur_exit(void)
1059{ 1060{
1060 class_destroy(vmur_class);
1061 unregister_chrdev_region(ur_first_dev_maj_min, NUM_MINORS); 1061 unregister_chrdev_region(ur_first_dev_maj_min, NUM_MINORS);
1062 ccw_driver_unregister(&ur_driver); 1062 ccw_driver_unregister(&ur_driver);
1063 class_destroy(vmur_class);
1063 debug_unregister(vmur_dbf); 1064 debug_unregister(vmur_dbf);
1064 pr_info("%s unloaded.\n", ur_banner); 1065 pr_info("%s unloaded.\n", ur_banner);
1065} 1066}
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 1bbae433fbd8..c431198bdbc4 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -275,7 +275,7 @@ struct zcore_header {
275 u32 num_pages; 275 u32 num_pages;
276 u32 pad1; 276 u32 pad1;
277 u64 tod; 277 u64 tod;
278 cpuid_t cpu_id; 278 struct cpuid cpu_id;
279 u32 arch_id; 279 u32 arch_id;
280 u32 volnr; 280 u32 volnr;
281 u32 build_arch; 281 u32 build_arch;
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index adb3dd301528..fa4c9662f65e 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the S/390 common i/o drivers 2# Makefile for the S/390 common i/o drivers
3# 3#
4 4
5obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o scsw.o \ 5obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o \
6 fcx.o itcw.o crw.o 6 fcx.o itcw.o crw.o
7ccw_device-objs += device.o device_fsm.o device_ops.o 7ccw_device-objs += device.o device_fsm.o device_ops.o
8ccw_device-objs += device_id.o device_pgid.o device_status.o 8ccw_device-objs += device_id.o device_pgid.o device_status.o
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 3e5f304ad88f..40002830d48a 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -417,7 +417,8 @@ int chp_new(struct chp_id chpid)
417 if (ret) { 417 if (ret) {
418 CIO_MSG_EVENT(0, "Could not register chp%x.%02x: %d\n", 418 CIO_MSG_EVENT(0, "Could not register chp%x.%02x: %d\n",
419 chpid.cssid, chpid.id, ret); 419 chpid.cssid, chpid.id, ret);
420 goto out_free; 420 put_device(&chp->dev);
421 goto out;
421 } 422 }
422 ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group); 423 ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
423 if (ret) { 424 if (ret) {
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 425e8f89a6c5..37aa611d4ac5 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -37,29 +37,6 @@ struct channel_path_desc {
37 37
38struct channel_path; 38struct channel_path;
39 39
40struct css_general_char {
41 u64 : 12;
42 u32 dynio : 1; /* bit 12 */
43 u32 : 28;
44 u32 aif : 1; /* bit 41 */
45 u32 : 3;
46 u32 mcss : 1; /* bit 45 */
47 u32 fcs : 1; /* bit 46 */
48 u32 : 1;
49 u32 ext_mb : 1; /* bit 48 */
50 u32 : 7;
51 u32 aif_tdd : 1; /* bit 56 */
52 u32 : 1;
53 u32 qebsm : 1; /* bit 58 */
54 u32 : 8;
55 u32 aif_osa : 1; /* bit 67 */
56 u32 : 14;
57 u32 cib : 1; /* bit 82 */
58 u32 : 5;
59 u32 fcx : 1; /* bit 88 */
60 u32 : 7;
61}__attribute__((packed));
62
63struct css_chsc_char { 40struct css_chsc_char {
64 u64 res; 41 u64 res;
65 u64 : 20; 42 u64 : 20;
@@ -72,7 +49,6 @@ struct css_chsc_char {
72 u32 : 19; 49 u32 : 19;
73}__attribute__((packed)); 50}__attribute__((packed));
74 51
75extern struct css_general_char css_general_characteristics;
76extern struct css_chsc_char css_chsc_characteristics; 52extern struct css_chsc_char css_chsc_characteristics;
77 53
78struct chsc_ssd_info { 54struct chsc_ssd_info {
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 5ec7789bd9d8..138124fcfcad 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -139,12 +139,11 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
139 __u8 lpm, /* logical path mask */ 139 __u8 lpm, /* logical path mask */
140 __u8 key) /* storage key */ 140 __u8 key) /* storage key */
141{ 141{
142 char dbf_txt[15];
143 int ccode; 142 int ccode;
144 union orb *orb; 143 union orb *orb;
145 144
146 CIO_TRACE_EVENT(4, "stIO"); 145 CIO_TRACE_EVENT(5, "stIO");
147 CIO_TRACE_EVENT(4, dev_name(&sch->dev)); 146 CIO_TRACE_EVENT(5, dev_name(&sch->dev));
148 147
149 orb = &to_io_private(sch)->orb; 148 orb = &to_io_private(sch)->orb;
150 memset(orb, 0, sizeof(union orb)); 149 memset(orb, 0, sizeof(union orb));
@@ -169,8 +168,7 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
169 ccode = ssch(sch->schid, orb); 168 ccode = ssch(sch->schid, orb);
170 169
171 /* process condition code */ 170 /* process condition code */
172 sprintf(dbf_txt, "ccode:%d", ccode); 171 CIO_HEX_EVENT(5, &ccode, sizeof(ccode));
173 CIO_TRACE_EVENT(4, dbf_txt);
174 172
175 switch (ccode) { 173 switch (ccode) {
176 case 0: 174 case 0:
@@ -201,16 +199,14 @@ cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
201int 199int
202cio_resume (struct subchannel *sch) 200cio_resume (struct subchannel *sch)
203{ 201{
204 char dbf_txt[15];
205 int ccode; 202 int ccode;
206 203
207 CIO_TRACE_EVENT (4, "resIO"); 204 CIO_TRACE_EVENT(4, "resIO");
208 CIO_TRACE_EVENT(4, dev_name(&sch->dev)); 205 CIO_TRACE_EVENT(4, dev_name(&sch->dev));
209 206
210 ccode = rsch (sch->schid); 207 ccode = rsch (sch->schid);
211 208
212 sprintf (dbf_txt, "ccode:%d", ccode); 209 CIO_HEX_EVENT(4, &ccode, sizeof(ccode));
213 CIO_TRACE_EVENT (4, dbf_txt);
214 210
215 switch (ccode) { 211 switch (ccode) {
216 case 0: 212 case 0:
@@ -235,13 +231,12 @@ cio_resume (struct subchannel *sch)
235int 231int
236cio_halt(struct subchannel *sch) 232cio_halt(struct subchannel *sch)
237{ 233{
238 char dbf_txt[15];
239 int ccode; 234 int ccode;
240 235
241 if (!sch) 236 if (!sch)
242 return -ENODEV; 237 return -ENODEV;
243 238
244 CIO_TRACE_EVENT (2, "haltIO"); 239 CIO_TRACE_EVENT(2, "haltIO");
245 CIO_TRACE_EVENT(2, dev_name(&sch->dev)); 240 CIO_TRACE_EVENT(2, dev_name(&sch->dev));
246 241
247 /* 242 /*
@@ -249,8 +244,7 @@ cio_halt(struct subchannel *sch)
249 */ 244 */
250 ccode = hsch (sch->schid); 245 ccode = hsch (sch->schid);
251 246
252 sprintf (dbf_txt, "ccode:%d", ccode); 247 CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
253 CIO_TRACE_EVENT (2, dbf_txt);
254 248
255 switch (ccode) { 249 switch (ccode) {
256 case 0: 250 case 0:
@@ -270,13 +264,12 @@ cio_halt(struct subchannel *sch)
270int 264int
271cio_clear(struct subchannel *sch) 265cio_clear(struct subchannel *sch)
272{ 266{
273 char dbf_txt[15];
274 int ccode; 267 int ccode;
275 268
276 if (!sch) 269 if (!sch)
277 return -ENODEV; 270 return -ENODEV;
278 271
279 CIO_TRACE_EVENT (2, "clearIO"); 272 CIO_TRACE_EVENT(2, "clearIO");
280 CIO_TRACE_EVENT(2, dev_name(&sch->dev)); 273 CIO_TRACE_EVENT(2, dev_name(&sch->dev));
281 274
282 /* 275 /*
@@ -284,8 +277,7 @@ cio_clear(struct subchannel *sch)
284 */ 277 */
285 ccode = csch (sch->schid); 278 ccode = csch (sch->schid);
286 279
287 sprintf (dbf_txt, "ccode:%d", ccode); 280 CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
288 CIO_TRACE_EVENT (2, dbf_txt);
289 281
290 switch (ccode) { 282 switch (ccode) {
291 case 0: 283 case 0:
@@ -306,19 +298,17 @@ cio_clear(struct subchannel *sch)
306int 298int
307cio_cancel (struct subchannel *sch) 299cio_cancel (struct subchannel *sch)
308{ 300{
309 char dbf_txt[15];
310 int ccode; 301 int ccode;
311 302
312 if (!sch) 303 if (!sch)
313 return -ENODEV; 304 return -ENODEV;
314 305
315 CIO_TRACE_EVENT (2, "cancelIO"); 306 CIO_TRACE_EVENT(2, "cancelIO");
316 CIO_TRACE_EVENT(2, dev_name(&sch->dev)); 307 CIO_TRACE_EVENT(2, dev_name(&sch->dev));
317 308
318 ccode = xsch (sch->schid); 309 ccode = xsch (sch->schid);
319 310
320 sprintf (dbf_txt, "ccode:%d", ccode); 311 CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
321 CIO_TRACE_EVENT (2, dbf_txt);
322 312
323 switch (ccode) { 313 switch (ccode) {
324 case 0: /* success */ 314 case 0: /* success */
@@ -429,11 +419,10 @@ EXPORT_SYMBOL_GPL(cio_update_schib);
429 */ 419 */
430int cio_enable_subchannel(struct subchannel *sch, u32 intparm) 420int cio_enable_subchannel(struct subchannel *sch, u32 intparm)
431{ 421{
432 char dbf_txt[15];
433 int retry; 422 int retry;
434 int ret; 423 int ret;
435 424
436 CIO_TRACE_EVENT (2, "ensch"); 425 CIO_TRACE_EVENT(2, "ensch");
437 CIO_TRACE_EVENT(2, dev_name(&sch->dev)); 426 CIO_TRACE_EVENT(2, dev_name(&sch->dev));
438 427
439 if (sch_is_pseudo_sch(sch)) 428 if (sch_is_pseudo_sch(sch))
@@ -460,8 +449,7 @@ int cio_enable_subchannel(struct subchannel *sch, u32 intparm)
460 } else 449 } else
461 break; 450 break;
462 } 451 }
463 sprintf (dbf_txt, "ret:%d", ret); 452 CIO_HEX_EVENT(2, &ret, sizeof(ret));
464 CIO_TRACE_EVENT (2, dbf_txt);
465 return ret; 453 return ret;
466} 454}
467EXPORT_SYMBOL_GPL(cio_enable_subchannel); 455EXPORT_SYMBOL_GPL(cio_enable_subchannel);
@@ -472,11 +460,10 @@ EXPORT_SYMBOL_GPL(cio_enable_subchannel);
472 */ 460 */
473int cio_disable_subchannel(struct subchannel *sch) 461int cio_disable_subchannel(struct subchannel *sch)
474{ 462{
475 char dbf_txt[15];
476 int retry; 463 int retry;
477 int ret; 464 int ret;
478 465
479 CIO_TRACE_EVENT (2, "dissch"); 466 CIO_TRACE_EVENT(2, "dissch");
480 CIO_TRACE_EVENT(2, dev_name(&sch->dev)); 467 CIO_TRACE_EVENT(2, dev_name(&sch->dev));
481 468
482 if (sch_is_pseudo_sch(sch)) 469 if (sch_is_pseudo_sch(sch))
@@ -495,8 +482,7 @@ int cio_disable_subchannel(struct subchannel *sch)
495 } else 482 } else
496 break; 483 break;
497 } 484 }
498 sprintf (dbf_txt, "ret:%d", ret); 485 CIO_HEX_EVENT(2, &ret, sizeof(ret));
499 CIO_TRACE_EVENT (2, dbf_txt);
500 return ret; 486 return ret;
501} 487}
502EXPORT_SYMBOL_GPL(cio_disable_subchannel); 488EXPORT_SYMBOL_GPL(cio_disable_subchannel);
@@ -578,11 +564,6 @@ int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid)
578 goto out; 564 goto out;
579 } 565 }
580 mutex_init(&sch->reg_mutex); 566 mutex_init(&sch->reg_mutex);
581 /* Set a name for the subchannel */
582 if (cio_is_console(schid))
583 sch->dev.init_name = cio_get_console_sch_name(schid);
584 else
585 dev_set_name(&sch->dev, "0.%x.%04x", schid.ssid, schid.sch_no);
586 567
587 /* 568 /*
588 * The first subchannel that is not-operational (ccode==3) 569 * The first subchannel that is not-operational (ccode==3)
@@ -686,7 +667,6 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
686 667
687#ifdef CONFIG_CCW_CONSOLE 668#ifdef CONFIG_CCW_CONSOLE
688static struct subchannel console_subchannel; 669static struct subchannel console_subchannel;
689static char console_sch_name[10] = "0.x.xxxx";
690static struct io_subchannel_private console_priv; 670static struct io_subchannel_private console_priv;
691static int console_subchannel_in_use; 671static int console_subchannel_in_use;
692 672
@@ -873,12 +853,6 @@ cio_get_console_subchannel(void)
873 return &console_subchannel; 853 return &console_subchannel;
874} 854}
875 855
876const char *cio_get_console_sch_name(struct subchannel_id schid)
877{
878 snprintf(console_sch_name, 10, "0.%x.%04x", schid.ssid, schid.sch_no);
879 return (const char *)console_sch_name;
880}
881
882#endif 856#endif
883static int 857static int
884__disable_subchannel_easy(struct subchannel_id schid, struct schib *schib) 858__disable_subchannel_easy(struct subchannel_id schid, struct schib *schib)
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 5150fba742ac..2e43558c704b 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -133,15 +133,11 @@ extern int cio_is_console(struct subchannel_id);
133extern struct subchannel *cio_get_console_subchannel(void); 133extern struct subchannel *cio_get_console_subchannel(void);
134extern spinlock_t * cio_get_console_lock(void); 134extern spinlock_t * cio_get_console_lock(void);
135extern void *cio_get_console_priv(void); 135extern void *cio_get_console_priv(void);
136extern const char *cio_get_console_sch_name(struct subchannel_id schid);
137extern const char *cio_get_console_cdev_name(struct subchannel *sch);
138#else 136#else
139#define cio_is_console(schid) 0 137#define cio_is_console(schid) 0
140#define cio_get_console_subchannel() NULL 138#define cio_get_console_subchannel() NULL
141#define cio_get_console_lock() NULL 139#define cio_get_console_lock() NULL
142#define cio_get_console_priv() NULL 140#define cio_get_console_priv() NULL
143#define cio_get_console_sch_name(schid) NULL
144#define cio_get_console_cdev_name(sch) NULL
145#endif 141#endif
146 142
147#endif 143#endif
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 85d43c6bcb66..e995123fd805 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -152,24 +152,15 @@ css_alloc_subchannel(struct subchannel_id schid)
152} 152}
153 153
154static void 154static void
155css_free_subchannel(struct subchannel *sch)
156{
157 if (sch) {
158 /* Reset intparm to zeroes. */
159 sch->config.intparm = 0;
160 cio_commit_config(sch);
161 kfree(sch->lock);
162 kfree(sch);
163 }
164}
165
166static void
167css_subchannel_release(struct device *dev) 155css_subchannel_release(struct device *dev)
168{ 156{
169 struct subchannel *sch; 157 struct subchannel *sch;
170 158
171 sch = to_subchannel(dev); 159 sch = to_subchannel(dev);
172 if (!cio_is_console(sch->schid)) { 160 if (!cio_is_console(sch->schid)) {
161 /* Reset intparm to zeroes. */
162 sch->config.intparm = 0;
163 cio_commit_config(sch);
173 kfree(sch->lock); 164 kfree(sch->lock);
174 kfree(sch); 165 kfree(sch);
175 } 166 }
@@ -180,6 +171,8 @@ static int css_sch_device_register(struct subchannel *sch)
180 int ret; 171 int ret;
181 172
182 mutex_lock(&sch->reg_mutex); 173 mutex_lock(&sch->reg_mutex);
174 dev_set_name(&sch->dev, "0.%x.%04x", sch->schid.ssid,
175 sch->schid.sch_no);
183 ret = device_register(&sch->dev); 176 ret = device_register(&sch->dev);
184 mutex_unlock(&sch->reg_mutex); 177 mutex_unlock(&sch->reg_mutex);
185 return ret; 178 return ret;
@@ -327,7 +320,7 @@ int css_probe_device(struct subchannel_id schid)
327 return PTR_ERR(sch); 320 return PTR_ERR(sch);
328 ret = css_register_subchannel(sch); 321 ret = css_register_subchannel(sch);
329 if (ret) 322 if (ret)
330 css_free_subchannel(sch); 323 put_device(&sch->dev);
331 return ret; 324 return ret;
332} 325}
333 326
@@ -644,7 +637,10 @@ __init_channel_subsystem(struct subchannel_id schid, void *data)
644 * not working) so we do it now. This is true e.g. for the 637 * not working) so we do it now. This is true e.g. for the
645 * console subchannel. 638 * console subchannel.
646 */ 639 */
647 css_register_subchannel(sch); 640 if (css_register_subchannel(sch)) {
641 if (!cio_is_console(schid))
642 put_device(&sch->dev);
643 }
648 return 0; 644 return 0;
649} 645}
650 646
@@ -661,8 +657,8 @@ css_generate_pgid(struct channel_subsystem *css, u32 tod_high)
661 css->global_pgid.pgid_high.cpu_addr = 0; 657 css->global_pgid.pgid_high.cpu_addr = 0;
662#endif 658#endif
663 } 659 }
664 css->global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident; 660 css->global_pgid.cpu_id = S390_lowcore.cpu_id.ident;
665 css->global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine; 661 css->global_pgid.cpu_model = S390_lowcore.cpu_id.machine;
666 css->global_pgid.tod_high = tod_high; 662 css->global_pgid.tod_high = tod_high;
667 663
668} 664}
@@ -920,8 +916,10 @@ init_channel_subsystem (void)
920 goto out_device; 916 goto out_device;
921 } 917 }
922 ret = device_register(&css->pseudo_subchannel->dev); 918 ret = device_register(&css->pseudo_subchannel->dev);
923 if (ret) 919 if (ret) {
920 put_device(&css->pseudo_subchannel->dev);
924 goto out_file; 921 goto out_file;
922 }
925 } 923 }
926 ret = register_reboot_notifier(&css_reboot_notifier); 924 ret = register_reboot_notifier(&css_reboot_notifier);
927 if (ret) 925 if (ret)
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index d593bc76afe3..0f95405c2c5e 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -307,8 +307,11 @@ int ccw_device_is_orphan(struct ccw_device *cdev)
307 307
308static void ccw_device_unregister(struct ccw_device *cdev) 308static void ccw_device_unregister(struct ccw_device *cdev)
309{ 309{
310 if (test_and_clear_bit(1, &cdev->private->registered)) 310 if (test_and_clear_bit(1, &cdev->private->registered)) {
311 device_del(&cdev->dev); 311 device_del(&cdev->dev);
312 /* Release reference from device_initialize(). */
313 put_device(&cdev->dev);
314 }
312} 315}
313 316
314static void ccw_device_remove_orphan_cb(struct work_struct *work) 317static void ccw_device_remove_orphan_cb(struct work_struct *work)
@@ -319,7 +322,6 @@ static void ccw_device_remove_orphan_cb(struct work_struct *work)
319 priv = container_of(work, struct ccw_device_private, kick_work); 322 priv = container_of(work, struct ccw_device_private, kick_work);
320 cdev = priv->cdev; 323 cdev = priv->cdev;
321 ccw_device_unregister(cdev); 324 ccw_device_unregister(cdev);
322 put_device(&cdev->dev);
323 /* Release cdev reference for workqueue processing. */ 325 /* Release cdev reference for workqueue processing. */
324 put_device(&cdev->dev); 326 put_device(&cdev->dev);
325} 327}
@@ -333,15 +335,15 @@ ccw_device_remove_disconnected(struct ccw_device *cdev)
333 * Forced offline in disconnected state means 335 * Forced offline in disconnected state means
334 * 'throw away device'. 336 * 'throw away device'.
335 */ 337 */
336 /* Get cdev reference for workqueue processing. */
337 if (!get_device(&cdev->dev))
338 return;
339 if (ccw_device_is_orphan(cdev)) { 338 if (ccw_device_is_orphan(cdev)) {
340 /* 339 /*
341 * Deregister ccw device. 340 * Deregister ccw device.
342 * Unfortunately, we cannot do this directly from the 341 * Unfortunately, we cannot do this directly from the
343 * attribute method. 342 * attribute method.
344 */ 343 */
344 /* Get cdev reference for workqueue processing. */
345 if (!get_device(&cdev->dev))
346 return;
345 spin_lock_irqsave(cdev->ccwlock, flags); 347 spin_lock_irqsave(cdev->ccwlock, flags);
346 cdev->private->state = DEV_STATE_NOT_OPER; 348 cdev->private->state = DEV_STATE_NOT_OPER;
347 spin_unlock_irqrestore(cdev->ccwlock, flags); 349 spin_unlock_irqrestore(cdev->ccwlock, flags);
@@ -380,30 +382,34 @@ int ccw_device_set_offline(struct ccw_device *cdev)
380 } 382 }
381 cdev->online = 0; 383 cdev->online = 0;
382 spin_lock_irq(cdev->ccwlock); 384 spin_lock_irq(cdev->ccwlock);
383 ret = ccw_device_offline(cdev); 385 /* Wait until a final state or DISCONNECTED is reached */
384 if (ret == -ENODEV) { 386 while (!dev_fsm_final_state(cdev) &&
385 if (cdev->private->state != DEV_STATE_NOT_OPER) { 387 cdev->private->state != DEV_STATE_DISCONNECTED) {
386 cdev->private->state = DEV_STATE_OFFLINE;
387 dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
388 }
389 spin_unlock_irq(cdev->ccwlock); 388 spin_unlock_irq(cdev->ccwlock);
390 /* Give up reference from ccw_device_set_online(). */ 389 wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
391 put_device(&cdev->dev); 390 cdev->private->state == DEV_STATE_DISCONNECTED));
392 return ret; 391 spin_lock_irq(cdev->ccwlock);
393 } 392 }
393 ret = ccw_device_offline(cdev);
394 if (ret)
395 goto error;
394 spin_unlock_irq(cdev->ccwlock); 396 spin_unlock_irq(cdev->ccwlock);
395 if (ret == 0) { 397 wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
396 wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); 398 cdev->private->state == DEV_STATE_DISCONNECTED));
397 /* Give up reference from ccw_device_set_online(). */ 399 /* Give up reference from ccw_device_set_online(). */
398 put_device(&cdev->dev); 400 put_device(&cdev->dev);
399 } else { 401 return 0;
400 CIO_MSG_EVENT(0, "ccw_device_offline returned %d, " 402
401 "device 0.%x.%04x\n", 403error:
402 ret, cdev->private->dev_id.ssid, 404 CIO_MSG_EVENT(0, "ccw_device_offline returned %d, device 0.%x.%04x\n",
403 cdev->private->dev_id.devno); 405 ret, cdev->private->dev_id.ssid,
404 cdev->online = 1; 406 cdev->private->dev_id.devno);
405 } 407 cdev->private->state = DEV_STATE_OFFLINE;
406 return ret; 408 dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
409 spin_unlock_irq(cdev->ccwlock);
410 /* Give up reference from ccw_device_set_online(). */
411 put_device(&cdev->dev);
412 return -ENODEV;
407} 413}
408 414
409/** 415/**
@@ -421,6 +427,7 @@ int ccw_device_set_offline(struct ccw_device *cdev)
421int ccw_device_set_online(struct ccw_device *cdev) 427int ccw_device_set_online(struct ccw_device *cdev)
422{ 428{
423 int ret; 429 int ret;
430 int ret2;
424 431
425 if (!cdev) 432 if (!cdev)
426 return -ENODEV; 433 return -ENODEV;
@@ -444,28 +451,53 @@ int ccw_device_set_online(struct ccw_device *cdev)
444 put_device(&cdev->dev); 451 put_device(&cdev->dev);
445 return ret; 452 return ret;
446 } 453 }
447 if (cdev->private->state != DEV_STATE_ONLINE) { 454 spin_lock_irq(cdev->ccwlock);
455 /* Check if online processing was successful */
456 if ((cdev->private->state != DEV_STATE_ONLINE) &&
457 (cdev->private->state != DEV_STATE_W4SENSE)) {
458 spin_unlock_irq(cdev->ccwlock);
448 /* Give up online reference since onlining failed. */ 459 /* Give up online reference since onlining failed. */
449 put_device(&cdev->dev); 460 put_device(&cdev->dev);
450 return -ENODEV; 461 return -ENODEV;
451 } 462 }
452 if (!cdev->drv->set_online || cdev->drv->set_online(cdev) == 0) { 463 spin_unlock_irq(cdev->ccwlock);
453 cdev->online = 1; 464 if (cdev->drv->set_online)
454 return 0; 465 ret = cdev->drv->set_online(cdev);
455 } 466 if (ret)
467 goto rollback;
468 cdev->online = 1;
469 return 0;
470
471rollback:
456 spin_lock_irq(cdev->ccwlock); 472 spin_lock_irq(cdev->ccwlock);
457 ret = ccw_device_offline(cdev); 473 /* Wait until a final state or DISCONNECTED is reached */
474 while (!dev_fsm_final_state(cdev) &&
475 cdev->private->state != DEV_STATE_DISCONNECTED) {
476 spin_unlock_irq(cdev->ccwlock);
477 wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
478 cdev->private->state == DEV_STATE_DISCONNECTED));
479 spin_lock_irq(cdev->ccwlock);
480 }
481 ret2 = ccw_device_offline(cdev);
482 if (ret2)
483 goto error;
458 spin_unlock_irq(cdev->ccwlock); 484 spin_unlock_irq(cdev->ccwlock);
459 if (ret == 0) 485 wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
460 wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev)); 486 cdev->private->state == DEV_STATE_DISCONNECTED));
461 else
462 CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
463 "device 0.%x.%04x\n",
464 ret, cdev->private->dev_id.ssid,
465 cdev->private->dev_id.devno);
466 /* Give up online reference since onlining failed. */ 487 /* Give up online reference since onlining failed. */
467 put_device(&cdev->dev); 488 put_device(&cdev->dev);
468 return (ret == 0) ? -ENODEV : ret; 489 return ret;
490
491error:
492 CIO_MSG_EVENT(0, "rollback ccw_device_offline returned %d, "
493 "device 0.%x.%04x\n",
494 ret2, cdev->private->dev_id.ssid,
495 cdev->private->dev_id.devno);
496 cdev->private->state = DEV_STATE_OFFLINE;
497 spin_unlock_irq(cdev->ccwlock);
498 /* Give up online reference since onlining failed. */
499 put_device(&cdev->dev);
500 return ret;
469} 501}
470 502
471static int online_store_handle_offline(struct ccw_device *cdev) 503static int online_store_handle_offline(struct ccw_device *cdev)
@@ -637,8 +669,12 @@ static int ccw_device_register(struct ccw_device *cdev)
637 int ret; 669 int ret;
638 670
639 dev->bus = &ccw_bus_type; 671 dev->bus = &ccw_bus_type;
640 672 ret = dev_set_name(&cdev->dev, "0.%x.%04x", cdev->private->dev_id.ssid,
641 if ((ret = device_add(dev))) 673 cdev->private->dev_id.devno);
674 if (ret)
675 return ret;
676 ret = device_add(dev);
677 if (ret)
642 return ret; 678 return ret;
643 679
644 set_bit(1, &cdev->private->registered); 680 set_bit(1, &cdev->private->registered);
@@ -1024,9 +1060,6 @@ static void ccw_device_call_sch_unregister(struct work_struct *work)
1024 return; 1060 return;
1025 sch = to_subchannel(cdev->dev.parent); 1061 sch = to_subchannel(cdev->dev.parent);
1026 css_sch_device_unregister(sch); 1062 css_sch_device_unregister(sch);
1027 /* Reset intparm to zeroes. */
1028 sch->config.intparm = 0;
1029 cio_commit_config(sch);
1030 /* Release cdev reference for workqueue processing.*/ 1063 /* Release cdev reference for workqueue processing.*/
1031 put_device(&cdev->dev); 1064 put_device(&cdev->dev);
1032 /* Release subchannel reference for local processing. */ 1065 /* Release subchannel reference for local processing. */
@@ -1035,6 +1068,9 @@ static void ccw_device_call_sch_unregister(struct work_struct *work)
1035 1068
1036void ccw_device_schedule_sch_unregister(struct ccw_device *cdev) 1069void ccw_device_schedule_sch_unregister(struct ccw_device *cdev)
1037{ 1070{
1071 /* Get cdev reference for workqueue processing. */
1072 if (!get_device(&cdev->dev))
1073 return;
1038 PREPARE_WORK(&cdev->private->kick_work, 1074 PREPARE_WORK(&cdev->private->kick_work,
1039 ccw_device_call_sch_unregister); 1075 ccw_device_call_sch_unregister);
1040 queue_work(slow_path_wq, &cdev->private->kick_work); 1076 queue_work(slow_path_wq, &cdev->private->kick_work);
@@ -1055,9 +1091,6 @@ io_subchannel_recog_done(struct ccw_device *cdev)
1055 /* Device did not respond in time. */ 1091 /* Device did not respond in time. */
1056 case DEV_STATE_NOT_OPER: 1092 case DEV_STATE_NOT_OPER:
1057 cdev->private->flags.recog_done = 1; 1093 cdev->private->flags.recog_done = 1;
1058 /* Remove device found not operational. */
1059 if (!get_device(&cdev->dev))
1060 break;
1061 ccw_device_schedule_sch_unregister(cdev); 1094 ccw_device_schedule_sch_unregister(cdev);
1062 if (atomic_dec_and_test(&ccw_device_init_count)) 1095 if (atomic_dec_and_test(&ccw_device_init_count))
1063 wake_up(&ccw_device_init_wq); 1096 wake_up(&ccw_device_init_wq);
@@ -1095,13 +1128,6 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch)
1095 init_waitqueue_head(&priv->wait_q); 1128 init_waitqueue_head(&priv->wait_q);
1096 init_timer(&priv->timer); 1129 init_timer(&priv->timer);
1097 1130
1098 /* Set an initial name for the device. */
1099 if (cio_is_console(sch->schid))
1100 cdev->dev.init_name = cio_get_console_cdev_name(sch);
1101 else
1102 dev_set_name(&cdev->dev, "0.%x.%04x",
1103 sch->schid.ssid, sch->schib.pmcw.dev);
1104
1105 /* Increase counter of devices currently in recognition. */ 1131 /* Increase counter of devices currently in recognition. */
1106 atomic_inc(&ccw_device_init_count); 1132 atomic_inc(&ccw_device_init_count);
1107 1133
@@ -1171,8 +1197,8 @@ static void io_subchannel_irq(struct subchannel *sch)
1171 1197
1172 cdev = sch_get_cdev(sch); 1198 cdev = sch_get_cdev(sch);
1173 1199
1174 CIO_TRACE_EVENT(3, "IRQ"); 1200 CIO_TRACE_EVENT(6, "IRQ");
1175 CIO_TRACE_EVENT(3, dev_name(&sch->dev)); 1201 CIO_TRACE_EVENT(6, dev_name(&sch->dev));
1176 if (cdev) 1202 if (cdev)
1177 dev_fsm_event(cdev, DEV_EVENT_INTERRUPT); 1203 dev_fsm_event(cdev, DEV_EVENT_INTERRUPT);
1178} 1204}
@@ -1210,9 +1236,6 @@ static void io_subchannel_do_unreg(struct work_struct *work)
1210 1236
1211 sch = container_of(work, struct subchannel, work); 1237 sch = container_of(work, struct subchannel, work);
1212 css_sch_device_unregister(sch); 1238 css_sch_device_unregister(sch);
1213 /* Reset intparm to zeroes. */
1214 sch->config.intparm = 0;
1215 cio_commit_config(sch);
1216 put_device(&sch->dev); 1239 put_device(&sch->dev);
1217} 1240}
1218 1241
@@ -1334,7 +1357,6 @@ io_subchannel_remove (struct subchannel *sch)
1334 cdev->private->state = DEV_STATE_NOT_OPER; 1357 cdev->private->state = DEV_STATE_NOT_OPER;
1335 spin_unlock_irqrestore(cdev->ccwlock, flags); 1358 spin_unlock_irqrestore(cdev->ccwlock, flags);
1336 ccw_device_unregister(cdev); 1359 ccw_device_unregister(cdev);
1337 put_device(&cdev->dev);
1338 kfree(sch->private); 1360 kfree(sch->private);
1339 sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group); 1361 sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
1340 return 0; 1362 return 0;
@@ -1571,8 +1593,6 @@ static int purge_fn(struct device *dev, void *data)
1571 spin_unlock_irq(cdev->ccwlock); 1593 spin_unlock_irq(cdev->ccwlock);
1572 if (!unreg) 1594 if (!unreg)
1573 goto out; 1595 goto out;
1574 if (!get_device(&cdev->dev))
1575 goto out;
1576 CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", priv->dev_id.ssid, 1596 CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", priv->dev_id.ssid,
1577 priv->dev_id.devno); 1597 priv->dev_id.devno);
1578 ccw_device_schedule_sch_unregister(cdev); 1598 ccw_device_schedule_sch_unregister(cdev);
@@ -1688,10 +1708,6 @@ static int io_subchannel_sch_event(struct subchannel *sch, int slow)
1688 spin_unlock_irqrestore(sch->lock, flags); 1708 spin_unlock_irqrestore(sch->lock, flags);
1689 css_sch_device_unregister(sch); 1709 css_sch_device_unregister(sch);
1690 spin_lock_irqsave(sch->lock, flags); 1710 spin_lock_irqsave(sch->lock, flags);
1691
1692 /* Reset intparm to zeroes. */
1693 sch->config.intparm = 0;
1694 cio_commit_config(sch);
1695 break; 1711 break;
1696 case REPROBE: 1712 case REPROBE:
1697 ccw_device_trigger_reprobe(cdev); 1713 ccw_device_trigger_reprobe(cdev);
@@ -1712,7 +1728,6 @@ static int io_subchannel_sch_event(struct subchannel *sch, int slow)
1712 1728
1713#ifdef CONFIG_CCW_CONSOLE 1729#ifdef CONFIG_CCW_CONSOLE
1714static struct ccw_device console_cdev; 1730static struct ccw_device console_cdev;
1715static char console_cdev_name[10] = "0.x.xxxx";
1716static struct ccw_device_private console_private; 1731static struct ccw_device_private console_private;
1717static int console_cdev_in_use; 1732static int console_cdev_in_use;
1718 1733
@@ -1796,13 +1811,6 @@ int ccw_device_force_console(void)
1796 return ccw_device_pm_restore(&console_cdev.dev); 1811 return ccw_device_pm_restore(&console_cdev.dev);
1797} 1812}
1798EXPORT_SYMBOL_GPL(ccw_device_force_console); 1813EXPORT_SYMBOL_GPL(ccw_device_force_console);
1799
1800const char *cio_get_console_cdev_name(struct subchannel *sch)
1801{
1802 snprintf(console_cdev_name, 10, "0.%x.%04x",
1803 sch->schid.ssid, sch->schib.pmcw.dev);
1804 return (const char *)console_cdev_name;
1805}
1806#endif 1814#endif
1807 1815
1808/* 1816/*
@@ -2020,7 +2028,9 @@ static void __ccw_device_pm_restore(struct ccw_device *cdev)
2020 spin_unlock_irq(sch->lock); 2028 spin_unlock_irq(sch->lock);
2021 if (ret) { 2029 if (ret) {
2022 CIO_MSG_EVENT(0, "Couldn't start recognition for device " 2030 CIO_MSG_EVENT(0, "Couldn't start recognition for device "
2023 "%s (ret=%d)\n", dev_name(&cdev->dev), ret); 2031 "0.%x.%04x (ret=%d)\n",
2032 cdev->private->dev_id.ssid,
2033 cdev->private->dev_id.devno, ret);
2024 spin_lock_irq(sch->lock); 2034 spin_lock_irq(sch->lock);
2025 cdev->private->state = DEV_STATE_DISCONNECTED; 2035 cdev->private->state = DEV_STATE_DISCONNECTED;
2026 spin_unlock_irq(sch->lock); 2036 spin_unlock_irq(sch->lock);
@@ -2083,8 +2093,9 @@ static int ccw_device_pm_restore(struct device *dev)
2083 } 2093 }
2084 /* check if the device id has changed */ 2094 /* check if the device id has changed */
2085 if (sch->schib.pmcw.dev != cdev->private->dev_id.devno) { 2095 if (sch->schib.pmcw.dev != cdev->private->dev_id.devno) {
2086 CIO_MSG_EVENT(0, "resume: sch %s: failed (devno changed from " 2096 CIO_MSG_EVENT(0, "resume: sch 0.%x.%04x: failed (devno "
2087 "%04x to %04x)\n", dev_name(&sch->dev), 2097 "changed from %04x to %04x)\n",
2098 sch->schid.ssid, sch->schid.sch_no,
2088 cdev->private->dev_id.devno, 2099 cdev->private->dev_id.devno,
2089 sch->schib.pmcw.dev); 2100 sch->schib.pmcw.dev);
2090 goto out_unreg_unlock; 2101 goto out_unreg_unlock;
@@ -2117,8 +2128,9 @@ static int ccw_device_pm_restore(struct device *dev)
2117 if (cm_enabled) { 2128 if (cm_enabled) {
2118 ret = ccw_set_cmf(cdev, 1); 2129 ret = ccw_set_cmf(cdev, 1);
2119 if (ret) { 2130 if (ret) {
2120 CIO_MSG_EVENT(2, "resume: cdev %s: cmf failed " 2131 CIO_MSG_EVENT(2, "resume: cdev 0.%x.%04x: cmf failed "
2121 "(rc=%d)\n", dev_name(&cdev->dev), ret); 2132 "(rc=%d)\n", cdev->private->dev_id.ssid,
2133 cdev->private->dev_id.devno, ret);
2122 ret = 0; 2134 ret = 0;
2123 } 2135 }
2124 } 2136 }
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 3db88c52d287..e728ce447f6e 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -394,6 +394,13 @@ ccw_device_done(struct ccw_device *cdev, int state)
394 ccw_device_schedule_sch_unregister(cdev); 394 ccw_device_schedule_sch_unregister(cdev);
395 cdev->private->flags.donotify = 0; 395 cdev->private->flags.donotify = 0;
396 } 396 }
397 if (state == DEV_STATE_NOT_OPER) {
398 CIO_MSG_EVENT(0, "Device %04x gone on subchannel %04x\n",
399 cdev->private->dev_id.devno, sch->schid.sch_no);
400 if (!ccw_device_notify(cdev, CIO_GONE))
401 ccw_device_schedule_sch_unregister(cdev);
402 cdev->private->flags.donotify = 0;
403 }
397 404
398 if (cdev->private->flags.donotify) { 405 if (cdev->private->flags.donotify) {
399 cdev->private->flags.donotify = 0; 406 cdev->private->flags.donotify = 0;
@@ -731,6 +738,17 @@ static void ccw_device_generic_notoper(struct ccw_device *cdev,
731} 738}
732 739
733/* 740/*
741 * Handle path verification event in offline state.
742 */
743static void ccw_device_offline_verify(struct ccw_device *cdev,
744 enum dev_event dev_event)
745{
746 struct subchannel *sch = to_subchannel(cdev->dev.parent);
747
748 css_schedule_eval(sch->schid);
749}
750
751/*
734 * Handle path verification event. 752 * Handle path verification event.
735 */ 753 */
736static void 754static void
@@ -887,6 +905,8 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
887 } 905 }
888call_handler: 906call_handler:
889 cdev->private->state = DEV_STATE_ONLINE; 907 cdev->private->state = DEV_STATE_ONLINE;
908 /* In case sensing interfered with setting the device online */
909 wake_up(&cdev->private->wait_q);
890 /* Call the handler. */ 910 /* Call the handler. */
891 if (ccw_device_call_handler(cdev) && cdev->private->flags.doverify) 911 if (ccw_device_call_handler(cdev) && cdev->private->flags.doverify)
892 /* Start delayed path verification. */ 912 /* Start delayed path verification. */
@@ -1149,7 +1169,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = {
1149 [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, 1169 [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper,
1150 [DEV_EVENT_INTERRUPT] = ccw_device_offline_irq, 1170 [DEV_EVENT_INTERRUPT] = ccw_device_offline_irq,
1151 [DEV_EVENT_TIMEOUT] = ccw_device_nop, 1171 [DEV_EVENT_TIMEOUT] = ccw_device_nop,
1152 [DEV_EVENT_VERIFY] = ccw_device_nop, 1172 [DEV_EVENT_VERIFY] = ccw_device_offline_verify,
1153 }, 1173 },
1154 [DEV_STATE_VERIFY] = { 1174 [DEV_STATE_VERIFY] = {
1155 [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, 1175 [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper,
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index b1241f8fae88..ff7748a9199d 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/drivers/s390/cio/qdio.h 2 * linux/drivers/s390/cio/qdio.h
3 * 3 *
4 * Copyright 2000,2008 IBM Corp. 4 * Copyright 2000,2009 IBM Corp.
5 * Author(s): Utz Bacher <utz.bacher@de.ibm.com> 5 * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
6 * Jan Glauber <jang@linux.vnet.ibm.com> 6 * Jan Glauber <jang@linux.vnet.ibm.com>
7 */ 7 */
@@ -246,6 +246,7 @@ struct qdio_q {
246 atomic_t nr_buf_used; 246 atomic_t nr_buf_used;
247 247
248 struct qdio_irq *irq_ptr; 248 struct qdio_irq *irq_ptr;
249 struct dentry *debugfs_q;
249 struct tasklet_struct tasklet; 250 struct tasklet_struct tasklet;
250 251
251 /* error condition during a data transfer */ 252 /* error condition during a data transfer */
@@ -267,6 +268,7 @@ struct qdio_irq {
267 struct qib qib; 268 struct qib qib;
268 u32 *dsci; /* address of device state change indicator */ 269 u32 *dsci; /* address of device state change indicator */
269 struct ccw_device *cdev; 270 struct ccw_device *cdev;
271 struct dentry *debugfs_dev;
270 272
271 unsigned long int_parm; 273 unsigned long int_parm;
272 struct subchannel_id schid; 274 struct subchannel_id schid;
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index b8626d4df116..1b78f639ead3 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -1,14 +1,12 @@
1/* 1/*
2 * drivers/s390/cio/qdio_debug.c 2 * drivers/s390/cio/qdio_debug.c
3 * 3 *
4 * Copyright IBM Corp. 2008 4 * Copyright IBM Corp. 2008,2009
5 * 5 *
6 * Author: Jan Glauber (jang@linux.vnet.ibm.com) 6 * Author: Jan Glauber (jang@linux.vnet.ibm.com)
7 */ 7 */
8#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 8#include <linux/seq_file.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <asm/qdio.h>
12#include <asm/debug.h> 10#include <asm/debug.h>
13#include "qdio_debug.h" 11#include "qdio_debug.h"
14#include "qdio.h" 12#include "qdio.h"
@@ -17,10 +15,7 @@ debug_info_t *qdio_dbf_setup;
17debug_info_t *qdio_dbf_error; 15debug_info_t *qdio_dbf_error;
18 16
19static struct dentry *debugfs_root; 17static struct dentry *debugfs_root;
20#define MAX_DEBUGFS_QUEUES 32 18#define QDIO_DEBUGFS_NAME_LEN 10
21static struct dentry *debugfs_queues[MAX_DEBUGFS_QUEUES] = { NULL };
22static DEFINE_MUTEX(debugfs_mutex);
23#define QDIO_DEBUGFS_NAME_LEN 40
24 19
25void qdio_allocate_dbf(struct qdio_initialize *init_data, 20void qdio_allocate_dbf(struct qdio_initialize *init_data,
26 struct qdio_irq *irq_ptr) 21 struct qdio_irq *irq_ptr)
@@ -130,20 +125,6 @@ static int qstat_seq_open(struct inode *inode, struct file *filp)
130 filp->f_path.dentry->d_inode->i_private); 125 filp->f_path.dentry->d_inode->i_private);
131} 126}
132 127
133static void remove_debugfs_entry(struct qdio_q *q)
134{
135 int i;
136
137 for (i = 0; i < MAX_DEBUGFS_QUEUES; i++) {
138 if (!debugfs_queues[i])
139 continue;
140 if (debugfs_queues[i]->d_inode->i_private == q) {
141 debugfs_remove(debugfs_queues[i]);
142 debugfs_queues[i] = NULL;
143 }
144 }
145}
146
147static struct file_operations debugfs_fops = { 128static struct file_operations debugfs_fops = {
148 .owner = THIS_MODULE, 129 .owner = THIS_MODULE,
149 .open = qstat_seq_open, 130 .open = qstat_seq_open,
@@ -155,22 +136,15 @@ static struct file_operations debugfs_fops = {
155 136
156static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev) 137static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev)
157{ 138{
158 int i = 0;
159 char name[QDIO_DEBUGFS_NAME_LEN]; 139 char name[QDIO_DEBUGFS_NAME_LEN];
160 140
161 while (debugfs_queues[i] != NULL) { 141 snprintf(name, QDIO_DEBUGFS_NAME_LEN, "%s_%d",
162 i++;
163 if (i >= MAX_DEBUGFS_QUEUES)
164 return;
165 }
166 snprintf(name, QDIO_DEBUGFS_NAME_LEN, "%s_%s_%d",
167 dev_name(&cdev->dev),
168 q->is_input_q ? "input" : "output", 142 q->is_input_q ? "input" : "output",
169 q->nr); 143 q->nr);
170 debugfs_queues[i] = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR, 144 q->debugfs_q = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR,
171 debugfs_root, q, &debugfs_fops); 145 q->irq_ptr->debugfs_dev, q, &debugfs_fops);
172 if (IS_ERR(debugfs_queues[i])) 146 if (IS_ERR(q->debugfs_q))
173 debugfs_queues[i] = NULL; 147 q->debugfs_q = NULL;
174} 148}
175 149
176void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) 150void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
@@ -178,12 +152,14 @@ void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
178 struct qdio_q *q; 152 struct qdio_q *q;
179 int i; 153 int i;
180 154
181 mutex_lock(&debugfs_mutex); 155 irq_ptr->debugfs_dev = debugfs_create_dir(dev_name(&cdev->dev),
156 debugfs_root);
157 if (IS_ERR(irq_ptr->debugfs_dev))
158 irq_ptr->debugfs_dev = NULL;
182 for_each_input_queue(irq_ptr, q, i) 159 for_each_input_queue(irq_ptr, q, i)
183 setup_debugfs_entry(q, cdev); 160 setup_debugfs_entry(q, cdev);
184 for_each_output_queue(irq_ptr, q, i) 161 for_each_output_queue(irq_ptr, q, i)
185 setup_debugfs_entry(q, cdev); 162 setup_debugfs_entry(q, cdev);
186 mutex_unlock(&debugfs_mutex);
187} 163}
188 164
189void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) 165void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
@@ -191,17 +167,16 @@ void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cd
191 struct qdio_q *q; 167 struct qdio_q *q;
192 int i; 168 int i;
193 169
194 mutex_lock(&debugfs_mutex);
195 for_each_input_queue(irq_ptr, q, i) 170 for_each_input_queue(irq_ptr, q, i)
196 remove_debugfs_entry(q); 171 debugfs_remove(q->debugfs_q);
197 for_each_output_queue(irq_ptr, q, i) 172 for_each_output_queue(irq_ptr, q, i)
198 remove_debugfs_entry(q); 173 debugfs_remove(q->debugfs_q);
199 mutex_unlock(&debugfs_mutex); 174 debugfs_remove(irq_ptr->debugfs_dev);
200} 175}
201 176
202int __init qdio_debug_init(void) 177int __init qdio_debug_init(void)
203{ 178{
204 debugfs_root = debugfs_create_dir("qdio_queues", NULL); 179 debugfs_root = debugfs_create_dir("qdio", NULL);
205 180
206 qdio_dbf_setup = debug_register("qdio_setup", 16, 1, 16); 181 qdio_dbf_setup = debug_register("qdio_setup", 16, 1, 16);
207 debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view); 182 debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 0038750ad945..9aef402a5f1b 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -798,8 +798,10 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
798 798
799 if (!qdio_inbound_q_done(q)) { 799 if (!qdio_inbound_q_done(q)) {
800 qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop); 800 qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop);
801 if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) 801 if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
802 tasklet_schedule(&q->tasklet); 802 tasklet_schedule(&q->tasklet);
803 return;
804 }
803 } 805 }
804 806
805 qdio_stop_polling(q); 807 qdio_stop_polling(q);
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index ed3dcdea7fe1..090b32a339c6 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -648,7 +648,9 @@ static int ap_bus_suspend(struct device *dev, pm_message_t state)
648 /* Poll on the device until all requests are finished. */ 648 /* Poll on the device until all requests are finished. */
649 do { 649 do {
650 flags = 0; 650 flags = 0;
651 spin_lock_bh(&ap_dev->lock);
651 __ap_poll_device(ap_dev, &flags); 652 __ap_poll_device(ap_dev, &flags);
653 spin_unlock_bh(&ap_dev->lock);
652 } while ((flags & 1) || (flags & 2)); 654 } while ((flags & 1) || (flags & 2));
653 655
654 ap_device_remove(dev); 656 ap_device_remove(dev);
@@ -1109,12 +1111,15 @@ static void ap_scan_bus(struct work_struct *unused)
1109 1111
1110 ap_dev->device.bus = &ap_bus_type; 1112 ap_dev->device.bus = &ap_bus_type;
1111 ap_dev->device.parent = ap_root_device; 1113 ap_dev->device.parent = ap_root_device;
1112 dev_set_name(&ap_dev->device, "card%02x", 1114 if (dev_set_name(&ap_dev->device, "card%02x",
1113 AP_QID_DEVICE(ap_dev->qid)); 1115 AP_QID_DEVICE(ap_dev->qid))) {
1116 kfree(ap_dev);
1117 continue;
1118 }
1114 ap_dev->device.release = ap_device_release; 1119 ap_dev->device.release = ap_device_release;
1115 rc = device_register(&ap_dev->device); 1120 rc = device_register(&ap_dev->device);
1116 if (rc) { 1121 if (rc) {
1117 kfree(ap_dev); 1122 put_device(&ap_dev->device);
1118 continue; 1123 continue;
1119 } 1124 }
1120 /* Add device attributes. */ 1125 /* Add device attributes. */
@@ -1407,14 +1412,12 @@ static void ap_reset(struct ap_device *ap_dev)
1407 1412
1408static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags) 1413static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
1409{ 1414{
1410 spin_lock(&ap_dev->lock);
1411 if (!ap_dev->unregistered) { 1415 if (!ap_dev->unregistered) {
1412 if (ap_poll_queue(ap_dev, flags)) 1416 if (ap_poll_queue(ap_dev, flags))
1413 ap_dev->unregistered = 1; 1417 ap_dev->unregistered = 1;
1414 if (ap_dev->reset == AP_RESET_DO) 1418 if (ap_dev->reset == AP_RESET_DO)
1415 ap_reset(ap_dev); 1419 ap_reset(ap_dev);
1416 } 1420 }
1417 spin_unlock(&ap_dev->lock);
1418 return 0; 1421 return 0;
1419} 1422}
1420 1423
@@ -1441,7 +1444,9 @@ static void ap_poll_all(unsigned long dummy)
1441 flags = 0; 1444 flags = 0;
1442 spin_lock(&ap_device_list_lock); 1445 spin_lock(&ap_device_list_lock);
1443 list_for_each_entry(ap_dev, &ap_device_list, list) { 1446 list_for_each_entry(ap_dev, &ap_device_list, list) {
1447 spin_lock(&ap_dev->lock);
1444 __ap_poll_device(ap_dev, &flags); 1448 __ap_poll_device(ap_dev, &flags);
1449 spin_unlock(&ap_dev->lock);
1445 } 1450 }
1446 spin_unlock(&ap_device_list_lock); 1451 spin_unlock(&ap_device_list_lock);
1447 } while (flags & 1); 1452 } while (flags & 1);
@@ -1487,7 +1492,9 @@ static int ap_poll_thread(void *data)
1487 flags = 0; 1492 flags = 0;
1488 spin_lock_bh(&ap_device_list_lock); 1493 spin_lock_bh(&ap_device_list_lock);
1489 list_for_each_entry(ap_dev, &ap_device_list, list) { 1494 list_for_each_entry(ap_dev, &ap_device_list, list) {
1495 spin_lock(&ap_dev->lock);
1490 __ap_poll_device(ap_dev, &flags); 1496 __ap_poll_device(ap_dev, &flags);
1497 spin_unlock(&ap_dev->lock);
1491 } 1498 }
1492 spin_unlock_bh(&ap_device_list_lock); 1499 spin_unlock_bh(&ap_device_list_lock);
1493 } 1500 }
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index e38e5d306faf..2930fc763ac5 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -403,10 +403,14 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
403 return len; 403 return len;
404} 404}
405 405
406void __init s390_virtio_console_init(void) 406static int __init s390_virtio_console_init(void)
407{ 407{
408 virtio_cons_early_init(early_put_chars); 408 if (!MACHINE_IS_KVM)
409 return -ENODEV;
410 return virtio_cons_early_init(early_put_chars);
409} 411}
412console_initcall(s390_virtio_console_init);
413
410 414
411/* 415/*
412 * We do this after core stuff, but before the drivers. 416 * We do this after core stuff, but before the drivers.
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 8c36eafcfbfe..87dff11061b0 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -1839,9 +1839,10 @@ static int netiucv_register_device(struct net_device *ndev)
1839 return -ENOMEM; 1839 return -ENOMEM;
1840 1840
1841 ret = device_register(dev); 1841 ret = device_register(dev);
1842 1842 if (ret) {
1843 if (ret) 1843 put_device(dev);
1844 return ret; 1844 return ret;
1845 }
1845 ret = netiucv_add_files(dev); 1846 ret = netiucv_add_files(dev);
1846 if (ret) 1847 if (ret)
1847 goto out_unreg; 1848 goto out_unreg;
@@ -2226,8 +2227,10 @@ static int __init netiucv_init(void)
2226 netiucv_dev->release = (void (*)(struct device *))kfree; 2227 netiucv_dev->release = (void (*)(struct device *))kfree;
2227 netiucv_dev->driver = &netiucv_driver; 2228 netiucv_dev->driver = &netiucv_driver;
2228 rc = device_register(netiucv_dev); 2229 rc = device_register(netiucv_dev);
2229 if (rc) 2230 if (rc) {
2231 put_device(netiucv_dev);
2230 goto out_driver; 2232 goto out_driver;
2233 }
2231 netiucv_banner(); 2234 netiucv_banner();
2232 return rc; 2235 return rc;
2233 2236
diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c
index e76a320d373b..102000d1af6f 100644
--- a/drivers/s390/net/smsgiucv.c
+++ b/drivers/s390/net/smsgiucv.c
@@ -219,13 +219,13 @@ static int __init smsg_init(void)
219 smsg_dev->driver = &smsg_driver; 219 smsg_dev->driver = &smsg_driver;
220 rc = device_register(smsg_dev); 220 rc = device_register(smsg_dev);
221 if (rc) 221 if (rc)
222 goto out_free_dev; 222 goto out_put;
223 223
224 cpcmd("SET SMSG IUCV", NULL, 0, NULL); 224 cpcmd("SET SMSG IUCV", NULL, 0, NULL);
225 return 0; 225 return 0;
226 226
227out_free_dev: 227out_put:
228 kfree(smsg_dev); 228 put_device(smsg_dev);
229out_free_path: 229out_free_path:
230 iucv_path_free(smsg_path); 230 iucv_path_free(smsg_path);
231 smsg_path = NULL; 231 smsg_path = NULL;
diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c
index 042d9bce9914..d0ab23a58355 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_init.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_init.c
@@ -26,7 +26,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
26 26
27static void open_s3_dev(struct t3cdev *); 27static void open_s3_dev(struct t3cdev *);
28static void close_s3_dev(struct t3cdev *); 28static void close_s3_dev(struct t3cdev *);
29static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error); 29static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port);
30 30
31static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS]; 31static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
32static struct cxgb3_client t3c_client = { 32static struct cxgb3_client t3c_client = {
@@ -34,7 +34,7 @@ static struct cxgb3_client t3c_client = {
34 .handlers = cxgb3i_cpl_handlers, 34 .handlers = cxgb3i_cpl_handlers,
35 .add = open_s3_dev, 35 .add = open_s3_dev,
36 .remove = close_s3_dev, 36 .remove = close_s3_dev,
37 .err_handler = s3_err_handler, 37 .event_handler = s3_event_handler,
38}; 38};
39 39
40/** 40/**
@@ -66,16 +66,16 @@ static void close_s3_dev(struct t3cdev *t3dev)
66 cxgb3i_ddp_cleanup(t3dev); 66 cxgb3i_ddp_cleanup(t3dev);
67} 67}
68 68
69static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error) 69static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port)
70{ 70{
71 struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(tdev); 71 struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(tdev);
72 72
73 cxgb3i_log_info("snic 0x%p, tdev 0x%p, status 0x%x, err 0x%x.\n", 73 cxgb3i_log_info("snic 0x%p, tdev 0x%p, event 0x%x, port 0x%x.\n",
74 snic, tdev, status, error); 74 snic, tdev, event, port);
75 if (!snic) 75 if (!snic)
76 return; 76 return;
77 77
78 switch (status) { 78 switch (event) {
79 case OFFLOAD_STATUS_DOWN: 79 case OFFLOAD_STATUS_DOWN:
80 snic->flags |= CXGB3I_ADAPTER_FLAG_RESET; 80 snic->flags |= CXGB3I_ADAPTER_FLAG_RESET;
81 break; 81 break;
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 9d7c99394ec6..640f65c6ef84 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1752,12 +1752,12 @@ static int comedi_open(struct inode *inode, struct file *file)
1752 mutex_lock(&dev->mutex); 1752 mutex_lock(&dev->mutex);
1753 if (dev->attached) 1753 if (dev->attached)
1754 goto ok; 1754 goto ok;
1755 if (!capable(CAP_SYS_MODULE) && dev->in_request_module) { 1755 if (!capable(CAP_NET_ADMIN) && dev->in_request_module) {
1756 DPRINTK("in request module\n"); 1756 DPRINTK("in request module\n");
1757 mutex_unlock(&dev->mutex); 1757 mutex_unlock(&dev->mutex);
1758 return -ENODEV; 1758 return -ENODEV;
1759 } 1759 }
1760 if (capable(CAP_SYS_MODULE) && dev->in_request_module) 1760 if (capable(CAP_NET_ADMIN) && dev->in_request_module)
1761 goto ok; 1761 goto ok;
1762 1762
1763 dev->in_request_module = 1; 1763 dev->in_request_module = 1;
@@ -1770,8 +1770,8 @@ static int comedi_open(struct inode *inode, struct file *file)
1770 1770
1771 dev->in_request_module = 0; 1771 dev->in_request_module = 0;
1772 1772
1773 if (!dev->attached && !capable(CAP_SYS_MODULE)) { 1773 if (!dev->attached && !capable(CAP_NET_ADMIN)) {
1774 DPRINTK("not attached and not CAP_SYS_MODULE\n"); 1774 DPRINTK("not attached and not CAP_NET_ADMIN\n");
1775 mutex_unlock(&dev->mutex); 1775 mutex_unlock(&dev->mutex);
1776 return -ENODEV; 1776 return -ENODEV;
1777 } 1777 }
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 7b605795b770..e63c9bea6c54 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1950,14 +1950,7 @@ static int pohmelfs_get_sb(struct file_system_type *fs_type,
1950 */ 1950 */
1951static void pohmelfs_kill_super(struct super_block *sb) 1951static void pohmelfs_kill_super(struct super_block *sb)
1952{ 1952{
1953 struct writeback_control wbc = { 1953 sync_inodes_sb(sb);
1954 .sync_mode = WB_SYNC_ALL,
1955 .range_start = 0,
1956 .range_end = LLONG_MAX,
1957 .nr_to_write = LONG_MAX,
1958 };
1959 generic_sync_sb_inodes(sb, &wbc);
1960
1961 kill_anon_super(sb); 1954 kill_anon_super(sb);
1962} 1955}
1963 1956
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b7c1603cd4bd..7c1e65d54872 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
501 } 501 }
502 } 502 }
503 503
504 /* 504 if (last_bss > elf_bss) {
505 * Now fill out the bss section. First pad the last page up 505 /*
506 * to the page boundary, and then perform a mmap to make sure 506 * Now fill out the bss section. First pad the last page up
507 * that there are zero-mapped pages up to and including the 507 * to the page boundary, and then perform a mmap to make sure
508 * last bss page. 508 * that there are zero-mapped pages up to and including the
509 */ 509 * last bss page.
510 if (padzero(elf_bss)) { 510 */
511 error = -EFAULT; 511 if (padzero(elf_bss)) {
512 goto out_close; 512 error = -EFAULT;
513 } 513 goto out_close;
514 }
514 515
515 /* What we have mapped so far */ 516 /* What we have mapped so far */
516 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); 517 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
517 518
518 /* Map the last of the bss segment */ 519 /* Map the last of the bss segment */
519 if (last_bss > elf_bss) {
520 down_write(&current->mm->mmap_sem); 520 down_write(&current->mm->mmap_sem);
521 error = do_brk(elf_bss, last_bss - elf_bss); 521 error = do_brk(elf_bss, last_bss - elf_bss);
522 up_write(&current->mm->mmap_sem); 522 up_write(&current->mm->mmap_sem);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..15831d5c7367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1352{ 1352{
1353 int err; 1353 int err;
1354 1354
1355 bdi->name = "btrfs";
1355 bdi->capabilities = BDI_CAP_MAP_COPY; 1356 bdi->capabilities = BDI_CAP_MAP_COPY;
1356 err = bdi_init(bdi); 1357 err = bdi_init(bdi);
1357 if (err) 1358 if (err)
diff --git a/fs/buffer.c b/fs/buffer.c
index 28f320fac4d4..90a98865b0cc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -281,7 +281,7 @@ static void free_more_memory(void)
281 struct zone *zone; 281 struct zone *zone;
282 int nid; 282 int nid;
283 283
284 wakeup_pdflush(1024); 284 wakeup_flusher_threads(1024);
285 yield(); 285 yield();
286 286
287 for_each_online_node(nid) { 287 for_each_online_node(nid) {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 2f18c1e4e301..3cbc57f932d2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -31,6 +31,7 @@
31 * - no readahead or I/O queue unplugging required 31 * - no readahead or I/O queue unplugging required
32 */ 32 */
33struct backing_dev_info directly_mappable_cdev_bdi = { 33struct backing_dev_info directly_mappable_cdev_bdi = {
34 .name = "char",
34 .capabilities = ( 35 .capabilities = (
35#ifdef CONFIG_MMU 36#ifdef CONFIG_MMU
36 /* permit private copies of the data to be taken */ 37 /* permit private copies of the data to be taken */
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e7426d95..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = {
51}; 51};
52 52
53static struct backing_dev_info configfs_backing_dev_info = { 53static struct backing_dev_info configfs_backing_dev_info = {
54 .name = "configfs",
54 .ra_pages = 0, /* No readahead */ 55 .ra_pages = 0, /* No readahead */
55 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 56 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
56}; 57};
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d636e1297cad..a63d44256a70 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
230 return error; 230 return error;
231} 231}
232 232
233static int 233int
234ext2_check_acl(struct inode *inode, int mask) 234ext2_check_acl(struct inode *inode, int mask)
235{ 235{
236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask)
246 return -EAGAIN; 246 return -EAGAIN;
247} 247}
248 248
249int
250ext2_permission(struct inode *inode, int mask)
251{
252 return generic_permission(inode, mask, ext2_check_acl);
253}
254
255/* 249/*
256 * Initialize the ACLs of a new inode. Called from ext2_new_inode. 250 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
257 * 251 *
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index ecefe478898f..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_permission (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
61#else 61#else
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext2_permission NULL 63#define ext2_check_acl NULL
64#define ext2_get_acl NULL 64#define ext2_get_acl NULL
65#define ext2_set_acl NULL 65#define ext2_set_acl NULL
66 66
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b9e47dc9222..a2f3afd1a1c1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = {
85 .removexattr = generic_removexattr, 85 .removexattr = generic_removexattr,
86#endif 86#endif
87 .setattr = ext2_setattr, 87 .setattr = ext2_setattr,
88 .permission = ext2_permission, 88 .check_acl = ext2_check_acl,
89 .fiemap = ext2_fiemap, 89 .fiemap = ext2_fiemap,
90}; 90};
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 78d9b925fc94..23701f289e98 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -400,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = {
400 .removexattr = generic_removexattr, 400 .removexattr = generic_removexattr,
401#endif 401#endif
402 .setattr = ext2_setattr, 402 .setattr = ext2_setattr,
403 .permission = ext2_permission, 403 .check_acl = ext2_check_acl,
404}; 404};
405 405
406const struct inode_operations ext2_special_inode_operations = { 406const struct inode_operations ext2_special_inode_operations = {
@@ -411,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = {
411 .removexattr = generic_removexattr, 411 .removexattr = generic_removexattr,
412#endif 412#endif
413 .setattr = ext2_setattr, 413 .setattr = ext2_setattr,
414 .permission = ext2_permission, 414 .check_acl = ext2_check_acl,
415}; 415};
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e167bae37ef0..c9b0df376b5f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
238 return error; 238 return error;
239} 239}
240 240
241static int 241int
242ext3_check_acl(struct inode *inode, int mask) 242ext3_check_acl(struct inode *inode, int mask)
243{ 243{
244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask)
254 return -EAGAIN; 254 return -EAGAIN;
255} 255}
256 256
257int
258ext3_permission(struct inode *inode, int mask)
259{
260 return generic_permission(inode, mask, ext3_check_acl);
261}
262
263/* 257/*
264 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 258 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
265 * 259 *
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 07d15a3a5969..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_permission (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT3_FS_POSIX_ACL */ 61#else /* CONFIG_EXT3_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext3_permission NULL 63#define ext3_check_acl NULL
64 64
65static inline int 65static inline int
66ext3_acl_chmod(struct inode *inode) 66ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5b49704b231b..299253214789 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -137,7 +137,7 @@ const struct inode_operations ext3_file_inode_operations = {
137 .listxattr = ext3_listxattr, 137 .listxattr = ext3_listxattr,
138 .removexattr = generic_removexattr, 138 .removexattr = generic_removexattr,
139#endif 139#endif
140 .permission = ext3_permission, 140 .check_acl = ext3_check_acl,
141 .fiemap = ext3_fiemap, 141 .fiemap = ext3_fiemap,
142}; 142};
143 143
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6ff7b9730234..aad6400c9b77 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = {
2445 .listxattr = ext3_listxattr, 2445 .listxattr = ext3_listxattr,
2446 .removexattr = generic_removexattr, 2446 .removexattr = generic_removexattr,
2447#endif 2447#endif
2448 .permission = ext3_permission, 2448 .check_acl = ext3_check_acl,
2449}; 2449};
2450 2450
2451const struct inode_operations ext3_special_inode_operations = { 2451const struct inode_operations ext3_special_inode_operations = {
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = {
2456 .listxattr = ext3_listxattr, 2456 .listxattr = ext3_listxattr,
2457 .removexattr = generic_removexattr, 2457 .removexattr = generic_removexattr,
2458#endif 2458#endif
2459 .permission = ext3_permission, 2459 .check_acl = ext3_check_acl,
2460}; 2460};
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index f6d8967149ca..0df88b2a69b0 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
236 return error; 236 return error;
237} 237}
238 238
239static int 239int
240ext4_check_acl(struct inode *inode, int mask) 240ext4_check_acl(struct inode *inode, int mask)
241{ 241{
242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask)
252 return -EAGAIN; 252 return -EAGAIN;
253} 253}
254 254
255int
256ext4_permission(struct inode *inode, int mask)
257{
258 return generic_permission(inode, mask, ext4_check_acl);
259}
260
261/* 255/*
262 * Initialize the ACLs of a new inode. Called from ext4_new_inode. 256 * Initialize the ACLs of a new inode. Called from ext4_new_inode.
263 * 257 *
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 949789d2bba6..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_permission(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT4_FS_POSIX_ACL */ 61#else /* CONFIG_EXT4_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext4_permission NULL 63#define ext4_check_acl NULL
64 64
65static inline int 65static inline int
66ext4_acl_chmod(struct inode *inode) 66ext4_acl_chmod(struct inode *inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3f1873fef1c6..27f3c5354c0e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,7 +207,7 @@ const struct inode_operations ext4_file_inode_operations = {
207 .listxattr = ext4_listxattr, 207 .listxattr = ext4_listxattr,
208 .removexattr = generic_removexattr, 208 .removexattr = generic_removexattr,
209#endif 209#endif
210 .permission = ext4_permission, 210 .check_acl = ext4_check_acl,
211 .fallocate = ext4_fallocate, 211 .fallocate = ext4_fallocate,
212 .fiemap = ext4_fiemap, 212 .fiemap = ext4_fiemap,
213}; 213};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index de04013d16ff..114abe5d2c1d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2536,7 +2536,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2536 .listxattr = ext4_listxattr, 2536 .listxattr = ext4_listxattr,
2537 .removexattr = generic_removexattr, 2537 .removexattr = generic_removexattr,
2538#endif 2538#endif
2539 .permission = ext4_permission, 2539 .check_acl = ext4_check_acl,
2540 .fiemap = ext4_fiemap, 2540 .fiemap = ext4_fiemap,
2541}; 2541};
2542 2542
@@ -2548,5 +2548,5 @@ const struct inode_operations ext4_special_inode_operations = {
2548 .listxattr = ext4_listxattr, 2548 .listxattr = ext4_listxattr,
2549 .removexattr = generic_removexattr, 2549 .removexattr = generic_removexattr,
2550#endif 2550#endif
2551 .permission = ext4_permission, 2551 .check_acl = ext4_check_acl,
2552}; 2552};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226be5294..da86ef58e427 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,171 +19,223 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/kthread.h>
23#include <linux/freezer.h>
22#include <linux/writeback.h> 24#include <linux/writeback.h>
23#include <linux/blkdev.h> 25#include <linux/blkdev.h>
24#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
25#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
26#include "internal.h" 28#include "internal.h"
27 29
30#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
28 31
29/** 32/*
30 * writeback_acquire - attempt to get exclusive writeback access to a device 33 * We don't actually have pdflush, but this one is exported though /proc...
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */ 34 */
41static int writeback_acquire(struct backing_dev_info *bdi) 35int nr_pdflush_threads;
36
37/*
38 * Work items for the bdi_writeback threads
39 */
40struct bdi_work {
41 struct list_head list;
42 struct list_head wait_list;
43 struct rcu_head rcu_head;
44
45 unsigned long seen;
46 atomic_t pending;
47
48 struct super_block *sb;
49 unsigned long nr_pages;
50 enum writeback_sync_modes sync_mode;
51
52 unsigned long state;
53};
54
55enum {
56 WS_USED_B = 0,
57 WS_ONSTACK_B,
58};
59
60#define WS_USED (1 << WS_USED_B)
61#define WS_ONSTACK (1 << WS_ONSTACK_B)
62
63static inline bool bdi_work_on_stack(struct bdi_work *work)
42{ 64{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state); 65 return test_bit(WS_ONSTACK_B, &work->state);
66}
67
68static inline void bdi_work_init(struct bdi_work *work,
69 struct writeback_control *wbc)
70{
71 INIT_RCU_HEAD(&work->rcu_head);
72 work->sb = wbc->sb;
73 work->nr_pages = wbc->nr_to_write;
74 work->sync_mode = wbc->sync_mode;
75 work->state = WS_USED;
76}
77
78static inline void bdi_work_init_on_stack(struct bdi_work *work,
79 struct writeback_control *wbc)
80{
81 bdi_work_init(work, wbc);
82 work->state |= WS_ONSTACK;
44} 83}
45 84
46/** 85/**
47 * writeback_in_progress - determine whether there is writeback in progress 86 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure. 87 * @bdi: the device's backing_dev_info structure.
49 * 88 *
50 * Determine whether there is writeback in progress against a backing device. 89 * Determine whether there is writeback waiting to be handled against a
90 * backing device.
51 */ 91 */
52int writeback_in_progress(struct backing_dev_info *bdi) 92int writeback_in_progress(struct backing_dev_info *bdi)
53{ 93{
54 return test_bit(BDI_pdflush, &bdi->state); 94 return !list_empty(&bdi->work_list);
55} 95}
56 96
57/** 97static void bdi_work_clear(struct bdi_work *work)
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{ 98{
63 BUG_ON(!writeback_in_progress(bdi)); 99 clear_bit(WS_USED_B, &work->state);
64 clear_bit(BDI_pdflush, &bdi->state); 100 smp_mb__after_clear_bit();
101 wake_up_bit(&work->state, WS_USED_B);
65} 102}
66 103
67static noinline void block_dump___mark_inode_dirty(struct inode *inode) 104static void bdi_work_free(struct rcu_head *head)
68{ 105{
69 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 106 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
70 struct dentry *dentry;
71 const char *name = "?";
72 107
73 dentry = d_find_alias(inode); 108 if (!bdi_work_on_stack(work))
74 if (dentry) { 109 kfree(work);
75 spin_lock(&dentry->d_lock); 110 else
76 name = (const char *) dentry->d_name.name; 111 bdi_work_clear(work);
77 }
78 printk(KERN_DEBUG
79 "%s(%d): dirtied inode %lu (%s) on %s\n",
80 current->comm, task_pid_nr(current), inode->i_ino,
81 name, inode->i_sb->s_id);
82 if (dentry) {
83 spin_unlock(&dentry->d_lock);
84 dput(dentry);
85 }
86 }
87} 112}
88 113
89/** 114static void wb_work_complete(struct bdi_work *work)
90 * __mark_inode_dirty - internal function
91 * @inode: inode to mark
92 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
93 * Mark an inode as dirty. Callers should use mark_inode_dirty or
94 * mark_inode_dirty_sync.
95 *
96 * Put the inode on the super block's dirty list.
97 *
98 * CAREFUL! We mark it dirty unconditionally, but move it onto the
99 * dirty list only if it is hashed or if it refers to a blockdev.
100 * If it was not hashed, it will never be added to the dirty list
101 * even if it is later hashed, as it will have been marked dirty already.
102 *
103 * In short, make sure you hash any inodes _before_ you start marking
104 * them dirty.
105 *
106 * This function *must* be atomic for the I_DIRTY_PAGES case -
107 * set_page_dirty() is called under spinlock in several places.
108 *
109 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
110 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
111 * the kernel-internal blockdev inode represents the dirtying time of the
112 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
113 * page->mapping->host, so the page-dirtying time is recorded in the internal
114 * blockdev inode.
115 */
116void __mark_inode_dirty(struct inode *inode, int flags)
117{ 115{
118 struct super_block *sb = inode->i_sb; 116 const enum writeback_sync_modes sync_mode = work->sync_mode;
119 117
120 /* 118 /*
121 * Don't do this for I_DIRTY_PAGES - that doesn't actually 119 * For allocated work, we can clear the done/seen bit right here.
122 * dirty the inode itself 120 * For on-stack work, we need to postpone both the clear and free
121 * to after the RCU grace period, since the stack could be invalidated
122 * as soon as bdi_work_clear() has done the wakeup.
123 */ 123 */
124 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 124 if (!bdi_work_on_stack(work))
125 if (sb->s_op->dirty_inode) 125 bdi_work_clear(work);
126 sb->s_op->dirty_inode(inode); 126 if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
127 } 127 call_rcu(&work->rcu_head, bdi_work_free);
128}
128 129
130static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
131{
129 /* 132 /*
130 * make sure that changes are seen by all cpus before we test i_state 133 * The caller has retrieved the work arguments from this work,
131 * -- mikulas 134 * drop our reference. If this is the last ref, delete and free it
132 */ 135 */
133 smp_mb(); 136 if (atomic_dec_and_test(&work->pending)) {
137 struct backing_dev_info *bdi = wb->bdi;
134 138
135 /* avoid the locking if we can */ 139 spin_lock(&bdi->wb_lock);
136 if ((inode->i_state & flags) == flags) 140 list_del_rcu(&work->list);
137 return; 141 spin_unlock(&bdi->wb_lock);
138 142
139 if (unlikely(block_dump)) 143 wb_work_complete(work);
140 block_dump___mark_inode_dirty(inode); 144 }
141 145}
142 spin_lock(&inode_lock);
143 if ((inode->i_state & flags) != flags) {
144 const int was_dirty = inode->i_state & I_DIRTY;
145 146
146 inode->i_state |= flags; 147static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
148{
149 if (work) {
150 work->seen = bdi->wb_mask;
151 BUG_ON(!work->seen);
152 atomic_set(&work->pending, bdi->wb_cnt);
153 BUG_ON(!bdi->wb_cnt);
147 154
148 /* 155 /*
149 * If the inode is being synced, just update its dirty state. 156 * Make sure stores are seen before it appears on the list
150 * The unlocker will place the inode on the appropriate
151 * superblock list, based upon its state.
152 */ 157 */
153 if (inode->i_state & I_SYNC) 158 smp_mb();
154 goto out;
155 159
156 /* 160 spin_lock(&bdi->wb_lock);
157 * Only add valid (hashed) inodes to the superblock's 161 list_add_tail_rcu(&work->list, &bdi->work_list);
158 * dirty list. Add blockdev inodes as well. 162 spin_unlock(&bdi->wb_lock);
159 */ 163 }
160 if (!S_ISBLK(inode->i_mode)) { 164
161 if (hlist_unhashed(&inode->i_hash)) 165 /*
162 goto out; 166 * If the default thread isn't there, make sure we add it. When
163 } 167 * it gets created and wakes up, we'll run this work.
164 if (inode->i_state & (I_FREEING|I_CLEAR)) 168 */
165 goto out; 169 if (unlikely(list_empty_careful(&bdi->wb_list)))
170 wake_up_process(default_backing_dev_info.wb.task);
171 else {
172 struct bdi_writeback *wb = &bdi->wb;
166 173
167 /* 174 /*
168 * If the inode was already on s_dirty/s_io/s_more_io, don't 175 * If we failed allocating the bdi work item, wake up the wb
169 * reposition it (that would break s_dirty time-ordering). 176 * thread always. As a safety precaution, it'll flush out
177 * everything
170 */ 178 */
171 if (!was_dirty) { 179 if (!wb_has_dirty_io(wb)) {
172 inode->dirtied_when = jiffies; 180 if (work)
173 list_move(&inode->i_list, &sb->s_dirty); 181 wb_clear_pending(wb, work);
174 } 182 } else if (wb->task)
183 wake_up_process(wb->task);
175 } 184 }
176out:
177 spin_unlock(&inode_lock);
178} 185}
179 186
180EXPORT_SYMBOL(__mark_inode_dirty); 187/*
188 * Used for on-stack allocated work items. The caller needs to wait until
189 * the wb threads have acked the work before it's safe to continue.
190 */
191static void bdi_wait_on_work_clear(struct bdi_work *work)
192{
193 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
194 TASK_UNINTERRUPTIBLE);
195}
181 196
182static int write_inode(struct inode *inode, int sync) 197static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
183{ 198{
184 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 199 struct bdi_work *work;
185 return inode->i_sb->s_op->write_inode(inode, sync); 200
186 return 0; 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
202 if (work)
203 bdi_work_init(work, wbc);
204
205 return work;
206}
207
208void bdi_start_writeback(struct writeback_control *wbc)
209{
210 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
211 struct bdi_work work_stack, *work = NULL;
212
213 if (!must_wait)
214 work = bdi_alloc_work(wbc);
215
216 if (!work) {
217 work = &work_stack;
218 bdi_work_init_on_stack(work, wbc);
219 }
220
221 bdi_queue_work(wbc->bdi, work);
222
223 /*
224 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
225 * complete. If not, we only need to wait for the work to be started,
226 * if we allocated it on-stack. We use the same mechanism, if the
227 * wait bit is set in the bdi_work struct, then threads will not
228 * clear pending until after they are done.
229 *
230 * Note that work == &work_stack if must_wait is true, so we don't
231 * need to do call_rcu() here ever, since the completion path will
232 * have done that for us.
233 */
234 if (must_wait || work == &work_stack) {
235 bdi_wait_on_work_clear(work);
236 if (work != &work_stack)
237 call_rcu(&work->rcu_head, bdi_work_free);
238 }
187} 239}
188 240
189/* 241/*
@@ -191,31 +243,32 @@ static int write_inode(struct inode *inode, int sync)
191 * furthest end of its superblock's dirty-inode list. 243 * furthest end of its superblock's dirty-inode list.
192 * 244 *
193 * Before stamping the inode's ->dirtied_when, we check to see whether it is 245 * Before stamping the inode's ->dirtied_when, we check to see whether it is
194 * already the most-recently-dirtied inode on the s_dirty list. If that is 246 * already the most-recently-dirtied inode on the b_dirty list. If that is
195 * the case then the inode must have been redirtied while it was being written 247 * the case then the inode must have been redirtied while it was being written
196 * out and we don't reset its dirtied_when. 248 * out and we don't reset its dirtied_when.
197 */ 249 */
198static void redirty_tail(struct inode *inode) 250static void redirty_tail(struct inode *inode)
199{ 251{
200 struct super_block *sb = inode->i_sb; 252 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
201 253
202 if (!list_empty(&sb->s_dirty)) { 254 if (!list_empty(&wb->b_dirty)) {
203 struct inode *tail_inode; 255 struct inode *tail;
204 256
205 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); 257 tail = list_entry(wb->b_dirty.next, struct inode, i_list);
206 if (time_before(inode->dirtied_when, 258 if (time_before(inode->dirtied_when, tail->dirtied_when))
207 tail_inode->dirtied_when))
208 inode->dirtied_when = jiffies; 259 inode->dirtied_when = jiffies;
209 } 260 }
210 list_move(&inode->i_list, &sb->s_dirty); 261 list_move(&inode->i_list, &wb->b_dirty);
211} 262}
212 263
213/* 264/*
214 * requeue inode for re-scanning after sb->s_io list is exhausted. 265 * requeue inode for re-scanning after bdi->b_io list is exhausted.
215 */ 266 */
216static void requeue_io(struct inode *inode) 267static void requeue_io(struct inode *inode)
217{ 268{
218 list_move(&inode->i_list, &inode->i_sb->s_more_io); 269 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
270
271 list_move(&inode->i_list, &wb->b_more_io);
219} 272}
220 273
221static void inode_sync_complete(struct inode *inode) 274static void inode_sync_complete(struct inode *inode)
@@ -262,20 +315,18 @@ static void move_expired_inodes(struct list_head *delaying_queue,
262/* 315/*
263 * Queue all expired dirty inodes for io, eldest first. 316 * Queue all expired dirty inodes for io, eldest first.
264 */ 317 */
265static void queue_io(struct super_block *sb, 318static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
266 unsigned long *older_than_this)
267{ 319{
268 list_splice_init(&sb->s_more_io, sb->s_io.prev); 320 list_splice_init(&wb->b_more_io, wb->b_io.prev);
269 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); 321 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
270} 322}
271 323
272int sb_has_dirty_inodes(struct super_block *sb) 324static int write_inode(struct inode *inode, int sync)
273{ 325{
274 return !list_empty(&sb->s_dirty) || 326 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
275 !list_empty(&sb->s_io) || 327 return inode->i_sb->s_op->write_inode(inode, sync);
276 !list_empty(&sb->s_more_io); 328 return 0;
277} 329}
278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 330
280/* 331/*
281 * Wait for writeback on an inode to complete. 332 * Wait for writeback on an inode to complete.
@@ -322,11 +373,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
322 if (inode->i_state & I_SYNC) { 373 if (inode->i_state & I_SYNC) {
323 /* 374 /*
324 * If this inode is locked for writeback and we are not doing 375 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that 376 * writeback-for-data-integrity, move it to b_more_io so that
326 * writeback can proceed with the other inodes on s_io. 377 * writeback can proceed with the other inodes on s_io.
327 * 378 *
328 * We'll have another go at writing back this inode when we 379 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io. 380 * completed a full scan of b_io.
330 */ 381 */
331 if (!wait) { 382 if (!wait) {
332 requeue_io(inode); 383 requeue_io(inode);
@@ -371,11 +422,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
371 /* 422 /*
372 * We didn't write back all the pages. nfs_writepages() 423 * We didn't write back all the pages. nfs_writepages()
373 * sometimes bales out without doing anything. Redirty 424 * sometimes bales out without doing anything. Redirty
374 * the inode; Move it from s_io onto s_more_io/s_dirty. 425 * the inode; Move it from b_io onto b_more_io/b_dirty.
375 */ 426 */
376 /* 427 /*
377 * akpm: if the caller was the kupdate function we put 428 * akpm: if the caller was the kupdate function we put
378 * this inode at the head of s_dirty so it gets first 429 * this inode at the head of b_dirty so it gets first
379 * consideration. Otherwise, move it to the tail, for 430 * consideration. Otherwise, move it to the tail, for
380 * the reasons described there. I'm not really sure 431 * the reasons described there. I'm not really sure
381 * how much sense this makes. Presumably I had a good 432 * how much sense this makes. Presumably I had a good
@@ -385,7 +436,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
385 if (wbc->for_kupdate) { 436 if (wbc->for_kupdate) {
386 /* 437 /*
387 * For the kupdate function we move the inode 438 * For the kupdate function we move the inode
388 * to s_more_io so it will get more writeout as 439 * to b_more_io so it will get more writeout as
389 * soon as the queue becomes uncongested. 440 * soon as the queue becomes uncongested.
390 */ 441 */
391 inode->i_state |= I_DIRTY_PAGES; 442 inode->i_state |= I_DIRTY_PAGES;
@@ -434,50 +485,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
434} 485}
435 486
436/* 487/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 488 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 489 * before calling writeback. So make sure that we do pin it, so it doesn't
439 * 490 * go away while we are writing inodes from it.
440 * If older_than_this is non-NULL, then only write out inodes which
441 * had their first dirtying at a time earlier than *older_than_this.
442 *
443 * If we're a pdflush thread, then implement pdflush collision avoidance
444 * against the entire list.
445 * 491 *
446 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 492 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
447 * This function assumes that the blockdev superblock's inodes are backed by 493 * 1 if we failed.
448 * a variety of queues, so all inodes are searched. For other superblocks,
449 * assume that all inodes are backed by the same queue.
450 *
451 * FIXME: this linear search could get expensive with many fileystems. But
452 * how to fix? We need to go from an address_space to all inodes which share
453 * a queue with that address_space. (Easy: have a global "dirty superblocks"
454 * list).
455 *
456 * The inodes to be written are parked on sb->s_io. They are moved back onto
457 * sb->s_dirty as they are selected for writing. This way, none can be missed
458 * on the writer throttling path, and we get decent balancing between many
459 * throttled threads: we don't want them all piling up on inode_sync_wait.
460 */ 494 */
461void generic_sync_sb_inodes(struct super_block *sb, 495static int pin_sb_for_writeback(struct writeback_control *wbc,
496 struct inode *inode)
497{
498 struct super_block *sb = inode->i_sb;
499
500 /*
501 * Caller must already hold the ref for this
502 */
503 if (wbc->sync_mode == WB_SYNC_ALL) {
504 WARN_ON(!rwsem_is_locked(&sb->s_umount));
505 return 0;
506 }
507
508 spin_lock(&sb_lock);
509 sb->s_count++;
510 if (down_read_trylock(&sb->s_umount)) {
511 if (sb->s_root) {
512 spin_unlock(&sb_lock);
513 return 0;
514 }
515 /*
516 * umounted, drop rwsem again and fall through to failure
517 */
518 up_read(&sb->s_umount);
519 }
520
521 sb->s_count--;
522 spin_unlock(&sb_lock);
523 return 1;
524}
525
526static void unpin_sb_for_writeback(struct writeback_control *wbc,
527 struct inode *inode)
528{
529 struct super_block *sb = inode->i_sb;
530
531 if (wbc->sync_mode == WB_SYNC_ALL)
532 return;
533
534 up_read(&sb->s_umount);
535 put_super(sb);
536}
537
538static void writeback_inodes_wb(struct bdi_writeback *wb,
462 struct writeback_control *wbc) 539 struct writeback_control *wbc)
463{ 540{
541 struct super_block *sb = wbc->sb;
542 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
464 const unsigned long start = jiffies; /* livelock avoidance */ 543 const unsigned long start = jiffies; /* livelock avoidance */
465 int sync = wbc->sync_mode == WB_SYNC_ALL;
466 544
467 spin_lock(&inode_lock); 545 spin_lock(&inode_lock);
468 if (!wbc->for_kupdate || list_empty(&sb->s_io))
469 queue_io(sb, wbc->older_than_this);
470 546
471 while (!list_empty(&sb->s_io)) { 547 if (!wbc->for_kupdate || list_empty(&wb->b_io))
472 struct inode *inode = list_entry(sb->s_io.prev, 548 queue_io(wb, wbc->older_than_this);
549
550 while (!list_empty(&wb->b_io)) {
551 struct inode *inode = list_entry(wb->b_io.prev,
473 struct inode, i_list); 552 struct inode, i_list);
474 struct address_space *mapping = inode->i_mapping;
475 struct backing_dev_info *bdi = mapping->backing_dev_info;
476 long pages_skipped; 553 long pages_skipped;
477 554
478 if (!bdi_cap_writeback_dirty(bdi)) { 555 /*
556 * super block given and doesn't match, skip this inode
557 */
558 if (sb && sb != inode->i_sb) {
559 redirty_tail(inode);
560 continue;
561 }
562
563 if (!bdi_cap_writeback_dirty(wb->bdi)) {
479 redirty_tail(inode); 564 redirty_tail(inode);
480 if (sb_is_blkdev_sb(sb)) { 565 if (is_blkdev_sb) {
481 /* 566 /*
482 * Dirty memory-backed blockdev: the ramdisk 567 * Dirty memory-backed blockdev: the ramdisk
483 * driver does this. Skip just this inode 568 * driver does this. Skip just this inode
@@ -497,21 +582,14 @@ void generic_sync_sb_inodes(struct super_block *sb,
497 continue; 582 continue;
498 } 583 }
499 584
500 if (wbc->nonblocking && bdi_write_congested(bdi)) { 585 if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
501 wbc->encountered_congestion = 1; 586 wbc->encountered_congestion = 1;
502 if (!sb_is_blkdev_sb(sb)) 587 if (!is_blkdev_sb)
503 break; /* Skip a congested fs */ 588 break; /* Skip a congested fs */
504 requeue_io(inode); 589 requeue_io(inode);
505 continue; /* Skip a congested blockdev */ 590 continue; /* Skip a congested blockdev */
506 } 591 }
507 592
508 if (wbc->bdi && bdi != wbc->bdi) {
509 if (!sb_is_blkdev_sb(sb))
510 break; /* fs has the wrong queue */
511 requeue_io(inode);
512 continue; /* blockdev has wrong queue */
513 }
514
515 /* 593 /*
516 * Was this inode dirtied after sync_sb_inodes was called? 594 * Was this inode dirtied after sync_sb_inodes was called?
517 * This keeps sync from extra jobs and livelock. 595 * This keeps sync from extra jobs and livelock.
@@ -519,16 +597,16 @@ void generic_sync_sb_inodes(struct super_block *sb,
519 if (inode_dirtied_after(inode, start)) 597 if (inode_dirtied_after(inode, start))
520 break; 598 break;
521 599
522 /* Is another pdflush already flushing this queue? */ 600 if (pin_sb_for_writeback(wbc, inode)) {
523 if (current_is_pdflush() && !writeback_acquire(bdi)) 601 requeue_io(inode);
524 break; 602 continue;
603 }
525 604
526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 605 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 606 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 607 pages_skipped = wbc->pages_skipped;
529 writeback_single_inode(inode, wbc); 608 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 609 unpin_sb_for_writeback(wbc, inode);
531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 610 if (wbc->pages_skipped != pages_skipped) {
533 /* 611 /*
534 * writeback is not making progress due to locked 612 * writeback is not making progress due to locked
@@ -544,144 +622,571 @@ void generic_sync_sb_inodes(struct super_block *sb,
544 wbc->more_io = 1; 622 wbc->more_io = 1;
545 break; 623 break;
546 } 624 }
547 if (!list_empty(&sb->s_more_io)) 625 if (!list_empty(&wb->b_more_io))
548 wbc->more_io = 1; 626 wbc->more_io = 1;
549 } 627 }
550 628
551 if (sync) { 629 spin_unlock(&inode_lock);
552 struct inode *inode, *old_inode = NULL; 630 /* Leave any unwritten inodes on b_io */
631}
632
633void writeback_inodes_wbc(struct writeback_control *wbc)
634{
635 struct backing_dev_info *bdi = wbc->bdi;
553 636
637 writeback_inodes_wb(&bdi->wb, wbc);
638}
639
640/*
641 * The maximum number of pages to writeout in a single bdi flush/kupdate
642 * operation. We do this so we don't hold I_SYNC against an inode for
643 * enormous amounts of time, which would block a userspace task which has
644 * been forced to throttle against that inode. Also, the code reevaluates
645 * the dirty each time it has written this many pages.
646 */
647#define MAX_WRITEBACK_PAGES 1024
648
649static inline bool over_bground_thresh(void)
650{
651 unsigned long background_thresh, dirty_thresh;
652
653 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
654
655 return (global_page_state(NR_FILE_DIRTY) +
656 global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
657}
658
659/*
660 * Explicit flushing or periodic writeback of "old" data.
661 *
662 * Define "old": the first time one of an inode's pages is dirtied, we mark the
663 * dirtying-time in the inode's address_space. So this periodic writeback code
664 * just walks the superblock inode list, writing back any inodes which are
665 * older than a specific point in time.
666 *
667 * Try to run once per dirty_writeback_interval. But if a writeback event
668 * takes longer than a dirty_writeback_interval interval, then leave a
669 * one-second gap.
670 *
671 * older_than_this takes precedence over nr_to_write. So we'll only write back
672 * all dirty pages if they are all attached to "old" mappings.
673 */
674static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
675 struct super_block *sb,
676 enum writeback_sync_modes sync_mode, int for_kupdate)
677{
678 struct writeback_control wbc = {
679 .bdi = wb->bdi,
680 .sb = sb,
681 .sync_mode = sync_mode,
682 .older_than_this = NULL,
683 .for_kupdate = for_kupdate,
684 .range_cyclic = 1,
685 };
686 unsigned long oldest_jif;
687 long wrote = 0;
688
689 if (wbc.for_kupdate) {
690 wbc.older_than_this = &oldest_jif;
691 oldest_jif = jiffies -
692 msecs_to_jiffies(dirty_expire_interval * 10);
693 }
694
695 for (;;) {
554 /* 696 /*
555 * Data integrity sync. Must wait for all pages under writeback, 697 * Don't flush anything for non-integrity writeback where
556 * because there may have been pages dirtied before our sync 698 * no nr_pages was given
557 * call, but which had writeout started before we write it out.
558 * In which case, the inode may not be on the dirty list, but
559 * we still have to wait for that writeout.
560 */ 699 */
561 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 700 if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
562 struct address_space *mapping; 701 break;
563 702
564 if (inode->i_state & 703 /*
565 (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 704 * If no specific pages were given and this is just a
566 continue; 705 * periodic background writeout and we are below the
567 mapping = inode->i_mapping; 706 * background dirty threshold, don't do anything
568 if (mapping->nrpages == 0) 707 */
708 if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
709 break;
710
711 wbc.more_io = 0;
712 wbc.encountered_congestion = 0;
713 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
714 wbc.pages_skipped = 0;
715 writeback_inodes_wb(wb, &wbc);
716 nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
717 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
718
719 /*
720 * If we ran out of stuff to write, bail unless more_io got set
721 */
722 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
723 if (wbc.more_io && !wbc.for_kupdate)
569 continue; 724 continue;
570 __iget(inode); 725 break;
571 spin_unlock(&inode_lock); 726 }
727 }
728
729 return wrote;
730}
731
732/*
733 * Return the next bdi_work struct that hasn't been processed by this
734 * wb thread yet
735 */
736static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
737 struct bdi_writeback *wb)
738{
739 struct bdi_work *work, *ret = NULL;
740
741 rcu_read_lock();
742
743 list_for_each_entry_rcu(work, &bdi->work_list, list) {
744 if (!test_and_clear_bit(wb->nr, &work->seen))
745 continue;
746
747 ret = work;
748 break;
749 }
750
751 rcu_read_unlock();
752 return ret;
753}
754
755static long wb_check_old_data_flush(struct bdi_writeback *wb)
756{
757 unsigned long expired;
758 long nr_pages;
759
760 expired = wb->last_old_flush +
761 msecs_to_jiffies(dirty_writeback_interval * 10);
762 if (time_before(jiffies, expired))
763 return 0;
764
765 wb->last_old_flush = jiffies;
766 nr_pages = global_page_state(NR_FILE_DIRTY) +
767 global_page_state(NR_UNSTABLE_NFS) +
768 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
769
770 if (nr_pages)
771 return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
772
773 return 0;
774}
775
776/*
777 * Retrieve work items and do the writeback they describe
778 */
779long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
780{
781 struct backing_dev_info *bdi = wb->bdi;
782 struct bdi_work *work;
783 long nr_pages, wrote = 0;
784
785 while ((work = get_next_work_item(bdi, wb)) != NULL) {
786 enum writeback_sync_modes sync_mode;
787
788 nr_pages = work->nr_pages;
789
790 /*
791 * Override sync mode, in case we must wait for completion
792 */
793 if (force_wait)
794 work->sync_mode = sync_mode = WB_SYNC_ALL;
795 else
796 sync_mode = work->sync_mode;
797
798 /*
799 * If this isn't a data integrity operation, just notify
800 * that we have seen this work and we are now starting it.
801 */
802 if (sync_mode == WB_SYNC_NONE)
803 wb_clear_pending(wb, work);
804
805 wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
806
807 /*
808 * This is a data integrity writeback, so only do the
809 * notification when we have completed the work.
810 */
811 if (sync_mode == WB_SYNC_ALL)
812 wb_clear_pending(wb, work);
813 }
814
815 /*
816 * Check for periodic writeback, kupdated() style
817 */
818 wrote += wb_check_old_data_flush(wb);
819
820 return wrote;
821}
822
823/*
824 * Handle writeback of dirty data for the device backed by this bdi. Also
825 * wakes up periodically and does kupdated style flushing.
826 */
827int bdi_writeback_task(struct bdi_writeback *wb)
828{
829 unsigned long last_active = jiffies;
830 unsigned long wait_jiffies = -1UL;
831 long pages_written;
832
833 while (!kthread_should_stop()) {
834 pages_written = wb_do_writeback(wb, 0);
835
836 if (pages_written)
837 last_active = jiffies;
838 else if (wait_jiffies != -1UL) {
839 unsigned long max_idle;
840
572 /* 841 /*
573 * We hold a reference to 'inode' so it couldn't have 842 * Longest period of inactivity that we tolerate. If we
574 * been removed from s_inodes list while we dropped the 843 * see dirty data again later, the task will get
575 * inode_lock. We cannot iput the inode now as we can 844 * recreated automatically.
576 * be holding the last reference and we cannot iput it
577 * under inode_lock. So we keep the reference and iput
578 * it later.
579 */ 845 */
580 iput(old_inode); 846 max_idle = max(5UL * 60 * HZ, wait_jiffies);
581 old_inode = inode; 847 if (time_after(jiffies, max_idle + last_active))
848 break;
849 }
582 850
583 filemap_fdatawait(mapping); 851 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
852 set_current_state(TASK_INTERRUPTIBLE);
853 schedule_timeout(wait_jiffies);
854 try_to_freeze();
855 }
584 856
585 cond_resched(); 857 return 0;
858}
859
860/*
861 * Schedule writeback for all backing devices. Expensive! If this is a data
862 * integrity operation, writeback will be complete when this returns. If
863 * we are simply called for WB_SYNC_NONE, then writeback will merely be
864 * scheduled to run.
865 */
866static void bdi_writeback_all(struct writeback_control *wbc)
867{
868 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
869 struct backing_dev_info *bdi;
870 struct bdi_work *work;
871 LIST_HEAD(list);
872
873restart:
874 spin_lock(&bdi_lock);
875
876 list_for_each_entry(bdi, &bdi_list, bdi_list) {
877 struct bdi_work *work;
586 878
587 spin_lock(&inode_lock); 879 if (!bdi_has_dirty_io(bdi))
880 continue;
881
882 /*
883 * If work allocation fails, do the writes inline. We drop
884 * the lock and restart the list writeout. This should be OK,
885 * since this happens rarely and because the writeout should
886 * eventually make more free memory available.
887 */
888 work = bdi_alloc_work(wbc);
889 if (!work) {
890 struct writeback_control __wbc;
891
892 /*
893 * Not a data integrity writeout, just continue
894 */
895 if (!must_wait)
896 continue;
897
898 spin_unlock(&bdi_lock);
899 __wbc = *wbc;
900 __wbc.bdi = bdi;
901 writeback_inodes_wbc(&__wbc);
902 goto restart;
588 } 903 }
589 spin_unlock(&inode_lock); 904 if (must_wait)
590 iput(old_inode); 905 list_add_tail(&work->wait_list, &list);
591 } else 906
592 spin_unlock(&inode_lock); 907 bdi_queue_work(bdi, work);
908 }
909
910 spin_unlock(&bdi_lock);
593 911
594 return; /* Leave any unwritten inodes on s_io */ 912 /*
913 * If this is for WB_SYNC_ALL, wait for pending work to complete
914 * before returning.
915 */
916 while (!list_empty(&list)) {
917 work = list_entry(list.next, struct bdi_work, wait_list);
918 list_del(&work->wait_list);
919 bdi_wait_on_work_clear(work);
920 call_rcu(&work->rcu_head, bdi_work_free);
921 }
595} 922}
596EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
597 923
598static void sync_sb_inodes(struct super_block *sb, 924/*
599 struct writeback_control *wbc) 925 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
926 * the whole world.
927 */
928void wakeup_flusher_threads(long nr_pages)
600{ 929{
601 generic_sync_sb_inodes(sb, wbc); 930 struct writeback_control wbc = {
931 .sync_mode = WB_SYNC_NONE,
932 .older_than_this = NULL,
933 .range_cyclic = 1,
934 };
935
936 if (nr_pages == 0)
937 nr_pages = global_page_state(NR_FILE_DIRTY) +
938 global_page_state(NR_UNSTABLE_NFS);
939 wbc.nr_to_write = nr_pages;
940 bdi_writeback_all(&wbc);
602} 941}
603 942
604/* 943static noinline void block_dump___mark_inode_dirty(struct inode *inode)
605 * Start writeback of dirty pagecache data against all unlocked inodes. 944{
945 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
946 struct dentry *dentry;
947 const char *name = "?";
948
949 dentry = d_find_alias(inode);
950 if (dentry) {
951 spin_lock(&dentry->d_lock);
952 name = (const char *) dentry->d_name.name;
953 }
954 printk(KERN_DEBUG
955 "%s(%d): dirtied inode %lu (%s) on %s\n",
956 current->comm, task_pid_nr(current), inode->i_ino,
957 name, inode->i_sb->s_id);
958 if (dentry) {
959 spin_unlock(&dentry->d_lock);
960 dput(dentry);
961 }
962 }
963}
964
965/**
966 * __mark_inode_dirty - internal function
967 * @inode: inode to mark
968 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
969 * Mark an inode as dirty. Callers should use mark_inode_dirty or
970 * mark_inode_dirty_sync.
606 * 971 *
607 * Note: 972 * Put the inode on the super block's dirty list.
608 * We don't need to grab a reference to superblock here. If it has non-empty 973 *
609 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed 974 * CAREFUL! We mark it dirty unconditionally, but move it onto the
610 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all 975 * dirty list only if it is hashed or if it refers to a blockdev.
611 * empty. Since __sync_single_inode() regains inode_lock before it finally moves 976 * If it was not hashed, it will never be added to the dirty list
612 * inode from superblock lists we are OK. 977 * even if it is later hashed, as it will have been marked dirty already.
613 * 978 *
614 * If `older_than_this' is non-zero then only flush inodes which have a 979 * In short, make sure you hash any inodes _before_ you start marking
615 * flushtime older than *older_than_this. 980 * them dirty.
616 * 981 *
617 * If `bdi' is non-zero then we will scan the first inode against each 982 * This function *must* be atomic for the I_DIRTY_PAGES case -
618 * superblock until we find the matching ones. One group will be the dirty 983 * set_page_dirty() is called under spinlock in several places.
619 * inodes against a filesystem. Then when we hit the dummy blockdev superblock, 984 *
620 * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not 985 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
621 * super-efficient but we're about to do a ton of I/O... 986 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
987 * the kernel-internal blockdev inode represents the dirtying time of the
988 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
989 * page->mapping->host, so the page-dirtying time is recorded in the internal
990 * blockdev inode.
622 */ 991 */
623void 992void __mark_inode_dirty(struct inode *inode, int flags)
624writeback_inodes(struct writeback_control *wbc)
625{ 993{
626 struct super_block *sb; 994 struct super_block *sb = inode->i_sb;
627 995
628 might_sleep(); 996 /*
629 spin_lock(&sb_lock); 997 * Don't do this for I_DIRTY_PAGES - that doesn't actually
630restart: 998 * dirty the inode itself
631 list_for_each_entry_reverse(sb, &super_blocks, s_list) { 999 */
632 if (sb_has_dirty_inodes(sb)) { 1000 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
633 /* we're making our own get_super here */ 1001 if (sb->s_op->dirty_inode)
634 sb->s_count++; 1002 sb->s_op->dirty_inode(inode);
635 spin_unlock(&sb_lock); 1003 }
636 /* 1004
637 * If we can't get the readlock, there's no sense in 1005 /*
638 * waiting around, most of the time the FS is going to 1006 * make sure that changes are seen by all cpus before we test i_state
639 * be unmounted by the time it is released. 1007 * -- mikulas
640 */ 1008 */
641 if (down_read_trylock(&sb->s_umount)) { 1009 smp_mb();
642 if (sb->s_root) 1010
643 sync_sb_inodes(sb, wbc); 1011 /* avoid the locking if we can */
644 up_read(&sb->s_umount); 1012 if ((inode->i_state & flags) == flags)
1013 return;
1014
1015 if (unlikely(block_dump))
1016 block_dump___mark_inode_dirty(inode);
1017
1018 spin_lock(&inode_lock);
1019 if ((inode->i_state & flags) != flags) {
1020 const int was_dirty = inode->i_state & I_DIRTY;
1021
1022 inode->i_state |= flags;
1023
1024 /*
1025 * If the inode is being synced, just update its dirty state.
1026 * The unlocker will place the inode on the appropriate
1027 * superblock list, based upon its state.
1028 */
1029 if (inode->i_state & I_SYNC)
1030 goto out;
1031
1032 /*
1033 * Only add valid (hashed) inodes to the superblock's
1034 * dirty list. Add blockdev inodes as well.
1035 */
1036 if (!S_ISBLK(inode->i_mode)) {
1037 if (hlist_unhashed(&inode->i_hash))
1038 goto out;
1039 }
1040 if (inode->i_state & (I_FREEING|I_CLEAR))
1041 goto out;
1042
1043 /*
1044 * If the inode was already on b_dirty/b_io/b_more_io, don't
1045 * reposition it (that would break b_dirty time-ordering).
1046 */
1047 if (!was_dirty) {
1048 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1049 struct backing_dev_info *bdi = wb->bdi;
1050
1051 if (bdi_cap_writeback_dirty(bdi) &&
1052 !test_bit(BDI_registered, &bdi->state)) {
1053 WARN_ON(1);
1054 printk(KERN_ERR "bdi-%s not registered\n",
1055 bdi->name);
645 } 1056 }
646 spin_lock(&sb_lock); 1057
647 if (__put_super_and_need_restart(sb)) 1058 inode->dirtied_when = jiffies;
648 goto restart; 1059 list_move(&inode->i_list, &wb->b_dirty);
649 } 1060 }
650 if (wbc->nr_to_write <= 0)
651 break;
652 } 1061 }
653 spin_unlock(&sb_lock); 1062out:
1063 spin_unlock(&inode_lock);
654} 1064}
1065EXPORT_SYMBOL(__mark_inode_dirty);
655 1066
656/* 1067/*
657 * writeback and wait upon the filesystem's dirty inodes. The caller will 1068 * Write out a superblock's list of dirty inodes. A wait will be performed
658 * do this in two passes - one to write, and one to wait. 1069 * upon no inodes, all inodes or the final one, depending upon sync_mode.
1070 *
1071 * If older_than_this is non-NULL, then only write out inodes which
1072 * had their first dirtying at a time earlier than *older_than_this.
659 * 1073 *
660 * A finite limit is set on the number of pages which will be written. 1074 * If we're a pdlfush thread, then implement pdflush collision avoidance
661 * To prevent infinite livelock of sys_sync(). 1075 * against the entire list.
662 * 1076 *
663 * We add in the number of potentially dirty inodes, because each inode write 1077 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
664 * can dirty pagecache in the underlying blockdev. 1078 * This function assumes that the blockdev superblock's inodes are backed by
1079 * a variety of queues, so all inodes are searched. For other superblocks,
1080 * assume that all inodes are backed by the same queue.
1081 *
1082 * The inodes to be written are parked on bdi->b_io. They are moved back onto
1083 * bdi->b_dirty as they are selected for writing. This way, none can be missed
1084 * on the writer throttling path, and we get decent balancing between many
1085 * throttled threads: we don't want them all piling up on inode_sync_wait.
665 */ 1086 */
666void sync_inodes_sb(struct super_block *sb, int wait) 1087static void wait_sb_inodes(struct writeback_control *wbc)
1088{
1089 struct inode *inode, *old_inode = NULL;
1090
1091 /*
1092 * We need to be protected against the filesystem going from
1093 * r/o to r/w or vice versa.
1094 */
1095 WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
1096
1097 spin_lock(&inode_lock);
1098
1099 /*
1100 * Data integrity sync. Must wait for all pages under writeback,
1101 * because there may have been pages dirtied before our sync
1102 * call, but which had writeout started before we write it out.
1103 * In which case, the inode may not be on the dirty list, but
1104 * we still have to wait for that writeout.
1105 */
1106 list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
1107 struct address_space *mapping;
1108
1109 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
1110 continue;
1111 mapping = inode->i_mapping;
1112 if (mapping->nrpages == 0)
1113 continue;
1114 __iget(inode);
1115 spin_unlock(&inode_lock);
1116 /*
1117 * We hold a reference to 'inode' so it couldn't have
1118 * been removed from s_inodes list while we dropped the
1119 * inode_lock. We cannot iput the inode now as we can
1120 * be holding the last reference and we cannot iput it
1121 * under inode_lock. So we keep the reference and iput
1122 * it later.
1123 */
1124 iput(old_inode);
1125 old_inode = inode;
1126
1127 filemap_fdatawait(mapping);
1128
1129 cond_resched();
1130
1131 spin_lock(&inode_lock);
1132 }
1133 spin_unlock(&inode_lock);
1134 iput(old_inode);
1135}
1136
1137/**
1138 * writeback_inodes_sb - writeback dirty inodes from given super_block
1139 * @sb: the superblock
1140 *
1141 * Start writeback on some inodes on this super_block. No guarantees are made
1142 * on how many (if any) will be written, and this function does not wait
1143 * for IO completion of submitted IO. The number of pages submitted is
1144 * returned.
1145 */
1146long writeback_inodes_sb(struct super_block *sb)
667{ 1147{
668 struct writeback_control wbc = { 1148 struct writeback_control wbc = {
669 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 1149 .sb = sb,
1150 .sync_mode = WB_SYNC_NONE,
670 .range_start = 0, 1151 .range_start = 0,
671 .range_end = LLONG_MAX, 1152 .range_end = LLONG_MAX,
672 }; 1153 };
1154 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1155 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1156 long nr_to_write;
673 1157
674 if (!wait) { 1158 nr_to_write = nr_dirty + nr_unstable +
675 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
676 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
677
678 wbc.nr_to_write = nr_dirty + nr_unstable +
679 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1159 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
680 } else
681 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
682 1160
683 sync_sb_inodes(sb, &wbc); 1161 wbc.nr_to_write = nr_to_write;
1162 bdi_writeback_all(&wbc);
1163 return nr_to_write - wbc.nr_to_write;
1164}
1165EXPORT_SYMBOL(writeback_inodes_sb);
1166
1167/**
1168 * sync_inodes_sb - sync sb inode pages
1169 * @sb: the superblock
1170 *
1171 * This function writes and waits on any dirty inode belonging to this
1172 * super_block. The number of pages synced is returned.
1173 */
1174long sync_inodes_sb(struct super_block *sb)
1175{
1176 struct writeback_control wbc = {
1177 .sb = sb,
1178 .sync_mode = WB_SYNC_ALL,
1179 .range_start = 0,
1180 .range_end = LLONG_MAX,
1181 };
1182 long nr_to_write = LONG_MAX; /* doesn't actually matter */
1183
1184 wbc.nr_to_write = nr_to_write;
1185 bdi_writeback_all(&wbc);
1186 wait_sb_inodes(&wbc);
1187 return nr_to_write - wbc.nr_to_write;
684} 1188}
1189EXPORT_SYMBOL(sync_inodes_sb);
685 1190
686/** 1191/**
687 * write_inode_now - write an inode to disk 1192 * write_inode_now - write an inode to disk
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..4567db6f9430 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
801{ 801{
802 int err; 802 int err;
803 803
804 fc->bdi.name = "fuse";
804 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 805 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
805 fc->bdi.unplug_io_fn = default_unplug_io_fn; 806 fc->bdi.unplug_io_fn = default_unplug_io_fn;
806 /* fuse does it's own writeback accounting */ 807 /* fuse does it's own writeback accounting */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cb88dac8ccaa..a93b885311d8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
44static const struct inode_operations hugetlbfs_inode_operations; 44static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .name = "hugetlbfs",
47 .ra_pages = 0, /* No readahead */ 48 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 49 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 50};
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8fcb6239218e..7edb62e97419 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
258 return rc; 258 return rc;
259} 259}
260 260
261static int jffs2_check_acl(struct inode *inode, int mask) 261int jffs2_check_acl(struct inode *inode, int mask)
262{ 262{
263 struct posix_acl *acl; 263 struct posix_acl *acl;
264 int rc; 264 int rc;
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask)
274 return -EAGAIN; 274 return -EAGAIN;
275} 275}
276 276
277int jffs2_permission(struct inode *inode, int mask)
278{
279 return generic_permission(inode, mask, jffs2_check_acl);
280}
281
282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 277int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
283{ 278{
284 struct posix_acl *acl, *clone; 279 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index fc929f2a14f6..f0ba63e3c36b 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_permission(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
36 36
37#else 37#else
38 38
39#define jffs2_permission (NULL) 39#define jffs2_check_acl (NULL)
40#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
41#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
42#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6f60cc910f4c..7aa4417e085f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations =
55 .rmdir = jffs2_rmdir, 55 .rmdir = jffs2_rmdir,
56 .mknod = jffs2_mknod, 56 .mknod = jffs2_mknod,
57 .rename = jffs2_rename, 57 .rename = jffs2_rename,
58 .permission = jffs2_permission, 58 .check_acl = jffs2_check_acl,
59 .setattr = jffs2_setattr, 59 .setattr = jffs2_setattr,
60 .setxattr = jffs2_setxattr, 60 .setxattr = jffs2_setxattr,
61 .getxattr = jffs2_getxattr, 61 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 23c947539864..b7b74e299142 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations =
56 56
57const struct inode_operations jffs2_file_inode_operations = 57const struct inode_operations jffs2_file_inode_operations =
58{ 58{
59 .permission = jffs2_permission, 59 .check_acl = jffs2_check_acl,
60 .setattr = jffs2_setattr, 60 .setattr = jffs2_setattr,
61 .setxattr = jffs2_setxattr, 61 .setxattr = jffs2_setxattr,
62 .getxattr = jffs2_getxattr, 62 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b7339c3b6ad9..4ec11e8bda8c 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
21{ 21{
22 .readlink = generic_readlink, 22 .readlink = generic_readlink,
23 .follow_link = jffs2_follow_link, 23 .follow_link = jffs2_follow_link,
24 .permission = jffs2_permission, 24 .check_acl = jffs2_check_acl,
25 .setattr = jffs2_setattr, 25 .setattr = jffs2_setattr,
26 .setxattr = jffs2_setxattr, 26 .setxattr = jffs2_setxattr,
27 .getxattr = jffs2_getxattr, 27 .getxattr = jffs2_getxattr,
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a29c7c3e3fb8..d66477c34306 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,7 +114,7 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117static int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
120 120
@@ -129,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask)
129 return -EAGAIN; 129 return -EAGAIN;
130} 130}
131 131
132int jfs_permission(struct inode *inode, int mask)
133{
134 return generic_permission(inode, mask, jfs_check_acl);
135}
136
137int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) 132int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
138{ 133{
139 struct posix_acl *acl = NULL; 134 struct posix_acl *acl = NULL;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 7f6063acaa3b..2b70fa78e4a7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = {
96 .removexattr = jfs_removexattr, 96 .removexattr = jfs_removexattr,
97#ifdef CONFIG_JFS_POSIX_ACL 97#ifdef CONFIG_JFS_POSIX_ACL
98 .setattr = jfs_setattr, 98 .setattr = jfs_setattr,
99 .permission = jfs_permission, 99 .check_acl = jfs_check_acl,
100#endif 100#endif
101}; 101};
102 102
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 88475f10a389..b07bd417ef85 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_permission(struct inode *, int); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 514ee2edb92a..c79a4270f083 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = {
1543 .removexattr = jfs_removexattr, 1543 .removexattr = jfs_removexattr,
1544#ifdef CONFIG_JFS_POSIX_ACL 1544#ifdef CONFIG_JFS_POSIX_ACL
1545 .setattr = jfs_setattr, 1545 .setattr = jfs_setattr,
1546 .permission = jfs_permission, 1546 .check_acl = jfs_check_acl,
1547#endif 1547#endif
1548}; 1548};
1549 1549
diff --git a/fs/locks.c b/fs/locks.c
index b6440f52178f..52366e877d76 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1591 if (can_sleep) 1591 if (can_sleep)
1592 lock->fl_flags |= FL_SLEEP; 1592 lock->fl_flags |= FL_SLEEP;
1593 1593
1594 error = security_file_lock(filp, cmd); 1594 error = security_file_lock(filp, lock->fl_type);
1595 if (error) 1595 if (error)
1596 goto out_free; 1596 goto out_free;
1597 1597
diff --git a/fs/namei.c b/fs/namei.c
index 1f13751693a5..d11f404667e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,19 +169,10 @@ void putname(const char *name)
169EXPORT_SYMBOL(putname); 169EXPORT_SYMBOL(putname);
170#endif 170#endif
171 171
172 172/*
173/** 173 * This does basic POSIX ACL permission checking
174 * generic_permission - check for access rights on a Posix-like filesystem
175 * @inode: inode to check access rights for
176 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
177 * @check_acl: optional callback to check for Posix ACLs
178 *
179 * Used to check for read/write/execute permissions on a file.
180 * We use "fsuid" for this, letting us set arbitrary permissions
181 * for filesystem access without changing the "normal" uids which
182 * are used for other things..
183 */ 174 */
184int generic_permission(struct inode *inode, int mask, 175static int acl_permission_check(struct inode *inode, int mask,
185 int (*check_acl)(struct inode *inode, int mask)) 176 int (*check_acl)(struct inode *inode, int mask))
186{ 177{
187 umode_t mode = inode->i_mode; 178 umode_t mode = inode->i_mode;
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask,
193 else { 184 else {
194 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 185 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
195 int error = check_acl(inode, mask); 186 int error = check_acl(inode, mask);
196 if (error == -EACCES) 187 if (error != -EAGAIN)
197 goto check_capabilities;
198 else if (error != -EAGAIN)
199 return error; 188 return error;
200 } 189 }
201 190
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask,
208 */ 197 */
209 if ((mask & ~mode) == 0) 198 if ((mask & ~mode) == 0)
210 return 0; 199 return 0;
200 return -EACCES;
201}
202
203/**
204 * generic_permission - check for access rights on a Posix-like filesystem
205 * @inode: inode to check access rights for
206 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
207 * @check_acl: optional callback to check for Posix ACLs
208 *
209 * Used to check for read/write/execute permissions on a file.
210 * We use "fsuid" for this, letting us set arbitrary permissions
211 * for filesystem access without changing the "normal" uids which
212 * are used for other things..
213 */
214int generic_permission(struct inode *inode, int mask,
215 int (*check_acl)(struct inode *inode, int mask))
216{
217 int ret;
218
219 /*
220 * Do the basic POSIX ACL permission checks.
221 */
222 ret = acl_permission_check(inode, mask, check_acl);
223 if (ret != -EACCES)
224 return ret;
211 225
212 check_capabilities:
213 /* 226 /*
214 * Read/write DACs are always overridable. 227 * Read/write DACs are always overridable.
215 * Executable DACs are overridable if at least one exec bit is set. 228 * Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask)
262 if (inode->i_op->permission) 275 if (inode->i_op->permission)
263 retval = inode->i_op->permission(inode, mask); 276 retval = inode->i_op->permission(inode, mask);
264 else 277 else
265 retval = generic_permission(inode, mask, NULL); 278 retval = generic_permission(inode, mask, inode->i_op->check_acl);
266 279
267 if (retval) 280 if (retval)
268 return retval; 281 return retval;
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
432 */ 445 */
433static int exec_permission_lite(struct inode *inode) 446static int exec_permission_lite(struct inode *inode)
434{ 447{
435 umode_t mode = inode->i_mode; 448 int ret;
436
437 if (inode->i_op->permission)
438 return -EAGAIN;
439 449
440 if (current_fsuid() == inode->i_uid) 450 if (inode->i_op->permission) {
441 mode >>= 6; 451 ret = inode->i_op->permission(inode, MAY_EXEC);
442 else if (in_group_p(inode->i_gid)) 452 if (!ret)
443 mode >>= 3; 453 goto ok;
444 454 return ret;
445 if (mode & MAY_EXEC) 455 }
446 goto ok; 456 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
447 457 if (!ret)
448 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
449 goto ok;
450
451 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
452 goto ok; 458 goto ok;
453 459
454 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 460 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
455 goto ok; 461 goto ok;
456 462
457 return -EACCES; 463 return ret;
458ok: 464ok:
459 return security_inode_permission(inode, MAY_EXEC); 465 return security_inode_permission(inode, MAY_EXEC);
460} 466}
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
853 859
854 nd->flags |= LOOKUP_CONTINUE; 860 nd->flags |= LOOKUP_CONTINUE;
855 err = exec_permission_lite(inode); 861 err = exec_permission_lite(inode);
856 if (err == -EAGAIN)
857 err = inode_permission(nd->path.dentry->d_inode,
858 MAY_EXEC);
859 if (!err)
860 err = ima_path_check(&nd->path, MAY_EXEC,
861 IMA_COUNT_UPDATE);
862 if (err) 862 if (err)
863 break; 863 break;
864 864
@@ -1533,9 +1533,11 @@ int may_open(struct path *path, int acc_mode, int flag)
1533 if (error) 1533 if (error)
1534 return error; 1534 return error;
1535 1535
1536 error = ima_path_check(path, 1536 error = ima_path_check(path, acc_mode ?
1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), 1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1538 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1538 IMA_COUNT_UPDATE); 1539 IMA_COUNT_UPDATE);
1540
1539 if (error) 1541 if (error)
1540 return error; 1542 return error;
1541 /* 1543 /*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..c6be84a161f6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -879,6 +879,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
879 server->rsize = NFS_MAX_FILE_IO_SIZE; 879 server->rsize = NFS_MAX_FILE_IO_SIZE;
880 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 880 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
881 881
882 server->backing_dev_info.name = "nfs";
882 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; 883 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
883 884
884 if (server->wsize > max_rpc_payload) 885 if (server->wsize > max_rpc_payload)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508f707f..36fcabbf5186 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
34 int flags = nfsexp_flags(rqstp, exp); 34 int flags = nfsexp_flags(rqstp, exp);
35 int ret; 35 int ret;
36 36
37 validate_process_creds();
38
37 /* discard any old override before preparing the new set */ 39 /* discard any old override before preparing the new set */
38 revert_creds(get_cred(current->real_cred)); 40 revert_creds(get_cred(current->real_cred));
39 new = prepare_creds(); 41 new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
86 else 88 else
87 new->cap_effective = cap_raise_nfsd_set(new->cap_effective, 89 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
88 new->cap_permitted); 90 new->cap_permitted);
91 validate_process_creds();
89 put_cred(override_creds(new)); 92 put_cred(override_creds(new));
90 put_cred(new); 93 put_cred(new);
94 validate_process_creds();
91 return 0; 95 return 0;
92 96
93oom: 97oom:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b7800b..24d58adfe5fd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
496 /* Lock the export hash tables for reading. */ 496 /* Lock the export hash tables for reading. */
497 exp_readlock(); 497 exp_readlock();
498 498
499 validate_process_creds();
499 svc_process(rqstp); 500 svc_process(rqstp);
501 validate_process_creds();
500 502
501 /* Unlock export hash tables */ 503 /* Unlock export hash tables */
502 exp_readunlock(); 504 exp_readunlock();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1063bc..8fa09bfbcba7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
684 __be32 err; 684 __be32 err;
685 int host_err; 685 int host_err;
686 686
687 validate_process_creds();
688
687 /* 689 /*
688 * If we get here, then the client has already done an "open", 690 * If we get here, then the client has already done an "open",
689 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 691 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
740out_nfserr: 742out_nfserr:
741 err = nfserrno(host_err); 743 err = nfserrno(host_err);
742out: 744out:
745 validate_process_creds();
743 return err; 746 return err;
744} 747}
745 748
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb406a96..02bf17808bdc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -325,6 +325,7 @@ clear_fields:
325} 325}
326 326
327static struct backing_dev_info dlmfs_backing_dev_info = { 327static struct backing_dev_info dlmfs_backing_dev_info = {
328 .name = "ocfs2-dlmfs",
328 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
329 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
330}; 331};
diff --git a/fs/open.c b/fs/open.c
index dd98e8076024..31191bf513e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -199,7 +199,7 @@ out:
199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
200 struct file *filp) 200 struct file *filp)
201{ 201{
202 int err; 202 int ret;
203 struct iattr newattrs; 203 struct iattr newattrs;
204 204
205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
214 } 214 }
215 215
216 /* Remove suid/sgid on truncate too */ 216 /* Remove suid/sgid on truncate too */
217 newattrs.ia_valid |= should_remove_suid(dentry); 217 ret = should_remove_suid(dentry);
218 if (ret)
219 newattrs.ia_valid |= ret | ATTR_FORCE;
218 220
219 mutex_lock(&dentry->d_inode->i_mutex); 221 mutex_lock(&dentry->d_inode->i_mutex);
220 err = notify_change(dentry, &newattrs); 222 ret = notify_change(dentry, &newattrs);
221 mutex_unlock(&dentry->d_inode->i_mutex); 223 mutex_unlock(&dentry->d_inode->i_mutex);
222 return err; 224 return ret;
223} 225}
224 226
225static long do_sys_truncate(const char __user *pathname, loff_t length) 227static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
957 int error; 959 int error;
958 struct file *f; 960 struct file *f;
959 961
962 validate_creds(cred);
963
960 /* 964 /*
961 * We must always pass in a valid mount pointer. Historically 965 * We must always pass in a valid mount pointer. Historically
962 * callers got away with not passing it, but we must enforce this at 966 * callers got away with not passing it, but we must enforce this at
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566c767c..a7f0110fca4c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops;
46static const struct inode_operations ramfs_dir_inode_operations; 46static const struct inode_operations ramfs_dir_inode_operations;
47 47
48static struct backing_dev_info ramfs_backing_dev_info = { 48static struct backing_dev_info ramfs_backing_dev_info = {
49 .name = "ramfs",
49 .ra_pages = 0, /* No readahead */ 50 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | 51 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
51 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 52 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..9cda337ddae2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
62 s = NULL; 62 s = NULL;
63 goto out; 63 goto out;
64 } 64 }
65 INIT_LIST_HEAD(&s->s_dirty);
66 INIT_LIST_HEAD(&s->s_io);
67 INIT_LIST_HEAD(&s->s_more_io);
68 INIT_LIST_HEAD(&s->s_files); 65 INIT_LIST_HEAD(&s->s_files);
69 INIT_LIST_HEAD(&s->s_instances); 66 INIT_LIST_HEAD(&s->s_instances);
70 INIT_HLIST_HEAD(&s->s_anon); 67 INIT_HLIST_HEAD(&s->s_anon);
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb)
171 * Drops a temporary reference, frees superblock if there's no 168 * Drops a temporary reference, frees superblock if there's no
172 * references left. 169 * references left.
173 */ 170 */
174static void put_super(struct super_block *sb) 171void put_super(struct super_block *sb)
175{ 172{
176 spin_lock(&sb_lock); 173 spin_lock(&sb_lock);
177 __put_super(sb); 174 __put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 3422ba61d86d..103cc7fdd3df 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -19,20 +19,22 @@
19 SYNC_FILE_RANGE_WAIT_AFTER) 19 SYNC_FILE_RANGE_WAIT_AFTER)
20 20
21/* 21/*
22 * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) 22 * Do the filesystem syncing work. For simple filesystems
23 * just dirties buffers with inodes so we have to submit IO for these buffers 23 * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
24 * via __sync_blockdev(). This also speeds up the wait == 1 case since in that 24 * submit IO for these buffers via __sync_blockdev(). This also speeds up the
25 * case write_inode() functions do sync_dirty_buffer() and thus effectively 25 * wait == 1 case since in that case write_inode() functions do
26 * write one block at a time. 26 * sync_dirty_buffer() and thus effectively write one block at a time.
27 */ 27 */
28static int __sync_filesystem(struct super_block *sb, int wait) 28static int __sync_filesystem(struct super_block *sb, int wait)
29{ 29{
30 /* Avoid doing twice syncing and cache pruning for quota sync */ 30 /* Avoid doing twice syncing and cache pruning for quota sync */
31 if (!wait) 31 if (!wait) {
32 writeout_quota_sb(sb, -1); 32 writeout_quota_sb(sb, -1);
33 else 33 writeback_inodes_sb(sb);
34 } else {
34 sync_quota_sb(sb, -1); 35 sync_quota_sb(sb, -1);
35 sync_inodes_sb(sb, wait); 36 sync_inodes_sb(sb);
37 }
36 if (sb->s_op->sync_fs) 38 if (sb->s_op->sync_fs)
37 sb->s_op->sync_fs(sb, wait); 39 sb->s_op->sync_fs(sb, wait);
38 return __sync_blockdev(sb->s_bdev, wait); 40 return __sync_blockdev(sb->s_bdev, wait);
@@ -118,7 +120,7 @@ restart:
118 */ 120 */
119SYSCALL_DEFINE0(sync) 121SYSCALL_DEFINE0(sync)
120{ 122{
121 wakeup_pdflush(0); 123 wakeup_flusher_threads(0);
122 sync_filesystems(0); 124 sync_filesystems(0);
123 sync_filesystems(1); 125 sync_filesystems(1);
124 if (unlikely(laptop_mode)) 126 if (unlikely(laptop_mode))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 14f2d71ea3ce..0050fc40e8c9 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
760const struct inode_operations sysfs_dir_inode_operations = { 760const struct inode_operations sysfs_dir_inode_operations = {
761 .lookup = sysfs_lookup, 761 .lookup = sysfs_lookup,
762 .setattr = sysfs_setattr, 762 .setattr = sysfs_setattr,
763 .setxattr = sysfs_setxattr,
763}; 764};
764 765
765static void remove_dir(struct sysfs_dirent *sd) 766static void remove_dir(struct sysfs_dirent *sd)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff988df..e28cecf179f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/xattr.h>
22#include <linux/security.h>
21#include "sysfs.h" 23#include "sysfs.h"
22 24
23extern struct super_block * sysfs_sb; 25extern struct super_block * sysfs_sb;
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = {
29}; 31};
30 32
31static struct backing_dev_info sysfs_backing_dev_info = { 33static struct backing_dev_info sysfs_backing_dev_info = {
34 .name = "sysfs",
32 .ra_pages = 0, /* No readahead */ 35 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 37};
35 38
36static const struct inode_operations sysfs_inode_operations ={ 39static const struct inode_operations sysfs_inode_operations ={
37 .setattr = sysfs_setattr, 40 .setattr = sysfs_setattr,
41 .setxattr = sysfs_setxattr,
38}; 42};
39 43
40int __init sysfs_inode_init(void) 44int __init sysfs_inode_init(void)
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void)
42 return bdi_init(&sysfs_backing_dev_info); 46 return bdi_init(&sysfs_backing_dev_info);
43} 47}
44 48
49struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
50{
51 struct sysfs_inode_attrs *attrs;
52 struct iattr *iattrs;
53
54 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
55 if (!attrs)
56 return NULL;
57 iattrs = &attrs->ia_iattr;
58
59 /* assign default attributes */
60 iattrs->ia_mode = sd->s_mode;
61 iattrs->ia_uid = 0;
62 iattrs->ia_gid = 0;
63 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
64
65 return attrs;
66}
45int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 67int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
46{ 68{
47 struct inode * inode = dentry->d_inode; 69 struct inode * inode = dentry->d_inode;
48 struct sysfs_dirent * sd = dentry->d_fsdata; 70 struct sysfs_dirent * sd = dentry->d_fsdata;
49 struct iattr * sd_iattr; 71 struct sysfs_inode_attrs *sd_attrs;
72 struct iattr *iattrs;
50 unsigned int ia_valid = iattr->ia_valid; 73 unsigned int ia_valid = iattr->ia_valid;
51 int error; 74 int error;
52 75
53 if (!sd) 76 if (!sd)
54 return -EINVAL; 77 return -EINVAL;
55 78
56 sd_iattr = sd->s_iattr; 79 sd_attrs = sd->s_iattr;
57 80
58 error = inode_change_ok(inode, iattr); 81 error = inode_change_ok(inode, iattr);
59 if (error) 82 if (error)
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
65 if (error) 88 if (error)
66 return error; 89 return error;
67 90
68 if (!sd_iattr) { 91 if (!sd_attrs) {
69 /* setting attributes for the first time, allocate now */ 92 /* setting attributes for the first time, allocate now */
70 sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); 93 sd_attrs = sysfs_init_inode_attrs(sd);
71 if (!sd_iattr) 94 if (!sd_attrs)
72 return -ENOMEM; 95 return -ENOMEM;
73 /* assign default attributes */ 96 sd->s_iattr = sd_attrs;
74 sd_iattr->ia_mode = sd->s_mode; 97 } else {
75 sd_iattr->ia_uid = 0; 98 /* attributes were changed at least once in past */
76 sd_iattr->ia_gid = 0; 99 iattrs = &sd_attrs->ia_iattr;
77 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 100
78 sd->s_iattr = sd_iattr; 101 if (ia_valid & ATTR_UID)
102 iattrs->ia_uid = iattr->ia_uid;
103 if (ia_valid & ATTR_GID)
104 iattrs->ia_gid = iattr->ia_gid;
105 if (ia_valid & ATTR_ATIME)
106 iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
107 inode->i_sb->s_time_gran);
108 if (ia_valid & ATTR_MTIME)
109 iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
110 inode->i_sb->s_time_gran);
111 if (ia_valid & ATTR_CTIME)
112 iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
113 inode->i_sb->s_time_gran);
114 if (ia_valid & ATTR_MODE) {
115 umode_t mode = iattr->ia_mode;
116
117 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
118 mode &= ~S_ISGID;
119 iattrs->ia_mode = sd->s_mode = mode;
120 }
79 } 121 }
122 return error;
123}
80 124
81 /* attributes were changed atleast once in past */ 125int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
82 126 size_t size, int flags)
83 if (ia_valid & ATTR_UID) 127{
84 sd_iattr->ia_uid = iattr->ia_uid; 128 struct sysfs_dirent *sd = dentry->d_fsdata;
85 if (ia_valid & ATTR_GID) 129 struct sysfs_inode_attrs *iattrs;
86 sd_iattr->ia_gid = iattr->ia_gid; 130 void *secdata;
87 if (ia_valid & ATTR_ATIME) 131 int error;
88 sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, 132 u32 secdata_len = 0;
89 inode->i_sb->s_time_gran); 133
90 if (ia_valid & ATTR_MTIME) 134 if (!sd)
91 sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, 135 return -EINVAL;
92 inode->i_sb->s_time_gran); 136 if (!sd->s_iattr)
93 if (ia_valid & ATTR_CTIME) 137 sd->s_iattr = sysfs_init_inode_attrs(sd);
94 sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, 138 if (!sd->s_iattr)
95 inode->i_sb->s_time_gran); 139 return -ENOMEM;
96 if (ia_valid & ATTR_MODE) { 140
97 umode_t mode = iattr->ia_mode; 141 iattrs = sd->s_iattr;
98 142
99 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 143 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
100 mode &= ~S_ISGID; 144 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
101 sd_iattr->ia_mode = sd->s_mode = mode; 145 error = security_inode_setsecurity(dentry->d_inode, suffix,
102 } 146 value, size, flags);
147 if (error)
148 goto out;
149 error = security_inode_getsecctx(dentry->d_inode,
150 &secdata, &secdata_len);
151 if (error)
152 goto out;
153 if (iattrs->ia_secdata)
154 security_release_secctx(iattrs->ia_secdata,
155 iattrs->ia_secdata_len);
156 iattrs->ia_secdata = secdata;
157 iattrs->ia_secdata_len = secdata_len;
103 158
159 } else
160 return -EINVAL;
161out:
104 return error; 162 return error;
105} 163}
106 164
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
146static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 204static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
147{ 205{
148 struct bin_attribute *bin_attr; 206 struct bin_attribute *bin_attr;
207 struct sysfs_inode_attrs *iattrs;
149 208
150 inode->i_private = sysfs_get(sd); 209 inode->i_private = sysfs_get(sd);
151 inode->i_mapping->a_ops = &sysfs_aops; 210 inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
154 inode->i_ino = sd->s_ino; 213 inode->i_ino = sd->s_ino;
155 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 214 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
156 215
157 if (sd->s_iattr) { 216 iattrs = sd->s_iattr;
217 if (iattrs) {
158 /* sysfs_dirent has non-default attributes 218 /* sysfs_dirent has non-default attributes
159 * get them for the new inode from persistent copy 219 * get them for the new inode from persistent copy
160 * in sysfs_dirent 220 * in sysfs_dirent
161 */ 221 */
162 set_inode_attr(inode, sd->s_iattr); 222 set_inode_attr(inode, &iattrs->ia_iattr);
223 if (iattrs->ia_secdata)
224 security_inode_notifysecctx(inode,
225 iattrs->ia_secdata,
226 iattrs->ia_secdata_len);
163 } else 227 } else
164 set_default_inode_attr(inode, sd->s_mode); 228 set_default_inode_attr(inode, sd->s_mode);
165 229
166
167 /* initialize inode according to type */ 230 /* initialize inode according to type */
168 switch (sysfs_type(sd)) { 231 switch (sysfs_type(sd)) {
169 case SYSFS_DIR: 232 case SYSFS_DIR:
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1d897ad808e0..c5081ad77026 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/security.h>
19 20
20#include "sysfs.h" 21#include "sysfs.h"
21 22
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
209} 210}
210 211
211const struct inode_operations sysfs_symlink_inode_operations = { 212const struct inode_operations sysfs_symlink_inode_operations = {
213 .setxattr = sysfs_setxattr,
212 .readlink = generic_readlink, 214 .readlink = generic_readlink,
213 .follow_link = sysfs_follow_link, 215 .follow_link = sysfs_follow_link,
214 .put_link = sysfs_put_link, 216 .put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3fa0d98481e2..af4c4e7482ac 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,8 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/fs.h>
12
11struct sysfs_open_dirent; 13struct sysfs_open_dirent;
12 14
13/* type-specific structures for sysfs_dirent->s_* union members */ 15/* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr {
31 struct hlist_head buffers; 33 struct hlist_head buffers;
32}; 34};
33 35
36struct sysfs_inode_attrs {
37 struct iattr ia_iattr;
38 void *ia_secdata;
39 u32 ia_secdata_len;
40};
41
34/* 42/*
35 * sysfs_dirent - the building block of sysfs hierarchy. Each and 43 * sysfs_dirent - the building block of sysfs hierarchy. Each and
36 * every sysfs node is represented by single sysfs_dirent. 44 * every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@ struct sysfs_dirent {
56 unsigned int s_flags; 64 unsigned int s_flags;
57 ino_t s_ino; 65 ino_t s_ino;
58 umode_t s_mode; 66 umode_t s_mode;
59 struct iattr *s_iattr; 67 struct sysfs_inode_attrs *s_iattr;
60}; 68};
61 69
62#define SD_DEACTIVATED_BIAS INT_MIN 70#define SD_DEACTIVATED_BIAS INT_MIN
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
148struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 156struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
149void sysfs_delete_inode(struct inode *inode); 157void sysfs_delete_inode(struct inode *inode);
150int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 158int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
159int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
160 size_t size, int flags);
151int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 161int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
152int sysfs_inode_init(void); 162int sysfs_inode_init(void);
153 163
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d891d46f..1c8991b0db13 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,26 +65,14 @@
65static int shrink_liability(struct ubifs_info *c, int nr_to_write) 65static int shrink_liability(struct ubifs_info *c, int nr_to_write)
66{ 66{
67 int nr_written; 67 int nr_written;
68 struct writeback_control wbc = {
69 .sync_mode = WB_SYNC_NONE,
70 .range_end = LLONG_MAX,
71 .nr_to_write = nr_to_write,
72 };
73
74 generic_sync_sb_inodes(c->vfs_sb, &wbc);
75 nr_written = nr_to_write - wbc.nr_to_write;
76 68
69 nr_written = writeback_inodes_sb(c->vfs_sb);
77 if (!nr_written) { 70 if (!nr_written) {
78 /* 71 /*
79 * Re-try again but wait on pages/inodes which are being 72 * Re-try again but wait on pages/inodes which are being
80 * written-back concurrently (e.g., by pdflush). 73 * written-back concurrently (e.g., by pdflush).
81 */ 74 */
82 memset(&wbc, 0, sizeof(struct writeback_control)); 75 nr_written = sync_inodes_sb(c->vfs_sb);
83 wbc.sync_mode = WB_SYNC_ALL;
84 wbc.range_end = LLONG_MAX;
85 wbc.nr_to_write = nr_to_write;
86 generic_sync_sb_inodes(c->vfs_sb, &wbc);
87 nr_written = nr_to_write - wbc.nr_to_write;
88 } 76 }
89 77
90 dbg_budg("%d pages were written back", nr_written); 78 dbg_budg("%d pages were written back", nr_written);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d80465..51763aa8f4de 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
438{ 438{
439 int i, err; 439 int i, err;
440 struct ubifs_info *c = sb->s_fs_info; 440 struct ubifs_info *c = sb->s_fs_info;
441 struct writeback_control wbc = {
442 .sync_mode = WB_SYNC_ALL,
443 .range_start = 0,
444 .range_end = LLONG_MAX,
445 .nr_to_write = LONG_MAX,
446 };
447 441
448 /* 442 /*
449 * Zero @wait is just an advisory thing to help the file system shove 443 * Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
462 * the user be able to get more accurate results of 'statfs()' after 456 * the user be able to get more accurate results of 'statfs()' after
463 * they synchronize the file system. 457 * they synchronize the file system.
464 */ 458 */
465 generic_sync_sb_inodes(sb, &wbc); 459 sync_inodes_sb(sb);
466 460
467 /* 461 /*
468 * Synchronize write buffers, because 'ubifs_run_commit()' does not 462 * Synchronize write buffers, because 'ubifs_run_commit()' does not
@@ -1971,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1971 * 1965 *
1972 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 1966 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1973 */ 1967 */
1968 c->bdi.name = "ubifs",
1974 c->bdi.capabilities = BDI_CAP_MAP_COPY; 1969 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1975 c->bdi.unplug_io_fn = default_unplug_io_fn; 1970 c->bdi.unplug_io_fn = default_unplug_io_fn;
1976 err = bdi_init(&c->bdi); 1971 err = bdi_init(&c->bdi);
diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af59ddf..6d4f6d3449fb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
66 return inode_permission(inode, mask); 66 return inode_permission(inode, mask);
67} 67}
68 68
69int 69/**
70vfs_setxattr(struct dentry *dentry, const char *name, const void *value, 70 * __vfs_setxattr_noperm - perform setxattr operation without performing
71 size_t size, int flags) 71 * permission checks.
72 *
73 * @dentry - object to perform setxattr on
74 * @name - xattr name to set
75 * @value - value to set @name to
76 * @size - size of @value
77 * @flags - flags to pass into filesystem operations
78 *
79 * returns the result of the internal setxattr or setsecurity operations.
80 *
81 * This function requires the caller to lock the inode's i_mutex before it
82 * is executed. It also assumes that the caller will make the appropriate
83 * permission checks.
84 */
85int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
86 const void *value, size_t size, int flags)
72{ 87{
73 struct inode *inode = dentry->d_inode; 88 struct inode *inode = dentry->d_inode;
74 int error; 89 int error = -EOPNOTSUPP;
75
76 error = xattr_permission(inode, name, MAY_WRITE);
77 if (error)
78 return error;
79 90
80 mutex_lock(&inode->i_mutex);
81 error = security_inode_setxattr(dentry, name, value, size, flags);
82 if (error)
83 goto out;
84 error = -EOPNOTSUPP;
85 if (inode->i_op->setxattr) { 91 if (inode->i_op->setxattr) {
86 error = inode->i_op->setxattr(dentry, name, value, size, flags); 92 error = inode->i_op->setxattr(dentry, name, value, size, flags);
87 if (!error) { 93 if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
97 if (!error) 103 if (!error)
98 fsnotify_xattr(dentry); 104 fsnotify_xattr(dentry);
99 } 105 }
106
107 return error;
108}
109
110
111int
112vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
113 size_t size, int flags)
114{
115 struct inode *inode = dentry->d_inode;
116 int error;
117
118 error = xattr_permission(inode, name, MAY_WRITE);
119 if (error)
120 return error;
121
122 mutex_lock(&inode->i_mutex);
123 error = security_inode_setxattr(dentry, name, value, size, flags);
124 if (error)
125 goto out;
126
127 error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
128
100out: 129out:
101 mutex_unlock(&inode->i_mutex); 130 mutex_unlock(&inode->i_mutex);
102 return error; 131 return error;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 8070b34cc287..6c32f1d63d8c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -485,14 +485,6 @@ xfs_vn_put_link(
485} 485}
486 486
487STATIC int 487STATIC int
488xfs_vn_permission(
489 struct inode *inode,
490 int mask)
491{
492 return generic_permission(inode, mask, xfs_check_acl);
493}
494
495STATIC int
496xfs_vn_getattr( 488xfs_vn_getattr(
497 struct vfsmount *mnt, 489 struct vfsmount *mnt,
498 struct dentry *dentry, 490 struct dentry *dentry,
@@ -696,7 +688,7 @@ xfs_vn_fiemap(
696} 688}
697 689
698static const struct inode_operations xfs_inode_operations = { 690static const struct inode_operations xfs_inode_operations = {
699 .permission = xfs_vn_permission, 691 .check_acl = xfs_check_acl,
700 .truncate = xfs_vn_truncate, 692 .truncate = xfs_vn_truncate,
701 .getattr = xfs_vn_getattr, 693 .getattr = xfs_vn_getattr,
702 .setattr = xfs_vn_setattr, 694 .setattr = xfs_vn_setattr,
@@ -724,7 +716,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
724 .rmdir = xfs_vn_unlink, 716 .rmdir = xfs_vn_unlink,
725 .mknod = xfs_vn_mknod, 717 .mknod = xfs_vn_mknod,
726 .rename = xfs_vn_rename, 718 .rename = xfs_vn_rename,
727 .permission = xfs_vn_permission, 719 .check_acl = xfs_check_acl,
728 .getattr = xfs_vn_getattr, 720 .getattr = xfs_vn_getattr,
729 .setattr = xfs_vn_setattr, 721 .setattr = xfs_vn_setattr,
730 .setxattr = generic_setxattr, 722 .setxattr = generic_setxattr,
@@ -749,7 +741,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
749 .rmdir = xfs_vn_unlink, 741 .rmdir = xfs_vn_unlink,
750 .mknod = xfs_vn_mknod, 742 .mknod = xfs_vn_mknod,
751 .rename = xfs_vn_rename, 743 .rename = xfs_vn_rename,
752 .permission = xfs_vn_permission, 744 .check_acl = xfs_check_acl,
753 .getattr = xfs_vn_getattr, 745 .getattr = xfs_vn_getattr,
754 .setattr = xfs_vn_setattr, 746 .setattr = xfs_vn_setattr,
755 .setxattr = generic_setxattr, 747 .setxattr = generic_setxattr,
@@ -762,7 +754,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
762 .readlink = generic_readlink, 754 .readlink = generic_readlink,
763 .follow_link = xfs_vn_follow_link, 755 .follow_link = xfs_vn_follow_link,
764 .put_link = xfs_vn_put_link, 756 .put_link = xfs_vn_put_link,
765 .permission = xfs_vn_permission, 757 .check_acl = xfs_check_acl,
766 .getattr = xfs_vn_getattr, 758 .getattr = xfs_vn_getattr,
767 .setattr = xfs_vn_setattr, 759 .setattr = xfs_vn_setattr,
768 .setxattr = generic_setxattr, 760 .setxattr = generic_setxattr,
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1d52425a6118..f169bcb90b58 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,6 +13,8 @@
13#include <linux/proportions.h> 13#include <linux/proportions.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/sched.h>
17#include <linux/writeback.h>
16#include <asm/atomic.h> 18#include <asm/atomic.h>
17 19
18struct page; 20struct page;
@@ -23,9 +25,11 @@ struct dentry;
23 * Bits in backing_dev_info.state 25 * Bits in backing_dev_info.state
24 */ 26 */
25enum bdi_state { 27enum bdi_state {
26 BDI_pdflush, /* A pdflush thread is working this device */ 28 BDI_pending, /* On its way to being activated */
29 BDI_wb_alloc, /* Default embedded wb allocated */
27 BDI_async_congested, /* The async (write) queue is getting full */ 30 BDI_async_congested, /* The async (write) queue is getting full */
28 BDI_sync_congested, /* The sync queue is getting full */ 31 BDI_sync_congested, /* The sync queue is getting full */
32 BDI_registered, /* bdi_register() was done */
29 BDI_unused, /* Available bits start here */ 33 BDI_unused, /* Available bits start here */
30}; 34};
31 35
@@ -39,7 +43,22 @@ enum bdi_stat_item {
39 43
40#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) 44#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
41 45
46struct bdi_writeback {
47 struct list_head list; /* hangs off the bdi */
48
49 struct backing_dev_info *bdi; /* our parent bdi */
50 unsigned int nr;
51
52 unsigned long last_old_flush; /* last old data flush */
53
54 struct task_struct *task; /* writeback task */
55 struct list_head b_dirty; /* dirty inodes */
56 struct list_head b_io; /* parked for writeback */
57 struct list_head b_more_io; /* parked for more writeback */
58};
59
42struct backing_dev_info { 60struct backing_dev_info {
61 struct list_head bdi_list;
43 unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ 62 unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
44 unsigned long state; /* Always use atomic bitops on this */ 63 unsigned long state; /* Always use atomic bitops on this */
45 unsigned int capabilities; /* Device capabilities */ 64 unsigned int capabilities; /* Device capabilities */
@@ -48,6 +67,8 @@ struct backing_dev_info {
48 void (*unplug_io_fn)(struct backing_dev_info *, struct page *); 67 void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
49 void *unplug_io_data; 68 void *unplug_io_data;
50 69
70 char *name;
71
51 struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS]; 72 struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
52 73
53 struct prop_local_percpu completions; 74 struct prop_local_percpu completions;
@@ -56,6 +77,14 @@ struct backing_dev_info {
56 unsigned int min_ratio; 77 unsigned int min_ratio;
57 unsigned int max_ratio, max_prop_frac; 78 unsigned int max_ratio, max_prop_frac;
58 79
80 struct bdi_writeback wb; /* default writeback info for this bdi */
81 spinlock_t wb_lock; /* protects update side of wb_list */
82 struct list_head wb_list; /* the flusher threads hanging off this bdi */
83 unsigned long wb_mask; /* bitmask of registered tasks */
84 unsigned int wb_cnt; /* number of registered tasks */
85
86 struct list_head work_list;
87
59 struct device *dev; 88 struct device *dev;
60 89
61#ifdef CONFIG_DEBUG_FS 90#ifdef CONFIG_DEBUG_FS
@@ -71,6 +100,19 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
71 const char *fmt, ...); 100 const char *fmt, ...);
72int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); 101int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
73void bdi_unregister(struct backing_dev_info *bdi); 102void bdi_unregister(struct backing_dev_info *bdi);
103void bdi_start_writeback(struct writeback_control *wbc);
104int bdi_writeback_task(struct bdi_writeback *wb);
105int bdi_has_dirty_io(struct backing_dev_info *bdi);
106
107extern spinlock_t bdi_lock;
108extern struct list_head bdi_list;
109
110static inline int wb_has_dirty_io(struct bdi_writeback *wb)
111{
112 return !list_empty(&wb->b_dirty) ||
113 !list_empty(&wb->b_io) ||
114 !list_empty(&wb->b_more_io);
115}
74 116
75static inline void __add_bdi_stat(struct backing_dev_info *bdi, 117static inline void __add_bdi_stat(struct backing_dev_info *bdi,
76 enum bdi_stat_item item, s64 amount) 118 enum bdi_stat_item item, s64 amount)
@@ -261,6 +303,11 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
261 return bdi->capabilities & BDI_CAP_SWAP_BACKED; 303 return bdi->capabilities & BDI_CAP_SWAP_BACKED;
262} 304}
263 305
306static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
307{
308 return bdi == &default_backing_dev_info;
309}
310
264static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 311static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
265{ 312{
266 return bdi_cap_writeback_dirty(mapping->backing_dev_info); 313 return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -276,4 +323,10 @@ static inline bool mapping_cap_swap_backed(struct address_space *mapping)
276 return bdi_cap_swap_backed(mapping->backing_dev_info); 323 return bdi_cap_swap_backed(mapping->backing_dev_info);
277} 324}
278 325
326static inline int bdi_sched_wait(void *word)
327{
328 schedule();
329 return 0;
330}
331
279#endif /* _LINUX_BACKING_DEV_H */ 332#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4fa999696310..24520a539c6f 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -114,6 +114,13 @@ struct thread_group_cred {
114 */ 114 */
115struct cred { 115struct cred {
116 atomic_t usage; 116 atomic_t usage;
117#ifdef CONFIG_DEBUG_CREDENTIALS
118 atomic_t subscribers; /* number of processes subscribed */
119 void *put_addr;
120 unsigned magic;
121#define CRED_MAGIC 0x43736564
122#define CRED_MAGIC_DEAD 0x44656144
123#endif
117 uid_t uid; /* real UID of the task */ 124 uid_t uid; /* real UID of the task */
118 gid_t gid; /* real GID of the task */ 125 gid_t gid; /* real GID of the task */
119 uid_t suid; /* saved UID of the task */ 126 uid_t suid; /* saved UID of the task */
@@ -143,7 +150,9 @@ struct cred {
143}; 150};
144 151
145extern void __put_cred(struct cred *); 152extern void __put_cred(struct cred *);
153extern void exit_creds(struct task_struct *);
146extern int copy_creds(struct task_struct *, unsigned long); 154extern int copy_creds(struct task_struct *, unsigned long);
155extern struct cred *cred_alloc_blank(void);
147extern struct cred *prepare_creds(void); 156extern struct cred *prepare_creds(void);
148extern struct cred *prepare_exec_creds(void); 157extern struct cred *prepare_exec_creds(void);
149extern struct cred *prepare_usermodehelper_creds(void); 158extern struct cred *prepare_usermodehelper_creds(void);
@@ -158,6 +167,60 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
158extern int set_create_files_as(struct cred *, struct inode *); 167extern int set_create_files_as(struct cred *, struct inode *);
159extern void __init cred_init(void); 168extern void __init cred_init(void);
160 169
170/*
171 * check for validity of credentials
172 */
173#ifdef CONFIG_DEBUG_CREDENTIALS
174extern void __invalid_creds(const struct cred *, const char *, unsigned);
175extern void __validate_process_creds(struct task_struct *,
176 const char *, unsigned);
177
178static inline bool creds_are_invalid(const struct cred *cred)
179{
180 if (cred->magic != CRED_MAGIC)
181 return true;
182 if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
183 return true;
184#ifdef CONFIG_SECURITY_SELINUX
185 if ((unsigned long) cred->security < PAGE_SIZE)
186 return true;
187 if ((*(u32*)cred->security & 0xffffff00) ==
188 (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
189 return true;
190#endif
191 return false;
192}
193
194static inline void __validate_creds(const struct cred *cred,
195 const char *file, unsigned line)
196{
197 if (unlikely(creds_are_invalid(cred)))
198 __invalid_creds(cred, file, line);
199}
200
201#define validate_creds(cred) \
202do { \
203 __validate_creds((cred), __FILE__, __LINE__); \
204} while(0)
205
206#define validate_process_creds() \
207do { \
208 __validate_process_creds(current, __FILE__, __LINE__); \
209} while(0)
210
211extern void validate_creds_for_do_exit(struct task_struct *);
212#else
213static inline void validate_creds(const struct cred *cred)
214{
215}
216static inline void validate_creds_for_do_exit(struct task_struct *tsk)
217{
218}
219static inline void validate_process_creds(void)
220{
221}
222#endif
223
161/** 224/**
162 * get_new_cred - Get a reference on a new set of credentials 225 * get_new_cred - Get a reference on a new set of credentials
163 * @cred: The new credentials to reference 226 * @cred: The new credentials to reference
@@ -186,7 +249,9 @@ static inline struct cred *get_new_cred(struct cred *cred)
186 */ 249 */
187static inline const struct cred *get_cred(const struct cred *cred) 250static inline const struct cred *get_cred(const struct cred *cred)
188{ 251{
189 return get_new_cred((struct cred *) cred); 252 struct cred *nonconst_cred = (struct cred *) cred;
253 validate_creds(cred);
254 return get_new_cred(nonconst_cred);
190} 255}
191 256
192/** 257/**
@@ -204,7 +269,7 @@ static inline void put_cred(const struct cred *_cred)
204{ 269{
205 struct cred *cred = (struct cred *) _cred; 270 struct cred *cred = (struct cred *) _cred;
206 271
207 BUG_ON(atomic_read(&(cred)->usage) <= 0); 272 validate_creds(cred);
208 if (atomic_dec_and_test(&(cred)->usage)) 273 if (atomic_dec_and_test(&(cred)->usage))
209 __put_cred(cred); 274 __put_cred(cred);
210} 275}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3972ffb597c5..a79f48373e7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -715,7 +715,7 @@ struct posix_acl;
715 715
716struct inode { 716struct inode {
717 struct hlist_node i_hash; 717 struct hlist_node i_hash;
718 struct list_head i_list; 718 struct list_head i_list; /* backing dev IO list */
719 struct list_head i_sb_list; 719 struct list_head i_sb_list;
720 struct list_head i_dentry; 720 struct list_head i_dentry;
721 unsigned long i_ino; 721 unsigned long i_ino;
@@ -1336,9 +1336,6 @@ struct super_block {
1336 struct xattr_handler **s_xattr; 1336 struct xattr_handler **s_xattr;
1337 1337
1338 struct list_head s_inodes; /* all inodes */ 1338 struct list_head s_inodes; /* all inodes */
1339 struct list_head s_dirty; /* dirty inodes */
1340 struct list_head s_io; /* parked for writeback */
1341 struct list_head s_more_io; /* parked for more writeback */
1342 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1339 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1343 struct list_head s_files; 1340 struct list_head s_files;
1344 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1341 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
@@ -1528,6 +1525,7 @@ struct inode_operations {
1528 void (*put_link) (struct dentry *, struct nameidata *, void *); 1525 void (*put_link) (struct dentry *, struct nameidata *, void *);
1529 void (*truncate) (struct inode *); 1526 void (*truncate) (struct inode *);
1530 int (*permission) (struct inode *, int); 1527 int (*permission) (struct inode *, int);
1528 int (*check_acl)(struct inode *, int);
1531 int (*setattr) (struct dentry *, struct iattr *); 1529 int (*setattr) (struct dentry *, struct iattr *);
1532 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 1530 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1533 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 1531 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1788,6 +1786,7 @@ extern int get_sb_pseudo(struct file_system_type *, char *,
1788 struct vfsmount *mnt); 1786 struct vfsmount *mnt);
1789extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); 1787extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
1790int __put_super_and_need_restart(struct super_block *sb); 1788int __put_super_and_need_restart(struct super_block *sb);
1789void put_super(struct super_block *sb);
1791 1790
1792/* Alas, no aliases. Too much hassle with bringing module.h everywhere */ 1791/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
1793#define fops_get(fops) \ 1792#define fops_get(fops) \
@@ -2083,8 +2082,6 @@ static inline void invalidate_remote_inode(struct inode *inode)
2083extern int invalidate_inode_pages2(struct address_space *mapping); 2082extern int invalidate_inode_pages2(struct address_space *mapping);
2084extern int invalidate_inode_pages2_range(struct address_space *mapping, 2083extern int invalidate_inode_pages2_range(struct address_space *mapping,
2085 pgoff_t start, pgoff_t end); 2084 pgoff_t start, pgoff_t end);
2086extern void generic_sync_sb_inodes(struct super_block *sb,
2087 struct writeback_control *wbc);
2088extern int write_inode_now(struct inode *, int); 2085extern int write_inode_now(struct inode *, int);
2089extern int filemap_fdatawrite(struct address_space *); 2086extern int filemap_fdatawrite(struct address_space *);
2090extern int filemap_flush(struct address_space *); 2087extern int filemap_flush(struct address_space *);
@@ -2199,7 +2196,6 @@ extern int bdev_read_only(struct block_device *);
2199extern int set_blocksize(struct block_device *, int); 2196extern int set_blocksize(struct block_device *, int);
2200extern int sb_set_blocksize(struct super_block *, int); 2197extern int sb_set_blocksize(struct super_block *, int);
2201extern int sb_min_blocksize(struct super_block *, int); 2198extern int sb_min_blocksize(struct super_block *, int);
2202extern int sb_has_dirty_inodes(struct super_block *);
2203 2199
2204extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2200extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2205extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2201extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
diff --git a/include/linux/key.h b/include/linux/key.h
index e544f466d69a..cd50dfa1d4c2 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -129,7 +129,10 @@ struct key {
129 struct rw_semaphore sem; /* change vs change sem */ 129 struct rw_semaphore sem; /* change vs change sem */
130 struct key_user *user; /* owner of this key */ 130 struct key_user *user; /* owner of this key */
131 void *security; /* security data for this key */ 131 void *security; /* security data for this key */
132 time_t expiry; /* time at which key expires (or 0) */ 132 union {
133 time_t expiry; /* time at which key expires (or 0) */
134 time_t revoked_at; /* time at which key was revoked */
135 };
133 uid_t uid; 136 uid_t uid;
134 gid_t gid; 137 gid_t gid;
135 key_perm_t perm; /* access permissions */ 138 key_perm_t perm; /* access permissions */
@@ -275,6 +278,8 @@ static inline key_serial_t key_serial(struct key *key)
275extern ctl_table key_sysctls[]; 278extern ctl_table key_sysctls[];
276#endif 279#endif
277 280
281extern void key_replace_session_keyring(void);
282
278/* 283/*
279 * the userspace interface 284 * the userspace interface
280 */ 285 */
@@ -297,6 +302,7 @@ extern void key_init(void);
297#define key_fsuid_changed(t) do { } while(0) 302#define key_fsuid_changed(t) do { } while(0)
298#define key_fsgid_changed(t) do { } while(0) 303#define key_fsgid_changed(t) do { } while(0)
299#define key_init() do { } while(0) 304#define key_init() do { } while(0)
305#define key_replace_session_keyring() do { } while(0)
300 306
301#endif /* CONFIG_KEYS */ 307#endif /* CONFIG_KEYS */
302#endif /* __KERNEL__ */ 308#endif /* __KERNEL__ */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index c0688eb72093..bd383f1944fb 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -52,5 +52,6 @@
52#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ 52#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */
53#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ 53#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */
54#define KEYCTL_GET_SECURITY 17 /* get key security label */ 54#define KEYCTL_GET_SECURITY 17 /* get key security label */
55#define KEYCTL_SESSION_TO_PARENT 18 /* apply session keyring to parent process */
55 56
56#endif /* _LINUX_KEYCTL_H */ 57#endif /* _LINUX_KEYCTL_H */
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7c7e84..dc2fd545db00 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -34,6 +34,8 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
34int kmemcheck_show_addr(unsigned long address); 34int kmemcheck_show_addr(unsigned long address);
35int kmemcheck_hide_addr(unsigned long address); 35int kmemcheck_hide_addr(unsigned long address);
36 36
37bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
38
37#else 39#else
38#define kmemcheck_enabled 0 40#define kmemcheck_enabled 0
39 41
@@ -99,6 +101,11 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
99{ 101{
100} 102}
101 103
104static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
105{
106 return true;
107}
108
102#endif /* CONFIG_KMEMCHECK */ 109#endif /* CONFIG_KMEMCHECK */
103 110
104/* 111/*
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 6a63807f714e..3c7497d46ee9 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -23,18 +23,18 @@
23 23
24#ifdef CONFIG_DEBUG_KMEMLEAK 24#ifdef CONFIG_DEBUG_KMEMLEAK
25 25
26extern void kmemleak_init(void); 26extern void kmemleak_init(void) __ref;
27extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, 27extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
28 gfp_t gfp); 28 gfp_t gfp) __ref;
29extern void kmemleak_free(const void *ptr); 29extern void kmemleak_free(const void *ptr) __ref;
30extern void kmemleak_free_part(const void *ptr, size_t size); 30extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
31extern void kmemleak_padding(const void *ptr, unsigned long offset, 31extern void kmemleak_padding(const void *ptr, unsigned long offset,
32 size_t size); 32 size_t size) __ref;
33extern void kmemleak_not_leak(const void *ptr); 33extern void kmemleak_not_leak(const void *ptr) __ref;
34extern void kmemleak_ignore(const void *ptr); 34extern void kmemleak_ignore(const void *ptr) __ref;
35extern void kmemleak_scan_area(const void *ptr, unsigned long offset, 35extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
36 size_t length, gfp_t gfp); 36 size_t length, gfp_t gfp) __ref;
37extern void kmemleak_no_scan(const void *ptr); 37extern void kmemleak_no_scan(const void *ptr) __ref;
38 38
39static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, 39static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
40 int min_count, unsigned long flags, 40 int min_count, unsigned long flags,
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index e461b2c3d711..190c37854870 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -33,6 +33,7 @@ struct common_audit_data {
33#define LSM_AUDIT_DATA_IPC 4 33#define LSM_AUDIT_DATA_IPC 4
34#define LSM_AUDIT_DATA_TASK 5 34#define LSM_AUDIT_DATA_TASK 5
35#define LSM_AUDIT_DATA_KEY 6 35#define LSM_AUDIT_DATA_KEY 6
36#define LSM_AUDIT_NO_AUDIT 7
36 struct task_struct *tsk; 37 struct task_struct *tsk;
37 union { 38 union {
38 struct { 39 struct {
@@ -66,16 +67,19 @@ struct common_audit_data {
66 } key_struct; 67 } key_struct;
67#endif 68#endif
68 } u; 69 } u;
69 const char *function;
70 /* this union contains LSM specific data */ 70 /* this union contains LSM specific data */
71 union { 71 union {
72#ifdef CONFIG_SECURITY_SMACK
72 /* SMACK data */ 73 /* SMACK data */
73 struct smack_audit_data { 74 struct smack_audit_data {
75 const char *function;
74 char *subject; 76 char *subject;
75 char *object; 77 char *object;
76 char *request; 78 char *request;
77 int result; 79 int result;
78 } smack_audit_data; 80 } smack_audit_data;
81#endif
82#ifdef CONFIG_SECURITY_SELINUX
79 /* SELinux data */ 83 /* SELinux data */
80 struct { 84 struct {
81 u32 ssid; 85 u32 ssid;
@@ -83,10 +87,12 @@ struct common_audit_data {
83 u16 tclass; 87 u16 tclass;
84 u32 requested; 88 u32 requested;
85 u32 audited; 89 u32 audited;
90 u32 denied;
86 struct av_decision *avd; 91 struct av_decision *avd;
87 int result; 92 int result;
88 } selinux_audit_data; 93 } selinux_audit_data;
89 } lsm_priv; 94#endif
95 };
90 /* these callback will be implemented by a specific LSM */ 96 /* these callback will be implemented by a specific LSM */
91 void (*lsm_pre_audit)(struct audit_buffer *, void *); 97 void (*lsm_pre_audit)(struct audit_buffer *, void *);
92 void (*lsm_post_audit)(struct audit_buffer *, void *); 98 void (*lsm_post_audit)(struct audit_buffer *, void *);
@@ -104,7 +110,7 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
104/* Initialize an LSM audit data structure. */ 110/* Initialize an LSM audit data structure. */
105#define COMMON_AUDIT_DATA_INIT(_d, _t) \ 111#define COMMON_AUDIT_DATA_INIT(_d, _t) \
106 { memset((_d), 0, sizeof(struct common_audit_data)); \ 112 { memset((_d), 0, sizeof(struct common_audit_data)); \
107 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } 113 (_d)->type = LSM_AUDIT_DATA_##_t; }
108 114
109void common_lsm_audit(struct common_audit_data *a); 115void common_lsm_audit(struct common_audit_data *a);
110 116
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f1ea4a66957..9304027673b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1292,6 +1292,7 @@ struct task_struct {
1292 struct mutex cred_guard_mutex; /* guard against foreign influences on 1292 struct mutex cred_guard_mutex; /* guard against foreign influences on
1293 * credential calculations 1293 * credential calculations
1294 * (notably. ptrace) */ 1294 * (notably. ptrace) */
1295 struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
1295 1296
1296 char comm[TASK_COMM_LEN]; /* executable name excluding path 1297 char comm[TASK_COMM_LEN]; /* executable name excluding path
1297 - access with [gs]et_task_comm (which lock 1298 - access with [gs]et_task_comm (which lock
@@ -2077,7 +2078,7 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
2077#define for_each_process(p) \ 2078#define for_each_process(p) \
2078 for (p = &init_task ; (p = next_task(p)) != &init_task ; ) 2079 for (p = &init_task ; (p = next_task(p)) != &init_task ; )
2079 2080
2080extern bool is_single_threaded(struct task_struct *); 2081extern bool current_is_single_threaded(void);
2081 2082
2082/* 2083/*
2083 * Careful: do_each_thread/while_each_thread is a double loop so 2084 * Careful: do_each_thread/while_each_thread is a double loop so
diff --git a/include/linux/security.h b/include/linux/security.h
index 1f16eea2017b..d050b66ab9ef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,7 +53,7 @@ struct audit_krule;
53extern int cap_capable(struct task_struct *tsk, const struct cred *cred, 53extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
54 int cap, int audit); 54 int cap, int audit);
55extern int cap_settime(struct timespec *ts, struct timezone *tz); 55extern int cap_settime(struct timespec *ts, struct timezone *tz);
56extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); 56extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
57extern int cap_ptrace_traceme(struct task_struct *parent); 57extern int cap_ptrace_traceme(struct task_struct *parent);
58extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); 58extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
59extern int cap_capset(struct cred *new, const struct cred *old, 59extern int cap_capset(struct cred *new, const struct cred *old,
@@ -653,6 +653,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
653 * manual page for definitions of the @clone_flags. 653 * manual page for definitions of the @clone_flags.
654 * @clone_flags contains the flags indicating what should be shared. 654 * @clone_flags contains the flags indicating what should be shared.
655 * Return 0 if permission is granted. 655 * Return 0 if permission is granted.
656 * @cred_alloc_blank:
657 * @cred points to the credentials.
658 * @gfp indicates the atomicity of any memory allocations.
659 * Only allocate sufficient memory and attach to @cred such that
660 * cred_transfer() will not get ENOMEM.
656 * @cred_free: 661 * @cred_free:
657 * @cred points to the credentials. 662 * @cred points to the credentials.
658 * Deallocate and clear the cred->security field in a set of credentials. 663 * Deallocate and clear the cred->security field in a set of credentials.
@@ -665,6 +670,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
665 * @new points to the new credentials. 670 * @new points to the new credentials.
666 * @old points to the original credentials. 671 * @old points to the original credentials.
667 * Install a new set of credentials. 672 * Install a new set of credentials.
673 * @cred_transfer:
674 * @new points to the new credentials.
675 * @old points to the original credentials.
676 * Transfer data from original creds to new creds
668 * @kernel_act_as: 677 * @kernel_act_as:
669 * Set the credentials for a kernel service to act as (subjective context). 678 * Set the credentials for a kernel service to act as (subjective context).
670 * @new points to the credentials to be modified. 679 * @new points to the credentials to be modified.
@@ -678,6 +687,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
678 * @inode points to the inode to use as a reference. 687 * @inode points to the inode to use as a reference.
679 * The current task must be the one that nominated @inode. 688 * The current task must be the one that nominated @inode.
680 * Return 0 if successful. 689 * Return 0 if successful.
690 * @kernel_module_request:
691 * Ability to trigger the kernel to automatically upcall to userspace for
692 * userspace to load a kernel module with the given name.
693 * Return 0 if successful.
681 * @task_setuid: 694 * @task_setuid:
682 * Check permission before setting one or more of the user identity 695 * Check permission before setting one or more of the user identity
683 * attributes of the current process. The @flags parameter indicates 696 * attributes of the current process. The @flags parameter indicates
@@ -994,6 +1007,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
994 * Sets the connection's peersid to the secmark on skb. 1007 * Sets the connection's peersid to the secmark on skb.
995 * @req_classify_flow: 1008 * @req_classify_flow:
996 * Sets the flow's sid to the openreq sid. 1009 * Sets the flow's sid to the openreq sid.
1010 * @tun_dev_create:
1011 * Check permissions prior to creating a new TUN device.
1012 * @tun_dev_post_create:
1013 * This hook allows a module to update or allocate a per-socket security
1014 * structure.
1015 * @sk contains the newly created sock structure.
1016 * @tun_dev_attach:
1017 * Check permissions prior to attaching to a persistent TUN device. This
1018 * hook can also be used by the module to update any security state
1019 * associated with the TUN device's sock structure.
1020 * @sk contains the existing sock structure.
997 * 1021 *
998 * Security hooks for XFRM operations. 1022 * Security hooks for XFRM operations.
999 * 1023 *
@@ -1088,6 +1112,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1088 * Return the length of the string (including terminating NUL) or -ve if 1112 * Return the length of the string (including terminating NUL) or -ve if
1089 * an error. 1113 * an error.
1090 * May also return 0 (and a NULL buffer pointer) if there is no label. 1114 * May also return 0 (and a NULL buffer pointer) if there is no label.
1115 * @key_session_to_parent:
1116 * Forcibly assign the session keyring from a process to its parent
1117 * process.
1118 * @cred: Pointer to process's credentials
1119 * @parent_cred: Pointer to parent process's credentials
1120 * @keyring: Proposed new session keyring
1121 * Return 0 if permission is granted, -ve error otherwise.
1091 * 1122 *
1092 * Security hooks affecting all System V IPC operations. 1123 * Security hooks affecting all System V IPC operations.
1093 * 1124 *
@@ -1229,7 +1260,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1229 * @alter contains the flag indicating whether changes are to be made. 1260 * @alter contains the flag indicating whether changes are to be made.
1230 * Return 0 if permission is granted. 1261 * Return 0 if permission is granted.
1231 * 1262 *
1232 * @ptrace_may_access: 1263 * @ptrace_access_check:
1233 * Check permission before allowing the current process to trace the 1264 * Check permission before allowing the current process to trace the
1234 * @child process. 1265 * @child process.
1235 * Security modules may also want to perform a process tracing check 1266 * Security modules may also want to perform a process tracing check
@@ -1244,7 +1275,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1244 * Check that the @parent process has sufficient permission to trace the 1275 * Check that the @parent process has sufficient permission to trace the
1245 * current process before allowing the current process to present itself 1276 * current process before allowing the current process to present itself
1246 * to the @parent process for tracing. 1277 * to the @parent process for tracing.
1247 * The parent process will still have to undergo the ptrace_may_access 1278 * The parent process will still have to undergo the ptrace_access_check
1248 * checks before it is allowed to trace this one. 1279 * checks before it is allowed to trace this one.
1249 * @parent contains the task_struct structure for debugger process. 1280 * @parent contains the task_struct structure for debugger process.
1250 * Return 0 if permission is granted. 1281 * Return 0 if permission is granted.
@@ -1351,12 +1382,47 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1351 * audit_rule_init. 1382 * audit_rule_init.
1352 * @rule contains the allocated rule 1383 * @rule contains the allocated rule
1353 * 1384 *
1385 * @inode_notifysecctx:
1386 * Notify the security module of what the security context of an inode
1387 * should be. Initializes the incore security context managed by the
1388 * security module for this inode. Example usage: NFS client invokes
1389 * this hook to initialize the security context in its incore inode to the
1390 * value provided by the server for the file when the server returned the
1391 * file's attributes to the client.
1392 *
1393 * Must be called with inode->i_mutex locked.
1394 *
1395 * @inode we wish to set the security context of.
1396 * @ctx contains the string which we wish to set in the inode.
1397 * @ctxlen contains the length of @ctx.
1398 *
1399 * @inode_setsecctx:
1400 * Change the security context of an inode. Updates the
1401 * incore security context managed by the security module and invokes the
1402 * fs code as needed (via __vfs_setxattr_noperm) to update any backing
1403 * xattrs that represent the context. Example usage: NFS server invokes
1404 * this hook to change the security context in its incore inode and on the
1405 * backing filesystem to a value provided by the client on a SETATTR
1406 * operation.
1407 *
1408 * Must be called with inode->i_mutex locked.
1409 *
1410 * @dentry contains the inode we wish to set the security context of.
1411 * @ctx contains the string which we wish to set in the inode.
1412 * @ctxlen contains the length of @ctx.
1413 *
1414 * @inode_getsecctx:
1415 * Returns a string containing all relavent security context information
1416 *
1417 * @inode we wish to set the security context of.
1418 * @ctx is a pointer in which to place the allocated security context.
1419 * @ctxlen points to the place to put the length of @ctx.
1354 * This is the main security structure. 1420 * This is the main security structure.
1355 */ 1421 */
1356struct security_operations { 1422struct security_operations {
1357 char name[SECURITY_NAME_MAX + 1]; 1423 char name[SECURITY_NAME_MAX + 1];
1358 1424
1359 int (*ptrace_may_access) (struct task_struct *child, unsigned int mode); 1425 int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
1360 int (*ptrace_traceme) (struct task_struct *parent); 1426 int (*ptrace_traceme) (struct task_struct *parent);
1361 int (*capget) (struct task_struct *target, 1427 int (*capget) (struct task_struct *target,
1362 kernel_cap_t *effective, 1428 kernel_cap_t *effective,
@@ -1483,12 +1549,15 @@ struct security_operations {
1483 int (*dentry_open) (struct file *file, const struct cred *cred); 1549 int (*dentry_open) (struct file *file, const struct cred *cred);
1484 1550
1485 int (*task_create) (unsigned long clone_flags); 1551 int (*task_create) (unsigned long clone_flags);
1552 int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
1486 void (*cred_free) (struct cred *cred); 1553 void (*cred_free) (struct cred *cred);
1487 int (*cred_prepare)(struct cred *new, const struct cred *old, 1554 int (*cred_prepare)(struct cred *new, const struct cred *old,
1488 gfp_t gfp); 1555 gfp_t gfp);
1489 void (*cred_commit)(struct cred *new, const struct cred *old); 1556 void (*cred_commit)(struct cred *new, const struct cred *old);
1557 void (*cred_transfer)(struct cred *new, const struct cred *old);
1490 int (*kernel_act_as)(struct cred *new, u32 secid); 1558 int (*kernel_act_as)(struct cred *new, u32 secid);
1491 int (*kernel_create_files_as)(struct cred *new, struct inode *inode); 1559 int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
1560 int (*kernel_module_request)(void);
1492 int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); 1561 int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
1493 int (*task_fix_setuid) (struct cred *new, const struct cred *old, 1562 int (*task_fix_setuid) (struct cred *new, const struct cred *old,
1494 int flags); 1563 int flags);
@@ -1556,6 +1625,10 @@ struct security_operations {
1556 int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid); 1625 int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
1557 void (*release_secctx) (char *secdata, u32 seclen); 1626 void (*release_secctx) (char *secdata, u32 seclen);
1558 1627
1628 int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
1629 int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
1630 int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
1631
1559#ifdef CONFIG_SECURITY_NETWORK 1632#ifdef CONFIG_SECURITY_NETWORK
1560 int (*unix_stream_connect) (struct socket *sock, 1633 int (*unix_stream_connect) (struct socket *sock,
1561 struct socket *other, struct sock *newsk); 1634 struct socket *other, struct sock *newsk);
@@ -1592,6 +1665,9 @@ struct security_operations {
1592 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); 1665 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
1593 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); 1666 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
1594 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); 1667 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
1668 int (*tun_dev_create)(void);
1669 void (*tun_dev_post_create)(struct sock *sk);
1670 int (*tun_dev_attach)(struct sock *sk);
1595#endif /* CONFIG_SECURITY_NETWORK */ 1671#endif /* CONFIG_SECURITY_NETWORK */
1596 1672
1597#ifdef CONFIG_SECURITY_NETWORK_XFRM 1673#ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1620,6 +1696,9 @@ struct security_operations {
1620 const struct cred *cred, 1696 const struct cred *cred,
1621 key_perm_t perm); 1697 key_perm_t perm);
1622 int (*key_getsecurity)(struct key *key, char **_buffer); 1698 int (*key_getsecurity)(struct key *key, char **_buffer);
1699 int (*key_session_to_parent)(const struct cred *cred,
1700 const struct cred *parent_cred,
1701 struct key *key);
1623#endif /* CONFIG_KEYS */ 1702#endif /* CONFIG_KEYS */
1624 1703
1625#ifdef CONFIG_AUDIT 1704#ifdef CONFIG_AUDIT
@@ -1637,7 +1716,7 @@ extern int security_module_enable(struct security_operations *ops);
1637extern int register_security(struct security_operations *ops); 1716extern int register_security(struct security_operations *ops);
1638 1717
1639/* Security operations */ 1718/* Security operations */
1640int security_ptrace_may_access(struct task_struct *child, unsigned int mode); 1719int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
1641int security_ptrace_traceme(struct task_struct *parent); 1720int security_ptrace_traceme(struct task_struct *parent);
1642int security_capget(struct task_struct *target, 1721int security_capget(struct task_struct *target,
1643 kernel_cap_t *effective, 1722 kernel_cap_t *effective,
@@ -1736,11 +1815,14 @@ int security_file_send_sigiotask(struct task_struct *tsk,
1736int security_file_receive(struct file *file); 1815int security_file_receive(struct file *file);
1737int security_dentry_open(struct file *file, const struct cred *cred); 1816int security_dentry_open(struct file *file, const struct cred *cred);
1738int security_task_create(unsigned long clone_flags); 1817int security_task_create(unsigned long clone_flags);
1818int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
1739void security_cred_free(struct cred *cred); 1819void security_cred_free(struct cred *cred);
1740int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); 1820int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
1741void security_commit_creds(struct cred *new, const struct cred *old); 1821void security_commit_creds(struct cred *new, const struct cred *old);
1822void security_transfer_creds(struct cred *new, const struct cred *old);
1742int security_kernel_act_as(struct cred *new, u32 secid); 1823int security_kernel_act_as(struct cred *new, u32 secid);
1743int security_kernel_create_files_as(struct cred *new, struct inode *inode); 1824int security_kernel_create_files_as(struct cred *new, struct inode *inode);
1825int security_kernel_module_request(void);
1744int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); 1826int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
1745int security_task_fix_setuid(struct cred *new, const struct cred *old, 1827int security_task_fix_setuid(struct cred *new, const struct cred *old,
1746 int flags); 1828 int flags);
@@ -1796,6 +1878,9 @@ int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
1796int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); 1878int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
1797void security_release_secctx(char *secdata, u32 seclen); 1879void security_release_secctx(char *secdata, u32 seclen);
1798 1880
1881int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
1882int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
1883int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
1799#else /* CONFIG_SECURITY */ 1884#else /* CONFIG_SECURITY */
1800struct security_mnt_opts { 1885struct security_mnt_opts {
1801}; 1886};
@@ -1818,10 +1903,10 @@ static inline int security_init(void)
1818 return 0; 1903 return 0;
1819} 1904}
1820 1905
1821static inline int security_ptrace_may_access(struct task_struct *child, 1906static inline int security_ptrace_access_check(struct task_struct *child,
1822 unsigned int mode) 1907 unsigned int mode)
1823{ 1908{
1824 return cap_ptrace_may_access(child, mode); 1909 return cap_ptrace_access_check(child, mode);
1825} 1910}
1826 1911
1827static inline int security_ptrace_traceme(struct task_struct *parent) 1912static inline int security_ptrace_traceme(struct task_struct *parent)
@@ -2266,6 +2351,11 @@ static inline int security_task_create(unsigned long clone_flags)
2266 return 0; 2351 return 0;
2267} 2352}
2268 2353
2354static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
2355{
2356 return 0;
2357}
2358
2269static inline void security_cred_free(struct cred *cred) 2359static inline void security_cred_free(struct cred *cred)
2270{ } 2360{ }
2271 2361
@@ -2281,6 +2371,11 @@ static inline void security_commit_creds(struct cred *new,
2281{ 2371{
2282} 2372}
2283 2373
2374static inline void security_transfer_creds(struct cred *new,
2375 const struct cred *old)
2376{
2377}
2378
2284static inline int security_kernel_act_as(struct cred *cred, u32 secid) 2379static inline int security_kernel_act_as(struct cred *cred, u32 secid)
2285{ 2380{
2286 return 0; 2381 return 0;
@@ -2292,6 +2387,11 @@ static inline int security_kernel_create_files_as(struct cred *cred,
2292 return 0; 2387 return 0;
2293} 2388}
2294 2389
2390static inline int security_kernel_module_request(void)
2391{
2392 return 0;
2393}
2394
2295static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, 2395static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
2296 int flags) 2396 int flags)
2297{ 2397{
@@ -2537,6 +2637,19 @@ static inline int security_secctx_to_secid(const char *secdata,
2537static inline void security_release_secctx(char *secdata, u32 seclen) 2637static inline void security_release_secctx(char *secdata, u32 seclen)
2538{ 2638{
2539} 2639}
2640
2641static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
2642{
2643 return -EOPNOTSUPP;
2644}
2645static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
2646{
2647 return -EOPNOTSUPP;
2648}
2649static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
2650{
2651 return -EOPNOTSUPP;
2652}
2540#endif /* CONFIG_SECURITY */ 2653#endif /* CONFIG_SECURITY */
2541 2654
2542#ifdef CONFIG_SECURITY_NETWORK 2655#ifdef CONFIG_SECURITY_NETWORK
@@ -2575,6 +2688,9 @@ void security_inet_csk_clone(struct sock *newsk,
2575 const struct request_sock *req); 2688 const struct request_sock *req);
2576void security_inet_conn_established(struct sock *sk, 2689void security_inet_conn_established(struct sock *sk,
2577 struct sk_buff *skb); 2690 struct sk_buff *skb);
2691int security_tun_dev_create(void);
2692void security_tun_dev_post_create(struct sock *sk);
2693int security_tun_dev_attach(struct sock *sk);
2578 2694
2579#else /* CONFIG_SECURITY_NETWORK */ 2695#else /* CONFIG_SECURITY_NETWORK */
2580static inline int security_unix_stream_connect(struct socket *sock, 2696static inline int security_unix_stream_connect(struct socket *sock,
@@ -2725,6 +2841,20 @@ static inline void security_inet_conn_established(struct sock *sk,
2725 struct sk_buff *skb) 2841 struct sk_buff *skb)
2726{ 2842{
2727} 2843}
2844
2845static inline int security_tun_dev_create(void)
2846{
2847 return 0;
2848}
2849
2850static inline void security_tun_dev_post_create(struct sock *sk)
2851{
2852}
2853
2854static inline int security_tun_dev_attach(struct sock *sk)
2855{
2856 return 0;
2857}
2728#endif /* CONFIG_SECURITY_NETWORK */ 2858#endif /* CONFIG_SECURITY_NETWORK */
2729 2859
2730#ifdef CONFIG_SECURITY_NETWORK_XFRM 2860#ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2881,6 +3011,9 @@ void security_key_free(struct key *key);
2881int security_key_permission(key_ref_t key_ref, 3011int security_key_permission(key_ref_t key_ref,
2882 const struct cred *cred, key_perm_t perm); 3012 const struct cred *cred, key_perm_t perm);
2883int security_key_getsecurity(struct key *key, char **_buffer); 3013int security_key_getsecurity(struct key *key, char **_buffer);
3014int security_key_session_to_parent(const struct cred *cred,
3015 const struct cred *parent_cred,
3016 struct key *key);
2884 3017
2885#else 3018#else
2886 3019
@@ -2908,6 +3041,13 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
2908 return 0; 3041 return 0;
2909} 3042}
2910 3043
3044static inline int security_key_session_to_parent(const struct cred *cred,
3045 const struct cred *parent_cred,
3046 struct key *key)
3047{
3048 return 0;
3049}
3050
2911#endif 3051#endif
2912#endif /* CONFIG_KEYS */ 3052#endif /* CONFIG_KEYS */
2913 3053
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index abff6c9b413c..6d3f2f449ead 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -39,7 +39,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
39} 39}
40 40
41#ifdef CONFIG_TMPFS_POSIX_ACL 41#ifdef CONFIG_TMPFS_POSIX_ACL
42int shmem_permission(struct inode *, int); 42int shmem_check_acl(struct inode *, int);
43int shmem_acl_init(struct inode *, struct inode *); 43int shmem_acl_init(struct inode *, struct inode *);
44 44
45extern struct xattr_handler shmem_xattr_acl_access_handler; 45extern struct xattr_handler shmem_xattr_acl_access_handler;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3224820c8514..78b1e4684cc9 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -14,17 +14,6 @@ extern struct list_head inode_in_use;
14extern struct list_head inode_unused; 14extern struct list_head inode_unused;
15 15
16/* 16/*
17 * Yes, writeback.h requires sched.h
18 * No, sched.h is not included from here.
19 */
20static inline int task_is_pdflush(struct task_struct *task)
21{
22 return task->flags & PF_FLUSHER;
23}
24
25#define current_is_pdflush() task_is_pdflush(current)
26
27/*
28 * fs/fs-writeback.c 17 * fs/fs-writeback.c
29 */ 18 */
30enum writeback_sync_modes { 19enum writeback_sync_modes {
@@ -40,6 +29,8 @@ enum writeback_sync_modes {
40struct writeback_control { 29struct writeback_control {
41 struct backing_dev_info *bdi; /* If !NULL, only write back this 30 struct backing_dev_info *bdi; /* If !NULL, only write back this
42 queue */ 31 queue */
32 struct super_block *sb; /* if !NULL, only write inodes from
33 this super_block */
43 enum writeback_sync_modes sync_mode; 34 enum writeback_sync_modes sync_mode;
44 unsigned long *older_than_this; /* If !NULL, only write back inodes 35 unsigned long *older_than_this; /* If !NULL, only write back inodes
45 older than this */ 36 older than this */
@@ -76,9 +67,13 @@ struct writeback_control {
76/* 67/*
77 * fs/fs-writeback.c 68 * fs/fs-writeback.c
78 */ 69 */
79void writeback_inodes(struct writeback_control *wbc); 70struct bdi_writeback;
80int inode_wait(void *); 71int inode_wait(void *);
81void sync_inodes_sb(struct super_block *, int wait); 72long writeback_inodes_sb(struct super_block *);
73long sync_inodes_sb(struct super_block *);
74void writeback_inodes_wbc(struct writeback_control *wbc);
75long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
76void wakeup_flusher_threads(long nr_pages);
82 77
83/* writeback.h requires fs.h; it, too, is not included from here. */ 78/* writeback.h requires fs.h; it, too, is not included from here. */
84static inline void wait_on_inode(struct inode *inode) 79static inline void wait_on_inode(struct inode *inode)
@@ -98,7 +93,6 @@ static inline void inode_sync_wait(struct inode *inode)
98/* 93/*
99 * mm/page-writeback.c 94 * mm/page-writeback.c
100 */ 95 */
101int wakeup_pdflush(long nr_pages);
102void laptop_io_completion(void); 96void laptop_io_completion(void);
103void laptop_sync_completion(void); 97void laptop_sync_completion(void);
104void throttle_vm_writeout(gfp_t gfp_mask); 98void throttle_vm_writeout(gfp_t gfp_mask);
@@ -150,7 +144,6 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
150typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, 144typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
151 void *data); 145 void *data);
152 146
153int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
154int generic_writepages(struct address_space *mapping, 147int generic_writepages(struct address_space *mapping,
155 struct writeback_control *wbc); 148 struct writeback_control *wbc);
156int write_cache_pages(struct address_space *mapping, 149int write_cache_pages(struct address_space *mapping,
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d131e352cfe1..5c84af8c5f6f 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -49,6 +49,7 @@ struct xattr_handler {
49ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); 49ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
50ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); 50ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
51ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); 51ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
52int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
52int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); 53int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
53int vfs_removexattr(struct dentry *, const char *); 54int vfs_removexattr(struct dentry *, const char *);
54 55
diff --git a/kernel/acct.c b/kernel/acct.c
index 9f3391090b3e..9a4715a2f6bf 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -491,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
491 u64 run_time; 491 u64 run_time;
492 struct timespec uptime; 492 struct timespec uptime;
493 struct tty_struct *tty; 493 struct tty_struct *tty;
494 const struct cred *orig_cred;
495
496 /* Perform file operations on behalf of whoever enabled accounting */
497 orig_cred = override_creds(file->f_cred);
494 498
495 /* 499 /*
496 * First check to see if there is enough free_space to continue 500 * First check to see if there is enough free_space to continue
497 * the process accounting system. 501 * the process accounting system.
498 */ 502 */
499 if (!check_free_space(acct, file)) 503 if (!check_free_space(acct, file))
500 return; 504 goto out;
501 505
502 /* 506 /*
503 * Fill the accounting struct with the needed info as recorded 507 * Fill the accounting struct with the needed info as recorded
@@ -578,6 +582,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
578 sizeof(acct_t), &file->f_pos); 582 sizeof(acct_t), &file->f_pos);
579 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; 583 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
580 set_fs(fs); 584 set_fs(fs);
585out:
586 revert_creds(orig_cred);
581} 587}
582 588
583/** 589/**
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b6eadfe30e7b..c7ece8f027f2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -600,6 +600,7 @@ static struct inode_operations cgroup_dir_inode_operations;
600static struct file_operations proc_cgroupstats_operations; 600static struct file_operations proc_cgroupstats_operations;
601 601
602static struct backing_dev_info cgroup_backing_dev_info = { 602static struct backing_dev_info cgroup_backing_dev_info = {
603 .name = "cgroup",
603 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 604 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
604}; 605};
605 606
diff --git a/kernel/cred.c b/kernel/cred.c
index 1bb4d7e5d616..006fcab009d5 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -18,6 +18,18 @@
18#include <linux/cn_proc.h> 18#include <linux/cn_proc.h>
19#include "cred-internals.h" 19#include "cred-internals.h"
20 20
21#if 0
22#define kdebug(FMT, ...) \
23 printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
24#else
25static inline __attribute__((format(printf, 1, 2)))
26void no_printk(const char *fmt, ...)
27{
28}
29#define kdebug(FMT, ...) \
30 no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
31#endif
32
21static struct kmem_cache *cred_jar; 33static struct kmem_cache *cred_jar;
22 34
23/* 35/*
@@ -36,6 +48,10 @@ static struct thread_group_cred init_tgcred = {
36 */ 48 */
37struct cred init_cred = { 49struct cred init_cred = {
38 .usage = ATOMIC_INIT(4), 50 .usage = ATOMIC_INIT(4),
51#ifdef CONFIG_DEBUG_CREDENTIALS
52 .subscribers = ATOMIC_INIT(2),
53 .magic = CRED_MAGIC,
54#endif
39 .securebits = SECUREBITS_DEFAULT, 55 .securebits = SECUREBITS_DEFAULT,
40 .cap_inheritable = CAP_INIT_INH_SET, 56 .cap_inheritable = CAP_INIT_INH_SET,
41 .cap_permitted = CAP_FULL_SET, 57 .cap_permitted = CAP_FULL_SET,
@@ -48,6 +64,31 @@ struct cred init_cred = {
48#endif 64#endif
49}; 65};
50 66
67static inline void set_cred_subscribers(struct cred *cred, int n)
68{
69#ifdef CONFIG_DEBUG_CREDENTIALS
70 atomic_set(&cred->subscribers, n);
71#endif
72}
73
74static inline int read_cred_subscribers(const struct cred *cred)
75{
76#ifdef CONFIG_DEBUG_CREDENTIALS
77 return atomic_read(&cred->subscribers);
78#else
79 return 0;
80#endif
81}
82
83static inline void alter_cred_subscribers(const struct cred *_cred, int n)
84{
85#ifdef CONFIG_DEBUG_CREDENTIALS
86 struct cred *cred = (struct cred *) _cred;
87
88 atomic_add(n, &cred->subscribers);
89#endif
90}
91
51/* 92/*
52 * Dispose of the shared task group credentials 93 * Dispose of the shared task group credentials
53 */ 94 */
@@ -85,9 +126,22 @@ static void put_cred_rcu(struct rcu_head *rcu)
85{ 126{
86 struct cred *cred = container_of(rcu, struct cred, rcu); 127 struct cred *cred = container_of(rcu, struct cred, rcu);
87 128
129 kdebug("put_cred_rcu(%p)", cred);
130
131#ifdef CONFIG_DEBUG_CREDENTIALS
132 if (cred->magic != CRED_MAGIC_DEAD ||
133 atomic_read(&cred->usage) != 0 ||
134 read_cred_subscribers(cred) != 0)
135 panic("CRED: put_cred_rcu() sees %p with"
136 " mag %x, put %p, usage %d, subscr %d\n",
137 cred, cred->magic, cred->put_addr,
138 atomic_read(&cred->usage),
139 read_cred_subscribers(cred));
140#else
88 if (atomic_read(&cred->usage) != 0) 141 if (atomic_read(&cred->usage) != 0)
89 panic("CRED: put_cred_rcu() sees %p with usage %d\n", 142 panic("CRED: put_cred_rcu() sees %p with usage %d\n",
90 cred, atomic_read(&cred->usage)); 143 cred, atomic_read(&cred->usage));
144#endif
91 145
92 security_cred_free(cred); 146 security_cred_free(cred);
93 key_put(cred->thread_keyring); 147 key_put(cred->thread_keyring);
@@ -106,12 +160,90 @@ static void put_cred_rcu(struct rcu_head *rcu)
106 */ 160 */
107void __put_cred(struct cred *cred) 161void __put_cred(struct cred *cred)
108{ 162{
163 kdebug("__put_cred(%p{%d,%d})", cred,
164 atomic_read(&cred->usage),
165 read_cred_subscribers(cred));
166
109 BUG_ON(atomic_read(&cred->usage) != 0); 167 BUG_ON(atomic_read(&cred->usage) != 0);
168#ifdef CONFIG_DEBUG_CREDENTIALS
169 BUG_ON(read_cred_subscribers(cred) != 0);
170 cred->magic = CRED_MAGIC_DEAD;
171 cred->put_addr = __builtin_return_address(0);
172#endif
173 BUG_ON(cred == current->cred);
174 BUG_ON(cred == current->real_cred);
110 175
111 call_rcu(&cred->rcu, put_cred_rcu); 176 call_rcu(&cred->rcu, put_cred_rcu);
112} 177}
113EXPORT_SYMBOL(__put_cred); 178EXPORT_SYMBOL(__put_cred);
114 179
180/*
181 * Clean up a task's credentials when it exits
182 */
183void exit_creds(struct task_struct *tsk)
184{
185 struct cred *cred;
186
187 kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
188 atomic_read(&tsk->cred->usage),
189 read_cred_subscribers(tsk->cred));
190
191 cred = (struct cred *) tsk->real_cred;
192 tsk->real_cred = NULL;
193 validate_creds(cred);
194 alter_cred_subscribers(cred, -1);
195 put_cred(cred);
196
197 cred = (struct cred *) tsk->cred;
198 tsk->cred = NULL;
199 validate_creds(cred);
200 alter_cred_subscribers(cred, -1);
201 put_cred(cred);
202
203 cred = (struct cred *) tsk->replacement_session_keyring;
204 if (cred) {
205 tsk->replacement_session_keyring = NULL;
206 validate_creds(cred);
207 put_cred(cred);
208 }
209}
210
211/*
212 * Allocate blank credentials, such that the credentials can be filled in at a
213 * later date without risk of ENOMEM.
214 */
215struct cred *cred_alloc_blank(void)
216{
217 struct cred *new;
218
219 new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
220 if (!new)
221 return NULL;
222
223#ifdef CONFIG_KEYS
224 new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
225 if (!new->tgcred) {
226 kfree(new);
227 return NULL;
228 }
229 atomic_set(&new->tgcred->usage, 1);
230#endif
231
232 atomic_set(&new->usage, 1);
233
234 if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
235 goto error;
236
237#ifdef CONFIG_DEBUG_CREDENTIALS
238 new->magic = CRED_MAGIC;
239#endif
240 return new;
241
242error:
243 abort_creds(new);
244 return NULL;
245}
246
115/** 247/**
116 * prepare_creds - Prepare a new set of credentials for modification 248 * prepare_creds - Prepare a new set of credentials for modification
117 * 249 *
@@ -132,16 +264,19 @@ struct cred *prepare_creds(void)
132 const struct cred *old; 264 const struct cred *old;
133 struct cred *new; 265 struct cred *new;
134 266
135 BUG_ON(atomic_read(&task->real_cred->usage) < 1); 267 validate_process_creds();
136 268
137 new = kmem_cache_alloc(cred_jar, GFP_KERNEL); 269 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
138 if (!new) 270 if (!new)
139 return NULL; 271 return NULL;
140 272
273 kdebug("prepare_creds() alloc %p", new);
274
141 old = task->cred; 275 old = task->cred;
142 memcpy(new, old, sizeof(struct cred)); 276 memcpy(new, old, sizeof(struct cred));
143 277
144 atomic_set(&new->usage, 1); 278 atomic_set(&new->usage, 1);
279 set_cred_subscribers(new, 0);
145 get_group_info(new->group_info); 280 get_group_info(new->group_info);
146 get_uid(new->user); 281 get_uid(new->user);
147 282
@@ -157,6 +292,7 @@ struct cred *prepare_creds(void)
157 292
158 if (security_prepare_creds(new, old, GFP_KERNEL) < 0) 293 if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
159 goto error; 294 goto error;
295 validate_creds(new);
160 return new; 296 return new;
161 297
162error: 298error:
@@ -229,9 +365,12 @@ struct cred *prepare_usermodehelper_creds(void)
229 if (!new) 365 if (!new)
230 return NULL; 366 return NULL;
231 367
368 kdebug("prepare_usermodehelper_creds() alloc %p", new);
369
232 memcpy(new, &init_cred, sizeof(struct cred)); 370 memcpy(new, &init_cred, sizeof(struct cred));
233 371
234 atomic_set(&new->usage, 1); 372 atomic_set(&new->usage, 1);
373 set_cred_subscribers(new, 0);
235 get_group_info(new->group_info); 374 get_group_info(new->group_info);
236 get_uid(new->user); 375 get_uid(new->user);
237 376
@@ -250,6 +389,7 @@ struct cred *prepare_usermodehelper_creds(void)
250#endif 389#endif
251 if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0) 390 if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
252 goto error; 391 goto error;
392 validate_creds(new);
253 393
254 BUG_ON(atomic_read(&new->usage) != 1); 394 BUG_ON(atomic_read(&new->usage) != 1);
255 return new; 395 return new;
@@ -286,6 +426,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
286 ) { 426 ) {
287 p->real_cred = get_cred(p->cred); 427 p->real_cred = get_cred(p->cred);
288 get_cred(p->cred); 428 get_cred(p->cred);
429 alter_cred_subscribers(p->cred, 2);
430 kdebug("share_creds(%p{%d,%d})",
431 p->cred, atomic_read(&p->cred->usage),
432 read_cred_subscribers(p->cred));
289 atomic_inc(&p->cred->user->processes); 433 atomic_inc(&p->cred->user->processes);
290 return 0; 434 return 0;
291 } 435 }
@@ -331,6 +475,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
331 475
332 atomic_inc(&new->user->processes); 476 atomic_inc(&new->user->processes);
333 p->cred = p->real_cred = get_cred(new); 477 p->cred = p->real_cred = get_cred(new);
478 alter_cred_subscribers(new, 2);
479 validate_creds(new);
334 return 0; 480 return 0;
335 481
336error_put: 482error_put:
@@ -355,13 +501,20 @@ error_put:
355int commit_creds(struct cred *new) 501int commit_creds(struct cred *new)
356{ 502{
357 struct task_struct *task = current; 503 struct task_struct *task = current;
358 const struct cred *old; 504 const struct cred *old = task->real_cred;
359 505
360 BUG_ON(task->cred != task->real_cred); 506 kdebug("commit_creds(%p{%d,%d})", new,
361 BUG_ON(atomic_read(&task->real_cred->usage) < 2); 507 atomic_read(&new->usage),
508 read_cred_subscribers(new));
509
510 BUG_ON(task->cred != old);
511#ifdef CONFIG_DEBUG_CREDENTIALS
512 BUG_ON(read_cred_subscribers(old) < 2);
513 validate_creds(old);
514 validate_creds(new);
515#endif
362 BUG_ON(atomic_read(&new->usage) < 1); 516 BUG_ON(atomic_read(&new->usage) < 1);
363 517
364 old = task->real_cred;
365 security_commit_creds(new, old); 518 security_commit_creds(new, old);
366 519
367 get_cred(new); /* we will require a ref for the subj creds too */ 520 get_cred(new); /* we will require a ref for the subj creds too */
@@ -390,12 +543,14 @@ int commit_creds(struct cred *new)
390 * cheaply with the new uid cache, so if it matters 543 * cheaply with the new uid cache, so if it matters
391 * we should be checking for it. -DaveM 544 * we should be checking for it. -DaveM
392 */ 545 */
546 alter_cred_subscribers(new, 2);
393 if (new->user != old->user) 547 if (new->user != old->user)
394 atomic_inc(&new->user->processes); 548 atomic_inc(&new->user->processes);
395 rcu_assign_pointer(task->real_cred, new); 549 rcu_assign_pointer(task->real_cred, new);
396 rcu_assign_pointer(task->cred, new); 550 rcu_assign_pointer(task->cred, new);
397 if (new->user != old->user) 551 if (new->user != old->user)
398 atomic_dec(&old->user->processes); 552 atomic_dec(&old->user->processes);
553 alter_cred_subscribers(old, -2);
399 554
400 sched_switch_user(task); 555 sched_switch_user(task);
401 556
@@ -428,6 +583,13 @@ EXPORT_SYMBOL(commit_creds);
428 */ 583 */
429void abort_creds(struct cred *new) 584void abort_creds(struct cred *new)
430{ 585{
586 kdebug("abort_creds(%p{%d,%d})", new,
587 atomic_read(&new->usage),
588 read_cred_subscribers(new));
589
590#ifdef CONFIG_DEBUG_CREDENTIALS
591 BUG_ON(read_cred_subscribers(new) != 0);
592#endif
431 BUG_ON(atomic_read(&new->usage) < 1); 593 BUG_ON(atomic_read(&new->usage) < 1);
432 put_cred(new); 594 put_cred(new);
433} 595}
@@ -444,7 +606,20 @@ const struct cred *override_creds(const struct cred *new)
444{ 606{
445 const struct cred *old = current->cred; 607 const struct cred *old = current->cred;
446 608
447 rcu_assign_pointer(current->cred, get_cred(new)); 609 kdebug("override_creds(%p{%d,%d})", new,
610 atomic_read(&new->usage),
611 read_cred_subscribers(new));
612
613 validate_creds(old);
614 validate_creds(new);
615 get_cred(new);
616 alter_cred_subscribers(new, 1);
617 rcu_assign_pointer(current->cred, new);
618 alter_cred_subscribers(old, -1);
619
620 kdebug("override_creds() = %p{%d,%d}", old,
621 atomic_read(&old->usage),
622 read_cred_subscribers(old));
448 return old; 623 return old;
449} 624}
450EXPORT_SYMBOL(override_creds); 625EXPORT_SYMBOL(override_creds);
@@ -460,7 +635,15 @@ void revert_creds(const struct cred *old)
460{ 635{
461 const struct cred *override = current->cred; 636 const struct cred *override = current->cred;
462 637
638 kdebug("revert_creds(%p{%d,%d})", old,
639 atomic_read(&old->usage),
640 read_cred_subscribers(old));
641
642 validate_creds(old);
643 validate_creds(override);
644 alter_cred_subscribers(old, 1);
463 rcu_assign_pointer(current->cred, old); 645 rcu_assign_pointer(current->cred, old);
646 alter_cred_subscribers(override, -1);
464 put_cred(override); 647 put_cred(override);
465} 648}
466EXPORT_SYMBOL(revert_creds); 649EXPORT_SYMBOL(revert_creds);
@@ -502,11 +685,15 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
502 if (!new) 685 if (!new)
503 return NULL; 686 return NULL;
504 687
688 kdebug("prepare_kernel_cred() alloc %p", new);
689
505 if (daemon) 690 if (daemon)
506 old = get_task_cred(daemon); 691 old = get_task_cred(daemon);
507 else 692 else
508 old = get_cred(&init_cred); 693 old = get_cred(&init_cred);
509 694
695 validate_creds(old);
696
510 *new = *old; 697 *new = *old;
511 get_uid(new->user); 698 get_uid(new->user);
512 get_group_info(new->group_info); 699 get_group_info(new->group_info);
@@ -526,7 +713,9 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
526 goto error; 713 goto error;
527 714
528 atomic_set(&new->usage, 1); 715 atomic_set(&new->usage, 1);
716 set_cred_subscribers(new, 0);
529 put_cred(old); 717 put_cred(old);
718 validate_creds(new);
530 return new; 719 return new;
531 720
532error: 721error:
@@ -589,3 +778,95 @@ int set_create_files_as(struct cred *new, struct inode *inode)
589 return security_kernel_create_files_as(new, inode); 778 return security_kernel_create_files_as(new, inode);
590} 779}
591EXPORT_SYMBOL(set_create_files_as); 780EXPORT_SYMBOL(set_create_files_as);
781
782#ifdef CONFIG_DEBUG_CREDENTIALS
783
784/*
785 * dump invalid credentials
786 */
787static void dump_invalid_creds(const struct cred *cred, const char *label,
788 const struct task_struct *tsk)
789{
790 printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
791 label, cred,
792 cred == &init_cred ? "[init]" : "",
793 cred == tsk->real_cred ? "[real]" : "",
794 cred == tsk->cred ? "[eff]" : "");
795 printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
796 cred->magic, cred->put_addr);
797 printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
798 atomic_read(&cred->usage),
799 read_cred_subscribers(cred));
800 printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
801 cred->uid, cred->euid, cred->suid, cred->fsuid);
802 printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
803 cred->gid, cred->egid, cred->sgid, cred->fsgid);
804#ifdef CONFIG_SECURITY
805 printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
806 if ((unsigned long) cred->security >= PAGE_SIZE &&
807 (((unsigned long) cred->security & 0xffffff00) !=
808 (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
809 printk(KERN_ERR "CRED: ->security {%x, %x}\n",
810 ((u32*)cred->security)[0],
811 ((u32*)cred->security)[1]);
812#endif
813}
814
815/*
816 * report use of invalid credentials
817 */
818void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
819{
820 printk(KERN_ERR "CRED: Invalid credentials\n");
821 printk(KERN_ERR "CRED: At %s:%u\n", file, line);
822 dump_invalid_creds(cred, "Specified", current);
823 BUG();
824}
825EXPORT_SYMBOL(__invalid_creds);
826
827/*
828 * check the credentials on a process
829 */
830void __validate_process_creds(struct task_struct *tsk,
831 const char *file, unsigned line)
832{
833 if (tsk->cred == tsk->real_cred) {
834 if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
835 creds_are_invalid(tsk->cred)))
836 goto invalid_creds;
837 } else {
838 if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
839 read_cred_subscribers(tsk->cred) < 1 ||
840 creds_are_invalid(tsk->real_cred) ||
841 creds_are_invalid(tsk->cred)))
842 goto invalid_creds;
843 }
844 return;
845
846invalid_creds:
847 printk(KERN_ERR "CRED: Invalid process credentials\n");
848 printk(KERN_ERR "CRED: At %s:%u\n", file, line);
849
850 dump_invalid_creds(tsk->real_cred, "Real", tsk);
851 if (tsk->cred != tsk->real_cred)
852 dump_invalid_creds(tsk->cred, "Effective", tsk);
853 else
854 printk(KERN_ERR "CRED: Effective creds == Real creds\n");
855 BUG();
856}
857EXPORT_SYMBOL(__validate_process_creds);
858
859/*
860 * check creds for do_exit()
861 */
862void validate_creds_for_do_exit(struct task_struct *tsk)
863{
864 kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
865 tsk->real_cred, tsk->cred,
866 atomic_read(&tsk->cred->usage),
867 read_cred_subscribers(tsk->cred));
868
869 __validate_process_creds(tsk, __FILE__, __LINE__);
870}
871
872#endif /* CONFIG_DEBUG_CREDENTIALS */
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..c98ff7a8025f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -901,6 +901,8 @@ NORET_TYPE void do_exit(long code)
901 901
902 tracehook_report_exit(&code); 902 tracehook_report_exit(&code);
903 903
904 validate_creds_for_do_exit(tsk);
905
904 /* 906 /*
905 * We're taking recursive faults here in do_exit. Safest is to just 907 * We're taking recursive faults here in do_exit. Safest is to just
906 * leave this task alone and wait for reboot. 908 * leave this task alone and wait for reboot.
@@ -1009,6 +1011,8 @@ NORET_TYPE void do_exit(long code)
1009 if (tsk->splice_pipe) 1011 if (tsk->splice_pipe)
1010 __free_pipe_info(tsk->splice_pipe); 1012 __free_pipe_info(tsk->splice_pipe);
1011 1013
1014 validate_creds_for_do_exit(tsk);
1015
1012 preempt_disable(); 1016 preempt_disable();
1013 /* causes final put_task_struct in finish_task_switch(). */ 1017 /* causes final put_task_struct in finish_task_switch(). */
1014 tsk->state = TASK_DEAD; 1018 tsk->state = TASK_DEAD;
diff --git a/kernel/fork.c b/kernel/fork.c
index e6c04d462ab2..aab8579c6093 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -152,8 +152,7 @@ void __put_task_struct(struct task_struct *tsk)
152 WARN_ON(atomic_read(&tsk->usage)); 152 WARN_ON(atomic_read(&tsk->usage));
153 WARN_ON(tsk == current); 153 WARN_ON(tsk == current);
154 154
155 put_cred(tsk->real_cred); 155 exit_creds(tsk);
156 put_cred(tsk->cred);
157 delayacct_tsk_free(tsk); 156 delayacct_tsk_free(tsk);
158 157
159 if (!profile_handoff_task(tsk)) 158 if (!profile_handoff_task(tsk))
@@ -1297,8 +1296,7 @@ bad_fork_cleanup_put_domain:
1297 module_put(task_thread_info(p)->exec_domain->module); 1296 module_put(task_thread_info(p)->exec_domain->module);
1298bad_fork_cleanup_count: 1297bad_fork_cleanup_count:
1299 atomic_dec(&p->cred->user->processes); 1298 atomic_dec(&p->cred->user->processes);
1300 put_cred(p->real_cred); 1299 exit_creds(p);
1301 put_cred(p->cred);
1302bad_fork_free: 1300bad_fork_free:
1303 free_task(p); 1301 free_task(p);
1304fork_out: 1302fork_out:
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 385c31a1bdbf..4e8cae2e9148 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -78,6 +78,10 @@ int __request_module(bool wait, const char *fmt, ...)
78#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ 78#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
79 static int kmod_loop_msg; 79 static int kmod_loop_msg;
80 80
81 ret = security_kernel_module_request();
82 if (ret)
83 return ret;
84
81 va_start(args, fmt); 85 va_start(args, fmt);
82 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); 86 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
83 va_end(args); 87 va_end(args);
@@ -462,6 +466,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
462 int retval = 0; 466 int retval = 0;
463 467
464 BUG_ON(atomic_read(&sub_info->cred->usage) != 1); 468 BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
469 validate_creds(sub_info->cred);
465 470
466 helper_lock(); 471 helper_lock();
467 if (sub_info->path[0] == '\0') 472 if (sub_info->path[0] == '\0')
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 082c320e4dbf..307c285af59e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -152,7 +152,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
152 if (!dumpable && !capable(CAP_SYS_PTRACE)) 152 if (!dumpable && !capable(CAP_SYS_PTRACE))
153 return -EPERM; 153 return -EPERM;
154 154
155 return security_ptrace_may_access(task, mode); 155 return security_ptrace_access_check(task, mode);
156} 156}
157 157
158bool ptrace_may_access(struct task_struct *task, unsigned int mode) 158bool ptrace_may_access(struct task_struct *task, unsigned int mode)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd0..71d8dc7f9920 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,7 +49,6 @@
49#include <linux/acpi.h> 49#include <linux/acpi.h>
50#include <linux/reboot.h> 50#include <linux/reboot.h>
51#include <linux/ftrace.h> 51#include <linux/ftrace.h>
52#include <linux/security.h>
53#include <linux/slow-work.h> 52#include <linux/slow-work.h>
54#include <linux/perf_counter.h> 53#include <linux/perf_counter.h>
55 54
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2bb785..fbb87cf138c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -653,6 +653,21 @@ config DEBUG_NOTIFIERS
653 This is a relatively cheap check but if you care about maximum 653 This is a relatively cheap check but if you care about maximum
654 performance, say N. 654 performance, say N.
655 655
656config DEBUG_CREDENTIALS
657 bool "Debug credential management"
658 depends on DEBUG_KERNEL
659 help
660 Enable this to turn on some debug checking for credential
661 management. The additional code keeps track of the number of
662 pointers from task_structs to any given cred struct, and checks to
663 see that this number never exceeds the usage count of the cred
664 struct.
665
666 Furthermore, if SELinux is enabled, this also checks that the
667 security pointer in the cred struct is never seen to be invalid.
668
669 If unsure, say N.
670
656# 671#
657# Select this config option from the architecture Kconfig, if it 672# Select this config option from the architecture Kconfig, if it
658# it is preferred to always offer frame pointers as a config 673# it is preferred to always offer frame pointers as a config
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index f1ed2fe76c65..bd2bea963364 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -12,34 +12,47 @@
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14 14
15/** 15/*
16 * is_single_threaded - Determine if a thread group is single-threaded or not 16 * Returns true if the task does not share ->mm with another thread/process.
17 * @p: A task in the thread group in question
18 *
19 * This returns true if the thread group to which a task belongs is single
20 * threaded, false if it is not.
21 */ 17 */
22bool is_single_threaded(struct task_struct *p) 18bool current_is_single_threaded(void)
23{ 19{
24 struct task_struct *g, *t; 20 struct task_struct *task = current;
25 struct mm_struct *mm = p->mm; 21 struct mm_struct *mm = task->mm;
22 struct task_struct *p, *t;
23 bool ret;
26 24
27 if (atomic_read(&p->signal->count) != 1) 25 if (atomic_read(&task->signal->live) != 1)
28 goto no; 26 return false;
29 27
30 if (atomic_read(&p->mm->mm_users) != 1) { 28 if (atomic_read(&mm->mm_users) == 1)
31 read_lock(&tasklist_lock); 29 return true;
32 do_each_thread(g, t) {
33 if (t->mm == mm && t != p)
34 goto no_unlock;
35 } while_each_thread(g, t);
36 read_unlock(&tasklist_lock);
37 }
38 30
39 return true; 31 ret = false;
32 rcu_read_lock();
33 for_each_process(p) {
34 if (unlikely(p->flags & PF_KTHREAD))
35 continue;
36 if (unlikely(p == task->group_leader))
37 continue;
38
39 t = p;
40 do {
41 if (unlikely(t->mm == mm))
42 goto found;
43 if (likely(t->mm))
44 break;
45 /*
46 * t->mm == NULL. Make sure next_thread/next_task
47 * will see other CLONE_VM tasks which might be
48 * forked before exiting.
49 */
50 smp_rmb();
51 } while_each_thread(p, t);
52 }
53 ret = true;
54found:
55 rcu_read_unlock();
40 56
41no_unlock: 57 return ret;
42 read_unlock(&tasklist_lock);
43no:
44 return false;
45} 58}
diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd6426693..147a7a7873c4 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
8 vmalloc.o 8 vmalloc.o
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o pdflush.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 page_isolation.o mm_init.o $(mmu-y) 14 page_isolation.o mm_init.o $(mmu-y)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c86edd244294..d3ca0dac1111 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,8 +1,11 @@
1 1
2#include <linux/wait.h> 2#include <linux/wait.h>
3#include <linux/backing-dev.h> 3#include <linux/backing-dev.h>
4#include <linux/kthread.h>
5#include <linux/freezer.h>
4#include <linux/fs.h> 6#include <linux/fs.h>
5#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/mm.h>
6#include <linux/sched.h> 9#include <linux/sched.h>
7#include <linux/module.h> 10#include <linux/module.h>
8#include <linux/writeback.h> 11#include <linux/writeback.h>
@@ -14,6 +17,7 @@ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
14EXPORT_SYMBOL(default_unplug_io_fn); 17EXPORT_SYMBOL(default_unplug_io_fn);
15 18
16struct backing_dev_info default_backing_dev_info = { 19struct backing_dev_info default_backing_dev_info = {
20 .name = "default",
17 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, 21 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
18 .state = 0, 22 .state = 0,
19 .capabilities = BDI_CAP_MAP_COPY, 23 .capabilities = BDI_CAP_MAP_COPY,
@@ -22,6 +26,18 @@ struct backing_dev_info default_backing_dev_info = {
22EXPORT_SYMBOL_GPL(default_backing_dev_info); 26EXPORT_SYMBOL_GPL(default_backing_dev_info);
23 27
24static struct class *bdi_class; 28static struct class *bdi_class;
29DEFINE_SPINLOCK(bdi_lock);
30LIST_HEAD(bdi_list);
31LIST_HEAD(bdi_pending_list);
32
33static struct task_struct *sync_supers_tsk;
34static struct timer_list sync_supers_timer;
35
36static int bdi_sync_supers(void *);
37static void sync_supers_timer_fn(unsigned long);
38static void arm_supers_timer(void);
39
40static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
25 41
26#ifdef CONFIG_DEBUG_FS 42#ifdef CONFIG_DEBUG_FS
27#include <linux/debugfs.h> 43#include <linux/debugfs.h>
@@ -37,9 +53,29 @@ static void bdi_debug_init(void)
37static int bdi_debug_stats_show(struct seq_file *m, void *v) 53static int bdi_debug_stats_show(struct seq_file *m, void *v)
38{ 54{
39 struct backing_dev_info *bdi = m->private; 55 struct backing_dev_info *bdi = m->private;
56 struct bdi_writeback *wb;
40 unsigned long background_thresh; 57 unsigned long background_thresh;
41 unsigned long dirty_thresh; 58 unsigned long dirty_thresh;
42 unsigned long bdi_thresh; 59 unsigned long bdi_thresh;
60 unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
61 struct inode *inode;
62
63 /*
64 * inode lock is enough here, the bdi->wb_list is protected by
65 * RCU on the reader side
66 */
67 nr_wb = nr_dirty = nr_io = nr_more_io = 0;
68 spin_lock(&inode_lock);
69 list_for_each_entry(wb, &bdi->wb_list, list) {
70 nr_wb++;
71 list_for_each_entry(inode, &wb->b_dirty, i_list)
72 nr_dirty++;
73 list_for_each_entry(inode, &wb->b_io, i_list)
74 nr_io++;
75 list_for_each_entry(inode, &wb->b_more_io, i_list)
76 nr_more_io++;
77 }
78 spin_unlock(&inode_lock);
43 79
44 get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); 80 get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
45 81
@@ -49,12 +85,22 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
49 "BdiReclaimable: %8lu kB\n" 85 "BdiReclaimable: %8lu kB\n"
50 "BdiDirtyThresh: %8lu kB\n" 86 "BdiDirtyThresh: %8lu kB\n"
51 "DirtyThresh: %8lu kB\n" 87 "DirtyThresh: %8lu kB\n"
52 "BackgroundThresh: %8lu kB\n", 88 "BackgroundThresh: %8lu kB\n"
89 "WriteBack threads:%8lu\n"
90 "b_dirty: %8lu\n"
91 "b_io: %8lu\n"
92 "b_more_io: %8lu\n"
93 "bdi_list: %8u\n"
94 "state: %8lx\n"
95 "wb_mask: %8lx\n"
96 "wb_list: %8u\n"
97 "wb_cnt: %8u\n",
53 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 98 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
54 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), 99 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
55 K(bdi_thresh), 100 K(bdi_thresh), K(dirty_thresh),
56 K(dirty_thresh), 101 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
57 K(background_thresh)); 102 !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
103 !list_empty(&bdi->wb_list), bdi->wb_cnt);
58#undef K 104#undef K
59 105
60 return 0; 106 return 0;
@@ -185,6 +231,13 @@ static int __init default_bdi_init(void)
185{ 231{
186 int err; 232 int err;
187 233
234 sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
235 BUG_ON(IS_ERR(sync_supers_tsk));
236
237 init_timer(&sync_supers_timer);
238 setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
239 arm_supers_timer();
240
188 err = bdi_init(&default_backing_dev_info); 241 err = bdi_init(&default_backing_dev_info);
189 if (!err) 242 if (!err)
190 bdi_register(&default_backing_dev_info, NULL, "default"); 243 bdi_register(&default_backing_dev_info, NULL, "default");
@@ -193,6 +246,248 @@ static int __init default_bdi_init(void)
193} 246}
194subsys_initcall(default_bdi_init); 247subsys_initcall(default_bdi_init);
195 248
249static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
250{
251 memset(wb, 0, sizeof(*wb));
252
253 wb->bdi = bdi;
254 wb->last_old_flush = jiffies;
255 INIT_LIST_HEAD(&wb->b_dirty);
256 INIT_LIST_HEAD(&wb->b_io);
257 INIT_LIST_HEAD(&wb->b_more_io);
258}
259
260static void bdi_task_init(struct backing_dev_info *bdi,
261 struct bdi_writeback *wb)
262{
263 struct task_struct *tsk = current;
264
265 spin_lock(&bdi->wb_lock);
266 list_add_tail_rcu(&wb->list, &bdi->wb_list);
267 spin_unlock(&bdi->wb_lock);
268
269 tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
270 set_freezable();
271
272 /*
273 * Our parent may run at a different priority, just set us to normal
274 */
275 set_user_nice(tsk, 0);
276}
277
278static int bdi_start_fn(void *ptr)
279{
280 struct bdi_writeback *wb = ptr;
281 struct backing_dev_info *bdi = wb->bdi;
282 int ret;
283
284 /*
285 * Add us to the active bdi_list
286 */
287 spin_lock(&bdi_lock);
288 list_add(&bdi->bdi_list, &bdi_list);
289 spin_unlock(&bdi_lock);
290
291 bdi_task_init(bdi, wb);
292
293 /*
294 * Clear pending bit and wakeup anybody waiting to tear us down
295 */
296 clear_bit(BDI_pending, &bdi->state);
297 smp_mb__after_clear_bit();
298 wake_up_bit(&bdi->state, BDI_pending);
299
300 ret = bdi_writeback_task(wb);
301
302 /*
303 * Remove us from the list
304 */
305 spin_lock(&bdi->wb_lock);
306 list_del_rcu(&wb->list);
307 spin_unlock(&bdi->wb_lock);
308
309 /*
310 * Flush any work that raced with us exiting. No new work
311 * will be added, since this bdi isn't discoverable anymore.
312 */
313 if (!list_empty(&bdi->work_list))
314 wb_do_writeback(wb, 1);
315
316 wb->task = NULL;
317 return ret;
318}
319
320int bdi_has_dirty_io(struct backing_dev_info *bdi)
321{
322 return wb_has_dirty_io(&bdi->wb);
323}
324
325static void bdi_flush_io(struct backing_dev_info *bdi)
326{
327 struct writeback_control wbc = {
328 .bdi = bdi,
329 .sync_mode = WB_SYNC_NONE,
330 .older_than_this = NULL,
331 .range_cyclic = 1,
332 .nr_to_write = 1024,
333 };
334
335 writeback_inodes_wbc(&wbc);
336}
337
338/*
339 * kupdated() used to do this. We cannot do it from the bdi_forker_task()
340 * or we risk deadlocking on ->s_umount. The longer term solution would be
341 * to implement sync_supers_bdi() or similar and simply do it from the
342 * bdi writeback tasks individually.
343 */
344static int bdi_sync_supers(void *unused)
345{
346 set_user_nice(current, 0);
347
348 while (!kthread_should_stop()) {
349 set_current_state(TASK_INTERRUPTIBLE);
350 schedule();
351
352 /*
353 * Do this periodically, like kupdated() did before.
354 */
355 sync_supers();
356 }
357
358 return 0;
359}
360
361static void arm_supers_timer(void)
362{
363 unsigned long next;
364
365 next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
366 mod_timer(&sync_supers_timer, round_jiffies_up(next));
367}
368
369static void sync_supers_timer_fn(unsigned long unused)
370{
371 wake_up_process(sync_supers_tsk);
372 arm_supers_timer();
373}
374
375static int bdi_forker_task(void *ptr)
376{
377 struct bdi_writeback *me = ptr;
378
379 bdi_task_init(me->bdi, me);
380
381 for (;;) {
382 struct backing_dev_info *bdi, *tmp;
383 struct bdi_writeback *wb;
384
385 /*
386 * Temporary measure, we want to make sure we don't see
387 * dirty data on the default backing_dev_info
388 */
389 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
390 wb_do_writeback(me, 0);
391
392 spin_lock(&bdi_lock);
393
394 /*
395 * Check if any existing bdi's have dirty data without
396 * a thread registered. If so, set that up.
397 */
398 list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
399 if (bdi->wb.task)
400 continue;
401 if (list_empty(&bdi->work_list) &&
402 !bdi_has_dirty_io(bdi))
403 continue;
404
405 bdi_add_default_flusher_task(bdi);
406 }
407
408 set_current_state(TASK_INTERRUPTIBLE);
409
410 if (list_empty(&bdi_pending_list)) {
411 unsigned long wait;
412
413 spin_unlock(&bdi_lock);
414 wait = msecs_to_jiffies(dirty_writeback_interval * 10);
415 schedule_timeout(wait);
416 try_to_freeze();
417 continue;
418 }
419
420 __set_current_state(TASK_RUNNING);
421
422 /*
423 * This is our real job - check for pending entries in
424 * bdi_pending_list, and create the tasks that got added
425 */
426 bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
427 bdi_list);
428 list_del_init(&bdi->bdi_list);
429 spin_unlock(&bdi_lock);
430
431 wb = &bdi->wb;
432 wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
433 dev_name(bdi->dev));
434 /*
435 * If task creation fails, then readd the bdi to
436 * the pending list and force writeout of the bdi
437 * from this forker thread. That will free some memory
438 * and we can try again.
439 */
440 if (IS_ERR(wb->task)) {
441 wb->task = NULL;
442
443 /*
444 * Add this 'bdi' to the back, so we get
445 * a chance to flush other bdi's to free
446 * memory.
447 */
448 spin_lock(&bdi_lock);
449 list_add_tail(&bdi->bdi_list, &bdi_pending_list);
450 spin_unlock(&bdi_lock);
451
452 bdi_flush_io(bdi);
453 }
454 }
455
456 return 0;
457}
458
459/*
460 * Add the default flusher task that gets created for any bdi
461 * that has dirty data pending writeout
462 */
463void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
464{
465 if (!bdi_cap_writeback_dirty(bdi))
466 return;
467
468 if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
469 printk(KERN_ERR "bdi %p/%s is not registered!\n",
470 bdi, bdi->name);
471 return;
472 }
473
474 /*
475 * Check with the helper whether to proceed adding a task. Will only
476 * abort if we two or more simultanous calls to
477 * bdi_add_default_flusher_task() occured, further additions will block
478 * waiting for previous additions to finish.
479 */
480 if (!test_and_set_bit(BDI_pending, &bdi->state)) {
481 list_move_tail(&bdi->bdi_list, &bdi_pending_list);
482
483 /*
484 * We are now on the pending list, wake up bdi_forker_task()
485 * to finish the job and add us back to the active bdi_list
486 */
487 wake_up_process(default_backing_dev_info.wb.task);
488 }
489}
490
196int bdi_register(struct backing_dev_info *bdi, struct device *parent, 491int bdi_register(struct backing_dev_info *bdi, struct device *parent,
197 const char *fmt, ...) 492 const char *fmt, ...)
198{ 493{
@@ -211,9 +506,35 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
211 goto exit; 506 goto exit;
212 } 507 }
213 508
509 spin_lock(&bdi_lock);
510 list_add_tail(&bdi->bdi_list, &bdi_list);
511 spin_unlock(&bdi_lock);
512
214 bdi->dev = dev; 513 bdi->dev = dev;
215 bdi_debug_register(bdi, dev_name(dev));
216 514
515 /*
516 * Just start the forker thread for our default backing_dev_info,
517 * and add other bdi's to the list. They will get a thread created
518 * on-demand when they need it.
519 */
520 if (bdi_cap_flush_forker(bdi)) {
521 struct bdi_writeback *wb = &bdi->wb;
522
523 wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
524 dev_name(dev));
525 if (IS_ERR(wb->task)) {
526 wb->task = NULL;
527 ret = -ENOMEM;
528
529 spin_lock(&bdi_lock);
530 list_del(&bdi->bdi_list);
531 spin_unlock(&bdi_lock);
532 goto exit;
533 }
534 }
535
536 bdi_debug_register(bdi, dev_name(dev));
537 set_bit(BDI_registered, &bdi->state);
217exit: 538exit:
218 return ret; 539 return ret;
219} 540}
@@ -225,9 +546,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
225} 546}
226EXPORT_SYMBOL(bdi_register_dev); 547EXPORT_SYMBOL(bdi_register_dev);
227 548
549/*
550 * Remove bdi from the global list and shutdown any threads we have running
551 */
552static void bdi_wb_shutdown(struct backing_dev_info *bdi)
553{
554 struct bdi_writeback *wb;
555
556 if (!bdi_cap_writeback_dirty(bdi))
557 return;
558
559 /*
560 * If setup is pending, wait for that to complete first
561 */
562 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
563 TASK_UNINTERRUPTIBLE);
564
565 /*
566 * Make sure nobody finds us on the bdi_list anymore
567 */
568 spin_lock(&bdi_lock);
569 list_del(&bdi->bdi_list);
570 spin_unlock(&bdi_lock);
571
572 /*
573 * Finally, kill the kernel threads. We don't need to be RCU
574 * safe anymore, since the bdi is gone from visibility.
575 */
576 list_for_each_entry(wb, &bdi->wb_list, list)
577 kthread_stop(wb->task);
578}
579
228void bdi_unregister(struct backing_dev_info *bdi) 580void bdi_unregister(struct backing_dev_info *bdi)
229{ 581{
230 if (bdi->dev) { 582 if (bdi->dev) {
583 if (!bdi_cap_flush_forker(bdi))
584 bdi_wb_shutdown(bdi);
231 bdi_debug_unregister(bdi); 585 bdi_debug_unregister(bdi);
232 device_unregister(bdi->dev); 586 device_unregister(bdi->dev);
233 bdi->dev = NULL; 587 bdi->dev = NULL;
@@ -237,14 +591,25 @@ EXPORT_SYMBOL(bdi_unregister);
237 591
238int bdi_init(struct backing_dev_info *bdi) 592int bdi_init(struct backing_dev_info *bdi)
239{ 593{
240 int i; 594 int i, err;
241 int err;
242 595
243 bdi->dev = NULL; 596 bdi->dev = NULL;
244 597
245 bdi->min_ratio = 0; 598 bdi->min_ratio = 0;
246 bdi->max_ratio = 100; 599 bdi->max_ratio = 100;
247 bdi->max_prop_frac = PROP_FRAC_BASE; 600 bdi->max_prop_frac = PROP_FRAC_BASE;
601 spin_lock_init(&bdi->wb_lock);
602 INIT_LIST_HEAD(&bdi->bdi_list);
603 INIT_LIST_HEAD(&bdi->wb_list);
604 INIT_LIST_HEAD(&bdi->work_list);
605
606 bdi_wb_init(&bdi->wb, bdi);
607
608 /*
609 * Just one thread support for now, hard code mask and count
610 */
611 bdi->wb_mask = 1;
612 bdi->wb_cnt = 1;
248 613
249 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 614 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
250 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 615 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -269,6 +634,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
269{ 634{
270 int i; 635 int i;
271 636
637 WARN_ON(bdi_has_dirty_io(bdi));
638
272 bdi_unregister(bdi); 639 bdi_unregister(bdi);
273 640
274 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 641 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 701740c9e81b..555d5d2731c6 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -521,7 +521,11 @@ find_block:
521 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + 521 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
522 start_off); 522 start_off);
523 memset(region, 0, size); 523 memset(region, 0, size);
524 kmemleak_alloc(region, size, 1, 0); 524 /*
525 * The min_count is set to 0 so that bootmem allocated blocks
526 * are never reported as leaks.
527 */
528 kmemleak_alloc(region, size, 0, 0);
525 return region; 529 return region;
526 } 530 }
527 531
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 487267310a84..4ea4510e2996 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -92,11 +92,13 @@
92#include <linux/string.h> 92#include <linux/string.h>
93#include <linux/nodemask.h> 93#include <linux/nodemask.h>
94#include <linux/mm.h> 94#include <linux/mm.h>
95#include <linux/workqueue.h>
95 96
96#include <asm/sections.h> 97#include <asm/sections.h>
97#include <asm/processor.h> 98#include <asm/processor.h>
98#include <asm/atomic.h> 99#include <asm/atomic.h>
99 100
101#include <linux/kmemcheck.h>
100#include <linux/kmemleak.h> 102#include <linux/kmemleak.h>
101 103
102/* 104/*
@@ -107,6 +109,7 @@
107#define SECS_FIRST_SCAN 60 /* delay before the first scan */ 109#define SECS_FIRST_SCAN 60 /* delay before the first scan */
108#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 110#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
109#define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */ 111#define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */
112#define MAX_SCAN_SIZE 4096 /* maximum size of a scanned block */
110 113
111#define BYTES_PER_POINTER sizeof(void *) 114#define BYTES_PER_POINTER sizeof(void *)
112 115
@@ -120,6 +123,9 @@ struct kmemleak_scan_area {
120 size_t length; 123 size_t length;
121}; 124};
122 125
126#define KMEMLEAK_GREY 0
127#define KMEMLEAK_BLACK -1
128
123/* 129/*
124 * Structure holding the metadata for each allocated memory block. 130 * Structure holding the metadata for each allocated memory block.
125 * Modifications to such objects should be made while holding the 131 * Modifications to such objects should be made while holding the
@@ -161,6 +167,15 @@ struct kmemleak_object {
161/* flag set on newly allocated objects */ 167/* flag set on newly allocated objects */
162#define OBJECT_NEW (1 << 3) 168#define OBJECT_NEW (1 << 3)
163 169
170/* number of bytes to print per line; must be 16 or 32 */
171#define HEX_ROW_SIZE 16
172/* number of bytes to print at a time (1, 2, 4, 8) */
173#define HEX_GROUP_SIZE 1
174/* include ASCII after the hex output */
175#define HEX_ASCII 1
176/* max number of lines to be printed */
177#define HEX_MAX_LINES 2
178
164/* the list of all allocated objects */ 179/* the list of all allocated objects */
165static LIST_HEAD(object_list); 180static LIST_HEAD(object_list);
166/* the list of gray-colored objects (see color_gray comment below) */ 181/* the list of gray-colored objects (see color_gray comment below) */
@@ -228,11 +243,14 @@ struct early_log {
228 int min_count; /* minimum reference count */ 243 int min_count; /* minimum reference count */
229 unsigned long offset; /* scan area offset */ 244 unsigned long offset; /* scan area offset */
230 size_t length; /* scan area length */ 245 size_t length; /* scan area length */
246 unsigned long trace[MAX_TRACE]; /* stack trace */
247 unsigned int trace_len; /* stack trace length */
231}; 248};
232 249
233/* early logging buffer and current position */ 250/* early logging buffer and current position */
234static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE]; 251static struct early_log
235static int crt_early_log; 252 early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
253static int crt_early_log __initdata;
236 254
237static void kmemleak_disable(void); 255static void kmemleak_disable(void);
238 256
@@ -255,6 +273,35 @@ static void kmemleak_disable(void);
255} while (0) 273} while (0)
256 274
257/* 275/*
276 * Printing of the objects hex dump to the seq file. The number of lines to be
277 * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The
278 * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called
279 * with the object->lock held.
280 */
281static void hex_dump_object(struct seq_file *seq,
282 struct kmemleak_object *object)
283{
284 const u8 *ptr = (const u8 *)object->pointer;
285 int i, len, remaining;
286 unsigned char linebuf[HEX_ROW_SIZE * 5];
287
288 /* limit the number of lines to HEX_MAX_LINES */
289 remaining = len =
290 min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE));
291
292 seq_printf(seq, " hex dump (first %d bytes):\n", len);
293 for (i = 0; i < len; i += HEX_ROW_SIZE) {
294 int linelen = min(remaining, HEX_ROW_SIZE);
295
296 remaining -= HEX_ROW_SIZE;
297 hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE,
298 HEX_GROUP_SIZE, linebuf, sizeof(linebuf),
299 HEX_ASCII);
300 seq_printf(seq, " %s\n", linebuf);
301 }
302}
303
304/*
258 * Object colors, encoded with count and min_count: 305 * Object colors, encoded with count and min_count:
259 * - white - orphan object, not enough references to it (count < min_count) 306 * - white - orphan object, not enough references to it (count < min_count)
260 * - gray - not orphan, not marked as false positive (min_count == 0) or 307 * - gray - not orphan, not marked as false positive (min_count == 0) or
@@ -264,19 +311,21 @@ static void kmemleak_disable(void);
264 * Newly created objects don't have any color assigned (object->count == -1) 311 * Newly created objects don't have any color assigned (object->count == -1)
265 * before the next memory scan when they become white. 312 * before the next memory scan when they become white.
266 */ 313 */
267static int color_white(const struct kmemleak_object *object) 314static bool color_white(const struct kmemleak_object *object)
268{ 315{
269 return object->count != -1 && object->count < object->min_count; 316 return object->count != KMEMLEAK_BLACK &&
317 object->count < object->min_count;
270} 318}
271 319
272static int color_gray(const struct kmemleak_object *object) 320static bool color_gray(const struct kmemleak_object *object)
273{ 321{
274 return object->min_count != -1 && object->count >= object->min_count; 322 return object->min_count != KMEMLEAK_BLACK &&
323 object->count >= object->min_count;
275} 324}
276 325
277static int color_black(const struct kmemleak_object *object) 326static bool color_black(const struct kmemleak_object *object)
278{ 327{
279 return object->min_count == -1; 328 return object->min_count == KMEMLEAK_BLACK;
280} 329}
281 330
282/* 331/*
@@ -284,7 +333,7 @@ static int color_black(const struct kmemleak_object *object)
284 * not be deleted and have a minimum age to avoid false positives caused by 333 * not be deleted and have a minimum age to avoid false positives caused by
285 * pointers temporarily stored in CPU registers. 334 * pointers temporarily stored in CPU registers.
286 */ 335 */
287static int unreferenced_object(struct kmemleak_object *object) 336static bool unreferenced_object(struct kmemleak_object *object)
288{ 337{
289 return (object->flags & OBJECT_ALLOCATED) && color_white(object) && 338 return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
290 time_before_eq(object->jiffies + jiffies_min_age, 339 time_before_eq(object->jiffies + jiffies_min_age,
@@ -304,6 +353,7 @@ static void print_unreferenced(struct seq_file *seq,
304 object->pointer, object->size); 353 object->pointer, object->size);
305 seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", 354 seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n",
306 object->comm, object->pid, object->jiffies); 355 object->comm, object->pid, object->jiffies);
356 hex_dump_object(seq, object);
307 seq_printf(seq, " backtrace:\n"); 357 seq_printf(seq, " backtrace:\n");
308 358
309 for (i = 0; i < object->trace_len; i++) { 359 for (i = 0; i < object->trace_len; i++) {
@@ -330,6 +380,7 @@ static void dump_object_info(struct kmemleak_object *object)
330 object->comm, object->pid, object->jiffies); 380 object->comm, object->pid, object->jiffies);
331 pr_notice(" min_count = %d\n", object->min_count); 381 pr_notice(" min_count = %d\n", object->min_count);
332 pr_notice(" count = %d\n", object->count); 382 pr_notice(" count = %d\n", object->count);
383 pr_notice(" flags = 0x%lx\n", object->flags);
333 pr_notice(" backtrace:\n"); 384 pr_notice(" backtrace:\n");
334 print_stack_trace(&trace, 4); 385 print_stack_trace(&trace, 4);
335} 386}
@@ -434,21 +485,36 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
434} 485}
435 486
436/* 487/*
488 * Save stack trace to the given array of MAX_TRACE size.
489 */
490static int __save_stack_trace(unsigned long *trace)
491{
492 struct stack_trace stack_trace;
493
494 stack_trace.max_entries = MAX_TRACE;
495 stack_trace.nr_entries = 0;
496 stack_trace.entries = trace;
497 stack_trace.skip = 2;
498 save_stack_trace(&stack_trace);
499
500 return stack_trace.nr_entries;
501}
502
503/*
437 * Create the metadata (struct kmemleak_object) corresponding to an allocated 504 * Create the metadata (struct kmemleak_object) corresponding to an allocated
438 * memory block and add it to the object_list and object_tree_root. 505 * memory block and add it to the object_list and object_tree_root.
439 */ 506 */
440static void create_object(unsigned long ptr, size_t size, int min_count, 507static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
441 gfp_t gfp) 508 int min_count, gfp_t gfp)
442{ 509{
443 unsigned long flags; 510 unsigned long flags;
444 struct kmemleak_object *object; 511 struct kmemleak_object *object;
445 struct prio_tree_node *node; 512 struct prio_tree_node *node;
446 struct stack_trace trace;
447 513
448 object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); 514 object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
449 if (!object) { 515 if (!object) {
450 kmemleak_stop("Cannot allocate a kmemleak_object structure\n"); 516 kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
451 return; 517 return NULL;
452 } 518 }
453 519
454 INIT_LIST_HEAD(&object->object_list); 520 INIT_LIST_HEAD(&object->object_list);
@@ -482,18 +548,14 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
482 } 548 }
483 549
484 /* kernel backtrace */ 550 /* kernel backtrace */
485 trace.max_entries = MAX_TRACE; 551 object->trace_len = __save_stack_trace(object->trace);
486 trace.nr_entries = 0;
487 trace.entries = object->trace;
488 trace.skip = 1;
489 save_stack_trace(&trace);
490 object->trace_len = trace.nr_entries;
491 552
492 INIT_PRIO_TREE_NODE(&object->tree_node); 553 INIT_PRIO_TREE_NODE(&object->tree_node);
493 object->tree_node.start = ptr; 554 object->tree_node.start = ptr;
494 object->tree_node.last = ptr + size - 1; 555 object->tree_node.last = ptr + size - 1;
495 556
496 write_lock_irqsave(&kmemleak_lock, flags); 557 write_lock_irqsave(&kmemleak_lock, flags);
558
497 min_addr = min(min_addr, ptr); 559 min_addr = min(min_addr, ptr);
498 max_addr = max(max_addr, ptr + size); 560 max_addr = max(max_addr, ptr + size);
499 node = prio_tree_insert(&object_tree_root, &object->tree_node); 561 node = prio_tree_insert(&object_tree_root, &object->tree_node);
@@ -504,20 +566,19 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
504 * random memory blocks. 566 * random memory blocks.
505 */ 567 */
506 if (node != &object->tree_node) { 568 if (node != &object->tree_node) {
507 unsigned long flags;
508
509 kmemleak_stop("Cannot insert 0x%lx into the object search tree " 569 kmemleak_stop("Cannot insert 0x%lx into the object search tree "
510 "(already existing)\n", ptr); 570 "(already existing)\n", ptr);
511 object = lookup_object(ptr, 1); 571 object = lookup_object(ptr, 1);
512 spin_lock_irqsave(&object->lock, flags); 572 spin_lock(&object->lock);
513 dump_object_info(object); 573 dump_object_info(object);
514 spin_unlock_irqrestore(&object->lock, flags); 574 spin_unlock(&object->lock);
515 575
516 goto out; 576 goto out;
517 } 577 }
518 list_add_tail_rcu(&object->object_list, &object_list); 578 list_add_tail_rcu(&object->object_list, &object_list);
519out: 579out:
520 write_unlock_irqrestore(&kmemleak_lock, flags); 580 write_unlock_irqrestore(&kmemleak_lock, flags);
581 return object;
521} 582}
522 583
523/* 584/*
@@ -604,46 +665,55 @@ static void delete_object_part(unsigned long ptr, size_t size)
604 665
605 put_object(object); 666 put_object(object);
606} 667}
607/* 668
608 * Make a object permanently as gray-colored so that it can no longer be 669static void __paint_it(struct kmemleak_object *object, int color)
609 * reported as a leak. This is used in general to mark a false positive. 670{
610 */ 671 object->min_count = color;
611static void make_gray_object(unsigned long ptr) 672 if (color == KMEMLEAK_BLACK)
673 object->flags |= OBJECT_NO_SCAN;
674}
675
676static void paint_it(struct kmemleak_object *object, int color)
612{ 677{
613 unsigned long flags; 678 unsigned long flags;
679
680 spin_lock_irqsave(&object->lock, flags);
681 __paint_it(object, color);
682 spin_unlock_irqrestore(&object->lock, flags);
683}
684
685static void paint_ptr(unsigned long ptr, int color)
686{
614 struct kmemleak_object *object; 687 struct kmemleak_object *object;
615 688
616 object = find_and_get_object(ptr, 0); 689 object = find_and_get_object(ptr, 0);
617 if (!object) { 690 if (!object) {
618 kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr); 691 kmemleak_warn("Trying to color unknown object "
692 "at 0x%08lx as %s\n", ptr,
693 (color == KMEMLEAK_GREY) ? "Grey" :
694 (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
619 return; 695 return;
620 } 696 }
621 697 paint_it(object, color);
622 spin_lock_irqsave(&object->lock, flags);
623 object->min_count = 0;
624 spin_unlock_irqrestore(&object->lock, flags);
625 put_object(object); 698 put_object(object);
626} 699}
627 700
628/* 701/*
702 * Make a object permanently as gray-colored so that it can no longer be
703 * reported as a leak. This is used in general to mark a false positive.
704 */
705static void make_gray_object(unsigned long ptr)
706{
707 paint_ptr(ptr, KMEMLEAK_GREY);
708}
709
710/*
629 * Mark the object as black-colored so that it is ignored from scans and 711 * Mark the object as black-colored so that it is ignored from scans and
630 * reporting. 712 * reporting.
631 */ 713 */
632static void make_black_object(unsigned long ptr) 714static void make_black_object(unsigned long ptr)
633{ 715{
634 unsigned long flags; 716 paint_ptr(ptr, KMEMLEAK_BLACK);
635 struct kmemleak_object *object;
636
637 object = find_and_get_object(ptr, 0);
638 if (!object) {
639 kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
640 return;
641 }
642
643 spin_lock_irqsave(&object->lock, flags);
644 object->min_count = -1;
645 spin_unlock_irqrestore(&object->lock, flags);
646 put_object(object);
647} 717}
648 718
649/* 719/*
@@ -715,14 +785,15 @@ static void object_no_scan(unsigned long ptr)
715 * Log an early kmemleak_* call to the early_log buffer. These calls will be 785 * Log an early kmemleak_* call to the early_log buffer. These calls will be
716 * processed later once kmemleak is fully initialized. 786 * processed later once kmemleak is fully initialized.
717 */ 787 */
718static void log_early(int op_type, const void *ptr, size_t size, 788static void __init log_early(int op_type, const void *ptr, size_t size,
719 int min_count, unsigned long offset, size_t length) 789 int min_count, unsigned long offset, size_t length)
720{ 790{
721 unsigned long flags; 791 unsigned long flags;
722 struct early_log *log; 792 struct early_log *log;
723 793
724 if (crt_early_log >= ARRAY_SIZE(early_log)) { 794 if (crt_early_log >= ARRAY_SIZE(early_log)) {
725 pr_warning("Early log buffer exceeded\n"); 795 pr_warning("Early log buffer exceeded, "
796 "please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n");
726 kmemleak_disable(); 797 kmemleak_disable();
727 return; 798 return;
728 } 799 }
@@ -739,16 +810,45 @@ static void log_early(int op_type, const void *ptr, size_t size,
739 log->min_count = min_count; 810 log->min_count = min_count;
740 log->offset = offset; 811 log->offset = offset;
741 log->length = length; 812 log->length = length;
813 if (op_type == KMEMLEAK_ALLOC)
814 log->trace_len = __save_stack_trace(log->trace);
742 crt_early_log++; 815 crt_early_log++;
743 local_irq_restore(flags); 816 local_irq_restore(flags);
744} 817}
745 818
746/* 819/*
820 * Log an early allocated block and populate the stack trace.
821 */
822static void early_alloc(struct early_log *log)
823{
824 struct kmemleak_object *object;
825 unsigned long flags;
826 int i;
827
828 if (!atomic_read(&kmemleak_enabled) || !log->ptr || IS_ERR(log->ptr))
829 return;
830
831 /*
832 * RCU locking needed to ensure object is not freed via put_object().
833 */
834 rcu_read_lock();
835 object = create_object((unsigned long)log->ptr, log->size,
836 log->min_count, GFP_KERNEL);
837 spin_lock_irqsave(&object->lock, flags);
838 for (i = 0; i < log->trace_len; i++)
839 object->trace[i] = log->trace[i];
840 object->trace_len = log->trace_len;
841 spin_unlock_irqrestore(&object->lock, flags);
842 rcu_read_unlock();
843}
844
845/*
747 * Memory allocation function callback. This function is called from the 846 * Memory allocation function callback. This function is called from the
748 * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc, 847 * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
749 * vmalloc etc.). 848 * vmalloc etc.).
750 */ 849 */
751void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) 850void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
851 gfp_t gfp)
752{ 852{
753 pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count); 853 pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
754 854
@@ -763,7 +863,7 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
763 * Memory freeing function callback. This function is called from the kernel 863 * Memory freeing function callback. This function is called from the kernel
764 * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.). 864 * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
765 */ 865 */
766void kmemleak_free(const void *ptr) 866void __ref kmemleak_free(const void *ptr)
767{ 867{
768 pr_debug("%s(0x%p)\n", __func__, ptr); 868 pr_debug("%s(0x%p)\n", __func__, ptr);
769 869
@@ -778,7 +878,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free);
778 * Partial memory freeing function callback. This function is usually called 878 * Partial memory freeing function callback. This function is usually called
779 * from bootmem allocator when (part of) a memory block is freed. 879 * from bootmem allocator when (part of) a memory block is freed.
780 */ 880 */
781void kmemleak_free_part(const void *ptr, size_t size) 881void __ref kmemleak_free_part(const void *ptr, size_t size)
782{ 882{
783 pr_debug("%s(0x%p)\n", __func__, ptr); 883 pr_debug("%s(0x%p)\n", __func__, ptr);
784 884
@@ -793,7 +893,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_part);
793 * Mark an already allocated memory block as a false positive. This will cause 893 * Mark an already allocated memory block as a false positive. This will cause
794 * the block to no longer be reported as leak and always be scanned. 894 * the block to no longer be reported as leak and always be scanned.
795 */ 895 */
796void kmemleak_not_leak(const void *ptr) 896void __ref kmemleak_not_leak(const void *ptr)
797{ 897{
798 pr_debug("%s(0x%p)\n", __func__, ptr); 898 pr_debug("%s(0x%p)\n", __func__, ptr);
799 899
@@ -809,7 +909,7 @@ EXPORT_SYMBOL(kmemleak_not_leak);
809 * corresponding block is not a leak and does not contain any references to 909 * corresponding block is not a leak and does not contain any references to
810 * other allocated memory blocks. 910 * other allocated memory blocks.
811 */ 911 */
812void kmemleak_ignore(const void *ptr) 912void __ref kmemleak_ignore(const void *ptr)
813{ 913{
814 pr_debug("%s(0x%p)\n", __func__, ptr); 914 pr_debug("%s(0x%p)\n", __func__, ptr);
815 915
@@ -823,8 +923,8 @@ EXPORT_SYMBOL(kmemleak_ignore);
823/* 923/*
824 * Limit the range to be scanned in an allocated memory block. 924 * Limit the range to be scanned in an allocated memory block.
825 */ 925 */
826void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length, 926void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
827 gfp_t gfp) 927 size_t length, gfp_t gfp)
828{ 928{
829 pr_debug("%s(0x%p)\n", __func__, ptr); 929 pr_debug("%s(0x%p)\n", __func__, ptr);
830 930
@@ -838,7 +938,7 @@ EXPORT_SYMBOL(kmemleak_scan_area);
838/* 938/*
839 * Inform kmemleak not to scan the given memory block. 939 * Inform kmemleak not to scan the given memory block.
840 */ 940 */
841void kmemleak_no_scan(const void *ptr) 941void __ref kmemleak_no_scan(const void *ptr)
842{ 942{
843 pr_debug("%s(0x%p)\n", __func__, ptr); 943 pr_debug("%s(0x%p)\n", __func__, ptr);
844 944
@@ -882,15 +982,22 @@ static void scan_block(void *_start, void *_end,
882 unsigned long *end = _end - (BYTES_PER_POINTER - 1); 982 unsigned long *end = _end - (BYTES_PER_POINTER - 1);
883 983
884 for (ptr = start; ptr < end; ptr++) { 984 for (ptr = start; ptr < end; ptr++) {
885 unsigned long flags;
886 unsigned long pointer = *ptr;
887 struct kmemleak_object *object; 985 struct kmemleak_object *object;
986 unsigned long flags;
987 unsigned long pointer;
888 988
889 if (allow_resched) 989 if (allow_resched)
890 cond_resched(); 990 cond_resched();
891 if (scan_should_stop()) 991 if (scan_should_stop())
892 break; 992 break;
893 993
994 /* don't scan uninitialized memory */
995 if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
996 BYTES_PER_POINTER))
997 continue;
998
999 pointer = *ptr;
1000
894 object = find_and_get_object(pointer, 1); 1001 object = find_and_get_object(pointer, 1);
895 if (!object) 1002 if (!object)
896 continue; 1003 continue;
@@ -949,10 +1056,21 @@ static void scan_object(struct kmemleak_object *object)
949 if (!(object->flags & OBJECT_ALLOCATED)) 1056 if (!(object->flags & OBJECT_ALLOCATED))
950 /* already freed object */ 1057 /* already freed object */
951 goto out; 1058 goto out;
952 if (hlist_empty(&object->area_list)) 1059 if (hlist_empty(&object->area_list)) {
953 scan_block((void *)object->pointer, 1060 void *start = (void *)object->pointer;
954 (void *)(object->pointer + object->size), object, 0); 1061 void *end = (void *)(object->pointer + object->size);
955 else 1062
1063 while (start < end && (object->flags & OBJECT_ALLOCATED) &&
1064 !(object->flags & OBJECT_NO_SCAN)) {
1065 scan_block(start, min(start + MAX_SCAN_SIZE, end),
1066 object, 0);
1067 start += MAX_SCAN_SIZE;
1068
1069 spin_unlock_irqrestore(&object->lock, flags);
1070 cond_resched();
1071 spin_lock_irqsave(&object->lock, flags);
1072 }
1073 } else
956 hlist_for_each_entry(area, elem, &object->area_list, node) 1074 hlist_for_each_entry(area, elem, &object->area_list, node)
957 scan_block((void *)(object->pointer + area->offset), 1075 scan_block((void *)(object->pointer + area->offset),
958 (void *)(object->pointer + area->offset 1076 (void *)(object->pointer + area->offset
@@ -970,7 +1088,6 @@ static void kmemleak_scan(void)
970{ 1088{
971 unsigned long flags; 1089 unsigned long flags;
972 struct kmemleak_object *object, *tmp; 1090 struct kmemleak_object *object, *tmp;
973 struct task_struct *task;
974 int i; 1091 int i;
975 int new_leaks = 0; 1092 int new_leaks = 0;
976 int gray_list_pass = 0; 1093 int gray_list_pass = 0;
@@ -1037,15 +1154,16 @@ static void kmemleak_scan(void)
1037 } 1154 }
1038 1155
1039 /* 1156 /*
1040 * Scanning the task stacks may introduce false negatives and it is 1157 * Scanning the task stacks (may introduce false negatives).
1041 * not enabled by default.
1042 */ 1158 */
1043 if (kmemleak_stack_scan) { 1159 if (kmemleak_stack_scan) {
1160 struct task_struct *p, *g;
1161
1044 read_lock(&tasklist_lock); 1162 read_lock(&tasklist_lock);
1045 for_each_process(task) 1163 do_each_thread(g, p) {
1046 scan_block(task_stack_page(task), 1164 scan_block(task_stack_page(p), task_stack_page(p) +
1047 task_stack_page(task) + THREAD_SIZE, 1165 THREAD_SIZE, NULL, 0);
1048 NULL, 0); 1166 } while_each_thread(g, p);
1049 read_unlock(&tasklist_lock); 1167 read_unlock(&tasklist_lock);
1050 } 1168 }
1051 1169
@@ -1170,7 +1288,7 @@ static int kmemleak_scan_thread(void *arg)
1170 * Start the automatic memory scanning thread. This function must be called 1288 * Start the automatic memory scanning thread. This function must be called
1171 * with the scan_mutex held. 1289 * with the scan_mutex held.
1172 */ 1290 */
1173void start_scan_thread(void) 1291static void start_scan_thread(void)
1174{ 1292{
1175 if (scan_thread) 1293 if (scan_thread)
1176 return; 1294 return;
@@ -1185,7 +1303,7 @@ void start_scan_thread(void)
1185 * Stop the automatic memory scanning thread. This function must be called 1303 * Stop the automatic memory scanning thread. This function must be called
1186 * with the scan_mutex held. 1304 * with the scan_mutex held.
1187 */ 1305 */
1188void stop_scan_thread(void) 1306static void stop_scan_thread(void)
1189{ 1307{
1190 if (scan_thread) { 1308 if (scan_thread) {
1191 kthread_stop(scan_thread); 1309 kthread_stop(scan_thread);
@@ -1294,6 +1412,49 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1294 return seq_release(inode, file); 1412 return seq_release(inode, file);
1295} 1413}
1296 1414
1415static int dump_str_object_info(const char *str)
1416{
1417 unsigned long flags;
1418 struct kmemleak_object *object;
1419 unsigned long addr;
1420
1421 addr= simple_strtoul(str, NULL, 0);
1422 object = find_and_get_object(addr, 0);
1423 if (!object) {
1424 pr_info("Unknown object at 0x%08lx\n", addr);
1425 return -EINVAL;
1426 }
1427
1428 spin_lock_irqsave(&object->lock, flags);
1429 dump_object_info(object);
1430 spin_unlock_irqrestore(&object->lock, flags);
1431
1432 put_object(object);
1433 return 0;
1434}
1435
1436/*
1437 * We use grey instead of black to ensure we can do future scans on the same
1438 * objects. If we did not do future scans these black objects could
1439 * potentially contain references to newly allocated objects in the future and
1440 * we'd end up with false positives.
1441 */
1442static void kmemleak_clear(void)
1443{
1444 struct kmemleak_object *object;
1445 unsigned long flags;
1446
1447 rcu_read_lock();
1448 list_for_each_entry_rcu(object, &object_list, object_list) {
1449 spin_lock_irqsave(&object->lock, flags);
1450 if ((object->flags & OBJECT_REPORTED) &&
1451 unreferenced_object(object))
1452 __paint_it(object, KMEMLEAK_GREY);
1453 spin_unlock_irqrestore(&object->lock, flags);
1454 }
1455 rcu_read_unlock();
1456}
1457
1297/* 1458/*
1298 * File write operation to configure kmemleak at run-time. The following 1459 * File write operation to configure kmemleak at run-time. The following
1299 * commands can be written to the /sys/kernel/debug/kmemleak file: 1460 * commands can be written to the /sys/kernel/debug/kmemleak file:
@@ -1305,6 +1466,9 @@ static int kmemleak_release(struct inode *inode, struct file *file)
1305 * scan=... - set the automatic memory scanning period in seconds (0 to 1466 * scan=... - set the automatic memory scanning period in seconds (0 to
1306 * disable it) 1467 * disable it)
1307 * scan - trigger a memory scan 1468 * scan - trigger a memory scan
1469 * clear - mark all current reported unreferenced kmemleak objects as
1470 * grey to ignore printing them
1471 * dump=... - dump information about the object found at the given address
1308 */ 1472 */
1309static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, 1473static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1310 size_t size, loff_t *ppos) 1474 size_t size, loff_t *ppos)
@@ -1345,6 +1509,10 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
1345 } 1509 }
1346 } else if (strncmp(buf, "scan", 4) == 0) 1510 } else if (strncmp(buf, "scan", 4) == 0)
1347 kmemleak_scan(); 1511 kmemleak_scan();
1512 else if (strncmp(buf, "clear", 5) == 0)
1513 kmemleak_clear();
1514 else if (strncmp(buf, "dump=", 5) == 0)
1515 ret = dump_str_object_info(buf + 5);
1348 else 1516 else
1349 ret = -EINVAL; 1517 ret = -EINVAL;
1350 1518
@@ -1371,7 +1539,7 @@ static const struct file_operations kmemleak_fops = {
1371 * Perform the freeing of the kmemleak internal objects after waiting for any 1539 * Perform the freeing of the kmemleak internal objects after waiting for any
1372 * current memory scan to complete. 1540 * current memory scan to complete.
1373 */ 1541 */
1374static int kmemleak_cleanup_thread(void *arg) 1542static void kmemleak_do_cleanup(struct work_struct *work)
1375{ 1543{
1376 struct kmemleak_object *object; 1544 struct kmemleak_object *object;
1377 1545
@@ -1383,22 +1551,9 @@ static int kmemleak_cleanup_thread(void *arg)
1383 delete_object_full(object->pointer); 1551 delete_object_full(object->pointer);
1384 rcu_read_unlock(); 1552 rcu_read_unlock();
1385 mutex_unlock(&scan_mutex); 1553 mutex_unlock(&scan_mutex);
1386
1387 return 0;
1388} 1554}
1389 1555
1390/* 1556static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
1391 * Start the clean-up thread.
1392 */
1393static void kmemleak_cleanup(void)
1394{
1395 struct task_struct *cleanup_thread;
1396
1397 cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
1398 "kmemleak-clean");
1399 if (IS_ERR(cleanup_thread))
1400 pr_warning("Failed to create the clean-up thread\n");
1401}
1402 1557
1403/* 1558/*
1404 * Disable kmemleak. No memory allocation/freeing will be traced once this 1559 * Disable kmemleak. No memory allocation/freeing will be traced once this
@@ -1416,7 +1571,7 @@ static void kmemleak_disable(void)
1416 1571
1417 /* check whether it is too early for a kernel thread */ 1572 /* check whether it is too early for a kernel thread */
1418 if (atomic_read(&kmemleak_initialized)) 1573 if (atomic_read(&kmemleak_initialized))
1419 kmemleak_cleanup(); 1574 schedule_work(&cleanup_work);
1420 1575
1421 pr_info("Kernel memory leak detector disabled\n"); 1576 pr_info("Kernel memory leak detector disabled\n");
1422} 1577}
@@ -1469,8 +1624,7 @@ void __init kmemleak_init(void)
1469 1624
1470 switch (log->op_type) { 1625 switch (log->op_type) {
1471 case KMEMLEAK_ALLOC: 1626 case KMEMLEAK_ALLOC:
1472 kmemleak_alloc(log->ptr, log->size, log->min_count, 1627 early_alloc(log);
1473 GFP_KERNEL);
1474 break; 1628 break;
1475 case KMEMLEAK_FREE: 1629 case KMEMLEAK_FREE:
1476 kmemleak_free(log->ptr); 1630 kmemleak_free(log->ptr);
@@ -1513,7 +1667,7 @@ static int __init kmemleak_late_init(void)
1513 * after setting kmemleak_initialized and we may end up with 1667 * after setting kmemleak_initialized and we may end up with
1514 * two clean-up threads but serialized by scan_mutex. 1668 * two clean-up threads but serialized by scan_mutex.
1515 */ 1669 */
1516 kmemleak_cleanup(); 1670 schedule_work(&cleanup_work);
1517 return -ENOMEM; 1671 return -ENOMEM;
1518 } 1672 }
1519 1673
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 81627ebcd313..25e7770309b8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,15 +36,6 @@
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37 37
38/* 38/*
39 * The maximum number of pages to writeout in a single bdflush/kupdate
40 * operation. We do this so we don't hold I_SYNC against an inode for
41 * enormous amounts of time, which would block a userspace task which has
42 * been forced to throttle against that inode. Also, the code reevaluates
43 * the dirty each time it has written this many pages.
44 */
45#define MAX_WRITEBACK_PAGES 1024
46
47/*
48 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited 39 * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
49 * will look to see if it needs to force writeback or throttling. 40 * will look to see if it needs to force writeback or throttling.
50 */ 41 */
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode);
117/* End of sysctl-exported parameters */ 108/* End of sysctl-exported parameters */
118 109
119 110
120static void background_writeout(unsigned long _min_pages);
121
122/* 111/*
123 * Scale the writeback cache size proportional to the relative writeout speeds. 112 * Scale the writeback cache size proportional to the relative writeout speeds.
124 * 113 *
@@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
320/* 309/*
321 * 310 *
322 */ 311 */
323static DEFINE_SPINLOCK(bdi_lock);
324static unsigned int bdi_min_ratio; 312static unsigned int bdi_min_ratio;
325 313
326int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) 314int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
327{ 315{
328 int ret = 0; 316 int ret = 0;
329 unsigned long flags;
330 317
331 spin_lock_irqsave(&bdi_lock, flags); 318 spin_lock(&bdi_lock);
332 if (min_ratio > bdi->max_ratio) { 319 if (min_ratio > bdi->max_ratio) {
333 ret = -EINVAL; 320 ret = -EINVAL;
334 } else { 321 } else {
@@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
340 ret = -EINVAL; 327 ret = -EINVAL;
341 } 328 }
342 } 329 }
343 spin_unlock_irqrestore(&bdi_lock, flags); 330 spin_unlock(&bdi_lock);
344 331
345 return ret; 332 return ret;
346} 333}
347 334
348int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) 335int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
349{ 336{
350 unsigned long flags;
351 int ret = 0; 337 int ret = 0;
352 338
353 if (max_ratio > 100) 339 if (max_ratio > 100)
354 return -EINVAL; 340 return -EINVAL;
355 341
356 spin_lock_irqsave(&bdi_lock, flags); 342 spin_lock(&bdi_lock);
357 if (bdi->min_ratio > max_ratio) { 343 if (bdi->min_ratio > max_ratio) {
358 ret = -EINVAL; 344 ret = -EINVAL;
359 } else { 345 } else {
360 bdi->max_ratio = max_ratio; 346 bdi->max_ratio = max_ratio;
361 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 347 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
362 } 348 }
363 spin_unlock_irqrestore(&bdi_lock, flags); 349 spin_unlock(&bdi_lock);
364 350
365 return ret; 351 return ret;
366} 352}
@@ -546,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping)
546 * up. 532 * up.
547 */ 533 */
548 if (bdi_nr_reclaimable > bdi_thresh) { 534 if (bdi_nr_reclaimable > bdi_thresh) {
549 writeback_inodes(&wbc); 535 writeback_inodes_wbc(&wbc);
550 pages_written += write_chunk - wbc.nr_to_write; 536 pages_written += write_chunk - wbc.nr_to_write;
551 get_dirty_limits(&background_thresh, &dirty_thresh, 537 get_dirty_limits(&background_thresh, &dirty_thresh,
552 &bdi_thresh, bdi); 538 &bdi_thresh, bdi);
@@ -575,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping)
575 if (pages_written >= write_chunk) 561 if (pages_written >= write_chunk)
576 break; /* We've done our duty */ 562 break; /* We've done our duty */
577 563
578 congestion_wait(BLK_RW_ASYNC, HZ/10); 564 schedule_timeout(1);
579 } 565 }
580 566
581 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && 567 if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -594,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping)
594 * background_thresh, to keep the amount of dirty memory low. 580 * background_thresh, to keep the amount of dirty memory low.
595 */ 581 */
596 if ((laptop_mode && pages_written) || 582 if ((laptop_mode && pages_written) ||
597 (!laptop_mode && (global_page_state(NR_FILE_DIRTY) 583 (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
598 + global_page_state(NR_UNSTABLE_NFS) 584 + global_page_state(NR_UNSTABLE_NFS))
599 > background_thresh))) 585 > background_thresh))) {
600 pdflush_operation(background_writeout, 0); 586 struct writeback_control wbc = {
587 .bdi = bdi,
588 .sync_mode = WB_SYNC_NONE,
589 .nr_to_write = nr_writeback,
590 };
591
592
593 bdi_start_writeback(&wbc);
594 }
601} 595}
602 596
603void set_page_dirty_balance(struct page *page, int page_mkwrite) 597void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -681,153 +675,35 @@ void throttle_vm_writeout(gfp_t gfp_mask)
681 } 675 }
682} 676}
683 677
684/*
685 * writeback at least _min_pages, and keep writing until the amount of dirty
686 * memory is less than the background threshold, or until we're all clean.
687 */
688static void background_writeout(unsigned long _min_pages)
689{
690 long min_pages = _min_pages;
691 struct writeback_control wbc = {
692 .bdi = NULL,
693 .sync_mode = WB_SYNC_NONE,
694 .older_than_this = NULL,
695 .nr_to_write = 0,
696 .nonblocking = 1,
697 .range_cyclic = 1,
698 };
699
700 for ( ; ; ) {
701 unsigned long background_thresh;
702 unsigned long dirty_thresh;
703
704 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
705 if (global_page_state(NR_FILE_DIRTY) +
706 global_page_state(NR_UNSTABLE_NFS) < background_thresh
707 && min_pages <= 0)
708 break;
709 wbc.more_io = 0;
710 wbc.encountered_congestion = 0;
711 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
712 wbc.pages_skipped = 0;
713 writeback_inodes(&wbc);
714 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
715 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
716 /* Wrote less than expected */
717 if (wbc.encountered_congestion || wbc.more_io)
718 congestion_wait(BLK_RW_ASYNC, HZ/10);
719 else
720 break;
721 }
722 }
723}
724
725/*
726 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
727 * the whole world. Returns 0 if a pdflush thread was dispatched. Returns
728 * -1 if all pdflush threads were busy.
729 */
730int wakeup_pdflush(long nr_pages)
731{
732 if (nr_pages == 0)
733 nr_pages = global_page_state(NR_FILE_DIRTY) +
734 global_page_state(NR_UNSTABLE_NFS);
735 return pdflush_operation(background_writeout, nr_pages);
736}
737
738static void wb_timer_fn(unsigned long unused);
739static void laptop_timer_fn(unsigned long unused); 678static void laptop_timer_fn(unsigned long unused);
740 679
741static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
742static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); 680static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
743 681
744/* 682/*
745 * Periodic writeback of "old" data.
746 *
747 * Define "old": the first time one of an inode's pages is dirtied, we mark the
748 * dirtying-time in the inode's address_space. So this periodic writeback code
749 * just walks the superblock inode list, writing back any inodes which are
750 * older than a specific point in time.
751 *
752 * Try to run once per dirty_writeback_interval. But if a writeback event
753 * takes longer than a dirty_writeback_interval interval, then leave a
754 * one-second gap.
755 *
756 * older_than_this takes precedence over nr_to_write. So we'll only write back
757 * all dirty pages if they are all attached to "old" mappings.
758 */
759static void wb_kupdate(unsigned long arg)
760{
761 unsigned long oldest_jif;
762 unsigned long start_jif;
763 unsigned long next_jif;
764 long nr_to_write;
765 struct writeback_control wbc = {
766 .bdi = NULL,
767 .sync_mode = WB_SYNC_NONE,
768 .older_than_this = &oldest_jif,
769 .nr_to_write = 0,
770 .nonblocking = 1,
771 .for_kupdate = 1,
772 .range_cyclic = 1,
773 };
774
775 sync_supers();
776
777 oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
778 start_jif = jiffies;
779 next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
780 nr_to_write = global_page_state(NR_FILE_DIRTY) +
781 global_page_state(NR_UNSTABLE_NFS) +
782 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
783 while (nr_to_write > 0) {
784 wbc.more_io = 0;
785 wbc.encountered_congestion = 0;
786 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
787 writeback_inodes(&wbc);
788 if (wbc.nr_to_write > 0) {
789 if (wbc.encountered_congestion || wbc.more_io)
790 congestion_wait(BLK_RW_ASYNC, HZ/10);
791 else
792 break; /* All the old data is written */
793 }
794 nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
795 }
796 if (time_before(next_jif, jiffies + HZ))
797 next_jif = jiffies + HZ;
798 if (dirty_writeback_interval)
799 mod_timer(&wb_timer, next_jif);
800}
801
802/*
803 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs 683 * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
804 */ 684 */
805int dirty_writeback_centisecs_handler(ctl_table *table, int write, 685int dirty_writeback_centisecs_handler(ctl_table *table, int write,
806 struct file *file, void __user *buffer, size_t *length, loff_t *ppos) 686 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
807{ 687{
808 proc_dointvec(table, write, file, buffer, length, ppos); 688 proc_dointvec(table, write, file, buffer, length, ppos);
809 if (dirty_writeback_interval)
810 mod_timer(&wb_timer, jiffies +
811 msecs_to_jiffies(dirty_writeback_interval * 10));
812 else
813 del_timer(&wb_timer);
814 return 0; 689 return 0;
815} 690}
816 691
817static void wb_timer_fn(unsigned long unused) 692static void do_laptop_sync(struct work_struct *work)
818{
819 if (pdflush_operation(wb_kupdate, 0) < 0)
820 mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
821}
822
823static void laptop_flush(unsigned long unused)
824{ 693{
825 sys_sync(); 694 wakeup_flusher_threads(0);
695 kfree(work);
826} 696}
827 697
828static void laptop_timer_fn(unsigned long unused) 698static void laptop_timer_fn(unsigned long unused)
829{ 699{
830 pdflush_operation(laptop_flush, 0); 700 struct work_struct *work;
701
702 work = kmalloc(sizeof(*work), GFP_ATOMIC);
703 if (work) {
704 INIT_WORK(work, do_laptop_sync);
705 schedule_work(work);
706 }
831} 707}
832 708
833/* 709/*
@@ -910,8 +786,6 @@ void __init page_writeback_init(void)
910{ 786{
911 int shift; 787 int shift;
912 788
913 mod_timer(&wb_timer,
914 jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
915 writeback_set_ratelimit(); 789 writeback_set_ratelimit();
916 register_cpu_notifier(&ratelimit_nb); 790 register_cpu_notifier(&ratelimit_nb);
917 791
diff --git a/mm/pdflush.c b/mm/pdflush.c
deleted file mode 100644
index 235ac440c44e..000000000000
--- a/mm/pdflush.c
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * mm/pdflush.c - worker threads for writing back filesystem data
3 *
4 * Copyright (C) 2002, Linus Torvalds.
5 *
6 * 09Apr2002 Andrew Morton
7 * Initial version
8 * 29Feb2004 kaos@sgi.com
9 * Move worker thread creation to kthread to avoid chewing
10 * up stack space with nested calls to kernel_thread.
11 */
12
13#include <linux/sched.h>
14#include <linux/list.h>
15#include <linux/signal.h>
16#include <linux/spinlock.h>
17#include <linux/gfp.h>
18#include <linux/init.h>
19#include <linux/module.h>
20#include <linux/fs.h> /* Needed by writeback.h */
21#include <linux/writeback.h> /* Prototypes pdflush_operation() */
22#include <linux/kthread.h>
23#include <linux/cpuset.h>
24#include <linux/freezer.h>
25
26
27/*
28 * Minimum and maximum number of pdflush instances
29 */
30#define MIN_PDFLUSH_THREADS 2
31#define MAX_PDFLUSH_THREADS 8
32
33static void start_one_pdflush_thread(void);
34
35
36/*
37 * The pdflush threads are worker threads for writing back dirty data.
38 * Ideally, we'd like one thread per active disk spindle. But the disk
39 * topology is very hard to divine at this level. Instead, we take
40 * care in various places to prevent more than one pdflush thread from
41 * performing writeback against a single filesystem. pdflush threads
42 * have the PF_FLUSHER flag set in current->flags to aid in this.
43 */
44
45/*
46 * All the pdflush threads. Protected by pdflush_lock
47 */
48static LIST_HEAD(pdflush_list);
49static DEFINE_SPINLOCK(pdflush_lock);
50
51/*
52 * The count of currently-running pdflush threads. Protected
53 * by pdflush_lock.
54 *
55 * Readable by sysctl, but not writable. Published to userspace at
56 * /proc/sys/vm/nr_pdflush_threads.
57 */
58int nr_pdflush_threads = 0;
59
60/*
61 * The time at which the pdflush thread pool last went empty
62 */
63static unsigned long last_empty_jifs;
64
65/*
66 * The pdflush thread.
67 *
68 * Thread pool management algorithm:
69 *
70 * - The minimum and maximum number of pdflush instances are bound
71 * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
72 *
73 * - If there have been no idle pdflush instances for 1 second, create
74 * a new one.
75 *
76 * - If the least-recently-went-to-sleep pdflush thread has been asleep
77 * for more than one second, terminate a thread.
78 */
79
80/*
81 * A structure for passing work to a pdflush thread. Also for passing
82 * state information between pdflush threads. Protected by pdflush_lock.
83 */
84struct pdflush_work {
85 struct task_struct *who; /* The thread */
86 void (*fn)(unsigned long); /* A callback function */
87 unsigned long arg0; /* An argument to the callback */
88 struct list_head list; /* On pdflush_list, when idle */
89 unsigned long when_i_went_to_sleep;
90};
91
92static int __pdflush(struct pdflush_work *my_work)
93{
94 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
95 set_freezable();
96 my_work->fn = NULL;
97 my_work->who = current;
98 INIT_LIST_HEAD(&my_work->list);
99
100 spin_lock_irq(&pdflush_lock);
101 for ( ; ; ) {
102 struct pdflush_work *pdf;
103
104 set_current_state(TASK_INTERRUPTIBLE);
105 list_move(&my_work->list, &pdflush_list);
106 my_work->when_i_went_to_sleep = jiffies;
107 spin_unlock_irq(&pdflush_lock);
108 schedule();
109 try_to_freeze();
110 spin_lock_irq(&pdflush_lock);
111 if (!list_empty(&my_work->list)) {
112 /*
113 * Someone woke us up, but without removing our control
114 * structure from the global list. swsusp will do this
115 * in try_to_freeze()->refrigerator(). Handle it.
116 */
117 my_work->fn = NULL;
118 continue;
119 }
120 if (my_work->fn == NULL) {
121 printk("pdflush: bogus wakeup\n");
122 continue;
123 }
124 spin_unlock_irq(&pdflush_lock);
125
126 (*my_work->fn)(my_work->arg0);
127
128 spin_lock_irq(&pdflush_lock);
129
130 /*
131 * Thread creation: For how long have there been zero
132 * available threads?
133 *
134 * To throttle creation, we reset last_empty_jifs.
135 */
136 if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
137 if (list_empty(&pdflush_list)) {
138 if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) {
139 last_empty_jifs = jiffies;
140 nr_pdflush_threads++;
141 spin_unlock_irq(&pdflush_lock);
142 start_one_pdflush_thread();
143 spin_lock_irq(&pdflush_lock);
144 }
145 }
146 }
147
148 my_work->fn = NULL;
149
150 /*
151 * Thread destruction: For how long has the sleepiest
152 * thread slept?
153 */
154 if (list_empty(&pdflush_list))
155 continue;
156 if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
157 continue;
158 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
159 if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
160 /* Limit exit rate */
161 pdf->when_i_went_to_sleep = jiffies;
162 break; /* exeunt */
163 }
164 }
165 nr_pdflush_threads--;
166 spin_unlock_irq(&pdflush_lock);
167 return 0;
168}
169
170/*
171 * Of course, my_work wants to be just a local in __pdflush(). It is
172 * separated out in this manner to hopefully prevent the compiler from
173 * performing unfortunate optimisations against the auto variables. Because
174 * these are visible to other tasks and CPUs. (No problem has actually
175 * been observed. This is just paranoia).
176 */
177static int pdflush(void *dummy)
178{
179 struct pdflush_work my_work;
180 cpumask_var_t cpus_allowed;
181
182 /*
183 * Since the caller doesn't even check kthread_run() worked, let's not
184 * freak out too much if this fails.
185 */
186 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
187 printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
188 return 0;
189 }
190
191 /*
192 * pdflush can spend a lot of time doing encryption via dm-crypt. We
193 * don't want to do that at keventd's priority.
194 */
195 set_user_nice(current, 0);
196
197 /*
198 * Some configs put our parent kthread in a limited cpuset,
199 * which kthread() overrides, forcing cpus_allowed == cpu_all_mask.
200 * Our needs are more modest - cut back to our cpusets cpus_allowed.
201 * This is needed as pdflush's are dynamically created and destroyed.
202 * The boottime pdflush's are easily placed w/o these 2 lines.
203 */
204 cpuset_cpus_allowed(current, cpus_allowed);
205 set_cpus_allowed_ptr(current, cpus_allowed);
206 free_cpumask_var(cpus_allowed);
207
208 return __pdflush(&my_work);
209}
210
211/*
212 * Attempt to wake up a pdflush thread, and get it to do some work for you.
213 * Returns zero if it indeed managed to find a worker thread, and passed your
214 * payload to it.
215 */
216int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
217{
218 unsigned long flags;
219 int ret = 0;
220
221 BUG_ON(fn == NULL); /* Hard to diagnose if it's deferred */
222
223 spin_lock_irqsave(&pdflush_lock, flags);
224 if (list_empty(&pdflush_list)) {
225 ret = -1;
226 } else {
227 struct pdflush_work *pdf;
228
229 pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
230 list_del_init(&pdf->list);
231 if (list_empty(&pdflush_list))
232 last_empty_jifs = jiffies;
233 pdf->fn = fn;
234 pdf->arg0 = arg0;
235 wake_up_process(pdf->who);
236 }
237 spin_unlock_irqrestore(&pdflush_lock, flags);
238
239 return ret;
240}
241
242static void start_one_pdflush_thread(void)
243{
244 struct task_struct *k;
245
246 k = kthread_run(pdflush, NULL, "pdflush");
247 if (unlikely(IS_ERR(k))) {
248 spin_lock_irq(&pdflush_lock);
249 nr_pdflush_threads--;
250 spin_unlock_irq(&pdflush_lock);
251 }
252}
253
254static int __init pdflush_init(void)
255{
256 int i;
257
258 /*
259 * Pre-set nr_pdflush_threads... If we fail to create,
260 * the count will be decremented.
261 */
262 nr_pdflush_threads = MIN_PDFLUSH_THREADS;
263
264 for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
265 start_one_pdflush_thread();
266 return 0;
267}
268
269module_init(pdflush_init);
diff --git a/mm/shmem.c b/mm/shmem.c
index d713239ce2ce..5a0b3d4055f3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2446,7 +2446,7 @@ static const struct inode_operations shmem_inode_operations = {
2446 .getxattr = generic_getxattr, 2446 .getxattr = generic_getxattr,
2447 .listxattr = generic_listxattr, 2447 .listxattr = generic_listxattr,
2448 .removexattr = generic_removexattr, 2448 .removexattr = generic_removexattr,
2449 .permission = shmem_permission, 2449 .check_acl = shmem_check_acl,
2450#endif 2450#endif
2451 2451
2452}; 2452};
@@ -2469,7 +2469,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
2469 .getxattr = generic_getxattr, 2469 .getxattr = generic_getxattr,
2470 .listxattr = generic_listxattr, 2470 .listxattr = generic_listxattr,
2471 .removexattr = generic_removexattr, 2471 .removexattr = generic_removexattr,
2472 .permission = shmem_permission, 2472 .check_acl = shmem_check_acl,
2473#endif 2473#endif
2474}; 2474};
2475 2475
@@ -2480,7 +2480,7 @@ static const struct inode_operations shmem_special_inode_operations = {
2480 .getxattr = generic_getxattr, 2480 .getxattr = generic_getxattr,
2481 .listxattr = generic_listxattr, 2481 .listxattr = generic_listxattr,
2482 .removexattr = generic_removexattr, 2482 .removexattr = generic_removexattr,
2483 .permission = shmem_permission, 2483 .check_acl = shmem_check_acl,
2484#endif 2484#endif
2485}; 2485};
2486 2486
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 606a8e757a42..df2c87fdae50 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -157,7 +157,7 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
157/** 157/**
158 * shmem_check_acl - check_acl() callback for generic_permission() 158 * shmem_check_acl - check_acl() callback for generic_permission()
159 */ 159 */
160static int 160int
161shmem_check_acl(struct inode *inode, int mask) 161shmem_check_acl(struct inode *inode, int mask)
162{ 162{
163 struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS); 163 struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
@@ -169,12 +169,3 @@ shmem_check_acl(struct inode *inode, int mask)
169 } 169 }
170 return -EAGAIN; 170 return -EAGAIN;
171} 171}
172
173/**
174 * shmem_permission - permission() inode operation
175 */
176int
177shmem_permission(struct inode *inode, int mask)
178{
179 return generic_permission(inode, mask, shmem_check_acl);
180}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 42cd38eba79f..5ae6b8b78c80 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -34,6 +34,7 @@ static const struct address_space_operations swap_aops = {
34}; 34};
35 35
36static struct backing_dev_info swap_backing_dev_info = { 36static struct backing_dev_info swap_backing_dev_info = {
37 .name = "swap",
37 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 38 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
38 .unplug_io_fn = swap_unplug_io_fn, 39 .unplug_io_fn = swap_unplug_io_fn,
39}; 40};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 94e86dd6954c..ba8228e0a806 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1720,7 +1720,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1720 */ 1720 */
1721 if (total_scanned > sc->swap_cluster_max + 1721 if (total_scanned > sc->swap_cluster_max +
1722 sc->swap_cluster_max / 2) { 1722 sc->swap_cluster_max / 2) {
1723 wakeup_pdflush(laptop_mode ? 0 : total_scanned); 1723 wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
1724 sc->may_writepage = 1; 1724 sc->may_writepage = 1;
1725 } 1725 }
1726 1726
diff --git a/net/core/dev.c b/net/core/dev.c
index 6a94475aee85..278d489aad3b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1031,7 +1031,7 @@ void dev_load(struct net *net, const char *name)
1031 dev = __dev_get_by_name(net, name); 1031 dev = __dev_get_by_name(net, name);
1032 read_unlock(&dev_base_lock); 1032 read_unlock(&dev_base_lock);
1033 1033
1034 if (!dev && capable(CAP_SYS_MODULE)) 1034 if (!dev && capable(CAP_NET_ADMIN))
1035 request_module("%s", name); 1035 request_module("%s", name);
1036} 1036}
1037 1037
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e92beb9e55e0..6428b342b164 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -116,7 +116,7 @@ int tcp_set_default_congestion_control(const char *name)
116 spin_lock(&tcp_cong_list_lock); 116 spin_lock(&tcp_cong_list_lock);
117 ca = tcp_ca_find(name); 117 ca = tcp_ca_find(name);
118#ifdef CONFIG_MODULES 118#ifdef CONFIG_MODULES
119 if (!ca && capable(CAP_SYS_MODULE)) { 119 if (!ca && capable(CAP_NET_ADMIN)) {
120 spin_unlock(&tcp_cong_list_lock); 120 spin_unlock(&tcp_cong_list_lock);
121 121
122 request_module("tcp_%s", name); 122 request_module("tcp_%s", name);
@@ -246,7 +246,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
246 246
247#ifdef CONFIG_MODULES 247#ifdef CONFIG_MODULES
248 /* not found attempt to autoload module */ 248 /* not found attempt to autoload module */
249 if (!ca && capable(CAP_SYS_MODULE)) { 249 if (!ca && capable(CAP_NET_ADMIN)) {
250 rcu_read_unlock(); 250 rcu_read_unlock();
251 request_module("tcp_%s", name); 251 request_module("tcp_%s", name);
252 rcu_read_lock(); 252 rcu_read_lock();
diff --git a/security/Makefile b/security/Makefile
index b56e7f9ecbc2..95ecc06392d7 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,9 +16,7 @@ obj-$(CONFIG_SECURITYFS) += inode.o
16# Must precede capability.o in order to stack properly. 16# Must precede capability.o in order to stack properly.
17obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o 17obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
18obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o 18obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o
19ifeq ($(CONFIG_AUDIT),y) 19obj-$(CONFIG_AUDIT) += lsm_audit.o
20obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o
21endif
22obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o 20obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o
23obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o 21obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o
24obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o 22obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o
diff --git a/security/capability.c b/security/capability.c
index 88f752e8152c..fce07a7bc825 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -373,6 +373,11 @@ static int cap_task_create(unsigned long clone_flags)
373 return 0; 373 return 0;
374} 374}
375 375
376static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
377{
378 return 0;
379}
380
376static void cap_cred_free(struct cred *cred) 381static void cap_cred_free(struct cred *cred)
377{ 382{
378} 383}
@@ -386,6 +391,10 @@ static void cap_cred_commit(struct cred *new, const struct cred *old)
386{ 391{
387} 392}
388 393
394static void cap_cred_transfer(struct cred *new, const struct cred *old)
395{
396}
397
389static int cap_kernel_act_as(struct cred *new, u32 secid) 398static int cap_kernel_act_as(struct cred *new, u32 secid)
390{ 399{
391 return 0; 400 return 0;
@@ -396,6 +405,11 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
396 return 0; 405 return 0;
397} 406}
398 407
408static int cap_kernel_module_request(void)
409{
410 return 0;
411}
412
399static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) 413static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
400{ 414{
401 return 0; 415 return 0;
@@ -701,10 +715,26 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb)
701{ 715{
702} 716}
703 717
718
719
704static void cap_req_classify_flow(const struct request_sock *req, 720static void cap_req_classify_flow(const struct request_sock *req,
705 struct flowi *fl) 721 struct flowi *fl)
706{ 722{
707} 723}
724
725static int cap_tun_dev_create(void)
726{
727 return 0;
728}
729
730static void cap_tun_dev_post_create(struct sock *sk)
731{
732}
733
734static int cap_tun_dev_attach(struct sock *sk)
735{
736 return 0;
737}
708#endif /* CONFIG_SECURITY_NETWORK */ 738#endif /* CONFIG_SECURITY_NETWORK */
709 739
710#ifdef CONFIG_SECURITY_NETWORK_XFRM 740#ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -792,6 +822,20 @@ static void cap_release_secctx(char *secdata, u32 seclen)
792{ 822{
793} 823}
794 824
825static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
826{
827 return 0;
828}
829
830static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
831{
832 return 0;
833}
834
835static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
836{
837 return 0;
838}
795#ifdef CONFIG_KEYS 839#ifdef CONFIG_KEYS
796static int cap_key_alloc(struct key *key, const struct cred *cred, 840static int cap_key_alloc(struct key *key, const struct cred *cred,
797 unsigned long flags) 841 unsigned long flags)
@@ -815,6 +859,13 @@ static int cap_key_getsecurity(struct key *key, char **_buffer)
815 return 0; 859 return 0;
816} 860}
817 861
862static int cap_key_session_to_parent(const struct cred *cred,
863 const struct cred *parent_cred,
864 struct key *key)
865{
866 return 0;
867}
868
818#endif /* CONFIG_KEYS */ 869#endif /* CONFIG_KEYS */
819 870
820#ifdef CONFIG_AUDIT 871#ifdef CONFIG_AUDIT
@@ -854,7 +905,7 @@ struct security_operations default_security_ops = {
854 905
855void security_fixup_ops(struct security_operations *ops) 906void security_fixup_ops(struct security_operations *ops)
856{ 907{
857 set_to_cap_if_null(ops, ptrace_may_access); 908 set_to_cap_if_null(ops, ptrace_access_check);
858 set_to_cap_if_null(ops, ptrace_traceme); 909 set_to_cap_if_null(ops, ptrace_traceme);
859 set_to_cap_if_null(ops, capget); 910 set_to_cap_if_null(ops, capget);
860 set_to_cap_if_null(ops, capset); 911 set_to_cap_if_null(ops, capset);
@@ -940,11 +991,14 @@ void security_fixup_ops(struct security_operations *ops)
940 set_to_cap_if_null(ops, file_receive); 991 set_to_cap_if_null(ops, file_receive);
941 set_to_cap_if_null(ops, dentry_open); 992 set_to_cap_if_null(ops, dentry_open);
942 set_to_cap_if_null(ops, task_create); 993 set_to_cap_if_null(ops, task_create);
994 set_to_cap_if_null(ops, cred_alloc_blank);
943 set_to_cap_if_null(ops, cred_free); 995 set_to_cap_if_null(ops, cred_free);
944 set_to_cap_if_null(ops, cred_prepare); 996 set_to_cap_if_null(ops, cred_prepare);
945 set_to_cap_if_null(ops, cred_commit); 997 set_to_cap_if_null(ops, cred_commit);
998 set_to_cap_if_null(ops, cred_transfer);
946 set_to_cap_if_null(ops, kernel_act_as); 999 set_to_cap_if_null(ops, kernel_act_as);
947 set_to_cap_if_null(ops, kernel_create_files_as); 1000 set_to_cap_if_null(ops, kernel_create_files_as);
1001 set_to_cap_if_null(ops, kernel_module_request);
948 set_to_cap_if_null(ops, task_setuid); 1002 set_to_cap_if_null(ops, task_setuid);
949 set_to_cap_if_null(ops, task_fix_setuid); 1003 set_to_cap_if_null(ops, task_fix_setuid);
950 set_to_cap_if_null(ops, task_setgid); 1004 set_to_cap_if_null(ops, task_setgid);
@@ -992,6 +1046,9 @@ void security_fixup_ops(struct security_operations *ops)
992 set_to_cap_if_null(ops, secid_to_secctx); 1046 set_to_cap_if_null(ops, secid_to_secctx);
993 set_to_cap_if_null(ops, secctx_to_secid); 1047 set_to_cap_if_null(ops, secctx_to_secid);
994 set_to_cap_if_null(ops, release_secctx); 1048 set_to_cap_if_null(ops, release_secctx);
1049 set_to_cap_if_null(ops, inode_notifysecctx);
1050 set_to_cap_if_null(ops, inode_setsecctx);
1051 set_to_cap_if_null(ops, inode_getsecctx);
995#ifdef CONFIG_SECURITY_NETWORK 1052#ifdef CONFIG_SECURITY_NETWORK
996 set_to_cap_if_null(ops, unix_stream_connect); 1053 set_to_cap_if_null(ops, unix_stream_connect);
997 set_to_cap_if_null(ops, unix_may_send); 1054 set_to_cap_if_null(ops, unix_may_send);
@@ -1020,6 +1077,9 @@ void security_fixup_ops(struct security_operations *ops)
1020 set_to_cap_if_null(ops, inet_csk_clone); 1077 set_to_cap_if_null(ops, inet_csk_clone);
1021 set_to_cap_if_null(ops, inet_conn_established); 1078 set_to_cap_if_null(ops, inet_conn_established);
1022 set_to_cap_if_null(ops, req_classify_flow); 1079 set_to_cap_if_null(ops, req_classify_flow);
1080 set_to_cap_if_null(ops, tun_dev_create);
1081 set_to_cap_if_null(ops, tun_dev_post_create);
1082 set_to_cap_if_null(ops, tun_dev_attach);
1023#endif /* CONFIG_SECURITY_NETWORK */ 1083#endif /* CONFIG_SECURITY_NETWORK */
1024#ifdef CONFIG_SECURITY_NETWORK_XFRM 1084#ifdef CONFIG_SECURITY_NETWORK_XFRM
1025 set_to_cap_if_null(ops, xfrm_policy_alloc_security); 1085 set_to_cap_if_null(ops, xfrm_policy_alloc_security);
@@ -1038,6 +1098,7 @@ void security_fixup_ops(struct security_operations *ops)
1038 set_to_cap_if_null(ops, key_free); 1098 set_to_cap_if_null(ops, key_free);
1039 set_to_cap_if_null(ops, key_permission); 1099 set_to_cap_if_null(ops, key_permission);
1040 set_to_cap_if_null(ops, key_getsecurity); 1100 set_to_cap_if_null(ops, key_getsecurity);
1101 set_to_cap_if_null(ops, key_session_to_parent);
1041#endif /* CONFIG_KEYS */ 1102#endif /* CONFIG_KEYS */
1042#ifdef CONFIG_AUDIT 1103#ifdef CONFIG_AUDIT
1043 set_to_cap_if_null(ops, audit_rule_init); 1104 set_to_cap_if_null(ops, audit_rule_init);
diff --git a/security/commoncap.c b/security/commoncap.c
index e3097c0a1311..fe30751a6cd9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -101,7 +101,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
101} 101}
102 102
103/** 103/**
104 * cap_ptrace_may_access - Determine whether the current process may access 104 * cap_ptrace_access_check - Determine whether the current process may access
105 * another 105 * another
106 * @child: The process to be accessed 106 * @child: The process to be accessed
107 * @mode: The mode of attachment. 107 * @mode: The mode of attachment.
@@ -109,7 +109,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
109 * Determine whether a process may access another, returning 0 if permission 109 * Determine whether a process may access another, returning 0 if permission
110 * granted, -ve if denied. 110 * granted, -ve if denied.
111 */ 111 */
112int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) 112int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
113{ 113{
114 int ret = 0; 114 int ret = 0;
115 115
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 747a464943af..74d5447d7df7 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -3,6 +3,7 @@
3# 3#
4 4
5obj-y := \ 5obj-y := \
6 gc.o \
6 key.o \ 7 key.o \
7 keyring.o \ 8 keyring.o \
8 keyctl.o \ 9 keyctl.o \
diff --git a/security/keys/compat.c b/security/keys/compat.c
index c766c68a63bc..792c0a611a6d 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -82,6 +82,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
82 case KEYCTL_GET_SECURITY: 82 case KEYCTL_GET_SECURITY:
83 return keyctl_get_security(arg2, compat_ptr(arg3), arg4); 83 return keyctl_get_security(arg2, compat_ptr(arg3), arg4);
84 84
85 case KEYCTL_SESSION_TO_PARENT:
86 return keyctl_session_to_parent();
87
85 default: 88 default:
86 return -EOPNOTSUPP; 89 return -EOPNOTSUPP;
87 } 90 }
diff --git a/security/keys/gc.c b/security/keys/gc.c
new file mode 100644
index 000000000000..1e616aef55fd
--- /dev/null
+++ b/security/keys/gc.c
@@ -0,0 +1,194 @@
1/* Key garbage collector
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <keys/keyring-type.h>
14#include "internal.h"
15
16/*
17 * Delay between key revocation/expiry in seconds
18 */
19unsigned key_gc_delay = 5 * 60;
20
21/*
22 * Reaper
23 */
24static void key_gc_timer_func(unsigned long);
25static void key_garbage_collector(struct work_struct *);
26static DEFINE_TIMER(key_gc_timer, key_gc_timer_func, 0, 0);
27static DECLARE_WORK(key_gc_work, key_garbage_collector);
28static key_serial_t key_gc_cursor; /* the last key the gc considered */
29static unsigned long key_gc_executing;
30static time_t key_gc_next_run = LONG_MAX;
31
32/*
33 * Schedule a garbage collection run
34 * - precision isn't particularly important
35 */
36void key_schedule_gc(time_t gc_at)
37{
38 unsigned long expires;
39 time_t now = current_kernel_time().tv_sec;
40
41 kenter("%ld", gc_at - now);
42
43 gc_at += key_gc_delay;
44
45 if (now >= gc_at) {
46 schedule_work(&key_gc_work);
47 } else if (gc_at < key_gc_next_run) {
48 expires = jiffies + (gc_at - now) * HZ;
49 mod_timer(&key_gc_timer, expires);
50 }
51}
52
53/*
54 * The garbage collector timer kicked off
55 */
56static void key_gc_timer_func(unsigned long data)
57{
58 kenter("");
59 key_gc_next_run = LONG_MAX;
60 schedule_work(&key_gc_work);
61}
62
63/*
64 * Garbage collect pointers from a keyring
65 * - return true if we altered the keyring
66 */
67static bool key_gc_keyring(struct key *keyring, time_t limit)
68 __releases(key_serial_lock)
69{
70 struct keyring_list *klist;
71 struct key *key;
72 int loop;
73
74 kenter("%x", key_serial(keyring));
75
76 if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
77 goto dont_gc;
78
79 /* scan the keyring looking for dead keys */
80 klist = rcu_dereference(keyring->payload.subscriptions);
81 if (!klist)
82 goto dont_gc;
83
84 for (loop = klist->nkeys - 1; loop >= 0; loop--) {
85 key = klist->keys[loop];
86 if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
87 (key->expiry > 0 && key->expiry <= limit))
88 goto do_gc;
89 }
90
91dont_gc:
92 kleave(" = false");
93 return false;
94
95do_gc:
96 key_gc_cursor = keyring->serial;
97 key_get(keyring);
98 spin_unlock(&key_serial_lock);
99 keyring_gc(keyring, limit);
100 key_put(keyring);
101 kleave(" = true");
102 return true;
103}
104
105/*
106 * Garbage collector for keys
107 * - this involves scanning the keyrings for dead, expired and revoked keys
108 * that have overstayed their welcome
109 */
110static void key_garbage_collector(struct work_struct *work)
111{
112 struct rb_node *rb;
113 key_serial_t cursor;
114 struct key *key, *xkey;
115 time_t new_timer = LONG_MAX, limit;
116
117 kenter("");
118
119 if (test_and_set_bit(0, &key_gc_executing)) {
120 key_schedule_gc(current_kernel_time().tv_sec);
121 return;
122 }
123
124 limit = current_kernel_time().tv_sec;
125 if (limit > key_gc_delay)
126 limit -= key_gc_delay;
127 else
128 limit = key_gc_delay;
129
130 spin_lock(&key_serial_lock);
131
132 if (RB_EMPTY_ROOT(&key_serial_tree))
133 goto reached_the_end;
134
135 cursor = key_gc_cursor;
136 if (cursor < 0)
137 cursor = 0;
138
139 /* find the first key above the cursor */
140 key = NULL;
141 rb = key_serial_tree.rb_node;
142 while (rb) {
143 xkey = rb_entry(rb, struct key, serial_node);
144 if (cursor < xkey->serial) {
145 key = xkey;
146 rb = rb->rb_left;
147 } else if (cursor > xkey->serial) {
148 rb = rb->rb_right;
149 } else {
150 rb = rb_next(rb);
151 if (!rb)
152 goto reached_the_end;
153 key = rb_entry(rb, struct key, serial_node);
154 break;
155 }
156 }
157
158 if (!key)
159 goto reached_the_end;
160
161 /* trawl through the keys looking for keyrings */
162 for (;;) {
163 if (key->expiry > 0 && key->expiry < new_timer)
164 new_timer = key->expiry;
165
166 if (key->type == &key_type_keyring &&
167 key_gc_keyring(key, limit)) {
168 /* the gc ate our lock */
169 schedule_work(&key_gc_work);
170 goto no_unlock;
171 }
172
173 rb = rb_next(&key->serial_node);
174 if (!rb) {
175 key_gc_cursor = 0;
176 break;
177 }
178 key = rb_entry(rb, struct key, serial_node);
179 }
180
181out:
182 spin_unlock(&key_serial_lock);
183no_unlock:
184 clear_bit(0, &key_gc_executing);
185 if (new_timer < LONG_MAX)
186 key_schedule_gc(new_timer);
187
188 kleave("");
189 return;
190
191reached_the_end:
192 key_gc_cursor = 0;
193 goto out;
194}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 9fb679c66b8a..24ba0307b7ad 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -124,11 +124,18 @@ extern struct key *request_key_and_link(struct key_type *type,
124 struct key *dest_keyring, 124 struct key *dest_keyring,
125 unsigned long flags); 125 unsigned long flags);
126 126
127extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial, 127extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
128 key_perm_t perm); 128 key_perm_t perm);
129#define KEY_LOOKUP_CREATE 0x01
130#define KEY_LOOKUP_PARTIAL 0x02
131#define KEY_LOOKUP_FOR_UNLINK 0x04
129 132
130extern long join_session_keyring(const char *name); 133extern long join_session_keyring(const char *name);
131 134
135extern unsigned key_gc_delay;
136extern void keyring_gc(struct key *keyring, time_t limit);
137extern void key_schedule_gc(time_t expiry_at);
138
132/* 139/*
133 * check to see whether permission is granted to use a key in the desired way 140 * check to see whether permission is granted to use a key in the desired way
134 */ 141 */
@@ -194,6 +201,7 @@ extern long keyctl_set_timeout(key_serial_t, unsigned);
194extern long keyctl_assume_authority(key_serial_t); 201extern long keyctl_assume_authority(key_serial_t);
195extern long keyctl_get_security(key_serial_t keyid, char __user *buffer, 202extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
196 size_t buflen); 203 size_t buflen);
204extern long keyctl_session_to_parent(void);
197 205
198/* 206/*
199 * debugging key validation 207 * debugging key validation
diff --git a/security/keys/key.c b/security/keys/key.c
index 4a1297d1ada4..08531ad0f252 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -500,6 +500,7 @@ int key_negate_and_link(struct key *key,
500 set_bit(KEY_FLAG_INSTANTIATED, &key->flags); 500 set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
501 now = current_kernel_time(); 501 now = current_kernel_time();
502 key->expiry = now.tv_sec + timeout; 502 key->expiry = now.tv_sec + timeout;
503 key_schedule_gc(key->expiry);
503 504
504 if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) 505 if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
505 awaken = 1; 506 awaken = 1;
@@ -642,10 +643,8 @@ struct key *key_lookup(key_serial_t id)
642 goto error; 643 goto error;
643 644
644 found: 645 found:
645 /* pretend it doesn't exist if it's dead */ 646 /* pretend it doesn't exist if it is awaiting deletion */
646 if (atomic_read(&key->usage) == 0 || 647 if (atomic_read(&key->usage) == 0)
647 test_bit(KEY_FLAG_DEAD, &key->flags) ||
648 key->type == &key_type_dead)
649 goto not_found; 648 goto not_found;
650 649
651 /* this races with key_put(), but that doesn't matter since key_put() 650 /* this races with key_put(), but that doesn't matter since key_put()
@@ -890,6 +889,9 @@ EXPORT_SYMBOL(key_update);
890 */ 889 */
891void key_revoke(struct key *key) 890void key_revoke(struct key *key)
892{ 891{
892 struct timespec now;
893 time_t time;
894
893 key_check(key); 895 key_check(key);
894 896
895 /* make sure no one's trying to change or use the key when we mark it 897 /* make sure no one's trying to change or use the key when we mark it
@@ -902,6 +904,14 @@ void key_revoke(struct key *key)
902 key->type->revoke) 904 key->type->revoke)
903 key->type->revoke(key); 905 key->type->revoke(key);
904 906
907 /* set the death time to no more than the expiry time */
908 now = current_kernel_time();
909 time = now.tv_sec;
910 if (key->revoked_at == 0 || key->revoked_at > time) {
911 key->revoked_at = time;
912 key_schedule_gc(key->revoked_at);
913 }
914
905 up_write(&key->sem); 915 up_write(&key->sem);
906 916
907} /* end key_revoke() */ 917} /* end key_revoke() */
@@ -958,8 +968,10 @@ void unregister_key_type(struct key_type *ktype)
958 for (_n = rb_first(&key_serial_tree); _n; _n = rb_next(_n)) { 968 for (_n = rb_first(&key_serial_tree); _n; _n = rb_next(_n)) {
959 key = rb_entry(_n, struct key, serial_node); 969 key = rb_entry(_n, struct key, serial_node);
960 970
961 if (key->type == ktype) 971 if (key->type == ktype) {
962 key->type = &key_type_dead; 972 key->type = &key_type_dead;
973 set_bit(KEY_FLAG_DEAD, &key->flags);
974 }
963 } 975 }
964 976
965 spin_unlock(&key_serial_lock); 977 spin_unlock(&key_serial_lock);
@@ -984,6 +996,8 @@ void unregister_key_type(struct key_type *ktype)
984 spin_unlock(&key_serial_lock); 996 spin_unlock(&key_serial_lock);
985 up_write(&key_types_sem); 997 up_write(&key_types_sem);
986 998
999 key_schedule_gc(0);
1000
987} /* end unregister_key_type() */ 1001} /* end unregister_key_type() */
988 1002
989EXPORT_SYMBOL(unregister_key_type); 1003EXPORT_SYMBOL(unregister_key_type);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 7f09fb897d2b..74c968524592 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -103,7 +103,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
103 } 103 }
104 104
105 /* find the target keyring (which must be writable) */ 105 /* find the target keyring (which must be writable) */
106 keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); 106 keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
107 if (IS_ERR(keyring_ref)) { 107 if (IS_ERR(keyring_ref)) {
108 ret = PTR_ERR(keyring_ref); 108 ret = PTR_ERR(keyring_ref);
109 goto error3; 109 goto error3;
@@ -185,7 +185,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
185 /* get the destination keyring if specified */ 185 /* get the destination keyring if specified */
186 dest_ref = NULL; 186 dest_ref = NULL;
187 if (destringid) { 187 if (destringid) {
188 dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); 188 dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
189 KEY_WRITE);
189 if (IS_ERR(dest_ref)) { 190 if (IS_ERR(dest_ref)) {
190 ret = PTR_ERR(dest_ref); 191 ret = PTR_ERR(dest_ref);
191 goto error3; 192 goto error3;
@@ -233,9 +234,11 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
233long keyctl_get_keyring_ID(key_serial_t id, int create) 234long keyctl_get_keyring_ID(key_serial_t id, int create)
234{ 235{
235 key_ref_t key_ref; 236 key_ref_t key_ref;
237 unsigned long lflags;
236 long ret; 238 long ret;
237 239
238 key_ref = lookup_user_key(id, create, 0, KEY_SEARCH); 240 lflags = create ? KEY_LOOKUP_CREATE : 0;
241 key_ref = lookup_user_key(id, lflags, KEY_SEARCH);
239 if (IS_ERR(key_ref)) { 242 if (IS_ERR(key_ref)) {
240 ret = PTR_ERR(key_ref); 243 ret = PTR_ERR(key_ref);
241 goto error; 244 goto error;
@@ -309,7 +312,7 @@ long keyctl_update_key(key_serial_t id,
309 } 312 }
310 313
311 /* find the target key (which must be writable) */ 314 /* find the target key (which must be writable) */
312 key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); 315 key_ref = lookup_user_key(id, 0, KEY_WRITE);
313 if (IS_ERR(key_ref)) { 316 if (IS_ERR(key_ref)) {
314 ret = PTR_ERR(key_ref); 317 ret = PTR_ERR(key_ref);
315 goto error2; 318 goto error2;
@@ -337,10 +340,16 @@ long keyctl_revoke_key(key_serial_t id)
337 key_ref_t key_ref; 340 key_ref_t key_ref;
338 long ret; 341 long ret;
339 342
340 key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); 343 key_ref = lookup_user_key(id, 0, KEY_WRITE);
341 if (IS_ERR(key_ref)) { 344 if (IS_ERR(key_ref)) {
342 ret = PTR_ERR(key_ref); 345 ret = PTR_ERR(key_ref);
343 goto error; 346 if (ret != -EACCES)
347 goto error;
348 key_ref = lookup_user_key(id, 0, KEY_SETATTR);
349 if (IS_ERR(key_ref)) {
350 ret = PTR_ERR(key_ref);
351 goto error;
352 }
344 } 353 }
345 354
346 key_revoke(key_ref_to_ptr(key_ref)); 355 key_revoke(key_ref_to_ptr(key_ref));
@@ -363,7 +372,7 @@ long keyctl_keyring_clear(key_serial_t ringid)
363 key_ref_t keyring_ref; 372 key_ref_t keyring_ref;
364 long ret; 373 long ret;
365 374
366 keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); 375 keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
367 if (IS_ERR(keyring_ref)) { 376 if (IS_ERR(keyring_ref)) {
368 ret = PTR_ERR(keyring_ref); 377 ret = PTR_ERR(keyring_ref);
369 goto error; 378 goto error;
@@ -389,13 +398,13 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid)
389 key_ref_t keyring_ref, key_ref; 398 key_ref_t keyring_ref, key_ref;
390 long ret; 399 long ret;
391 400
392 keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); 401 keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
393 if (IS_ERR(keyring_ref)) { 402 if (IS_ERR(keyring_ref)) {
394 ret = PTR_ERR(keyring_ref); 403 ret = PTR_ERR(keyring_ref);
395 goto error; 404 goto error;
396 } 405 }
397 406
398 key_ref = lookup_user_key(id, 1, 0, KEY_LINK); 407 key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_LINK);
399 if (IS_ERR(key_ref)) { 408 if (IS_ERR(key_ref)) {
400 ret = PTR_ERR(key_ref); 409 ret = PTR_ERR(key_ref);
401 goto error2; 410 goto error2;
@@ -423,13 +432,13 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid)
423 key_ref_t keyring_ref, key_ref; 432 key_ref_t keyring_ref, key_ref;
424 long ret; 433 long ret;
425 434
426 keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE); 435 keyring_ref = lookup_user_key(ringid, 0, KEY_WRITE);
427 if (IS_ERR(keyring_ref)) { 436 if (IS_ERR(keyring_ref)) {
428 ret = PTR_ERR(keyring_ref); 437 ret = PTR_ERR(keyring_ref);
429 goto error; 438 goto error;
430 } 439 }
431 440
432 key_ref = lookup_user_key(id, 0, 0, 0); 441 key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0);
433 if (IS_ERR(key_ref)) { 442 if (IS_ERR(key_ref)) {
434 ret = PTR_ERR(key_ref); 443 ret = PTR_ERR(key_ref);
435 goto error2; 444 goto error2;
@@ -465,7 +474,7 @@ long keyctl_describe_key(key_serial_t keyid,
465 char *tmpbuf; 474 char *tmpbuf;
466 long ret; 475 long ret;
467 476
468 key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); 477 key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
469 if (IS_ERR(key_ref)) { 478 if (IS_ERR(key_ref)) {
470 /* viewing a key under construction is permitted if we have the 479 /* viewing a key under construction is permitted if we have the
471 * authorisation token handy */ 480 * authorisation token handy */
@@ -474,7 +483,8 @@ long keyctl_describe_key(key_serial_t keyid,
474 if (!IS_ERR(instkey)) { 483 if (!IS_ERR(instkey)) {
475 key_put(instkey); 484 key_put(instkey);
476 key_ref = lookup_user_key(keyid, 485 key_ref = lookup_user_key(keyid,
477 0, 1, 0); 486 KEY_LOOKUP_PARTIAL,
487 0);
478 if (!IS_ERR(key_ref)) 488 if (!IS_ERR(key_ref))
479 goto okay; 489 goto okay;
480 } 490 }
@@ -558,7 +568,7 @@ long keyctl_keyring_search(key_serial_t ringid,
558 } 568 }
559 569
560 /* get the keyring at which to begin the search */ 570 /* get the keyring at which to begin the search */
561 keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH); 571 keyring_ref = lookup_user_key(ringid, 0, KEY_SEARCH);
562 if (IS_ERR(keyring_ref)) { 572 if (IS_ERR(keyring_ref)) {
563 ret = PTR_ERR(keyring_ref); 573 ret = PTR_ERR(keyring_ref);
564 goto error2; 574 goto error2;
@@ -567,7 +577,8 @@ long keyctl_keyring_search(key_serial_t ringid,
567 /* get the destination keyring if specified */ 577 /* get the destination keyring if specified */
568 dest_ref = NULL; 578 dest_ref = NULL;
569 if (destringid) { 579 if (destringid) {
570 dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); 580 dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
581 KEY_WRITE);
571 if (IS_ERR(dest_ref)) { 582 if (IS_ERR(dest_ref)) {
572 ret = PTR_ERR(dest_ref); 583 ret = PTR_ERR(dest_ref);
573 goto error3; 584 goto error3;
@@ -637,7 +648,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
637 long ret; 648 long ret;
638 649
639 /* find the key first */ 650 /* find the key first */
640 key_ref = lookup_user_key(keyid, 0, 0, 0); 651 key_ref = lookup_user_key(keyid, 0, 0);
641 if (IS_ERR(key_ref)) { 652 if (IS_ERR(key_ref)) {
642 ret = -ENOKEY; 653 ret = -ENOKEY;
643 goto error; 654 goto error;
@@ -700,7 +711,8 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
700 if (uid == (uid_t) -1 && gid == (gid_t) -1) 711 if (uid == (uid_t) -1 && gid == (gid_t) -1)
701 goto error; 712 goto error;
702 713
703 key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); 714 key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
715 KEY_SETATTR);
704 if (IS_ERR(key_ref)) { 716 if (IS_ERR(key_ref)) {
705 ret = PTR_ERR(key_ref); 717 ret = PTR_ERR(key_ref);
706 goto error; 718 goto error;
@@ -805,7 +817,8 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
805 if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) 817 if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
806 goto error; 818 goto error;
807 819
808 key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); 820 key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
821 KEY_SETATTR);
809 if (IS_ERR(key_ref)) { 822 if (IS_ERR(key_ref)) {
810 ret = PTR_ERR(key_ref); 823 ret = PTR_ERR(key_ref);
811 goto error; 824 goto error;
@@ -847,7 +860,7 @@ static long get_instantiation_keyring(key_serial_t ringid,
847 860
848 /* if a specific keyring is nominated by ID, then use that */ 861 /* if a specific keyring is nominated by ID, then use that */
849 if (ringid > 0) { 862 if (ringid > 0) {
850 dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE); 863 dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
851 if (IS_ERR(dkref)) 864 if (IS_ERR(dkref))
852 return PTR_ERR(dkref); 865 return PTR_ERR(dkref);
853 *_dest_keyring = key_ref_to_ptr(dkref); 866 *_dest_keyring = key_ref_to_ptr(dkref);
@@ -1083,7 +1096,8 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
1083 time_t expiry; 1096 time_t expiry;
1084 long ret; 1097 long ret;
1085 1098
1086 key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); 1099 key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
1100 KEY_SETATTR);
1087 if (IS_ERR(key_ref)) { 1101 if (IS_ERR(key_ref)) {
1088 ret = PTR_ERR(key_ref); 1102 ret = PTR_ERR(key_ref);
1089 goto error; 1103 goto error;
@@ -1101,6 +1115,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
1101 } 1115 }
1102 1116
1103 key->expiry = expiry; 1117 key->expiry = expiry;
1118 key_schedule_gc(key->expiry);
1104 1119
1105 up_write(&key->sem); 1120 up_write(&key->sem);
1106 key_put(key); 1121 key_put(key);
@@ -1170,7 +1185,7 @@ long keyctl_get_security(key_serial_t keyid,
1170 char *context; 1185 char *context;
1171 long ret; 1186 long ret;
1172 1187
1173 key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); 1188 key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
1174 if (IS_ERR(key_ref)) { 1189 if (IS_ERR(key_ref)) {
1175 if (PTR_ERR(key_ref) != -EACCES) 1190 if (PTR_ERR(key_ref) != -EACCES)
1176 return PTR_ERR(key_ref); 1191 return PTR_ERR(key_ref);
@@ -1182,7 +1197,7 @@ long keyctl_get_security(key_serial_t keyid,
1182 return PTR_ERR(key_ref); 1197 return PTR_ERR(key_ref);
1183 key_put(instkey); 1198 key_put(instkey);
1184 1199
1185 key_ref = lookup_user_key(keyid, 0, 1, 0); 1200 key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0);
1186 if (IS_ERR(key_ref)) 1201 if (IS_ERR(key_ref))
1187 return PTR_ERR(key_ref); 1202 return PTR_ERR(key_ref);
1188 } 1203 }
@@ -1213,6 +1228,105 @@ long keyctl_get_security(key_serial_t keyid,
1213 return ret; 1228 return ret;
1214} 1229}
1215 1230
1231/*
1232 * attempt to install the calling process's session keyring on the process's
1233 * parent process
1234 * - the keyring must exist and must grant us LINK permission
1235 * - implements keyctl(KEYCTL_SESSION_TO_PARENT)
1236 */
1237long keyctl_session_to_parent(void)
1238{
1239 struct task_struct *me, *parent;
1240 const struct cred *mycred, *pcred;
1241 struct cred *cred, *oldcred;
1242 key_ref_t keyring_r;
1243 int ret;
1244
1245 keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
1246 if (IS_ERR(keyring_r))
1247 return PTR_ERR(keyring_r);
1248
1249 /* our parent is going to need a new cred struct, a new tgcred struct
1250 * and new security data, so we allocate them here to prevent ENOMEM in
1251 * our parent */
1252 ret = -ENOMEM;
1253 cred = cred_alloc_blank();
1254 if (!cred)
1255 goto error_keyring;
1256
1257 cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
1258 keyring_r = NULL;
1259
1260 me = current;
1261 write_lock_irq(&tasklist_lock);
1262
1263 parent = me->real_parent;
1264 ret = -EPERM;
1265
1266 /* the parent mustn't be init and mustn't be a kernel thread */
1267 if (parent->pid <= 1 || !parent->mm)
1268 goto not_permitted;
1269
1270 /* the parent must be single threaded */
1271 if (atomic_read(&parent->signal->count) != 1)
1272 goto not_permitted;
1273
1274 /* the parent and the child must have different session keyrings or
1275 * there's no point */
1276 mycred = current_cred();
1277 pcred = __task_cred(parent);
1278 if (mycred == pcred ||
1279 mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
1280 goto already_same;
1281
1282 /* the parent must have the same effective ownership and mustn't be
1283 * SUID/SGID */
1284 if (pcred-> uid != mycred->euid ||
1285 pcred->euid != mycred->euid ||
1286 pcred->suid != mycred->euid ||
1287 pcred-> gid != mycred->egid ||
1288 pcred->egid != mycred->egid ||
1289 pcred->sgid != mycred->egid)
1290 goto not_permitted;
1291
1292 /* the keyrings must have the same UID */
1293 if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
1294 mycred->tgcred->session_keyring->uid != mycred->euid)
1295 goto not_permitted;
1296
1297 /* the LSM must permit the replacement of the parent's keyring with the
1298 * keyring from this process */
1299 ret = security_key_session_to_parent(mycred, pcred,
1300 key_ref_to_ptr(keyring_r));
1301 if (ret < 0)
1302 goto not_permitted;
1303
1304 /* if there's an already pending keyring replacement, then we replace
1305 * that */
1306 oldcred = parent->replacement_session_keyring;
1307
1308 /* the replacement session keyring is applied just prior to userspace
1309 * restarting */
1310 parent->replacement_session_keyring = cred;
1311 cred = NULL;
1312 set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
1313
1314 write_unlock_irq(&tasklist_lock);
1315 if (oldcred)
1316 put_cred(oldcred);
1317 return 0;
1318
1319already_same:
1320 ret = 0;
1321not_permitted:
1322 put_cred(cred);
1323 return ret;
1324
1325error_keyring:
1326 key_ref_put(keyring_r);
1327 return ret;
1328}
1329
1216/*****************************************************************************/ 1330/*****************************************************************************/
1217/* 1331/*
1218 * the key control system call 1332 * the key control system call
@@ -1298,6 +1412,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
1298 (char __user *) arg3, 1412 (char __user *) arg3,
1299 (size_t) arg4); 1413 (size_t) arg4);
1300 1414
1415 case KEYCTL_SESSION_TO_PARENT:
1416 return keyctl_session_to_parent();
1417
1301 default: 1418 default:
1302 return -EOPNOTSUPP; 1419 return -EOPNOTSUPP;
1303 } 1420 }
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 3dba81c2eba3..ac977f661a79 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1000,3 +1000,88 @@ static void keyring_revoke(struct key *keyring)
1000 } 1000 }
1001 1001
1002} /* end keyring_revoke() */ 1002} /* end keyring_revoke() */
1003
1004/*
1005 * Determine whether a key is dead
1006 */
1007static bool key_is_dead(struct key *key, time_t limit)
1008{
1009 return test_bit(KEY_FLAG_DEAD, &key->flags) ||
1010 (key->expiry > 0 && key->expiry <= limit);
1011}
1012
1013/*
1014 * Collect garbage from the contents of a keyring
1015 */
1016void keyring_gc(struct key *keyring, time_t limit)
1017{
1018 struct keyring_list *klist, *new;
1019 struct key *key;
1020 int loop, keep, max;
1021
1022 kenter("%x", key_serial(keyring));
1023
1024 down_write(&keyring->sem);
1025
1026 klist = keyring->payload.subscriptions;
1027 if (!klist)
1028 goto just_return;
1029
1030 /* work out how many subscriptions we're keeping */
1031 keep = 0;
1032 for (loop = klist->nkeys - 1; loop >= 0; loop--)
1033 if (!key_is_dead(klist->keys[loop], limit));
1034 keep++;
1035
1036 if (keep == klist->nkeys)
1037 goto just_return;
1038
1039 /* allocate a new keyring payload */
1040 max = roundup(keep, 4);
1041 new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
1042 GFP_KERNEL);
1043 if (!new)
1044 goto just_return;
1045 new->maxkeys = max;
1046 new->nkeys = 0;
1047 new->delkey = 0;
1048
1049 /* install the live keys
1050 * - must take care as expired keys may be updated back to life
1051 */
1052 keep = 0;
1053 for (loop = klist->nkeys - 1; loop >= 0; loop--) {
1054 key = klist->keys[loop];
1055 if (!key_is_dead(key, limit)) {
1056 if (keep >= max)
1057 goto discard_new;
1058 new->keys[keep++] = key_get(key);
1059 }
1060 }
1061 new->nkeys = keep;
1062
1063 /* adjust the quota */
1064 key_payload_reserve(keyring,
1065 sizeof(struct keyring_list) +
1066 KEYQUOTA_LINK_BYTES * keep);
1067
1068 if (keep == 0) {
1069 rcu_assign_pointer(keyring->payload.subscriptions, NULL);
1070 kfree(new);
1071 } else {
1072 rcu_assign_pointer(keyring->payload.subscriptions, new);
1073 }
1074
1075 up_write(&keyring->sem);
1076
1077 call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
1078 kleave(" [yes]");
1079 return;
1080
1081discard_new:
1082 new->nkeys = keep;
1083 keyring_clear_rcu_disposal(&new->rcu);
1084just_return:
1085 up_write(&keyring->sem);
1086 kleave(" [no]");
1087}
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 769f9bdfd2b3..9d01021ca0c8 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -91,59 +91,94 @@ __initcall(key_proc_init);
91 */ 91 */
92#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS 92#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS
93 93
94static struct rb_node *__key_serial_next(struct rb_node *n) 94static struct rb_node *key_serial_next(struct rb_node *n)
95{ 95{
96 struct user_namespace *user_ns = current_user_ns();
97
98 n = rb_next(n);
96 while (n) { 99 while (n) {
97 struct key *key = rb_entry(n, struct key, serial_node); 100 struct key *key = rb_entry(n, struct key, serial_node);
98 if (key->user->user_ns == current_user_ns()) 101 if (key->user->user_ns == user_ns)
99 break; 102 break;
100 n = rb_next(n); 103 n = rb_next(n);
101 } 104 }
102 return n; 105 return n;
103} 106}
104 107
105static struct rb_node *key_serial_next(struct rb_node *n) 108static int proc_keys_open(struct inode *inode, struct file *file)
106{ 109{
107 return __key_serial_next(rb_next(n)); 110 return seq_open(file, &proc_keys_ops);
108} 111}
109 112
110static struct rb_node *key_serial_first(struct rb_root *r) 113static struct key *find_ge_key(key_serial_t id)
111{ 114{
112 struct rb_node *n = rb_first(r); 115 struct user_namespace *user_ns = current_user_ns();
113 return __key_serial_next(n); 116 struct rb_node *n = key_serial_tree.rb_node;
114} 117 struct key *minkey = NULL;
115 118
116static int proc_keys_open(struct inode *inode, struct file *file) 119 while (n) {
117{ 120 struct key *key = rb_entry(n, struct key, serial_node);
118 return seq_open(file, &proc_keys_ops); 121 if (id < key->serial) {
122 if (!minkey || minkey->serial > key->serial)
123 minkey = key;
124 n = n->rb_left;
125 } else if (id > key->serial) {
126 n = n->rb_right;
127 } else {
128 minkey = key;
129 break;
130 }
131 key = NULL;
132 }
119 133
134 if (!minkey)
135 return NULL;
136
137 for (;;) {
138 if (minkey->user->user_ns == user_ns)
139 return minkey;
140 n = rb_next(&minkey->serial_node);
141 if (!n)
142 return NULL;
143 minkey = rb_entry(n, struct key, serial_node);
144 }
120} 145}
121 146
122static void *proc_keys_start(struct seq_file *p, loff_t *_pos) 147static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
148 __acquires(key_serial_lock)
123{ 149{
124 struct rb_node *_p; 150 key_serial_t pos = *_pos;
125 loff_t pos = *_pos; 151 struct key *key;
126 152
127 spin_lock(&key_serial_lock); 153 spin_lock(&key_serial_lock);
128 154
129 _p = key_serial_first(&key_serial_tree); 155 if (*_pos > INT_MAX)
130 while (pos > 0 && _p) { 156 return NULL;
131 pos--; 157 key = find_ge_key(pos);
132 _p = key_serial_next(_p); 158 if (!key)
133 } 159 return NULL;
134 160 *_pos = key->serial;
135 return _p; 161 return &key->serial_node;
162}
136 163
164static inline key_serial_t key_node_serial(struct rb_node *n)
165{
166 struct key *key = rb_entry(n, struct key, serial_node);
167 return key->serial;
137} 168}
138 169
139static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos) 170static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
140{ 171{
141 (*_pos)++; 172 struct rb_node *n;
142 return key_serial_next((struct rb_node *) v);
143 173
174 n = key_serial_next(v);
175 if (n)
176 *_pos = key_node_serial(n);
177 return n;
144} 178}
145 179
146static void proc_keys_stop(struct seq_file *p, void *v) 180static void proc_keys_stop(struct seq_file *p, void *v)
181 __releases(key_serial_lock)
147{ 182{
148 spin_unlock(&key_serial_lock); 183 spin_unlock(&key_serial_lock);
149} 184}
@@ -174,11 +209,9 @@ static int proc_keys_show(struct seq_file *m, void *v)
174 /* come up with a suitable timeout value */ 209 /* come up with a suitable timeout value */
175 if (key->expiry == 0) { 210 if (key->expiry == 0) {
176 memcpy(xbuf, "perm", 5); 211 memcpy(xbuf, "perm", 5);
177 } 212 } else if (now.tv_sec >= key->expiry) {
178 else if (now.tv_sec >= key->expiry) {
179 memcpy(xbuf, "expd", 5); 213 memcpy(xbuf, "expd", 5);
180 } 214 } else {
181 else {
182 timo = key->expiry - now.tv_sec; 215 timo = key->expiry - now.tv_sec;
183 216
184 if (timo < 60) 217 if (timo < 60)
@@ -218,9 +251,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
218 seq_putc(m, '\n'); 251 seq_putc(m, '\n');
219 252
220 rcu_read_unlock(); 253 rcu_read_unlock();
221
222 return 0; 254 return 0;
223
224} 255}
225 256
226#endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */ 257#endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */
@@ -246,6 +277,7 @@ static struct rb_node *key_user_first(struct rb_root *r)
246 struct rb_node *n = rb_first(r); 277 struct rb_node *n = rb_first(r);
247 return __key_user_next(n); 278 return __key_user_next(n);
248} 279}
280
249/*****************************************************************************/ 281/*****************************************************************************/
250/* 282/*
251 * implement "/proc/key-users" to provides a list of the key users 283 * implement "/proc/key-users" to provides a list of the key users
@@ -253,10 +285,10 @@ static struct rb_node *key_user_first(struct rb_root *r)
253static int proc_key_users_open(struct inode *inode, struct file *file) 285static int proc_key_users_open(struct inode *inode, struct file *file)
254{ 286{
255 return seq_open(file, &proc_key_users_ops); 287 return seq_open(file, &proc_key_users_ops);
256
257} 288}
258 289
259static void *proc_key_users_start(struct seq_file *p, loff_t *_pos) 290static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
291 __acquires(key_user_lock)
260{ 292{
261 struct rb_node *_p; 293 struct rb_node *_p;
262 loff_t pos = *_pos; 294 loff_t pos = *_pos;
@@ -270,17 +302,16 @@ static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
270 } 302 }
271 303
272 return _p; 304 return _p;
273
274} 305}
275 306
276static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos) 307static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos)
277{ 308{
278 (*_pos)++; 309 (*_pos)++;
279 return key_user_next((struct rb_node *) v); 310 return key_user_next((struct rb_node *) v);
280
281} 311}
282 312
283static void proc_key_users_stop(struct seq_file *p, void *v) 313static void proc_key_users_stop(struct seq_file *p, void *v)
314 __releases(key_user_lock)
284{ 315{
285 spin_unlock(&key_user_lock); 316 spin_unlock(&key_user_lock);
286} 317}
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 276d27882ce8..5c23afb31ece 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -17,6 +17,7 @@
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/security.h>
20#include <linux/user_namespace.h> 21#include <linux/user_namespace.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include "internal.h" 23#include "internal.h"
@@ -487,7 +488,7 @@ static int lookup_user_key_possessed(const struct key *key, const void *target)
487 * - don't create special keyrings unless so requested 488 * - don't create special keyrings unless so requested
488 * - partially constructed keys aren't found unless requested 489 * - partially constructed keys aren't found unless requested
489 */ 490 */
490key_ref_t lookup_user_key(key_serial_t id, int create, int partial, 491key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
491 key_perm_t perm) 492 key_perm_t perm)
492{ 493{
493 struct request_key_auth *rka; 494 struct request_key_auth *rka;
@@ -503,7 +504,7 @@ try_again:
503 switch (id) { 504 switch (id) {
504 case KEY_SPEC_THREAD_KEYRING: 505 case KEY_SPEC_THREAD_KEYRING:
505 if (!cred->thread_keyring) { 506 if (!cred->thread_keyring) {
506 if (!create) 507 if (!(lflags & KEY_LOOKUP_CREATE))
507 goto error; 508 goto error;
508 509
509 ret = install_thread_keyring(); 510 ret = install_thread_keyring();
@@ -521,7 +522,7 @@ try_again:
521 522
522 case KEY_SPEC_PROCESS_KEYRING: 523 case KEY_SPEC_PROCESS_KEYRING:
523 if (!cred->tgcred->process_keyring) { 524 if (!cred->tgcred->process_keyring) {
524 if (!create) 525 if (!(lflags & KEY_LOOKUP_CREATE))
525 goto error; 526 goto error;
526 527
527 ret = install_process_keyring(); 528 ret = install_process_keyring();
@@ -642,7 +643,14 @@ try_again:
642 break; 643 break;
643 } 644 }
644 645
645 if (!partial) { 646 /* unlink does not use the nominated key in any way, so can skip all
647 * the permission checks as it is only concerned with the keyring */
648 if (lflags & KEY_LOOKUP_FOR_UNLINK) {
649 ret = 0;
650 goto error;
651 }
652
653 if (!(lflags & KEY_LOOKUP_PARTIAL)) {
646 ret = wait_for_key_construction(key, true); 654 ret = wait_for_key_construction(key, true);
647 switch (ret) { 655 switch (ret) {
648 case -ERESTARTSYS: 656 case -ERESTARTSYS:
@@ -660,7 +668,8 @@ try_again:
660 } 668 }
661 669
662 ret = -EIO; 670 ret = -EIO;
663 if (!partial && !test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) 671 if (!(lflags & KEY_LOOKUP_PARTIAL) &&
672 !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
664 goto invalid_key; 673 goto invalid_key;
665 674
666 /* check the permissions */ 675 /* check the permissions */
@@ -702,7 +711,7 @@ long join_session_keyring(const char *name)
702 /* only permit this if there's a single thread in the thread group - 711 /* only permit this if there's a single thread in the thread group -
703 * this avoids us having to adjust the creds on all threads and risking 712 * this avoids us having to adjust the creds on all threads and risking
704 * ENOMEM */ 713 * ENOMEM */
705 if (!is_single_threaded(current)) 714 if (!current_is_single_threaded())
706 return -EMLINK; 715 return -EMLINK;
707 716
708 new = prepare_creds(); 717 new = prepare_creds();
@@ -760,3 +769,51 @@ error:
760 abort_creds(new); 769 abort_creds(new);
761 return ret; 770 return ret;
762} 771}
772
773/*
774 * Replace a process's session keyring when that process resumes userspace on
775 * behalf of one of its children
776 */
777void key_replace_session_keyring(void)
778{
779 const struct cred *old;
780 struct cred *new;
781
782 if (!current->replacement_session_keyring)
783 return;
784
785 write_lock_irq(&tasklist_lock);
786 new = current->replacement_session_keyring;
787 current->replacement_session_keyring = NULL;
788 write_unlock_irq(&tasklist_lock);
789
790 if (!new)
791 return;
792
793 old = current_cred();
794 new-> uid = old-> uid;
795 new-> euid = old-> euid;
796 new-> suid = old-> suid;
797 new->fsuid = old->fsuid;
798 new-> gid = old-> gid;
799 new-> egid = old-> egid;
800 new-> sgid = old-> sgid;
801 new->fsgid = old->fsgid;
802 new->user = get_uid(old->user);
803 new->group_info = get_group_info(old->group_info);
804
805 new->securebits = old->securebits;
806 new->cap_inheritable = old->cap_inheritable;
807 new->cap_permitted = old->cap_permitted;
808 new->cap_effective = old->cap_effective;
809 new->cap_bset = old->cap_bset;
810
811 new->jit_keyring = old->jit_keyring;
812 new->thread_keyring = key_get(old->thread_keyring);
813 new->tgcred->tgid = old->tgcred->tgid;
814 new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
815
816 security_transfer_creds(new, old);
817
818 commit_creds(new);
819}
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index b611d493c2d8..5e05dc09e2db 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -13,6 +13,8 @@
13#include <linux/sysctl.h> 13#include <linux/sysctl.h>
14#include "internal.h" 14#include "internal.h"
15 15
16static const int zero, one = 1, max = INT_MAX;
17
16ctl_table key_sysctls[] = { 18ctl_table key_sysctls[] = {
17 { 19 {
18 .ctl_name = CTL_UNNUMBERED, 20 .ctl_name = CTL_UNNUMBERED,
@@ -20,7 +22,9 @@ ctl_table key_sysctls[] = {
20 .data = &key_quota_maxkeys, 22 .data = &key_quota_maxkeys,
21 .maxlen = sizeof(unsigned), 23 .maxlen = sizeof(unsigned),
22 .mode = 0644, 24 .mode = 0644,
23 .proc_handler = &proc_dointvec, 25 .proc_handler = &proc_dointvec_minmax,
26 .extra1 = (void *) &one,
27 .extra2 = (void *) &max,
24 }, 28 },
25 { 29 {
26 .ctl_name = CTL_UNNUMBERED, 30 .ctl_name = CTL_UNNUMBERED,
@@ -28,7 +32,9 @@ ctl_table key_sysctls[] = {
28 .data = &key_quota_maxbytes, 32 .data = &key_quota_maxbytes,
29 .maxlen = sizeof(unsigned), 33 .maxlen = sizeof(unsigned),
30 .mode = 0644, 34 .mode = 0644,
31 .proc_handler = &proc_dointvec, 35 .proc_handler = &proc_dointvec_minmax,
36 .extra1 = (void *) &one,
37 .extra2 = (void *) &max,
32 }, 38 },
33 { 39 {
34 .ctl_name = CTL_UNNUMBERED, 40 .ctl_name = CTL_UNNUMBERED,
@@ -36,7 +42,9 @@ ctl_table key_sysctls[] = {
36 .data = &key_quota_root_maxkeys, 42 .data = &key_quota_root_maxkeys,
37 .maxlen = sizeof(unsigned), 43 .maxlen = sizeof(unsigned),
38 .mode = 0644, 44 .mode = 0644,
39 .proc_handler = &proc_dointvec, 45 .proc_handler = &proc_dointvec_minmax,
46 .extra1 = (void *) &one,
47 .extra2 = (void *) &max,
40 }, 48 },
41 { 49 {
42 .ctl_name = CTL_UNNUMBERED, 50 .ctl_name = CTL_UNNUMBERED,
@@ -44,7 +52,19 @@ ctl_table key_sysctls[] = {
44 .data = &key_quota_root_maxbytes, 52 .data = &key_quota_root_maxbytes,
45 .maxlen = sizeof(unsigned), 53 .maxlen = sizeof(unsigned),
46 .mode = 0644, 54 .mode = 0644,
47 .proc_handler = &proc_dointvec, 55 .proc_handler = &proc_dointvec_minmax,
56 .extra1 = (void *) &one,
57 .extra2 = (void *) &max,
58 },
59 {
60 .ctl_name = CTL_UNNUMBERED,
61 .procname = "gc_delay",
62 .data = &key_gc_delay,
63 .maxlen = sizeof(unsigned),
64 .mode = 0644,
65 .proc_handler = &proc_dointvec_minmax,
66 .extra1 = (void *) &zero,
67 .extra2 = (void *) &max,
48 }, 68 },
49 { .ctl_name = 0 } 69 { .ctl_name = 0 }
50}; 70};
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 94b868494b31..500aad0ebd6a 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -220,6 +220,8 @@ static void dump_common_audit_data(struct audit_buffer *ab,
220 } 220 }
221 221
222 switch (a->type) { 222 switch (a->type) {
223 case LSM_AUDIT_NO_AUDIT:
224 return;
223 case LSM_AUDIT_DATA_IPC: 225 case LSM_AUDIT_DATA_IPC:
224 audit_log_format(ab, " key=%d ", a->u.ipc_id); 226 audit_log_format(ab, " key=%d ", a->u.ipc_id);
225 break; 227 break;
diff --git a/security/security.c b/security/security.c
index dc7674fbfc7a..c4c673240c1c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -124,9 +124,9 @@ int register_security(struct security_operations *ops)
124 124
125/* Security operations */ 125/* Security operations */
126 126
127int security_ptrace_may_access(struct task_struct *child, unsigned int mode) 127int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
128{ 128{
129 return security_ops->ptrace_may_access(child, mode); 129 return security_ops->ptrace_access_check(child, mode);
130} 130}
131 131
132int security_ptrace_traceme(struct task_struct *parent) 132int security_ptrace_traceme(struct task_struct *parent)
@@ -684,6 +684,11 @@ int security_task_create(unsigned long clone_flags)
684 return security_ops->task_create(clone_flags); 684 return security_ops->task_create(clone_flags);
685} 685}
686 686
687int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
688{
689 return security_ops->cred_alloc_blank(cred, gfp);
690}
691
687void security_cred_free(struct cred *cred) 692void security_cred_free(struct cred *cred)
688{ 693{
689 security_ops->cred_free(cred); 694 security_ops->cred_free(cred);
@@ -699,6 +704,11 @@ void security_commit_creds(struct cred *new, const struct cred *old)
699 security_ops->cred_commit(new, old); 704 security_ops->cred_commit(new, old);
700} 705}
701 706
707void security_transfer_creds(struct cred *new, const struct cred *old)
708{
709 security_ops->cred_transfer(new, old);
710}
711
702int security_kernel_act_as(struct cred *new, u32 secid) 712int security_kernel_act_as(struct cred *new, u32 secid)
703{ 713{
704 return security_ops->kernel_act_as(new, secid); 714 return security_ops->kernel_act_as(new, secid);
@@ -709,6 +719,11 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
709 return security_ops->kernel_create_files_as(new, inode); 719 return security_ops->kernel_create_files_as(new, inode);
710} 720}
711 721
722int security_kernel_module_request(void)
723{
724 return security_ops->kernel_module_request();
725}
726
712int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) 727int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
713{ 728{
714 return security_ops->task_setuid(id0, id1, id2, flags); 729 return security_ops->task_setuid(id0, id1, id2, flags);
@@ -959,6 +974,24 @@ void security_release_secctx(char *secdata, u32 seclen)
959} 974}
960EXPORT_SYMBOL(security_release_secctx); 975EXPORT_SYMBOL(security_release_secctx);
961 976
977int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
978{
979 return security_ops->inode_notifysecctx(inode, ctx, ctxlen);
980}
981EXPORT_SYMBOL(security_inode_notifysecctx);
982
983int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
984{
985 return security_ops->inode_setsecctx(dentry, ctx, ctxlen);
986}
987EXPORT_SYMBOL(security_inode_setsecctx);
988
989int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
990{
991 return security_ops->inode_getsecctx(inode, ctx, ctxlen);
992}
993EXPORT_SYMBOL(security_inode_getsecctx);
994
962#ifdef CONFIG_SECURITY_NETWORK 995#ifdef CONFIG_SECURITY_NETWORK
963 996
964int security_unix_stream_connect(struct socket *sock, struct socket *other, 997int security_unix_stream_connect(struct socket *sock, struct socket *other,
@@ -1112,6 +1145,24 @@ void security_inet_conn_established(struct sock *sk,
1112 security_ops->inet_conn_established(sk, skb); 1145 security_ops->inet_conn_established(sk, skb);
1113} 1146}
1114 1147
1148int security_tun_dev_create(void)
1149{
1150 return security_ops->tun_dev_create();
1151}
1152EXPORT_SYMBOL(security_tun_dev_create);
1153
1154void security_tun_dev_post_create(struct sock *sk)
1155{
1156 return security_ops->tun_dev_post_create(sk);
1157}
1158EXPORT_SYMBOL(security_tun_dev_post_create);
1159
1160int security_tun_dev_attach(struct sock *sk)
1161{
1162 return security_ops->tun_dev_attach(sk);
1163}
1164EXPORT_SYMBOL(security_tun_dev_attach);
1165
1115#endif /* CONFIG_SECURITY_NETWORK */ 1166#endif /* CONFIG_SECURITY_NETWORK */
1116 1167
1117#ifdef CONFIG_SECURITY_NETWORK_XFRM 1168#ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1218,6 +1269,13 @@ int security_key_getsecurity(struct key *key, char **_buffer)
1218 return security_ops->key_getsecurity(key, _buffer); 1269 return security_ops->key_getsecurity(key, _buffer);
1219} 1270}
1220 1271
1272int security_key_session_to_parent(const struct cred *cred,
1273 const struct cred *parent_cred,
1274 struct key *key)
1275{
1276 return security_ops->key_session_to_parent(cred, parent_cred, key);
1277}
1278
1221#endif /* CONFIG_KEYS */ 1279#endif /* CONFIG_KEYS */
1222 1280
1223#ifdef CONFIG_AUDIT 1281#ifdef CONFIG_AUDIT
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index b2ab60859832..e3d19014259b 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -137,7 +137,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
137 * @tclass: target security class 137 * @tclass: target security class
138 * @av: access vector 138 * @av: access vector
139 */ 139 */
140void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) 140static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
141{ 141{
142 const char **common_pts = NULL; 142 const char **common_pts = NULL;
143 u32 common_base = 0; 143 u32 common_base = 0;
@@ -492,23 +492,35 @@ out:
492 return node; 492 return node;
493} 493}
494 494
495static inline void avc_print_ipv6_addr(struct audit_buffer *ab, 495/**
496 struct in6_addr *addr, __be16 port, 496 * avc_audit_pre_callback - SELinux specific information
497 char *name1, char *name2) 497 * will be called by generic audit code
498 * @ab: the audit buffer
499 * @a: audit_data
500 */
501static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
498{ 502{
499 if (!ipv6_addr_any(addr)) 503 struct common_audit_data *ad = a;
500 audit_log_format(ab, " %s=%pI6", name1, addr); 504 audit_log_format(ab, "avc: %s ",
501 if (port) 505 ad->selinux_audit_data.denied ? "denied" : "granted");
502 audit_log_format(ab, " %s=%d", name2, ntohs(port)); 506 avc_dump_av(ab, ad->selinux_audit_data.tclass,
507 ad->selinux_audit_data.audited);
508 audit_log_format(ab, " for ");
503} 509}
504 510
505static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr, 511/**
506 __be16 port, char *name1, char *name2) 512 * avc_audit_post_callback - SELinux specific information
513 * will be called by generic audit code
514 * @ab: the audit buffer
515 * @a: audit_data
516 */
517static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
507{ 518{
508 if (addr) 519 struct common_audit_data *ad = a;
509 audit_log_format(ab, " %s=%pI4", name1, &addr); 520 audit_log_format(ab, " ");
510 if (port) 521 avc_dump_query(ab, ad->selinux_audit_data.ssid,
511 audit_log_format(ab, " %s=%d", name2, ntohs(port)); 522 ad->selinux_audit_data.tsid,
523 ad->selinux_audit_data.tclass);
512} 524}
513 525
514/** 526/**
@@ -532,13 +544,10 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
532 */ 544 */
533void avc_audit(u32 ssid, u32 tsid, 545void avc_audit(u32 ssid, u32 tsid,
534 u16 tclass, u32 requested, 546 u16 tclass, u32 requested,
535 struct av_decision *avd, int result, struct avc_audit_data *a) 547 struct av_decision *avd, int result, struct common_audit_data *a)
536{ 548{
537 struct task_struct *tsk = current; 549 struct common_audit_data stack_data;
538 struct inode *inode = NULL;
539 u32 denied, audited; 550 u32 denied, audited;
540 struct audit_buffer *ab;
541
542 denied = requested & ~avd->allowed; 551 denied = requested & ~avd->allowed;
543 if (denied) { 552 if (denied) {
544 audited = denied; 553 audited = denied;
@@ -551,144 +560,20 @@ void avc_audit(u32 ssid, u32 tsid,
551 if (!(audited & avd->auditallow)) 560 if (!(audited & avd->auditallow))
552 return; 561 return;
553 } 562 }
554 563 if (!a) {
555 ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); 564 a = &stack_data;
556 if (!ab) 565 memset(a, 0, sizeof(*a));
557 return; /* audit_panic has been called */ 566 a->type = LSM_AUDIT_NO_AUDIT;
558 audit_log_format(ab, "avc: %s ", denied ? "denied" : "granted");
559 avc_dump_av(ab, tclass, audited);
560 audit_log_format(ab, " for ");
561 if (a && a->tsk)
562 tsk = a->tsk;
563 if (tsk && tsk->pid) {
564 audit_log_format(ab, " pid=%d comm=", tsk->pid);
565 audit_log_untrustedstring(ab, tsk->comm);
566 } 567 }
567 if (a) { 568 a->selinux_audit_data.tclass = tclass;
568 switch (a->type) { 569 a->selinux_audit_data.requested = requested;
569 case AVC_AUDIT_DATA_IPC: 570 a->selinux_audit_data.ssid = ssid;
570 audit_log_format(ab, " key=%d", a->u.ipc_id); 571 a->selinux_audit_data.tsid = tsid;
571 break; 572 a->selinux_audit_data.audited = audited;
572 case AVC_AUDIT_DATA_CAP: 573 a->selinux_audit_data.denied = denied;
573 audit_log_format(ab, " capability=%d", a->u.cap); 574 a->lsm_pre_audit = avc_audit_pre_callback;
574 break; 575 a->lsm_post_audit = avc_audit_post_callback;
575 case AVC_AUDIT_DATA_FS: 576 common_lsm_audit(a);
576 if (a->u.fs.path.dentry) {
577 struct dentry *dentry = a->u.fs.path.dentry;
578 if (a->u.fs.path.mnt) {
579 audit_log_d_path(ab, "path=",
580 &a->u.fs.path);
581 } else {
582 audit_log_format(ab, " name=");
583 audit_log_untrustedstring(ab, dentry->d_name.name);
584 }
585 inode = dentry->d_inode;
586 } else if (a->u.fs.inode) {
587 struct dentry *dentry;
588 inode = a->u.fs.inode;
589 dentry = d_find_alias(inode);
590 if (dentry) {
591 audit_log_format(ab, " name=");
592 audit_log_untrustedstring(ab, dentry->d_name.name);
593 dput(dentry);
594 }
595 }
596 if (inode)
597 audit_log_format(ab, " dev=%s ino=%lu",
598 inode->i_sb->s_id,
599 inode->i_ino);
600 break;
601 case AVC_AUDIT_DATA_NET:
602 if (a->u.net.sk) {
603 struct sock *sk = a->u.net.sk;
604 struct unix_sock *u;
605 int len = 0;
606 char *p = NULL;
607
608 switch (sk->sk_family) {
609 case AF_INET: {
610 struct inet_sock *inet = inet_sk(sk);
611
612 avc_print_ipv4_addr(ab, inet->rcv_saddr,
613 inet->sport,
614 "laddr", "lport");
615 avc_print_ipv4_addr(ab, inet->daddr,
616 inet->dport,
617 "faddr", "fport");
618 break;
619 }
620 case AF_INET6: {
621 struct inet_sock *inet = inet_sk(sk);
622 struct ipv6_pinfo *inet6 = inet6_sk(sk);
623
624 avc_print_ipv6_addr(ab, &inet6->rcv_saddr,
625 inet->sport,
626 "laddr", "lport");
627 avc_print_ipv6_addr(ab, &inet6->daddr,
628 inet->dport,
629 "faddr", "fport");
630 break;
631 }
632 case AF_UNIX:
633 u = unix_sk(sk);
634 if (u->dentry) {
635 struct path path = {
636 .dentry = u->dentry,
637 .mnt = u->mnt
638 };
639 audit_log_d_path(ab, "path=",
640 &path);
641 break;
642 }
643 if (!u->addr)
644 break;
645 len = u->addr->len-sizeof(short);
646 p = &u->addr->name->sun_path[0];
647 audit_log_format(ab, " path=");
648 if (*p)
649 audit_log_untrustedstring(ab, p);
650 else
651 audit_log_n_hex(ab, p, len);
652 break;
653 }
654 }
655
656 switch (a->u.net.family) {
657 case AF_INET:
658 avc_print_ipv4_addr(ab, a->u.net.v4info.saddr,
659 a->u.net.sport,
660 "saddr", "src");
661 avc_print_ipv4_addr(ab, a->u.net.v4info.daddr,
662 a->u.net.dport,
663 "daddr", "dest");
664 break;
665 case AF_INET6:
666 avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr,
667 a->u.net.sport,
668 "saddr", "src");
669 avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr,
670 a->u.net.dport,
671 "daddr", "dest");
672 break;
673 }
674 if (a->u.net.netif > 0) {
675 struct net_device *dev;
676
677 /* NOTE: we always use init's namespace */
678 dev = dev_get_by_index(&init_net,
679 a->u.net.netif);
680 if (dev) {
681 audit_log_format(ab, " netif=%s",
682 dev->name);
683 dev_put(dev);
684 }
685 }
686 break;
687 }
688 }
689 audit_log_format(ab, " ");
690 avc_dump_query(ab, ssid, tsid, tclass);
691 audit_log_end(ab);
692} 577}
693 578
694/** 579/**
@@ -956,7 +841,7 @@ out:
956 * another -errno upon other errors. 841 * another -errno upon other errors.
957 */ 842 */
958int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, 843int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
959 u32 requested, struct avc_audit_data *auditdata) 844 u32 requested, struct common_audit_data *auditdata)
960{ 845{
961 struct av_decision avd; 846 struct av_decision avd;
962 int rc; 847 int rc;
@@ -970,3 +855,9 @@ u32 avc_policy_seqno(void)
970{ 855{
971 return avc_cache.latest_notif; 856 return avc_cache.latest_notif;
972} 857}
858
859void avc_disable(void)
860{
861 if (avc_node_cachep)
862 kmem_cache_destroy(avc_node_cachep);
863}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8d8b69c5664e..417f7c994522 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -13,8 +13,8 @@
13 * Eric Paris <eparis@redhat.com> 13 * Eric Paris <eparis@redhat.com>
14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. 14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
15 * <dgoeddel@trustedcs.com> 15 * <dgoeddel@trustedcs.com>
16 * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P. 16 * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
17 * Paul Moore <paul.moore@hp.com> 17 * Paul Moore <paul.moore@hp.com>
18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. 18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
19 * Yuichi Nakamura <ynakam@hitachisoft.jp> 19 * Yuichi Nakamura <ynakam@hitachisoft.jp>
20 * 20 *
@@ -448,6 +448,10 @@ static int sb_finish_set_opts(struct super_block *sb)
448 sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) 448 sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
449 sbsec->flags &= ~SE_SBLABELSUPP; 449 sbsec->flags &= ~SE_SBLABELSUPP;
450 450
451 /* Special handling for sysfs. Is genfs but also has setxattr handler*/
452 if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
453 sbsec->flags |= SE_SBLABELSUPP;
454
451 /* Initialize the root inode. */ 455 /* Initialize the root inode. */
452 rc = inode_doinit_with_dentry(root_inode, root); 456 rc = inode_doinit_with_dentry(root_inode, root);
453 457
@@ -1479,14 +1483,14 @@ static int task_has_capability(struct task_struct *tsk,
1479 const struct cred *cred, 1483 const struct cred *cred,
1480 int cap, int audit) 1484 int cap, int audit)
1481{ 1485{
1482 struct avc_audit_data ad; 1486 struct common_audit_data ad;
1483 struct av_decision avd; 1487 struct av_decision avd;
1484 u16 sclass; 1488 u16 sclass;
1485 u32 sid = cred_sid(cred); 1489 u32 sid = cred_sid(cred);
1486 u32 av = CAP_TO_MASK(cap); 1490 u32 av = CAP_TO_MASK(cap);
1487 int rc; 1491 int rc;
1488 1492
1489 AVC_AUDIT_DATA_INIT(&ad, CAP); 1493 COMMON_AUDIT_DATA_INIT(&ad, CAP);
1490 ad.tsk = tsk; 1494 ad.tsk = tsk;
1491 ad.u.cap = cap; 1495 ad.u.cap = cap;
1492 1496
@@ -1525,12 +1529,14 @@ static int task_has_system(struct task_struct *tsk,
1525static int inode_has_perm(const struct cred *cred, 1529static int inode_has_perm(const struct cred *cred,
1526 struct inode *inode, 1530 struct inode *inode,
1527 u32 perms, 1531 u32 perms,
1528 struct avc_audit_data *adp) 1532 struct common_audit_data *adp)
1529{ 1533{
1530 struct inode_security_struct *isec; 1534 struct inode_security_struct *isec;
1531 struct avc_audit_data ad; 1535 struct common_audit_data ad;
1532 u32 sid; 1536 u32 sid;
1533 1537
1538 validate_creds(cred);
1539
1534 if (unlikely(IS_PRIVATE(inode))) 1540 if (unlikely(IS_PRIVATE(inode)))
1535 return 0; 1541 return 0;
1536 1542
@@ -1539,7 +1545,7 @@ static int inode_has_perm(const struct cred *cred,
1539 1545
1540 if (!adp) { 1546 if (!adp) {
1541 adp = &ad; 1547 adp = &ad;
1542 AVC_AUDIT_DATA_INIT(&ad, FS); 1548 COMMON_AUDIT_DATA_INIT(&ad, FS);
1543 ad.u.fs.inode = inode; 1549 ad.u.fs.inode = inode;
1544 } 1550 }
1545 1551
@@ -1555,9 +1561,9 @@ static inline int dentry_has_perm(const struct cred *cred,
1555 u32 av) 1561 u32 av)
1556{ 1562{
1557 struct inode *inode = dentry->d_inode; 1563 struct inode *inode = dentry->d_inode;
1558 struct avc_audit_data ad; 1564 struct common_audit_data ad;
1559 1565
1560 AVC_AUDIT_DATA_INIT(&ad, FS); 1566 COMMON_AUDIT_DATA_INIT(&ad, FS);
1561 ad.u.fs.path.mnt = mnt; 1567 ad.u.fs.path.mnt = mnt;
1562 ad.u.fs.path.dentry = dentry; 1568 ad.u.fs.path.dentry = dentry;
1563 return inode_has_perm(cred, inode, av, &ad); 1569 return inode_has_perm(cred, inode, av, &ad);
@@ -1577,11 +1583,11 @@ static int file_has_perm(const struct cred *cred,
1577{ 1583{
1578 struct file_security_struct *fsec = file->f_security; 1584 struct file_security_struct *fsec = file->f_security;
1579 struct inode *inode = file->f_path.dentry->d_inode; 1585 struct inode *inode = file->f_path.dentry->d_inode;
1580 struct avc_audit_data ad; 1586 struct common_audit_data ad;
1581 u32 sid = cred_sid(cred); 1587 u32 sid = cred_sid(cred);
1582 int rc; 1588 int rc;
1583 1589
1584 AVC_AUDIT_DATA_INIT(&ad, FS); 1590 COMMON_AUDIT_DATA_INIT(&ad, FS);
1585 ad.u.fs.path = file->f_path; 1591 ad.u.fs.path = file->f_path;
1586 1592
1587 if (sid != fsec->sid) { 1593 if (sid != fsec->sid) {
@@ -1612,7 +1618,7 @@ static int may_create(struct inode *dir,
1612 struct inode_security_struct *dsec; 1618 struct inode_security_struct *dsec;
1613 struct superblock_security_struct *sbsec; 1619 struct superblock_security_struct *sbsec;
1614 u32 sid, newsid; 1620 u32 sid, newsid;
1615 struct avc_audit_data ad; 1621 struct common_audit_data ad;
1616 int rc; 1622 int rc;
1617 1623
1618 dsec = dir->i_security; 1624 dsec = dir->i_security;
@@ -1621,7 +1627,7 @@ static int may_create(struct inode *dir,
1621 sid = tsec->sid; 1627 sid = tsec->sid;
1622 newsid = tsec->create_sid; 1628 newsid = tsec->create_sid;
1623 1629
1624 AVC_AUDIT_DATA_INIT(&ad, FS); 1630 COMMON_AUDIT_DATA_INIT(&ad, FS);
1625 ad.u.fs.path.dentry = dentry; 1631 ad.u.fs.path.dentry = dentry;
1626 1632
1627 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, 1633 rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
@@ -1665,7 +1671,7 @@ static int may_link(struct inode *dir,
1665 1671
1666{ 1672{
1667 struct inode_security_struct *dsec, *isec; 1673 struct inode_security_struct *dsec, *isec;
1668 struct avc_audit_data ad; 1674 struct common_audit_data ad;
1669 u32 sid = current_sid(); 1675 u32 sid = current_sid();
1670 u32 av; 1676 u32 av;
1671 int rc; 1677 int rc;
@@ -1673,7 +1679,7 @@ static int may_link(struct inode *dir,
1673 dsec = dir->i_security; 1679 dsec = dir->i_security;
1674 isec = dentry->d_inode->i_security; 1680 isec = dentry->d_inode->i_security;
1675 1681
1676 AVC_AUDIT_DATA_INIT(&ad, FS); 1682 COMMON_AUDIT_DATA_INIT(&ad, FS);
1677 ad.u.fs.path.dentry = dentry; 1683 ad.u.fs.path.dentry = dentry;
1678 1684
1679 av = DIR__SEARCH; 1685 av = DIR__SEARCH;
@@ -1708,7 +1714,7 @@ static inline int may_rename(struct inode *old_dir,
1708 struct dentry *new_dentry) 1714 struct dentry *new_dentry)
1709{ 1715{
1710 struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec; 1716 struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
1711 struct avc_audit_data ad; 1717 struct common_audit_data ad;
1712 u32 sid = current_sid(); 1718 u32 sid = current_sid();
1713 u32 av; 1719 u32 av;
1714 int old_is_dir, new_is_dir; 1720 int old_is_dir, new_is_dir;
@@ -1719,7 +1725,7 @@ static inline int may_rename(struct inode *old_dir,
1719 old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 1725 old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
1720 new_dsec = new_dir->i_security; 1726 new_dsec = new_dir->i_security;
1721 1727
1722 AVC_AUDIT_DATA_INIT(&ad, FS); 1728 COMMON_AUDIT_DATA_INIT(&ad, FS);
1723 1729
1724 ad.u.fs.path.dentry = old_dentry; 1730 ad.u.fs.path.dentry = old_dentry;
1725 rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, 1731 rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
@@ -1761,7 +1767,7 @@ static inline int may_rename(struct inode *old_dir,
1761static int superblock_has_perm(const struct cred *cred, 1767static int superblock_has_perm(const struct cred *cred,
1762 struct super_block *sb, 1768 struct super_block *sb,
1763 u32 perms, 1769 u32 perms,
1764 struct avc_audit_data *ad) 1770 struct common_audit_data *ad)
1765{ 1771{
1766 struct superblock_security_struct *sbsec; 1772 struct superblock_security_struct *sbsec;
1767 u32 sid = cred_sid(cred); 1773 u32 sid = cred_sid(cred);
@@ -1855,12 +1861,12 @@ static inline u32 open_file_to_av(struct file *file)
1855 1861
1856/* Hook functions begin here. */ 1862/* Hook functions begin here. */
1857 1863
1858static int selinux_ptrace_may_access(struct task_struct *child, 1864static int selinux_ptrace_access_check(struct task_struct *child,
1859 unsigned int mode) 1865 unsigned int mode)
1860{ 1866{
1861 int rc; 1867 int rc;
1862 1868
1863 rc = cap_ptrace_may_access(child, mode); 1869 rc = cap_ptrace_access_check(child, mode);
1864 if (rc) 1870 if (rc)
1865 return rc; 1871 return rc;
1866 1872
@@ -2101,7 +2107,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
2101 const struct task_security_struct *old_tsec; 2107 const struct task_security_struct *old_tsec;
2102 struct task_security_struct *new_tsec; 2108 struct task_security_struct *new_tsec;
2103 struct inode_security_struct *isec; 2109 struct inode_security_struct *isec;
2104 struct avc_audit_data ad; 2110 struct common_audit_data ad;
2105 struct inode *inode = bprm->file->f_path.dentry->d_inode; 2111 struct inode *inode = bprm->file->f_path.dentry->d_inode;
2106 int rc; 2112 int rc;
2107 2113
@@ -2139,7 +2145,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
2139 return rc; 2145 return rc;
2140 } 2146 }
2141 2147
2142 AVC_AUDIT_DATA_INIT(&ad, FS); 2148 COMMON_AUDIT_DATA_INIT(&ad, FS);
2143 ad.u.fs.path = bprm->file->f_path; 2149 ad.u.fs.path = bprm->file->f_path;
2144 2150
2145 if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) 2151 if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
@@ -2232,7 +2238,7 @@ extern struct dentry *selinux_null;
2232static inline void flush_unauthorized_files(const struct cred *cred, 2238static inline void flush_unauthorized_files(const struct cred *cred,
2233 struct files_struct *files) 2239 struct files_struct *files)
2234{ 2240{
2235 struct avc_audit_data ad; 2241 struct common_audit_data ad;
2236 struct file *file, *devnull = NULL; 2242 struct file *file, *devnull = NULL;
2237 struct tty_struct *tty; 2243 struct tty_struct *tty;
2238 struct fdtable *fdt; 2244 struct fdtable *fdt;
@@ -2266,7 +2272,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
2266 2272
2267 /* Revalidate access to inherited open files. */ 2273 /* Revalidate access to inherited open files. */
2268 2274
2269 AVC_AUDIT_DATA_INIT(&ad, FS); 2275 COMMON_AUDIT_DATA_INIT(&ad, FS);
2270 2276
2271 spin_lock(&files->file_lock); 2277 spin_lock(&files->file_lock);
2272 for (;;) { 2278 for (;;) {
@@ -2515,7 +2521,7 @@ out:
2515static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) 2521static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
2516{ 2522{
2517 const struct cred *cred = current_cred(); 2523 const struct cred *cred = current_cred();
2518 struct avc_audit_data ad; 2524 struct common_audit_data ad;
2519 int rc; 2525 int rc;
2520 2526
2521 rc = superblock_doinit(sb, data); 2527 rc = superblock_doinit(sb, data);
@@ -2526,7 +2532,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
2526 if (flags & MS_KERNMOUNT) 2532 if (flags & MS_KERNMOUNT)
2527 return 0; 2533 return 0;
2528 2534
2529 AVC_AUDIT_DATA_INIT(&ad, FS); 2535 COMMON_AUDIT_DATA_INIT(&ad, FS);
2530 ad.u.fs.path.dentry = sb->s_root; 2536 ad.u.fs.path.dentry = sb->s_root;
2531 return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); 2537 return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
2532} 2538}
@@ -2534,9 +2540,9 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
2534static int selinux_sb_statfs(struct dentry *dentry) 2540static int selinux_sb_statfs(struct dentry *dentry)
2535{ 2541{
2536 const struct cred *cred = current_cred(); 2542 const struct cred *cred = current_cred();
2537 struct avc_audit_data ad; 2543 struct common_audit_data ad;
2538 2544
2539 AVC_AUDIT_DATA_INIT(&ad, FS); 2545 COMMON_AUDIT_DATA_INIT(&ad, FS);
2540 ad.u.fs.path.dentry = dentry->d_sb->s_root; 2546 ad.u.fs.path.dentry = dentry->d_sb->s_root;
2541 return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); 2547 return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
2542} 2548}
@@ -2711,12 +2717,18 @@ static int selinux_inode_permission(struct inode *inode, int mask)
2711static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) 2717static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
2712{ 2718{
2713 const struct cred *cred = current_cred(); 2719 const struct cred *cred = current_cred();
2720 unsigned int ia_valid = iattr->ia_valid;
2721
2722 /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
2723 if (ia_valid & ATTR_FORCE) {
2724 ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
2725 ATTR_FORCE);
2726 if (!ia_valid)
2727 return 0;
2728 }
2714 2729
2715 if (iattr->ia_valid & ATTR_FORCE) 2730 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
2716 return 0; 2731 ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
2717
2718 if (iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
2719 ATTR_ATIME_SET | ATTR_MTIME_SET))
2720 return dentry_has_perm(cred, NULL, dentry, FILE__SETATTR); 2732 return dentry_has_perm(cred, NULL, dentry, FILE__SETATTR);
2721 2733
2722 return dentry_has_perm(cred, NULL, dentry, FILE__WRITE); 2734 return dentry_has_perm(cred, NULL, dentry, FILE__WRITE);
@@ -2756,7 +2768,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
2756 struct inode *inode = dentry->d_inode; 2768 struct inode *inode = dentry->d_inode;
2757 struct inode_security_struct *isec = inode->i_security; 2769 struct inode_security_struct *isec = inode->i_security;
2758 struct superblock_security_struct *sbsec; 2770 struct superblock_security_struct *sbsec;
2759 struct avc_audit_data ad; 2771 struct common_audit_data ad;
2760 u32 newsid, sid = current_sid(); 2772 u32 newsid, sid = current_sid();
2761 int rc = 0; 2773 int rc = 0;
2762 2774
@@ -2770,7 +2782,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
2770 if (!is_owner_or_cap(inode)) 2782 if (!is_owner_or_cap(inode))
2771 return -EPERM; 2783 return -EPERM;
2772 2784
2773 AVC_AUDIT_DATA_INIT(&ad, FS); 2785 COMMON_AUDIT_DATA_INIT(&ad, FS);
2774 ad.u.fs.path.dentry = dentry; 2786 ad.u.fs.path.dentry = dentry;
2775 2787
2776 rc = avc_has_perm(sid, isec->sid, isec->sclass, 2788 rc = avc_has_perm(sid, isec->sid, isec->sclass,
@@ -2915,6 +2927,7 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name,
2915 return rc; 2927 return rc;
2916 2928
2917 isec->sid = newsid; 2929 isec->sid = newsid;
2930 isec->initialized = 1;
2918 return 0; 2931 return 0;
2919} 2932}
2920 2933
@@ -2939,11 +2952,6 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
2939 const struct cred *cred = current_cred(); 2952 const struct cred *cred = current_cred();
2940 struct inode *inode = file->f_path.dentry->d_inode; 2953 struct inode *inode = file->f_path.dentry->d_inode;
2941 2954
2942 if (!mask) {
2943 /* No permission to check. Existence test. */
2944 return 0;
2945 }
2946
2947 /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */ 2955 /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
2948 if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) 2956 if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
2949 mask |= MAY_APPEND; 2957 mask |= MAY_APPEND;
@@ -2954,10 +2962,20 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
2954 2962
2955static int selinux_file_permission(struct file *file, int mask) 2963static int selinux_file_permission(struct file *file, int mask)
2956{ 2964{
2965 struct inode *inode = file->f_path.dentry->d_inode;
2966 struct file_security_struct *fsec = file->f_security;
2967 struct inode_security_struct *isec = inode->i_security;
2968 u32 sid = current_sid();
2969
2957 if (!mask) 2970 if (!mask)
2958 /* No permission to check. Existence test. */ 2971 /* No permission to check. Existence test. */
2959 return 0; 2972 return 0;
2960 2973
2974 if (sid == fsec->sid && fsec->isid == isec->sid &&
2975 fsec->pseqno == avc_policy_seqno())
2976 /* No change since dentry_open check. */
2977 return 0;
2978
2961 return selinux_revalidate_file_permission(file, mask); 2979 return selinux_revalidate_file_permission(file, mask);
2962} 2980}
2963 2981
@@ -3220,12 +3238,29 @@ static int selinux_task_create(unsigned long clone_flags)
3220} 3238}
3221 3239
3222/* 3240/*
3241 * allocate the SELinux part of blank credentials
3242 */
3243static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
3244{
3245 struct task_security_struct *tsec;
3246
3247 tsec = kzalloc(sizeof(struct task_security_struct), gfp);
3248 if (!tsec)
3249 return -ENOMEM;
3250
3251 cred->security = tsec;
3252 return 0;
3253}
3254
3255/*
3223 * detach and free the LSM part of a set of credentials 3256 * detach and free the LSM part of a set of credentials
3224 */ 3257 */
3225static void selinux_cred_free(struct cred *cred) 3258static void selinux_cred_free(struct cred *cred)
3226{ 3259{
3227 struct task_security_struct *tsec = cred->security; 3260 struct task_security_struct *tsec = cred->security;
3228 cred->security = NULL; 3261
3262 BUG_ON((unsigned long) cred->security < PAGE_SIZE);
3263 cred->security = (void *) 0x7UL;
3229 kfree(tsec); 3264 kfree(tsec);
3230} 3265}
3231 3266
@@ -3249,6 +3284,17 @@ static int selinux_cred_prepare(struct cred *new, const struct cred *old,
3249} 3284}
3250 3285
3251/* 3286/*
3287 * transfer the SELinux data to a blank set of creds
3288 */
3289static void selinux_cred_transfer(struct cred *new, const struct cred *old)
3290{
3291 const struct task_security_struct *old_tsec = old->security;
3292 struct task_security_struct *tsec = new->security;
3293
3294 *tsec = *old_tsec;
3295}
3296
3297/*
3252 * set the security data for a kernel service 3298 * set the security data for a kernel service
3253 * - all the creation contexts are set to unlabelled 3299 * - all the creation contexts are set to unlabelled
3254 */ 3300 */
@@ -3292,6 +3338,11 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
3292 return 0; 3338 return 0;
3293} 3339}
3294 3340
3341static int selinux_kernel_module_request(void)
3342{
3343 return task_has_system(current, SYSTEM__MODULE_REQUEST);
3344}
3345
3295static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) 3346static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
3296{ 3347{
3297 return current_has_perm(p, PROCESS__SETPGID); 3348 return current_has_perm(p, PROCESS__SETPGID);
@@ -3409,7 +3460,7 @@ static void selinux_task_to_inode(struct task_struct *p,
3409 3460
3410/* Returns error only if unable to parse addresses */ 3461/* Returns error only if unable to parse addresses */
3411static int selinux_parse_skb_ipv4(struct sk_buff *skb, 3462static int selinux_parse_skb_ipv4(struct sk_buff *skb,
3412 struct avc_audit_data *ad, u8 *proto) 3463 struct common_audit_data *ad, u8 *proto)
3413{ 3464{
3414 int offset, ihlen, ret = -EINVAL; 3465 int offset, ihlen, ret = -EINVAL;
3415 struct iphdr _iph, *ih; 3466 struct iphdr _iph, *ih;
@@ -3490,7 +3541,7 @@ out:
3490 3541
3491/* Returns error only if unable to parse addresses */ 3542/* Returns error only if unable to parse addresses */
3492static int selinux_parse_skb_ipv6(struct sk_buff *skb, 3543static int selinux_parse_skb_ipv6(struct sk_buff *skb,
3493 struct avc_audit_data *ad, u8 *proto) 3544 struct common_audit_data *ad, u8 *proto)
3494{ 3545{
3495 u8 nexthdr; 3546 u8 nexthdr;
3496 int ret = -EINVAL, offset; 3547 int ret = -EINVAL, offset;
@@ -3561,7 +3612,7 @@ out:
3561 3612
3562#endif /* IPV6 */ 3613#endif /* IPV6 */
3563 3614
3564static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad, 3615static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
3565 char **_addrp, int src, u8 *proto) 3616 char **_addrp, int src, u8 *proto)
3566{ 3617{
3567 char *addrp; 3618 char *addrp;
@@ -3643,7 +3694,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
3643 u32 perms) 3694 u32 perms)
3644{ 3695{
3645 struct inode_security_struct *isec; 3696 struct inode_security_struct *isec;
3646 struct avc_audit_data ad; 3697 struct common_audit_data ad;
3647 u32 sid; 3698 u32 sid;
3648 int err = 0; 3699 int err = 0;
3649 3700
@@ -3653,7 +3704,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
3653 goto out; 3704 goto out;
3654 sid = task_sid(task); 3705 sid = task_sid(task);
3655 3706
3656 AVC_AUDIT_DATA_INIT(&ad, NET); 3707 COMMON_AUDIT_DATA_INIT(&ad, NET);
3657 ad.u.net.sk = sock->sk; 3708 ad.u.net.sk = sock->sk;
3658 err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); 3709 err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
3659 3710
@@ -3740,7 +3791,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
3740 if (family == PF_INET || family == PF_INET6) { 3791 if (family == PF_INET || family == PF_INET6) {
3741 char *addrp; 3792 char *addrp;
3742 struct inode_security_struct *isec; 3793 struct inode_security_struct *isec;
3743 struct avc_audit_data ad; 3794 struct common_audit_data ad;
3744 struct sockaddr_in *addr4 = NULL; 3795 struct sockaddr_in *addr4 = NULL;
3745 struct sockaddr_in6 *addr6 = NULL; 3796 struct sockaddr_in6 *addr6 = NULL;
3746 unsigned short snum; 3797 unsigned short snum;
@@ -3769,7 +3820,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
3769 snum, &sid); 3820 snum, &sid);
3770 if (err) 3821 if (err)
3771 goto out; 3822 goto out;
3772 AVC_AUDIT_DATA_INIT(&ad, NET); 3823 COMMON_AUDIT_DATA_INIT(&ad, NET);
3773 ad.u.net.sport = htons(snum); 3824 ad.u.net.sport = htons(snum);
3774 ad.u.net.family = family; 3825 ad.u.net.family = family;
3775 err = avc_has_perm(isec->sid, sid, 3826 err = avc_has_perm(isec->sid, sid,
@@ -3802,7 +3853,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
3802 if (err) 3853 if (err)
3803 goto out; 3854 goto out;
3804 3855
3805 AVC_AUDIT_DATA_INIT(&ad, NET); 3856 COMMON_AUDIT_DATA_INIT(&ad, NET);
3806 ad.u.net.sport = htons(snum); 3857 ad.u.net.sport = htons(snum);
3807 ad.u.net.family = family; 3858 ad.u.net.family = family;
3808 3859
@@ -3836,7 +3887,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
3836 isec = SOCK_INODE(sock)->i_security; 3887 isec = SOCK_INODE(sock)->i_security;
3837 if (isec->sclass == SECCLASS_TCP_SOCKET || 3888 if (isec->sclass == SECCLASS_TCP_SOCKET ||
3838 isec->sclass == SECCLASS_DCCP_SOCKET) { 3889 isec->sclass == SECCLASS_DCCP_SOCKET) {
3839 struct avc_audit_data ad; 3890 struct common_audit_data ad;
3840 struct sockaddr_in *addr4 = NULL; 3891 struct sockaddr_in *addr4 = NULL;
3841 struct sockaddr_in6 *addr6 = NULL; 3892 struct sockaddr_in6 *addr6 = NULL;
3842 unsigned short snum; 3893 unsigned short snum;
@@ -3861,7 +3912,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
3861 perm = (isec->sclass == SECCLASS_TCP_SOCKET) ? 3912 perm = (isec->sclass == SECCLASS_TCP_SOCKET) ?
3862 TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT; 3913 TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
3863 3914
3864 AVC_AUDIT_DATA_INIT(&ad, NET); 3915 COMMON_AUDIT_DATA_INIT(&ad, NET);
3865 ad.u.net.dport = htons(snum); 3916 ad.u.net.dport = htons(snum);
3866 ad.u.net.family = sk->sk_family; 3917 ad.u.net.family = sk->sk_family;
3867 err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad); 3918 err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad);
@@ -3951,13 +4002,13 @@ static int selinux_socket_unix_stream_connect(struct socket *sock,
3951 struct sk_security_struct *ssec; 4002 struct sk_security_struct *ssec;
3952 struct inode_security_struct *isec; 4003 struct inode_security_struct *isec;
3953 struct inode_security_struct *other_isec; 4004 struct inode_security_struct *other_isec;
3954 struct avc_audit_data ad; 4005 struct common_audit_data ad;
3955 int err; 4006 int err;
3956 4007
3957 isec = SOCK_INODE(sock)->i_security; 4008 isec = SOCK_INODE(sock)->i_security;
3958 other_isec = SOCK_INODE(other)->i_security; 4009 other_isec = SOCK_INODE(other)->i_security;
3959 4010
3960 AVC_AUDIT_DATA_INIT(&ad, NET); 4011 COMMON_AUDIT_DATA_INIT(&ad, NET);
3961 ad.u.net.sk = other->sk; 4012 ad.u.net.sk = other->sk;
3962 4013
3963 err = avc_has_perm(isec->sid, other_isec->sid, 4014 err = avc_has_perm(isec->sid, other_isec->sid,
@@ -3983,13 +4034,13 @@ static int selinux_socket_unix_may_send(struct socket *sock,
3983{ 4034{
3984 struct inode_security_struct *isec; 4035 struct inode_security_struct *isec;
3985 struct inode_security_struct *other_isec; 4036 struct inode_security_struct *other_isec;
3986 struct avc_audit_data ad; 4037 struct common_audit_data ad;
3987 int err; 4038 int err;
3988 4039
3989 isec = SOCK_INODE(sock)->i_security; 4040 isec = SOCK_INODE(sock)->i_security;
3990 other_isec = SOCK_INODE(other)->i_security; 4041 other_isec = SOCK_INODE(other)->i_security;
3991 4042
3992 AVC_AUDIT_DATA_INIT(&ad, NET); 4043 COMMON_AUDIT_DATA_INIT(&ad, NET);
3993 ad.u.net.sk = other->sk; 4044 ad.u.net.sk = other->sk;
3994 4045
3995 err = avc_has_perm(isec->sid, other_isec->sid, 4046 err = avc_has_perm(isec->sid, other_isec->sid,
@@ -4002,7 +4053,7 @@ static int selinux_socket_unix_may_send(struct socket *sock,
4002 4053
4003static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family, 4054static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
4004 u32 peer_sid, 4055 u32 peer_sid,
4005 struct avc_audit_data *ad) 4056 struct common_audit_data *ad)
4006{ 4057{
4007 int err; 4058 int err;
4008 u32 if_sid; 4059 u32 if_sid;
@@ -4030,10 +4081,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
4030 struct sk_security_struct *sksec = sk->sk_security; 4081 struct sk_security_struct *sksec = sk->sk_security;
4031 u32 peer_sid; 4082 u32 peer_sid;
4032 u32 sk_sid = sksec->sid; 4083 u32 sk_sid = sksec->sid;
4033 struct avc_audit_data ad; 4084 struct common_audit_data ad;
4034 char *addrp; 4085 char *addrp;
4035 4086
4036 AVC_AUDIT_DATA_INIT(&ad, NET); 4087 COMMON_AUDIT_DATA_INIT(&ad, NET);
4037 ad.u.net.netif = skb->iif; 4088 ad.u.net.netif = skb->iif;
4038 ad.u.net.family = family; 4089 ad.u.net.family = family;
4039 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); 4090 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4071,7 +4122,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
4071 struct sk_security_struct *sksec = sk->sk_security; 4122 struct sk_security_struct *sksec = sk->sk_security;
4072 u16 family = sk->sk_family; 4123 u16 family = sk->sk_family;
4073 u32 sk_sid = sksec->sid; 4124 u32 sk_sid = sksec->sid;
4074 struct avc_audit_data ad; 4125 struct common_audit_data ad;
4075 char *addrp; 4126 char *addrp;
4076 u8 secmark_active; 4127 u8 secmark_active;
4077 u8 peerlbl_active; 4128 u8 peerlbl_active;
@@ -4095,7 +4146,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
4095 if (!secmark_active && !peerlbl_active) 4146 if (!secmark_active && !peerlbl_active)
4096 return 0; 4147 return 0;
4097 4148
4098 AVC_AUDIT_DATA_INIT(&ad, NET); 4149 COMMON_AUDIT_DATA_INIT(&ad, NET);
4099 ad.u.net.netif = skb->iif; 4150 ad.u.net.netif = skb->iif;
4100 ad.u.net.family = family; 4151 ad.u.net.family = family;
4101 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); 4152 err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4309,6 +4360,59 @@ static void selinux_req_classify_flow(const struct request_sock *req,
4309 fl->secid = req->secid; 4360 fl->secid = req->secid;
4310} 4361}
4311 4362
4363static int selinux_tun_dev_create(void)
4364{
4365 u32 sid = current_sid();
4366
4367 /* we aren't taking into account the "sockcreate" SID since the socket
4368 * that is being created here is not a socket in the traditional sense,
4369 * instead it is a private sock, accessible only to the kernel, and
4370 * representing a wide range of network traffic spanning multiple
4371 * connections unlike traditional sockets - check the TUN driver to
4372 * get a better understanding of why this socket is special */
4373
4374 return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE,
4375 NULL);
4376}
4377
4378static void selinux_tun_dev_post_create(struct sock *sk)
4379{
4380 struct sk_security_struct *sksec = sk->sk_security;
4381
4382 /* we don't currently perform any NetLabel based labeling here and it
4383 * isn't clear that we would want to do so anyway; while we could apply
4384 * labeling without the support of the TUN user the resulting labeled
4385 * traffic from the other end of the connection would almost certainly
4386 * cause confusion to the TUN user that had no idea network labeling
4387 * protocols were being used */
4388
4389 /* see the comments in selinux_tun_dev_create() about why we don't use
4390 * the sockcreate SID here */
4391
4392 sksec->sid = current_sid();
4393 sksec->sclass = SECCLASS_TUN_SOCKET;
4394}
4395
4396static int selinux_tun_dev_attach(struct sock *sk)
4397{
4398 struct sk_security_struct *sksec = sk->sk_security;
4399 u32 sid = current_sid();
4400 int err;
4401
4402 err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
4403 TUN_SOCKET__RELABELFROM, NULL);
4404 if (err)
4405 return err;
4406 err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
4407 TUN_SOCKET__RELABELTO, NULL);
4408 if (err)
4409 return err;
4410
4411 sksec->sid = sid;
4412
4413 return 0;
4414}
4415
4312static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) 4416static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
4313{ 4417{
4314 int err = 0; 4418 int err = 0;
@@ -4353,7 +4457,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
4353 int err; 4457 int err;
4354 char *addrp; 4458 char *addrp;
4355 u32 peer_sid; 4459 u32 peer_sid;
4356 struct avc_audit_data ad; 4460 struct common_audit_data ad;
4357 u8 secmark_active; 4461 u8 secmark_active;
4358 u8 netlbl_active; 4462 u8 netlbl_active;
4359 u8 peerlbl_active; 4463 u8 peerlbl_active;
@@ -4370,7 +4474,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
4370 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) 4474 if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
4371 return NF_DROP; 4475 return NF_DROP;
4372 4476
4373 AVC_AUDIT_DATA_INIT(&ad, NET); 4477 COMMON_AUDIT_DATA_INIT(&ad, NET);
4374 ad.u.net.netif = ifindex; 4478 ad.u.net.netif = ifindex;
4375 ad.u.net.family = family; 4479 ad.u.net.family = family;
4376 if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) 4480 if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
@@ -4458,7 +4562,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
4458{ 4562{
4459 struct sock *sk = skb->sk; 4563 struct sock *sk = skb->sk;
4460 struct sk_security_struct *sksec; 4564 struct sk_security_struct *sksec;
4461 struct avc_audit_data ad; 4565 struct common_audit_data ad;
4462 char *addrp; 4566 char *addrp;
4463 u8 proto; 4567 u8 proto;
4464 4568
@@ -4466,7 +4570,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
4466 return NF_ACCEPT; 4570 return NF_ACCEPT;
4467 sksec = sk->sk_security; 4571 sksec = sk->sk_security;
4468 4572
4469 AVC_AUDIT_DATA_INIT(&ad, NET); 4573 COMMON_AUDIT_DATA_INIT(&ad, NET);
4470 ad.u.net.netif = ifindex; 4574 ad.u.net.netif = ifindex;
4471 ad.u.net.family = family; 4575 ad.u.net.family = family;
4472 if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) 4576 if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
@@ -4490,7 +4594,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
4490 u32 secmark_perm; 4594 u32 secmark_perm;
4491 u32 peer_sid; 4595 u32 peer_sid;
4492 struct sock *sk; 4596 struct sock *sk;
4493 struct avc_audit_data ad; 4597 struct common_audit_data ad;
4494 char *addrp; 4598 char *addrp;
4495 u8 secmark_active; 4599 u8 secmark_active;
4496 u8 peerlbl_active; 4600 u8 peerlbl_active;
@@ -4549,7 +4653,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
4549 secmark_perm = PACKET__SEND; 4653 secmark_perm = PACKET__SEND;
4550 } 4654 }
4551 4655
4552 AVC_AUDIT_DATA_INIT(&ad, NET); 4656 COMMON_AUDIT_DATA_INIT(&ad, NET);
4553 ad.u.net.netif = ifindex; 4657 ad.u.net.netif = ifindex;
4554 ad.u.net.family = family; 4658 ad.u.net.family = family;
4555 if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) 4659 if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
@@ -4619,13 +4723,13 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
4619static int selinux_netlink_recv(struct sk_buff *skb, int capability) 4723static int selinux_netlink_recv(struct sk_buff *skb, int capability)
4620{ 4724{
4621 int err; 4725 int err;
4622 struct avc_audit_data ad; 4726 struct common_audit_data ad;
4623 4727
4624 err = cap_netlink_recv(skb, capability); 4728 err = cap_netlink_recv(skb, capability);
4625 if (err) 4729 if (err)
4626 return err; 4730 return err;
4627 4731
4628 AVC_AUDIT_DATA_INIT(&ad, CAP); 4732 COMMON_AUDIT_DATA_INIT(&ad, CAP);
4629 ad.u.cap = capability; 4733 ad.u.cap = capability;
4630 4734
4631 return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid, 4735 return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid,
@@ -4684,12 +4788,12 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
4684 u32 perms) 4788 u32 perms)
4685{ 4789{
4686 struct ipc_security_struct *isec; 4790 struct ipc_security_struct *isec;
4687 struct avc_audit_data ad; 4791 struct common_audit_data ad;
4688 u32 sid = current_sid(); 4792 u32 sid = current_sid();
4689 4793
4690 isec = ipc_perms->security; 4794 isec = ipc_perms->security;
4691 4795
4692 AVC_AUDIT_DATA_INIT(&ad, IPC); 4796 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4693 ad.u.ipc_id = ipc_perms->key; 4797 ad.u.ipc_id = ipc_perms->key;
4694 4798
4695 return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); 4799 return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
@@ -4709,7 +4813,7 @@ static void selinux_msg_msg_free_security(struct msg_msg *msg)
4709static int selinux_msg_queue_alloc_security(struct msg_queue *msq) 4813static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
4710{ 4814{
4711 struct ipc_security_struct *isec; 4815 struct ipc_security_struct *isec;
4712 struct avc_audit_data ad; 4816 struct common_audit_data ad;
4713 u32 sid = current_sid(); 4817 u32 sid = current_sid();
4714 int rc; 4818 int rc;
4715 4819
@@ -4719,7 +4823,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
4719 4823
4720 isec = msq->q_perm.security; 4824 isec = msq->q_perm.security;
4721 4825
4722 AVC_AUDIT_DATA_INIT(&ad, IPC); 4826 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4723 ad.u.ipc_id = msq->q_perm.key; 4827 ad.u.ipc_id = msq->q_perm.key;
4724 4828
4725 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, 4829 rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4739,12 +4843,12 @@ static void selinux_msg_queue_free_security(struct msg_queue *msq)
4739static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) 4843static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
4740{ 4844{
4741 struct ipc_security_struct *isec; 4845 struct ipc_security_struct *isec;
4742 struct avc_audit_data ad; 4846 struct common_audit_data ad;
4743 u32 sid = current_sid(); 4847 u32 sid = current_sid();
4744 4848
4745 isec = msq->q_perm.security; 4849 isec = msq->q_perm.security;
4746 4850
4747 AVC_AUDIT_DATA_INIT(&ad, IPC); 4851 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4748 ad.u.ipc_id = msq->q_perm.key; 4852 ad.u.ipc_id = msq->q_perm.key;
4749 4853
4750 return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, 4854 return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4783,7 +4887,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
4783{ 4887{
4784 struct ipc_security_struct *isec; 4888 struct ipc_security_struct *isec;
4785 struct msg_security_struct *msec; 4889 struct msg_security_struct *msec;
4786 struct avc_audit_data ad; 4890 struct common_audit_data ad;
4787 u32 sid = current_sid(); 4891 u32 sid = current_sid();
4788 int rc; 4892 int rc;
4789 4893
@@ -4804,7 +4908,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
4804 return rc; 4908 return rc;
4805 } 4909 }
4806 4910
4807 AVC_AUDIT_DATA_INIT(&ad, IPC); 4911 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4808 ad.u.ipc_id = msq->q_perm.key; 4912 ad.u.ipc_id = msq->q_perm.key;
4809 4913
4810 /* Can this process write to the queue? */ 4914 /* Can this process write to the queue? */
@@ -4828,14 +4932,14 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
4828{ 4932{
4829 struct ipc_security_struct *isec; 4933 struct ipc_security_struct *isec;
4830 struct msg_security_struct *msec; 4934 struct msg_security_struct *msec;
4831 struct avc_audit_data ad; 4935 struct common_audit_data ad;
4832 u32 sid = task_sid(target); 4936 u32 sid = task_sid(target);
4833 int rc; 4937 int rc;
4834 4938
4835 isec = msq->q_perm.security; 4939 isec = msq->q_perm.security;
4836 msec = msg->security; 4940 msec = msg->security;
4837 4941
4838 AVC_AUDIT_DATA_INIT(&ad, IPC); 4942 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4839 ad.u.ipc_id = msq->q_perm.key; 4943 ad.u.ipc_id = msq->q_perm.key;
4840 4944
4841 rc = avc_has_perm(sid, isec->sid, 4945 rc = avc_has_perm(sid, isec->sid,
@@ -4850,7 +4954,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
4850static int selinux_shm_alloc_security(struct shmid_kernel *shp) 4954static int selinux_shm_alloc_security(struct shmid_kernel *shp)
4851{ 4955{
4852 struct ipc_security_struct *isec; 4956 struct ipc_security_struct *isec;
4853 struct avc_audit_data ad; 4957 struct common_audit_data ad;
4854 u32 sid = current_sid(); 4958 u32 sid = current_sid();
4855 int rc; 4959 int rc;
4856 4960
@@ -4860,7 +4964,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
4860 4964
4861 isec = shp->shm_perm.security; 4965 isec = shp->shm_perm.security;
4862 4966
4863 AVC_AUDIT_DATA_INIT(&ad, IPC); 4967 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4864 ad.u.ipc_id = shp->shm_perm.key; 4968 ad.u.ipc_id = shp->shm_perm.key;
4865 4969
4866 rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, 4970 rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4880,12 +4984,12 @@ static void selinux_shm_free_security(struct shmid_kernel *shp)
4880static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) 4984static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
4881{ 4985{
4882 struct ipc_security_struct *isec; 4986 struct ipc_security_struct *isec;
4883 struct avc_audit_data ad; 4987 struct common_audit_data ad;
4884 u32 sid = current_sid(); 4988 u32 sid = current_sid();
4885 4989
4886 isec = shp->shm_perm.security; 4990 isec = shp->shm_perm.security;
4887 4991
4888 AVC_AUDIT_DATA_INIT(&ad, IPC); 4992 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4889 ad.u.ipc_id = shp->shm_perm.key; 4993 ad.u.ipc_id = shp->shm_perm.key;
4890 4994
4891 return avc_has_perm(sid, isec->sid, SECCLASS_SHM, 4995 return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4942,7 +5046,7 @@ static int selinux_shm_shmat(struct shmid_kernel *shp,
4942static int selinux_sem_alloc_security(struct sem_array *sma) 5046static int selinux_sem_alloc_security(struct sem_array *sma)
4943{ 5047{
4944 struct ipc_security_struct *isec; 5048 struct ipc_security_struct *isec;
4945 struct avc_audit_data ad; 5049 struct common_audit_data ad;
4946 u32 sid = current_sid(); 5050 u32 sid = current_sid();
4947 int rc; 5051 int rc;
4948 5052
@@ -4952,7 +5056,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
4952 5056
4953 isec = sma->sem_perm.security; 5057 isec = sma->sem_perm.security;
4954 5058
4955 AVC_AUDIT_DATA_INIT(&ad, IPC); 5059 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4956 ad.u.ipc_id = sma->sem_perm.key; 5060 ad.u.ipc_id = sma->sem_perm.key;
4957 5061
4958 rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, 5062 rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -4972,12 +5076,12 @@ static void selinux_sem_free_security(struct sem_array *sma)
4972static int selinux_sem_associate(struct sem_array *sma, int semflg) 5076static int selinux_sem_associate(struct sem_array *sma, int semflg)
4973{ 5077{
4974 struct ipc_security_struct *isec; 5078 struct ipc_security_struct *isec;
4975 struct avc_audit_data ad; 5079 struct common_audit_data ad;
4976 u32 sid = current_sid(); 5080 u32 sid = current_sid();
4977 5081
4978 isec = sma->sem_perm.security; 5082 isec = sma->sem_perm.security;
4979 5083
4980 AVC_AUDIT_DATA_INIT(&ad, IPC); 5084 COMMON_AUDIT_DATA_INIT(&ad, IPC);
4981 ad.u.ipc_id = sma->sem_perm.key; 5085 ad.u.ipc_id = sma->sem_perm.key;
4982 5086
4983 return avc_has_perm(sid, isec->sid, SECCLASS_SEM, 5087 return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -5195,7 +5299,7 @@ static int selinux_setprocattr(struct task_struct *p,
5195 5299
5196 /* Only allow single threaded processes to change context */ 5300 /* Only allow single threaded processes to change context */
5197 error = -EPERM; 5301 error = -EPERM;
5198 if (!is_single_threaded(p)) { 5302 if (!current_is_single_threaded()) {
5199 error = security_bounded_transition(tsec->sid, sid); 5303 error = security_bounded_transition(tsec->sid, sid);
5200 if (error) 5304 if (error)
5201 goto abort_change; 5305 goto abort_change;
@@ -5252,6 +5356,32 @@ static void selinux_release_secctx(char *secdata, u32 seclen)
5252 kfree(secdata); 5356 kfree(secdata);
5253} 5357}
5254 5358
5359/*
5360 * called with inode->i_mutex locked
5361 */
5362static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
5363{
5364 return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
5365}
5366
5367/*
5368 * called with inode->i_mutex locked
5369 */
5370static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
5371{
5372 return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
5373}
5374
5375static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
5376{
5377 int len = 0;
5378 len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
5379 ctx, true);
5380 if (len < 0)
5381 return len;
5382 *ctxlen = len;
5383 return 0;
5384}
5255#ifdef CONFIG_KEYS 5385#ifdef CONFIG_KEYS
5256 5386
5257static int selinux_key_alloc(struct key *k, const struct cred *cred, 5387static int selinux_key_alloc(struct key *k, const struct cred *cred,
@@ -5323,7 +5453,7 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
5323static struct security_operations selinux_ops = { 5453static struct security_operations selinux_ops = {
5324 .name = "selinux", 5454 .name = "selinux",
5325 5455
5326 .ptrace_may_access = selinux_ptrace_may_access, 5456 .ptrace_access_check = selinux_ptrace_access_check,
5327 .ptrace_traceme = selinux_ptrace_traceme, 5457 .ptrace_traceme = selinux_ptrace_traceme,
5328 .capget = selinux_capget, 5458 .capget = selinux_capget,
5329 .capset = selinux_capset, 5459 .capset = selinux_capset,
@@ -5396,10 +5526,13 @@ static struct security_operations selinux_ops = {
5396 .dentry_open = selinux_dentry_open, 5526 .dentry_open = selinux_dentry_open,
5397 5527
5398 .task_create = selinux_task_create, 5528 .task_create = selinux_task_create,
5529 .cred_alloc_blank = selinux_cred_alloc_blank,
5399 .cred_free = selinux_cred_free, 5530 .cred_free = selinux_cred_free,
5400 .cred_prepare = selinux_cred_prepare, 5531 .cred_prepare = selinux_cred_prepare,
5532 .cred_transfer = selinux_cred_transfer,
5401 .kernel_act_as = selinux_kernel_act_as, 5533 .kernel_act_as = selinux_kernel_act_as,
5402 .kernel_create_files_as = selinux_kernel_create_files_as, 5534 .kernel_create_files_as = selinux_kernel_create_files_as,
5535 .kernel_module_request = selinux_kernel_module_request,
5403 .task_setpgid = selinux_task_setpgid, 5536 .task_setpgid = selinux_task_setpgid,
5404 .task_getpgid = selinux_task_getpgid, 5537 .task_getpgid = selinux_task_getpgid,
5405 .task_getsid = selinux_task_getsid, 5538 .task_getsid = selinux_task_getsid,
@@ -5448,6 +5581,9 @@ static struct security_operations selinux_ops = {
5448 .secid_to_secctx = selinux_secid_to_secctx, 5581 .secid_to_secctx = selinux_secid_to_secctx,
5449 .secctx_to_secid = selinux_secctx_to_secid, 5582 .secctx_to_secid = selinux_secctx_to_secid,
5450 .release_secctx = selinux_release_secctx, 5583 .release_secctx = selinux_release_secctx,
5584 .inode_notifysecctx = selinux_inode_notifysecctx,
5585 .inode_setsecctx = selinux_inode_setsecctx,
5586 .inode_getsecctx = selinux_inode_getsecctx,
5451 5587
5452 .unix_stream_connect = selinux_socket_unix_stream_connect, 5588 .unix_stream_connect = selinux_socket_unix_stream_connect,
5453 .unix_may_send = selinux_socket_unix_may_send, 5589 .unix_may_send = selinux_socket_unix_may_send,
@@ -5477,6 +5613,9 @@ static struct security_operations selinux_ops = {
5477 .inet_csk_clone = selinux_inet_csk_clone, 5613 .inet_csk_clone = selinux_inet_csk_clone,
5478 .inet_conn_established = selinux_inet_conn_established, 5614 .inet_conn_established = selinux_inet_conn_established,
5479 .req_classify_flow = selinux_req_classify_flow, 5615 .req_classify_flow = selinux_req_classify_flow,
5616 .tun_dev_create = selinux_tun_dev_create,
5617 .tun_dev_post_create = selinux_tun_dev_post_create,
5618 .tun_dev_attach = selinux_tun_dev_attach,
5480 5619
5481#ifdef CONFIG_SECURITY_NETWORK_XFRM 5620#ifdef CONFIG_SECURITY_NETWORK_XFRM
5482 .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, 5621 .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc,
@@ -5691,6 +5830,9 @@ int selinux_disable(void)
5691 selinux_disabled = 1; 5830 selinux_disabled = 1;
5692 selinux_enabled = 0; 5831 selinux_enabled = 0;
5693 5832
5833 /* Try to destroy the avc node cache */
5834 avc_disable();
5835
5694 /* Reset security_ops to the secondary module, dummy or capability. */ 5836 /* Reset security_ops to the secondary module, dummy or capability. */
5695 security_ops = secondary_ops; 5837 security_ops = secondary_ops;
5696 5838
diff --git a/security/selinux/include/av_inherit.h b/security/selinux/include/av_inherit.h
index 8377a4ba3b95..abedcd704dae 100644
--- a/security/selinux/include/av_inherit.h
+++ b/security/selinux/include/av_inherit.h
@@ -15,6 +15,7 @@
15 S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL) 15 S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL)
16 S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL) 16 S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL)
17 S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL) 17 S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL)
18 S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL)
18 S_(SECCLASS_IPC, ipc, 0x00000200UL) 19 S_(SECCLASS_IPC, ipc, 0x00000200UL)
19 S_(SECCLASS_SEM, ipc, 0x00000200UL) 20 S_(SECCLASS_SEM, ipc, 0x00000200UL)
20 S_(SECCLASS_MSGQ, ipc, 0x00000200UL) 21 S_(SECCLASS_MSGQ, ipc, 0x00000200UL)
diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index 31df1d7c1aee..2b683ad83d21 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h
@@ -107,6 +107,7 @@
107 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read") 107 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read")
108 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod") 108 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod")
109 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console") 109 S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console")
110 S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request")
110 S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown") 111 S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown")
111 S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override") 112 S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override")
112 S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search") 113 S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search")
diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index d645192ee950..0546d616ccac 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h
@@ -423,6 +423,28 @@
423#define UNIX_DGRAM_SOCKET__RECV_MSG 0x00080000UL 423#define UNIX_DGRAM_SOCKET__RECV_MSG 0x00080000UL
424#define UNIX_DGRAM_SOCKET__SEND_MSG 0x00100000UL 424#define UNIX_DGRAM_SOCKET__SEND_MSG 0x00100000UL
425#define UNIX_DGRAM_SOCKET__NAME_BIND 0x00200000UL 425#define UNIX_DGRAM_SOCKET__NAME_BIND 0x00200000UL
426#define TUN_SOCKET__IOCTL 0x00000001UL
427#define TUN_SOCKET__READ 0x00000002UL
428#define TUN_SOCKET__WRITE 0x00000004UL
429#define TUN_SOCKET__CREATE 0x00000008UL
430#define TUN_SOCKET__GETATTR 0x00000010UL
431#define TUN_SOCKET__SETATTR 0x00000020UL
432#define TUN_SOCKET__LOCK 0x00000040UL
433#define TUN_SOCKET__RELABELFROM 0x00000080UL
434#define TUN_SOCKET__RELABELTO 0x00000100UL
435#define TUN_SOCKET__APPEND 0x00000200UL
436#define TUN_SOCKET__BIND 0x00000400UL
437#define TUN_SOCKET__CONNECT 0x00000800UL
438#define TUN_SOCKET__LISTEN 0x00001000UL
439#define TUN_SOCKET__ACCEPT 0x00002000UL
440#define TUN_SOCKET__GETOPT 0x00004000UL
441#define TUN_SOCKET__SETOPT 0x00008000UL
442#define TUN_SOCKET__SHUTDOWN 0x00010000UL
443#define TUN_SOCKET__RECVFROM 0x00020000UL
444#define TUN_SOCKET__SENDTO 0x00040000UL
445#define TUN_SOCKET__RECV_MSG 0x00080000UL
446#define TUN_SOCKET__SEND_MSG 0x00100000UL
447#define TUN_SOCKET__NAME_BIND 0x00200000UL
426#define PROCESS__FORK 0x00000001UL 448#define PROCESS__FORK 0x00000001UL
427#define PROCESS__TRANSITION 0x00000002UL 449#define PROCESS__TRANSITION 0x00000002UL
428#define PROCESS__SIGCHLD 0x00000004UL 450#define PROCESS__SIGCHLD 0x00000004UL
@@ -508,6 +530,7 @@
508#define SYSTEM__SYSLOG_READ 0x00000002UL 530#define SYSTEM__SYSLOG_READ 0x00000002UL
509#define SYSTEM__SYSLOG_MOD 0x00000004UL 531#define SYSTEM__SYSLOG_MOD 0x00000004UL
510#define SYSTEM__SYSLOG_CONSOLE 0x00000008UL 532#define SYSTEM__SYSLOG_CONSOLE 0x00000008UL
533#define SYSTEM__MODULE_REQUEST 0x00000010UL
511#define CAPABILITY__CHOWN 0x00000001UL 534#define CAPABILITY__CHOWN 0x00000001UL
512#define CAPABILITY__DAC_OVERRIDE 0x00000002UL 535#define CAPABILITY__DAC_OVERRIDE 0x00000002UL
513#define CAPABILITY__DAC_READ_SEARCH 0x00000004UL 536#define CAPABILITY__DAC_READ_SEARCH 0x00000004UL
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index d12ff1a9c0aa..e94e82f73818 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -13,6 +13,7 @@
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/audit.h> 15#include <linux/audit.h>
16#include <linux/lsm_audit.h>
16#include <linux/in6.h> 17#include <linux/in6.h>
17#include <linux/path.h> 18#include <linux/path.h>
18#include <asm/system.h> 19#include <asm/system.h>
@@ -36,48 +37,6 @@ struct inode;
36struct sock; 37struct sock;
37struct sk_buff; 38struct sk_buff;
38 39
39/* Auxiliary data to use in generating the audit record. */
40struct avc_audit_data {
41 char type;
42#define AVC_AUDIT_DATA_FS 1
43#define AVC_AUDIT_DATA_NET 2
44#define AVC_AUDIT_DATA_CAP 3
45#define AVC_AUDIT_DATA_IPC 4
46 struct task_struct *tsk;
47 union {
48 struct {
49 struct path path;
50 struct inode *inode;
51 } fs;
52 struct {
53 int netif;
54 struct sock *sk;
55 u16 family;
56 __be16 dport;
57 __be16 sport;
58 union {
59 struct {
60 __be32 daddr;
61 __be32 saddr;
62 } v4;
63 struct {
64 struct in6_addr daddr;
65 struct in6_addr saddr;
66 } v6;
67 } fam;
68 } net;
69 int cap;
70 int ipc_id;
71 } u;
72};
73
74#define v4info fam.v4
75#define v6info fam.v6
76
77/* Initialize an AVC audit data structure. */
78#define AVC_AUDIT_DATA_INIT(_d,_t) \
79 { memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; }
80
81/* 40/*
82 * AVC statistics 41 * AVC statistics
83 */ 42 */
@@ -98,7 +57,9 @@ void __init avc_init(void);
98 57
99void avc_audit(u32 ssid, u32 tsid, 58void avc_audit(u32 ssid, u32 tsid,
100 u16 tclass, u32 requested, 59 u16 tclass, u32 requested,
101 struct av_decision *avd, int result, struct avc_audit_data *auditdata); 60 struct av_decision *avd,
61 int result,
62 struct common_audit_data *a);
102 63
103#define AVC_STRICT 1 /* Ignore permissive mode. */ 64#define AVC_STRICT 1 /* Ignore permissive mode. */
104int avc_has_perm_noaudit(u32 ssid, u32 tsid, 65int avc_has_perm_noaudit(u32 ssid, u32 tsid,
@@ -108,7 +69,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
108 69
109int avc_has_perm(u32 ssid, u32 tsid, 70int avc_has_perm(u32 ssid, u32 tsid,
110 u16 tclass, u32 requested, 71 u16 tclass, u32 requested,
111 struct avc_audit_data *auditdata); 72 struct common_audit_data *auditdata);
112 73
113u32 avc_policy_seqno(void); 74u32 avc_policy_seqno(void);
114 75
@@ -127,13 +88,13 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
127 u32 events, u32 ssid, u32 tsid, 88 u32 events, u32 ssid, u32 tsid,
128 u16 tclass, u32 perms); 89 u16 tclass, u32 perms);
129 90
130/* Shows permission in human readable form */
131void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
132
133/* Exported to selinuxfs */ 91/* Exported to selinuxfs */
134int avc_get_hash_stats(char *page); 92int avc_get_hash_stats(char *page);
135extern unsigned int avc_cache_threshold; 93extern unsigned int avc_cache_threshold;
136 94
95/* Attempt to free avc node cache */
96void avc_disable(void);
97
137#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS 98#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
138DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats); 99DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
139#endif 100#endif
diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h
index 21ec786611d4..7ab9299bfb6b 100644
--- a/security/selinux/include/class_to_string.h
+++ b/security/selinux/include/class_to_string.h
@@ -77,3 +77,4 @@
77 S_(NULL) 77 S_(NULL)
78 S_(NULL) 78 S_(NULL)
79 S_("kernel_service") 79 S_("kernel_service")
80 S_("tun_socket")
diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h
index 882f27d66fac..f248500a1e3c 100644
--- a/security/selinux/include/flask.h
+++ b/security/selinux/include/flask.h
@@ -53,6 +53,7 @@
53#define SECCLASS_PEER 68 53#define SECCLASS_PEER 68
54#define SECCLASS_CAPABILITY2 69 54#define SECCLASS_CAPABILITY2 69
55#define SECCLASS_KERNEL_SERVICE 74 55#define SECCLASS_KERNEL_SERVICE 74
56#define SECCLASS_TUN_SOCKET 75
56 57
57/* 58/*
58 * Security identifier indices for initial entities 59 * Security identifier indices for initial entities
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index b4b5b9b2f0be..8d7384280a7a 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -59,7 +59,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family);
59int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, 59int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
60 struct sk_buff *skb, 60 struct sk_buff *skb,
61 u16 family, 61 u16 family,
62 struct avc_audit_data *ad); 62 struct common_audit_data *ad);
63int selinux_netlbl_socket_setsockopt(struct socket *sock, 63int selinux_netlbl_socket_setsockopt(struct socket *sock,
64 int level, 64 int level,
65 int optname); 65 int optname);
@@ -129,7 +129,7 @@ static inline int selinux_netlbl_socket_post_create(struct sock *sk,
129static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, 129static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
130 struct sk_buff *skb, 130 struct sk_buff *skb,
131 u16 family, 131 u16 family,
132 struct avc_audit_data *ad) 132 struct common_audit_data *ad)
133{ 133{
134 return 0; 134 return 0;
135} 135}
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 289e24b39e3e..13128f9a3e5a 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -41,9 +41,9 @@ static inline int selinux_xfrm_enabled(void)
41} 41}
42 42
43int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, 43int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
44 struct avc_audit_data *ad); 44 struct common_audit_data *ad);
45int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, 45int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
46 struct avc_audit_data *ad, u8 proto); 46 struct common_audit_data *ad, u8 proto);
47int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); 47int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
48 48
49static inline void selinux_xfrm_notify_policyload(void) 49static inline void selinux_xfrm_notify_policyload(void)
@@ -57,13 +57,13 @@ static inline int selinux_xfrm_enabled(void)
57} 57}
58 58
59static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, 59static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
60 struct avc_audit_data *ad) 60 struct common_audit_data *ad)
61{ 61{
62 return 0; 62 return 0;
63} 63}
64 64
65static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, 65static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
66 struct avc_audit_data *ad, u8 proto) 66 struct common_audit_data *ad, u8 proto)
67{ 67{
68 return 0; 68 return 0;
69} 69}
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 2e984413c7b2..e68823741ad5 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -342,7 +342,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family)
342int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, 342int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
343 struct sk_buff *skb, 343 struct sk_buff *skb,
344 u16 family, 344 u16 family,
345 struct avc_audit_data *ad) 345 struct common_audit_data *ad)
346{ 346{
347 int rc; 347 int rc;
348 u32 nlbl_sid; 348 u32 nlbl_sid;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 500e6f78e115..ff17820d35ec 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -22,6 +22,11 @@
22 * 22 *
23 * Added validation of kernel classes and permissions 23 * Added validation of kernel classes and permissions
24 * 24 *
25 * Updated: KaiGai Kohei <kaigai@ak.jp.nec.com>
26 *
27 * Added support for bounds domain and audit messaged on masked permissions
28 *
29 * Copyright (C) 2008, 2009 NEC Corporation
25 * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P. 30 * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
26 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. 31 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
27 * Copyright (C) 2003 - 2004, 2006 Tresys Technology, LLC 32 * Copyright (C) 2003 - 2004, 2006 Tresys Technology, LLC
@@ -279,6 +284,95 @@ mls_ops:
279} 284}
280 285
281/* 286/*
287 * security_dump_masked_av - dumps masked permissions during
288 * security_compute_av due to RBAC, MLS/Constraint and Type bounds.
289 */
290static int dump_masked_av_helper(void *k, void *d, void *args)
291{
292 struct perm_datum *pdatum = d;
293 char **permission_names = args;
294
295 BUG_ON(pdatum->value < 1 || pdatum->value > 32);
296
297 permission_names[pdatum->value - 1] = (char *)k;
298
299 return 0;
300}
301
302static void security_dump_masked_av(struct context *scontext,
303 struct context *tcontext,
304 u16 tclass,
305 u32 permissions,
306 const char *reason)
307{
308 struct common_datum *common_dat;
309 struct class_datum *tclass_dat;
310 struct audit_buffer *ab;
311 char *tclass_name;
312 char *scontext_name = NULL;
313 char *tcontext_name = NULL;
314 char *permission_names[32];
315 int index, length;
316 bool need_comma = false;
317
318 if (!permissions)
319 return;
320
321 tclass_name = policydb.p_class_val_to_name[tclass - 1];
322 tclass_dat = policydb.class_val_to_struct[tclass - 1];
323 common_dat = tclass_dat->comdatum;
324
325 /* init permission_names */
326 if (common_dat &&
327 hashtab_map(common_dat->permissions.table,
328 dump_masked_av_helper, permission_names) < 0)
329 goto out;
330
331 if (hashtab_map(tclass_dat->permissions.table,
332 dump_masked_av_helper, permission_names) < 0)
333 goto out;
334
335 /* get scontext/tcontext in text form */
336 if (context_struct_to_string(scontext,
337 &scontext_name, &length) < 0)
338 goto out;
339
340 if (context_struct_to_string(tcontext,
341 &tcontext_name, &length) < 0)
342 goto out;
343
344 /* audit a message */
345 ab = audit_log_start(current->audit_context,
346 GFP_ATOMIC, AUDIT_SELINUX_ERR);
347 if (!ab)
348 goto out;
349
350 audit_log_format(ab, "op=security_compute_av reason=%s "
351 "scontext=%s tcontext=%s tclass=%s perms=",
352 reason, scontext_name, tcontext_name, tclass_name);
353
354 for (index = 0; index < 32; index++) {
355 u32 mask = (1 << index);
356
357 if ((mask & permissions) == 0)
358 continue;
359
360 audit_log_format(ab, "%s%s",
361 need_comma ? "," : "",
362 permission_names[index]
363 ? permission_names[index] : "????");
364 need_comma = true;
365 }
366 audit_log_end(ab);
367out:
368 /* release scontext/tcontext */
369 kfree(tcontext_name);
370 kfree(scontext_name);
371
372 return;
373}
374
375/*
282 * security_boundary_permission - drops violated permissions 376 * security_boundary_permission - drops violated permissions
283 * on boundary constraint. 377 * on boundary constraint.
284 */ 378 */
@@ -347,28 +441,12 @@ static void type_attribute_bounds_av(struct context *scontext,
347 } 441 }
348 442
349 if (masked) { 443 if (masked) {
350 struct audit_buffer *ab;
351 char *stype_name
352 = policydb.p_type_val_to_name[source->value - 1];
353 char *ttype_name
354 = policydb.p_type_val_to_name[target->value - 1];
355 char *tclass_name
356 = policydb.p_class_val_to_name[tclass - 1];
357
358 /* mask violated permissions */ 444 /* mask violated permissions */
359 avd->allowed &= ~masked; 445 avd->allowed &= ~masked;
360 446
361 /* notice to userspace via audit message */ 447 /* audit masked permissions */
362 ab = audit_log_start(current->audit_context, 448 security_dump_masked_av(scontext, tcontext,
363 GFP_ATOMIC, AUDIT_SELINUX_ERR); 449 tclass, masked, "bounds");
364 if (!ab)
365 return;
366
367 audit_log_format(ab, "av boundary violation: "
368 "source=%s target=%s tclass=%s",
369 stype_name, ttype_name, tclass_name);
370 avc_dump_av(ab, tclass, masked);
371 audit_log_end(ab);
372 } 450 }
373} 451}
374 452
@@ -480,7 +558,7 @@ static int context_struct_compute_av(struct context *scontext,
480 if ((constraint->permissions & (avd->allowed)) && 558 if ((constraint->permissions & (avd->allowed)) &&
481 !constraint_expr_eval(scontext, tcontext, NULL, 559 !constraint_expr_eval(scontext, tcontext, NULL,
482 constraint->expr)) { 560 constraint->expr)) {
483 avd->allowed = (avd->allowed) & ~(constraint->permissions); 561 avd->allowed &= ~(constraint->permissions);
484 } 562 }
485 constraint = constraint->next; 563 constraint = constraint->next;
486 } 564 }
@@ -499,8 +577,8 @@ static int context_struct_compute_av(struct context *scontext,
499 break; 577 break;
500 } 578 }
501 if (!ra) 579 if (!ra)
502 avd->allowed = (avd->allowed) & ~(PROCESS__TRANSITION | 580 avd->allowed &= ~(PROCESS__TRANSITION |
503 PROCESS__DYNTRANSITION); 581 PROCESS__DYNTRANSITION);
504 } 582 }
505 583
506 /* 584 /*
@@ -687,6 +765,26 @@ int security_bounded_transition(u32 old_sid, u32 new_sid)
687 } 765 }
688 index = type->bounds; 766 index = type->bounds;
689 } 767 }
768
769 if (rc) {
770 char *old_name = NULL;
771 char *new_name = NULL;
772 int length;
773
774 if (!context_struct_to_string(old_context,
775 &old_name, &length) &&
776 !context_struct_to_string(new_context,
777 &new_name, &length)) {
778 audit_log(current->audit_context,
779 GFP_ATOMIC, AUDIT_SELINUX_ERR,
780 "op=security_bounded_transition "
781 "result=denied "
782 "oldcontext=%s newcontext=%s",
783 old_name, new_name);
784 }
785 kfree(new_name);
786 kfree(old_name);
787 }
690out: 788out:
691 read_unlock(&policy_rwlock); 789 read_unlock(&policy_rwlock);
692 790
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 72b18452e1a1..f3cb9ed731a9 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -401,7 +401,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
401 * gone thru the IPSec process. 401 * gone thru the IPSec process.
402 */ 402 */
403int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, 403int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
404 struct avc_audit_data *ad) 404 struct common_audit_data *ad)
405{ 405{
406 int i, rc = 0; 406 int i, rc = 0;
407 struct sec_path *sp; 407 struct sec_path *sp;
@@ -442,7 +442,7 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
442 * checked in the selinux_xfrm_state_pol_flow_match hook above. 442 * checked in the selinux_xfrm_state_pol_flow_match hook above.
443 */ 443 */
444int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, 444int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
445 struct avc_audit_data *ad, u8 proto) 445 struct common_audit_data *ad, u8 proto)
446{ 446{
447 struct dst_entry *dst; 447 struct dst_entry *dst;
448 int rc = 0; 448 int rc = 0;
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 243bec175be0..c6e9acae72e4 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
275{ 275{
276 memset(a, 0, sizeof(*a)); 276 memset(a, 0, sizeof(*a));
277 a->a.type = type; 277 a->a.type = type;
278 a->a.function = func; 278 a->a.smack_audit_data.function = func;
279} 279}
280 280
281static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, 281static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 513dc1aa16dd..0f9ac8146900 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -240,8 +240,9 @@ static inline void smack_str_from_perm(char *string, int access)
240static void smack_log_callback(struct audit_buffer *ab, void *a) 240static void smack_log_callback(struct audit_buffer *ab, void *a)
241{ 241{
242 struct common_audit_data *ad = a; 242 struct common_audit_data *ad = a;
243 struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data; 243 struct smack_audit_data *sad = &ad->smack_audit_data;
244 audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function, 244 audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
245 ad->smack_audit_data.function,
245 sad->result ? "denied" : "granted"); 246 sad->result ? "denied" : "granted");
246 audit_log_format(ab, " subject="); 247 audit_log_format(ab, " subject=");
247 audit_log_untrustedstring(ab, sad->subject); 248 audit_log_untrustedstring(ab, sad->subject);
@@ -274,11 +275,11 @@ void smack_log(char *subject_label, char *object_label, int request,
274 if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0) 275 if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
275 return; 276 return;
276 277
277 if (a->function == NULL) 278 if (a->smack_audit_data.function == NULL)
278 a->function = "unknown"; 279 a->smack_audit_data.function = "unknown";
279 280
280 /* end preparing the audit data */ 281 /* end preparing the audit data */
281 sad = &a->lsm_priv.smack_audit_data; 282 sad = &a->smack_audit_data;
282 smack_str_from_perm(request_buffer, request); 283 smack_str_from_perm(request_buffer, request);
283 sad->subject = subject_label; 284 sad->subject = subject_label;
284 sad->object = object_label; 285 sad->object = object_label;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0023182078c7..acae7ef4092d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -91,7 +91,7 @@ struct inode_smack *new_inode_smack(char *smack)
91 */ 91 */
92 92
93/** 93/**
94 * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH 94 * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
95 * @ctp: child task pointer 95 * @ctp: child task pointer
96 * @mode: ptrace attachment mode 96 * @mode: ptrace attachment mode
97 * 97 *
@@ -99,13 +99,13 @@ struct inode_smack *new_inode_smack(char *smack)
99 * 99 *
100 * Do the capability checks, and require read and write. 100 * Do the capability checks, and require read and write.
101 */ 101 */
102static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) 102static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
103{ 103{
104 int rc; 104 int rc;
105 struct smk_audit_info ad; 105 struct smk_audit_info ad;
106 char *sp, *tsp; 106 char *sp, *tsp;
107 107
108 rc = cap_ptrace_may_access(ctp, mode); 108 rc = cap_ptrace_access_check(ctp, mode);
109 if (rc != 0) 109 if (rc != 0)
110 return rc; 110 return rc;
111 111
@@ -1080,6 +1080,22 @@ static int smack_file_receive(struct file *file)
1080 */ 1080 */
1081 1081
1082/** 1082/**
1083 * smack_cred_alloc_blank - "allocate" blank task-level security credentials
1084 * @new: the new credentials
1085 * @gfp: the atomicity of any memory allocations
1086 *
1087 * Prepare a blank set of credentials for modification. This must allocate all
1088 * the memory the LSM module might require such that cred_transfer() can
1089 * complete without error.
1090 */
1091static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
1092{
1093 cred->security = NULL;
1094 return 0;
1095}
1096
1097
1098/**
1083 * smack_cred_free - "free" task-level security credentials 1099 * smack_cred_free - "free" task-level security credentials
1084 * @cred: the credentials in question 1100 * @cred: the credentials in question
1085 * 1101 *
@@ -1117,6 +1133,18 @@ static void smack_cred_commit(struct cred *new, const struct cred *old)
1117} 1133}
1118 1134
1119/** 1135/**
1136 * smack_cred_transfer - Transfer the old credentials to the new credentials
1137 * @new: the new credentials
1138 * @old: the original credentials
1139 *
1140 * Fill in a set of blank credentials from another set of credentials.
1141 */
1142static void smack_cred_transfer(struct cred *new, const struct cred *old)
1143{
1144 new->security = old->security;
1145}
1146
1147/**
1120 * smack_kernel_act_as - Set the subjective context in a set of credentials 1148 * smack_kernel_act_as - Set the subjective context in a set of credentials
1121 * @new: points to the set of credentials to be modified. 1149 * @new: points to the set of credentials to be modified.
1122 * @secid: specifies the security ID to be set 1150 * @secid: specifies the security ID to be set
@@ -1638,6 +1666,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name,
1638 1666
1639 if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { 1667 if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
1640 nsp->smk_inode = sp; 1668 nsp->smk_inode = sp;
1669 nsp->smk_flags |= SMK_INODE_INSTANT;
1641 return 0; 1670 return 0;
1642 } 1671 }
1643 /* 1672 /*
@@ -2464,7 +2493,7 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg,
2464 /* 2493 /*
2465 * Perfectly reasonable for this to be NULL 2494 * Perfectly reasonable for this to be NULL
2466 */ 2495 */
2467 if (sip == NULL || sip->sin_family != PF_INET) 2496 if (sip == NULL || sip->sin_family != AF_INET)
2468 return 0; 2497 return 0;
2469 2498
2470 return smack_netlabel_send(sock->sk, sip); 2499 return smack_netlabel_send(sock->sk, sip);
@@ -3029,10 +3058,31 @@ static void smack_release_secctx(char *secdata, u32 seclen)
3029{ 3058{
3030} 3059}
3031 3060
3061static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
3062{
3063 return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
3064}
3065
3066static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
3067{
3068 return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
3069}
3070
3071static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
3072{
3073 int len = 0;
3074 len = smack_inode_getsecurity(inode, XATTR_SMACK_SUFFIX, ctx, true);
3075
3076 if (len < 0)
3077 return len;
3078 *ctxlen = len;
3079 return 0;
3080}
3081
3032struct security_operations smack_ops = { 3082struct security_operations smack_ops = {
3033 .name = "smack", 3083 .name = "smack",
3034 3084
3035 .ptrace_may_access = smack_ptrace_may_access, 3085 .ptrace_access_check = smack_ptrace_access_check,
3036 .ptrace_traceme = smack_ptrace_traceme, 3086 .ptrace_traceme = smack_ptrace_traceme,
3037 .syslog = smack_syslog, 3087 .syslog = smack_syslog,
3038 3088
@@ -3073,9 +3123,11 @@ struct security_operations smack_ops = {
3073 .file_send_sigiotask = smack_file_send_sigiotask, 3123 .file_send_sigiotask = smack_file_send_sigiotask,
3074 .file_receive = smack_file_receive, 3124 .file_receive = smack_file_receive,
3075 3125
3126 .cred_alloc_blank = smack_cred_alloc_blank,
3076 .cred_free = smack_cred_free, 3127 .cred_free = smack_cred_free,
3077 .cred_prepare = smack_cred_prepare, 3128 .cred_prepare = smack_cred_prepare,
3078 .cred_commit = smack_cred_commit, 3129 .cred_commit = smack_cred_commit,
3130 .cred_transfer = smack_cred_transfer,
3079 .kernel_act_as = smack_kernel_act_as, 3131 .kernel_act_as = smack_kernel_act_as,
3080 .kernel_create_files_as = smack_kernel_create_files_as, 3132 .kernel_create_files_as = smack_kernel_create_files_as,
3081 .task_setpgid = smack_task_setpgid, 3133 .task_setpgid = smack_task_setpgid,
@@ -3155,6 +3207,9 @@ struct security_operations smack_ops = {
3155 .secid_to_secctx = smack_secid_to_secctx, 3207 .secid_to_secctx = smack_secid_to_secctx,
3156 .secctx_to_secid = smack_secctx_to_secid, 3208 .secctx_to_secid = smack_secctx_to_secid,
3157 .release_secctx = smack_release_secctx, 3209 .release_secctx = smack_release_secctx,
3210 .inode_notifysecctx = smack_inode_notifysecctx,
3211 .inode_setsecctx = smack_inode_setsecctx,
3212 .inode_getsecctx = smack_inode_getsecctx,
3158}; 3213};
3159 3214
3160 3215
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index fdd1f4b8c448..3c8bd8ee0b95 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1285,6 +1285,36 @@ static bool tomoyo_is_select_one(struct tomoyo_io_buffer *head,
1285} 1285}
1286 1286
1287/** 1287/**
1288 * tomoyo_delete_domain - Delete a domain.
1289 *
1290 * @domainname: The name of domain.
1291 *
1292 * Returns 0.
1293 */
1294static int tomoyo_delete_domain(char *domainname)
1295{
1296 struct tomoyo_domain_info *domain;
1297 struct tomoyo_path_info name;
1298
1299 name.name = domainname;
1300 tomoyo_fill_path_info(&name);
1301 down_write(&tomoyo_domain_list_lock);
1302 /* Is there an active domain? */
1303 list_for_each_entry(domain, &tomoyo_domain_list, list) {
1304 /* Never delete tomoyo_kernel_domain */
1305 if (domain == &tomoyo_kernel_domain)
1306 continue;
1307 if (domain->is_deleted ||
1308 tomoyo_pathcmp(domain->domainname, &name))
1309 continue;
1310 domain->is_deleted = true;
1311 break;
1312 }
1313 up_write(&tomoyo_domain_list_lock);
1314 return 0;
1315}
1316
1317/**
1288 * tomoyo_write_domain_policy - Write domain policy. 1318 * tomoyo_write_domain_policy - Write domain policy.
1289 * 1319 *
1290 * @head: Pointer to "struct tomoyo_io_buffer". 1320 * @head: Pointer to "struct tomoyo_io_buffer".
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 6d6ba09af457..31df541911f7 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -339,8 +339,6 @@ const char *tomoyo_get_last_name(const struct tomoyo_domain_info *domain);
339const char *tomoyo_get_msg(const bool is_enforce); 339const char *tomoyo_get_msg(const bool is_enforce);
340/* Convert single path operation to operation name. */ 340/* Convert single path operation to operation name. */
341const char *tomoyo_sp2keyword(const u8 operation); 341const char *tomoyo_sp2keyword(const u8 operation);
342/* Delete a domain. */
343int tomoyo_delete_domain(char *data);
344/* Create "alias" entry in exception policy. */ 342/* Create "alias" entry in exception policy. */
345int tomoyo_write_alias_policy(char *data, const bool is_delete); 343int tomoyo_write_alias_policy(char *data, const bool is_delete);
346/* 344/*
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 1d8b16960576..fcf52accce2b 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -717,38 +717,6 @@ int tomoyo_write_alias_policy(char *data, const bool is_delete)
717 return tomoyo_update_alias_entry(data, cp, is_delete); 717 return tomoyo_update_alias_entry(data, cp, is_delete);
718} 718}
719 719
720/* Domain create/delete handler. */
721
722/**
723 * tomoyo_delete_domain - Delete a domain.
724 *
725 * @domainname: The name of domain.
726 *
727 * Returns 0.
728 */
729int tomoyo_delete_domain(char *domainname)
730{
731 struct tomoyo_domain_info *domain;
732 struct tomoyo_path_info name;
733
734 name.name = domainname;
735 tomoyo_fill_path_info(&name);
736 down_write(&tomoyo_domain_list_lock);
737 /* Is there an active domain? */
738 list_for_each_entry(domain, &tomoyo_domain_list, list) {
739 /* Never delete tomoyo_kernel_domain */
740 if (domain == &tomoyo_kernel_domain)
741 continue;
742 if (domain->is_deleted ||
743 tomoyo_pathcmp(domain->domainname, &name))
744 continue;
745 domain->is_deleted = true;
746 break;
747 }
748 up_write(&tomoyo_domain_list_lock);
749 return 0;
750}
751
752/** 720/**
753 * tomoyo_find_or_assign_new_domain - Create a domain. 721 * tomoyo_find_or_assign_new_domain - Create a domain.
754 * 722 *
@@ -818,13 +786,11 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
818/** 786/**
819 * tomoyo_find_next_domain - Find a domain. 787 * tomoyo_find_next_domain - Find a domain.
820 * 788 *
821 * @bprm: Pointer to "struct linux_binprm". 789 * @bprm: Pointer to "struct linux_binprm".
822 * @next_domain: Pointer to pointer to "struct tomoyo_domain_info".
823 * 790 *
824 * Returns 0 on success, negative value otherwise. 791 * Returns 0 on success, negative value otherwise.
825 */ 792 */
826int tomoyo_find_next_domain(struct linux_binprm *bprm, 793int tomoyo_find_next_domain(struct linux_binprm *bprm)
827 struct tomoyo_domain_info **next_domain)
828{ 794{
829 /* 795 /*
830 * This function assumes that the size of buffer returned by 796 * This function assumes that the size of buffer returned by
@@ -946,9 +912,11 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm,
946 tomoyo_set_domain_flag(old_domain, false, 912 tomoyo_set_domain_flag(old_domain, false,
947 TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED); 913 TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED);
948 out: 914 out:
915 if (!domain)
916 domain = old_domain;
917 bprm->cred->security = domain;
949 tomoyo_free(real_program_name); 918 tomoyo_free(real_program_name);
950 tomoyo_free(symlink_program_name); 919 tomoyo_free(symlink_program_name);
951 *next_domain = domain ? domain : old_domain;
952 tomoyo_free(tmp); 920 tomoyo_free(tmp);
953 return retval; 921 return retval;
954} 922}
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3194d09fe0f4..9548a0984cc4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -14,6 +14,12 @@
14#include "tomoyo.h" 14#include "tomoyo.h"
15#include "realpath.h" 15#include "realpath.h"
16 16
17static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
18{
19 new->security = NULL;
20 return 0;
21}
22
17static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, 23static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
18 gfp_t gfp) 24 gfp_t gfp)
19{ 25{
@@ -25,6 +31,15 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
25 return 0; 31 return 0;
26} 32}
27 33
34static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
35{
36 /*
37 * Since "struct tomoyo_domain_info *" is a sharable pointer,
38 * we don't need to duplicate.
39 */
40 new->security = old->security;
41}
42
28static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) 43static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
29{ 44{
30 int rc; 45 int rc;
@@ -61,14 +76,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
61 * Execute permission is checked against pathname passed to do_execve() 76 * Execute permission is checked against pathname passed to do_execve()
62 * using current domain. 77 * using current domain.
63 */ 78 */
64 if (!domain) { 79 if (!domain)
65 struct tomoyo_domain_info *next_domain = NULL; 80 return tomoyo_find_next_domain(bprm);
66 int retval = tomoyo_find_next_domain(bprm, &next_domain);
67
68 if (!retval)
69 bprm->cred->security = next_domain;
70 return retval;
71 }
72 /* 81 /*
73 * Read permission is checked against interpreters using next domain. 82 * Read permission is checked against interpreters using next domain.
74 * '1' is the result of open_to_namei_flags(O_RDONLY). 83 * '1' is the result of open_to_namei_flags(O_RDONLY).
@@ -268,7 +277,9 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
268 */ 277 */
269static struct security_operations tomoyo_security_ops = { 278static struct security_operations tomoyo_security_ops = {
270 .name = "tomoyo", 279 .name = "tomoyo",
280 .cred_alloc_blank = tomoyo_cred_alloc_blank,
271 .cred_prepare = tomoyo_cred_prepare, 281 .cred_prepare = tomoyo_cred_prepare,
282 .cred_transfer = tomoyo_cred_transfer,
272 .bprm_set_creds = tomoyo_bprm_set_creds, 283 .bprm_set_creds = tomoyo_bprm_set_creds,
273 .bprm_check_security = tomoyo_bprm_check_security, 284 .bprm_check_security = tomoyo_bprm_check_security,
274#ifdef CONFIG_SYSCTL 285#ifdef CONFIG_SYSCTL
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index 0fd588a629cf..cd6ba0bf7069 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -31,8 +31,7 @@ int tomoyo_check_2path_perm(struct tomoyo_domain_info *domain,
31 struct path *path2); 31 struct path *path2);
32int tomoyo_check_rewrite_permission(struct tomoyo_domain_info *domain, 32int tomoyo_check_rewrite_permission(struct tomoyo_domain_info *domain,
33 struct file *filp); 33 struct file *filp);
34int tomoyo_find_next_domain(struct linux_binprm *bprm, 34int tomoyo_find_next_domain(struct linux_binprm *bprm);
35 struct tomoyo_domain_info **next_domain);
36 35
37/* Index numbers for Access Controls. */ 36/* Index numbers for Access Controls. */
38 37