aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/dm-log.txt54
-rw-r--r--Documentation/device-mapper/dm-queue-length.txt39
-rw-r--r--Documentation/device-mapper/dm-service-time.txt91
-rw-r--r--Documentation/filesystems/Locking43
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt47
-rw-r--r--MAINTAINERS17
-rw-r--r--Makefile4
-rw-r--r--arch/ia64/kernel/acpi-processor.c12
-rw-r--r--arch/mips/Kconfig17
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c61
-rw-r--r--arch/mips/cavium-octeon/octeon_boot.h70
-rw-r--r--arch/mips/cavium-octeon/setup.c1
-rw-r--r--arch/mips/cavium-octeon/smp.c234
-rw-r--r--arch/mips/include/asm/bug.h1
-rw-r--r--arch/mips/include/asm/bugs.h1
-rw-r--r--arch/mips/include/asm/irq.h1
-rw-r--r--arch/mips/include/asm/mmu_context.h1
-rw-r--r--arch/mips/include/asm/smp-ops.h4
-rw-r--r--arch/mips/include/asm/smp.h20
-rw-r--r--arch/mips/include/asm/sn/addrs.h1
-rw-r--r--arch/mips/jazz/irq.c1
-rw-r--r--arch/mips/kernel/cevt-bcm1480.c1
-rw-r--r--arch/mips/kernel/cevt-r4k.c1
-rw-r--r--arch/mips/kernel/cevt-sb1250.c1
-rw-r--r--arch/mips/kernel/cevt-smtc.c1
-rw-r--r--arch/mips/kernel/cpu-probe.c1
-rw-r--r--arch/mips/kernel/i8253.c1
-rw-r--r--arch/mips/kernel/irq-gic.c1
-rw-r--r--arch/mips/kernel/kgdb.c1
-rw-r--r--arch/mips/kernel/process.c13
-rw-r--r--arch/mips/kernel/smp-cmp.c1
-rw-r--r--arch/mips/kernel/smp-up.c16
-rw-r--r--arch/mips/kernel/smp.c18
-rw-r--r--arch/mips/kernel/smtc.c1
-rw-r--r--arch/mips/kernel/topology.c5
-rw-r--r--arch/mips/mipssim/sim_time.c1
-rw-r--r--arch/mips/mm/c-octeon.c1
-rw-r--r--arch/mips/mm/c-r3k.c1
-rw-r--r--arch/mips/mm/c-r4k.c1
-rw-r--r--arch/mips/mm/c-tx39.c1
-rw-r--r--arch/mips/mm/highmem.c1
-rw-r--r--arch/mips/mm/init.c1
-rw-r--r--arch/mips/mm/page.c1
-rw-r--r--arch/mips/mm/tlb-r3k.c1
-rw-r--r--arch/mips/mm/tlb-r4k.c1
-rw-r--r--arch/mips/mm/tlb-r8k.c1
-rw-r--r--arch/mips/mm/tlbex.c1
-rw-r--r--arch/mips/mti-malta/malta-int.c1
-rw-r--r--arch/mips/pci/pci-ip27.c1
-rw-r--r--arch/mips/pmc-sierra/yosemite/smp.c1
-rw-r--r--arch/mips/power/hibernate.S9
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c1
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c1
-rw-r--r--arch/mips/sgi-ip27/ip27-timer.c1
-rw-r--r--arch/mips/sgi-ip27/ip27-xtalk.c1
-rw-r--r--arch/mips/sibyte/bcm1480/irq.c1
-rw-r--r--arch/mips/sibyte/common/cfe_console.c7
-rw-r--r--arch/mips/sni/time.c1
-rw-r--r--arch/um/drivers/slip_kern.c1
-rw-r--r--arch/um/drivers/slirp_kern.c1
-rw-r--r--arch/um/include/asm/dma-mapping.h4
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/pci_x86.h5
-rw-r--r--arch/x86/kernel/acpi/boot.c80
-rw-r--r--arch/x86/kernel/acpi/cstate.c16
-rw-r--r--arch/x86/kernel/acpi/processor.c13
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c65
-rw-r--r--drivers/acpi/ac.c20
-rw-r--r--drivers/acpi/battery.c34
-rw-r--r--drivers/acpi/blacklist.c16
-rw-r--r--drivers/acpi/bus.c91
-rw-r--r--drivers/acpi/glue.c40
-rw-r--r--drivers/acpi/osl.c25
-rw-r--r--drivers/acpi/pci_bind.c313
-rw-r--r--drivers/acpi/pci_irq.c17
-rw-r--r--drivers/acpi/pci_root.c297
-rw-r--r--drivers/acpi/power.c28
-rw-r--r--drivers/acpi/processor_core.c45
-rw-r--r--drivers/acpi/processor_idle.c47
-rw-r--r--drivers/acpi/scan.c69
-rw-r--r--drivers/acpi/video.c61
-rw-r--r--drivers/acpi/video_detect.c9
-rw-r--r--drivers/char/mxser.c2
-rw-r--r--drivers/char/nozomi.c2
-rw-r--r--drivers/char/synclink_gt.c72
-rw-r--r--drivers/char/tty_port.c2
-rw-r--r--drivers/dma/txx9dmac.c20
-rw-r--r--drivers/gpu/drm/i915/i915_opregion.c2
-rw-r--r--drivers/i2c/busses/Kconfig10
-rw-r--r--drivers/i2c/busses/Makefile1
-rw-r--r--drivers/i2c/busses/i2c-designware.c624
-rw-r--r--drivers/ide/cs5520.c1
-rw-r--r--drivers/ide/ide-cd.c10
-rw-r--r--drivers/ide/ide-dma.c21
-rw-r--r--drivers/ide/ide-io.c54
-rw-r--r--drivers/ide/ide-iops.c4
-rw-r--r--drivers/ide/ide-probe.c23
-rw-r--r--drivers/md/Kconfig30
-rw-r--r--drivers/md/Makefile5
-rw-r--r--drivers/md/dm-crypt.c19
-rw-r--r--drivers/md/dm-delay.c26
-rw-r--r--drivers/md/dm-exception-store.c2
-rw-r--r--drivers/md/dm-exception-store.h2
-rw-r--r--drivers/md/dm-io.c14
-rw-r--r--drivers/md/dm-ioctl.c27
-rw-r--r--drivers/md/dm-linear.c15
-rw-r--r--drivers/md/dm-log-userspace-base.c696
-rw-r--r--drivers/md/dm-log-userspace-transfer.c276
-rw-r--r--drivers/md/dm-log-userspace-transfer.h18
-rw-r--r--drivers/md/dm-log.c9
-rw-r--r--drivers/md/dm-mpath.c334
-rw-r--r--drivers/md/dm-path-selector.h8
-rw-r--r--drivers/md/dm-queue-length.c263
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-region-hash.c2
-rw-r--r--drivers/md/dm-round-robin.c2
-rw-r--r--drivers/md/dm-service-time.c339
-rw-r--r--drivers/md/dm-snap-persistent.c2
-rw-r--r--drivers/md/dm-snap.c11
-rw-r--r--drivers/md/dm-stripe.c33
-rw-r--r--drivers/md/dm-sysfs.c9
-rw-r--r--drivers/md/dm-table.c465
-rw-r--r--drivers/md/dm.c1133
-rw-r--r--drivers/md/dm.h35
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/bnx2.c10
-rw-r--r--drivers/net/can/Kconfig2
-rw-r--r--drivers/net/netxen/netxen_nic_init.c37
-rw-r--r--drivers/net/netxen/netxen_nic_main.c7
-rw-r--r--drivers/net/qla3xxx.c6
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c40
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c27
-rw-r--r--drivers/pci/intel-iommu.c314
-rw-r--r--drivers/pci/intr_remapping.c160
-rw-r--r--drivers/pci/intr_remapping.h2
-rw-r--r--drivers/platform/x86/Kconfig34
-rw-r--r--drivers/platform/x86/Makefile1
-rw-r--r--drivers/platform/x86/acerhdf.c602
-rw-r--r--drivers/platform/x86/asus-laptop.c111
-rw-r--r--drivers/platform/x86/asus_acpi.c30
-rw-r--r--drivers/platform/x86/dell-wmi.c56
-rw-r--r--drivers/platform/x86/eeepc-laptop.c126
-rw-r--r--drivers/platform/x86/hp-wmi.c87
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c411
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c46
-rw-r--r--drivers/power/Kconfig8
-rw-r--r--drivers/power/Makefile3
-rw-r--r--drivers/power/da9030_battery.c19
-rw-r--r--drivers/power/ds2760_battery.c42
-rw-r--r--drivers/power/max17040_battery.c309
-rw-r--r--drivers/serial/8250_pci.c6
-rw-r--r--drivers/serial/icom.c3
-rw-r--r--drivers/serial/jsm/jsm_tty.c4
-rw-r--r--drivers/serial/serial_txx9.c113
-rw-r--r--drivers/staging/octeon/Makefile1
-rw-r--r--drivers/staging/octeon/ethernet-common.c328
-rw-r--r--drivers/staging/octeon/ethernet-common.h29
-rw-r--r--drivers/staging/octeon/ethernet-defines.h2
-rw-r--r--drivers/staging/octeon/ethernet-rgmii.c9
-rw-r--r--drivers/staging/octeon/ethernet-sgmii.c9
-rw-r--r--drivers/staging/octeon/ethernet-spi.c1
-rw-r--r--drivers/staging/octeon/ethernet-tx.c62
-rw-r--r--drivers/staging/octeon/ethernet-tx.h25
-rw-r--r--drivers/staging/octeon/ethernet-xaui.c9
-rw-r--r--drivers/staging/octeon/ethernet.c470
-rw-r--r--drivers/staging/octeon/octeon-ethernet.h11
-rw-r--r--drivers/usb/class/cdc-acm.c4
-rw-r--r--drivers/usb/serial/usb-serial.c3
-rw-r--r--drivers/uwb/hwa-rc.c2
-rw-r--r--drivers/uwb/wlp/txrx.c2
-rw-r--r--drivers/w1/slaves/w1_ds2760.c30
-rw-r--r--drivers/w1/slaves/w1_ds2760.h7
-rw-r--r--fs/btrfs/acl.c44
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/compat_ioctl.c48
-rw-r--r--fs/devpts/inode.c10
-rw-r--r--fs/ext2/acl.c81
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext3/acl.c85
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/ext4/acl.c67
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/fs-writeback.c100
-rw-r--r--fs/inode.c27
-rw-r--r--fs/ioctl.c35
-rw-r--r--fs/jffs2/acl.c87
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/readinode.c1
-rw-r--r--fs/jfs/acl.c47
-rw-r--r--fs/jfs/jfs_incore.h6
-rw-r--r--fs/jfs/super.c16
-rw-r--r--fs/jfs/xattr.c10
-rw-r--r--fs/namei.c11
-rw-r--r--fs/namespace.c37
-rw-r--r--fs/nilfs2/inode.c8
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/nilfs2/super.c10
-rw-r--r--fs/ocfs2/dlmglue.c123
-rw-r--r--fs/ocfs2/dlmglue.h24
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/inode.c11
-rw-r--r--fs/ocfs2/journal.c43
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/ocfs2.h10
-rw-r--r--fs/ocfs2/stack_o2cb.c11
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/stackglue.c13
-rw-r--r--fs/ocfs2/stackglue.h6
-rw-r--r--fs/ocfs2/suballoc.c28
-rw-r--r--fs/ocfs2/super.c69
-rw-r--r--fs/ocfs2/sysfile.c19
-rw-r--r--fs/open.c58
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/resize.c1
-rw-r--r--fs/reiserfs/super.c24
-rw-r--r--fs/reiserfs/xattr_acl.c58
-rw-r--r--fs/super.c9
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/balloc.c9
-rw-r--r--fs/udf/lowlevel.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c73
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iget.c2
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--include/acpi/acpi_bus.h7
-rw-r--r--include/acpi/acpi_drivers.h13
-rw-r--r--include/acpi/processor.h1
-rw-r--r--include/acpi/video.h4
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/acpi.h6
-rw-r--r--include/linux/audit.h3
-rw-r--r--include/linux/connector.h4
-rw-r--r--include/linux/device-mapper.h47
-rw-r--r--include/linux/dm-ioctl.h14
-rw-r--r--include/linux/dm-log-userspace.h386
-rw-r--r--include/linux/dmar.h11
-rw-r--r--include/linux/ext3_fs_i.h4
-rw-r--r--include/linux/falloc.h21
-rw-r--r--include/linux/fs.h13
-rw-r--r--include/linux/icmpv6.h6
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/lockdep.h15
-rw-r--r--include/linux/max17040_battery.h19
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/pci_hotplug.h1
-rw-r--r--include/linux/posix_acl.h74
-rw-r--r--include/linux/reiserfs_acl.h17
-rw-r--r--include/linux/reiserfs_fs_i.h4
-rw-r--r--include/linux/shmem_fs.h8
-rw-r--r--include/net/protocol.h2
-rw-r--r--include/net/rawv6.h2
-rw-r--r--include/net/sctp/sctp.h1
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/xfrm.h2
-rw-r--r--init/main.c6
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c146
-rw-r--r--kernel/audit.h43
-rw-r--r--kernel/audit_tree.c66
-rw-r--r--kernel/audit_watch.c543
-rw-r--r--kernel/auditfilter.c518
-rw-r--r--kernel/auditsc.c33
-rw-r--r--kernel/futex.c45
-rw-r--r--mm/memory.c26
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/shmem_acl.c29
-rw-r--r--mm/slub.c10
-rw-r--r--net/ax25/ax25_in.c3
-rw-r--r--net/core/dev.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c12
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mip6.c2
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/tunnel6.c2
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/irda/ircomm/ircomm_lmp.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c25
-rw-r--r--net/netfilter/nf_log.c16
-rw-r--r--net/netfilter/xt_NFQUEUE.c8
-rw-r--r--net/netfilter/xt_cluster.c8
-rw-r--r--net/netfilter/xt_quota.c1
-rw-r--r--net/netfilter/xt_rateest.c2
-rw-r--r--net/sctp/ipv6.c2
314 files changed, 10300 insertions, 3936 deletions
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
new file mode 100644
index 000000000000..994dd75475a6
--- /dev/null
+++ b/Documentation/device-mapper/dm-log.txt
@@ -0,0 +1,54 @@
1Device-Mapper Logging
2=====================
3The device-mapper logging code is used by some of the device-mapper
4RAID targets to track regions of the disk that are not consistent.
5A region (or portion of the address space) of the disk may be
6inconsistent because a RAID stripe is currently being operated on or
7a machine died while the region was being altered. In the case of
8mirrors, a region would be considered dirty/inconsistent while you
9are writing to it because the writes need to be replicated for all
10the legs of the mirror and may not reach the legs at the same time.
11Once all writes are complete, the region is considered clean again.
12
13There is a generic logging interface that the device-mapper RAID
14implementations use to perform logging operations (see
15dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different
16logging implementations are available and provide different
17capabilities. The list includes:
18
19Type Files
20==== =====
21disk drivers/md/dm-log.c
22core drivers/md/dm-log.c
23userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
24
25The "disk" log type
26-------------------
27This log implementation commits the log state to disk. This way, the
28logging state survives reboots/crashes.
29
30The "core" log type
31-------------------
32This log implementation keeps the log state in memory. The log state
33will not survive a reboot or crash, but there may be a small boost in
34performance. This method can also be used if no storage device is
35available for storing log state.
36
37The "userspace" log type
38------------------------
39This log type simply provides a way to export the log API to userspace,
40so log implementations can be done there. This is done by forwarding most
41logging requests to userspace, where a daemon receives and processes the
42request.
43
44The structure used for communication between kernel and userspace are
45located in include/linux/dm-log-userspace.h. Due to the frequency,
46diversity, and 2-way communication nature of the exchanges between
47kernel and userspace, 'connector' is used as the interface for
48communication.
49
50There are currently two userspace log implementations that leverage this
51framework - "clustered_disk" and "clustered_core". These implementations
52provide a cluster-coherent log for shared-storage. Device-mapper mirroring
53can be used in a shared-storage environment when the cluster log implementations
54are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt
new file mode 100644
index 000000000000..f4db2562175c
--- /dev/null
+++ b/Documentation/device-mapper/dm-queue-length.txt
@@ -0,0 +1,39 @@
1dm-queue-length
2===============
3
4dm-queue-length is a path selector module for device-mapper targets,
5which selects a path with the least number of in-flight I/Os.
6The path selector name is 'queue-length'.
7
8Table parameters for each path: [<repeat_count>]
9 <repeat_count>: The number of I/Os to dispatch using the selected
10 path before switching to the next path.
11 If not given, internal default is used. To check
12 the default value, see the activated table.
13
14Status for each path: <status> <fail-count> <in-flight>
15 <status>: 'A' if the path is active, 'F' if the path is failed.
16 <fail-count>: The number of path failures.
17 <in-flight>: The number of in-flight I/Os on the path.
18
19
20Algorithm
21=========
22
23dm-queue-length increments/decrements 'in-flight' when an I/O is
24dispatched/completed respectively.
25dm-queue-length selects a path with the minimum 'in-flight'.
26
27
28Examples
29========
30In case that 2 paths (sda and sdb) are used with repeat_count == 128.
31
32# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
33 dmsetup create test
34#
35# dmsetup table
36test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
37#
38# dmsetup status
39test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt
new file mode 100644
index 000000000000..7d00668e97bb
--- /dev/null
+++ b/Documentation/device-mapper/dm-service-time.txt
@@ -0,0 +1,91 @@
1dm-service-time
2===============
3
4dm-service-time is a path selector module for device-mapper targets,
5which selects a path with the shortest estimated service time for
6the incoming I/O.
7
8The service time for each path is estimated by dividing the total size
9of in-flight I/Os on a path with the performance value of the path.
10The performance value is a relative throughput value among all paths
11in a path-group, and it can be specified as a table argument.
12
13The path selector name is 'service-time'.
14
15Table parameters for each path: [<repeat_count> [<relative_throughput>]]
16 <repeat_count>: The number of I/Os to dispatch using the selected
17 path before switching to the next path.
18 If not given, internal default is used. To check
19 the default value, see the activated table.
20 <relative_throughput>: The relative throughput value of the path
21 among all paths in the path-group.
22 The valid range is 0-100.
23 If not given, minimum value '1' is used.
24 If '0' is given, the path isn't selected while
25 other paths having a positive value are available.
26
27Status for each path: <status> <fail-count> <in-flight-size> \
28 <relative_throughput>
29 <status>: 'A' if the path is active, 'F' if the path is failed.
30 <fail-count>: The number of path failures.
31 <in-flight-size>: The size of in-flight I/Os on the path.
32 <relative_throughput>: The relative throughput value of the path
33 among all paths in the path-group.
34
35
36Algorithm
37=========
38
39dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
40dispatched and substracts when completed.
41Basically, dm-service-time selects a path having minimum service time
42which is calculated by:
43
44 ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
45
46However, some optimizations below are used to reduce the calculation
47as much as possible.
48
49 1. If the paths have the same 'relative_throughput', skip
50 the division and just compare the 'in-flight-size'.
51
52 2. If the paths have the same 'in-flight-size', skip the division
53 and just compare the 'relative_throughput'.
54
55 3. If some paths have non-zero 'relative_throughput' and others
56 have zero 'relative_throughput', ignore those paths with zero
57 'relative_throughput'.
58
59If such optimizations can't be applied, calculate service time, and
60compare service time.
61If calculated service time is equal, the path having maximum
62'relative_throughput' may be better. So compare 'relative_throughput'
63then.
64
65
66Examples
67========
68In case that 2 paths (sda and sdb) are used with repeat_count == 128
69and sda has an average throughput 1GB/s and sdb has 4GB/s,
70'relative_throughput' value may be '1' for sda and '4' for sdb.
71
72# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
73 dmsetup create test
74#
75# dmsetup table
76test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
77#
78# dmsetup status
79test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
80
81
82Or '2' for sda and '8' for sdb would be also true.
83
84# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
85 dmsetup create test
86#
87# dmsetup table
88test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
89#
90# dmsetup status
91test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 229d7b7c50a3..18b9d0ca0630 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -109,27 +109,28 @@ prototypes:
109 109
110locking rules: 110locking rules:
111 All may block. 111 All may block.
112 BKL s_lock s_umount 112 None have BKL
113alloc_inode: no no no 113 s_umount
114destroy_inode: no 114alloc_inode:
115dirty_inode: no (must not sleep) 115destroy_inode:
116write_inode: no 116dirty_inode: (must not sleep)
117drop_inode: no !!!inode_lock!!! 117write_inode:
118delete_inode: no 118drop_inode: !!!inode_lock!!!
119put_super: yes yes no 119delete_inode:
120write_super: no yes read 120put_super: write
121sync_fs: no no read 121write_super: read
122freeze_fs: ? 122sync_fs: read
123unfreeze_fs: ? 123freeze_fs: read
124statfs: no no no 124unfreeze_fs: read
125remount_fs: yes yes maybe (see below) 125statfs: no
126clear_inode: no 126remount_fs: maybe (see below)
127umount_begin: yes no no 127clear_inode:
128show_options: no (vfsmount->sem) 128umount_begin: no
129quota_read: no no no (see below) 129show_options: no (namespace_sem)
130quota_write: no no no (see below) 130quota_read: no (see below)
131 131quota_write: no (see below)
132->remount_fs() will have the s_umount lock if it's already mounted. 132
133->remount_fs() will have the s_umount exclusive lock if it's already mounted.
133When called from get_sb_single, it does NOT have the s_umount lock. 134When called from get_sb_single, it does NOT have the s_umount lock.
134->quota_read() and ->quota_write() functions are both guaranteed to 135->quota_read() and ->quota_write() functions are both guaranteed to
135be the only ones operating on the quota file by the quota code (via 136be the only ones operating on the quota file by the quota code (via
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 92e1ab8178a8..d08759aa0903 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -229,14 +229,6 @@ and is between 256 and 4096 characters. It is defined in the file
229 to assume that this machine's pmtimer latches its value 229 to assume that this machine's pmtimer latches its value
230 and always returns good values. 230 and always returns good values.
231 231
232 acpi.power_nocheck= [HW,ACPI]
233 Format: 1/0 enable/disable the check of power state.
234 On some bogus BIOS the _PSC object/_STA object of
235 power resource can't return the correct device power
236 state. In such case it is unneccessary to check its
237 power state again in power transition.
238 1 : disable the power state check
239
240 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode 232 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
241 Format: { level | edge | high | low } 233 Format: { level | edge | high | low }
242 234
@@ -1863,7 +1855,7 @@ and is between 256 and 4096 characters. It is defined in the file
1863 IRQ routing is enabled. 1855 IRQ routing is enabled.
1864 noacpi [X86] Do not use ACPI for IRQ routing 1856 noacpi [X86] Do not use ACPI for IRQ routing
1865 or for PCI scanning. 1857 or for PCI scanning.
1866 nocrs [X86] Don't use _CRS for PCI resource 1858 use_crs [X86] Use _CRS for PCI resource
1867 allocation. 1859 allocation.
1868 routeirq Do IRQ routing for all PCI devices. 1860 routeirq Do IRQ routing for all PCI devices.
1869 This is normally done in pci_enable_device(), 1861 This is normally done in pci_enable_device(),
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 78e354b42f67..f2296ecedb89 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -920,7 +920,7 @@ The available commands are:
920 echo '<LED number> off' >/proc/acpi/ibm/led 920 echo '<LED number> off' >/proc/acpi/ibm/led
921 echo '<LED number> blink' >/proc/acpi/ibm/led 921 echo '<LED number> blink' >/proc/acpi/ibm/led
922 922
923The <LED number> range is 0 to 7. The set of LEDs that can be 923The <LED number> range is 0 to 15. The set of LEDs that can be
924controlled varies from model to model. Here is the common ThinkPad 924controlled varies from model to model. Here is the common ThinkPad
925mapping: 925mapping:
926 926
@@ -932,6 +932,11 @@ mapping:
932 5 - UltraBase battery slot 932 5 - UltraBase battery slot
933 6 - (unknown) 933 6 - (unknown)
934 7 - standby 934 7 - standby
935 8 - dock status 1
936 9 - dock status 2
937 10, 11 - (unknown)
938 12 - thinkvantage
939 13, 14, 15 - (unknown)
935 940
936All of the above can be turned on and off and can be made to blink. 941All of the above can be turned on and off and can be made to blink.
937 942
@@ -940,10 +945,12 @@ sysfs notes:
940The ThinkPad LED sysfs interface is described in detail by the LED class 945The ThinkPad LED sysfs interface is described in detail by the LED class
941documentation, in Documentation/leds-class.txt. 946documentation, in Documentation/leds-class.txt.
942 947
943The leds are named (in LED ID order, from 0 to 7): 948The LEDs are named (in LED ID order, from 0 to 12):
944"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt", 949"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
945"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt", 950"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
946"tpacpi::unknown_led", "tpacpi::standby". 951"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
952"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
953"tpacpi::thinkvantage".
947 954
948Due to limitations in the sysfs LED class, if the status of the LED 955Due to limitations in the sysfs LED class, if the status of the LED
949indicators cannot be read due to an error, thinkpad-acpi will report it as 956indicators cannot be read due to an error, thinkpad-acpi will report it as
@@ -958,6 +965,12 @@ ThinkPad indicator LED should blink in hardware accelerated mode, use the
958"timer" trigger, and leave the delay_on and delay_off parameters set to 965"timer" trigger, and leave the delay_on and delay_off parameters set to
959zero (to request hardware acceleration autodetection). 966zero (to request hardware acceleration autodetection).
960 967
968LEDs that are known not to exist in a given ThinkPad model are not
969made available through the sysfs interface. If you have a dock and you
970notice there are LEDs listed for your ThinkPad that do not exist (and
971are not in the dock), or if you notice that there are missing LEDs,
972a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
973
961 974
962ACPI sounds -- /proc/acpi/ibm/beep 975ACPI sounds -- /proc/acpi/ibm/beep
963---------------------------------- 976----------------------------------
@@ -1156,17 +1169,19 @@ may not be distinct. Later Lenovo models that implement the ACPI
1156display backlight brightness control methods have 16 levels, ranging 1169display backlight brightness control methods have 16 levels, ranging
1157from 0 to 15. 1170from 0 to 15.
1158 1171
1159There are two interfaces to the firmware for direct brightness control, 1172For IBM ThinkPads, there are two interfaces to the firmware for direct
1160EC and UCMS (or CMOS). To select which one should be used, use the 1173brightness control, EC and UCMS (or CMOS). To select which one should be
1161brightness_mode module parameter: brightness_mode=1 selects EC mode, 1174used, use the brightness_mode module parameter: brightness_mode=1 selects
1162brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC 1175EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
1163mode with NVRAM backing (so that brightness changes are remembered 1176mode with NVRAM backing (so that brightness changes are remembered across
1164across shutdown/reboot). 1177shutdown/reboot).
1165 1178
1166The driver tries to select which interface to use from a table of 1179The driver tries to select which interface to use from a table of
1167defaults for each ThinkPad model. If it makes a wrong choice, please 1180defaults for each ThinkPad model. If it makes a wrong choice, please
1168report this as a bug, so that we can fix it. 1181report this as a bug, so that we can fix it.
1169 1182
1183Lenovo ThinkPads only support brightness_mode=2 (UCMS).
1184
1170When display backlight brightness controls are available through the 1185When display backlight brightness controls are available through the
1171standard ACPI interface, it is best to use it instead of this direct 1186standard ACPI interface, it is best to use it instead of this direct
1172ThinkPad-specific interface. The driver will disable its native 1187ThinkPad-specific interface. The driver will disable its native
@@ -1254,7 +1269,7 @@ Fan control and monitoring: fan speed, fan enable/disable
1254 1269
1255procfs: /proc/acpi/ibm/fan 1270procfs: /proc/acpi/ibm/fan
1256sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1, 1271sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
1257 pwm1_enable 1272 pwm1_enable, fan2_input
1258sysfs hwmon driver attributes: fan_watchdog 1273sysfs hwmon driver attributes: fan_watchdog
1259 1274
1260NOTE NOTE NOTE: fan control operations are disabled by default for 1275NOTE NOTE NOTE: fan control operations are disabled by default for
@@ -1267,6 +1282,9 @@ from the hardware registers of the embedded controller. This is known
1267to work on later R, T, X and Z series ThinkPads but may show a bogus 1282to work on later R, T, X and Z series ThinkPads but may show a bogus
1268value on other models. 1283value on other models.
1269 1284
1285Some Lenovo ThinkPads support a secondary fan. This fan cannot be
1286controlled separately, it shares the main fan control.
1287
1270Fan levels: 1288Fan levels:
1271 1289
1272Most ThinkPad fans work in "levels" at the firmware interface. Level 0 1290Most ThinkPad fans work in "levels" at the firmware interface. Level 0
@@ -1397,6 +1415,11 @@ hwmon device attribute fan1_input:
1397 which can take up to two minutes. May return rubbish on older 1415 which can take up to two minutes. May return rubbish on older
1398 ThinkPads. 1416 ThinkPads.
1399 1417
1418hwmon device attribute fan2_input:
1419 Fan tachometer reading, in RPM, for the secondary fan.
1420 Available only on some ThinkPads. If the secondary fan is
1421 not installed, will always read 0.
1422
1400hwmon driver attribute fan_watchdog: 1423hwmon driver attribute fan_watchdog:
1401 Fan safety watchdog timer interval, in seconds. Minimum is 1424 Fan safety watchdog timer interval, in seconds. Minimum is
1402 1 second, maximum is 120 seconds. 0 disables the watchdog. 1425 1 second, maximum is 120 seconds. 0 disables the watchdog.
@@ -1555,3 +1578,7 @@ Sysfs interface changelog:
15550x020300: hotkey enable/disable support removed, attributes 15780x020300: hotkey enable/disable support removed, attributes
1556 hotkey_bios_enabled and hotkey_enable deprecated and 1579 hotkey_bios_enabled and hotkey_enable deprecated and
1557 marked for removal. 1580 marked for removal.
1581
15820x020400: Marker for 16 LEDs support. Also, LEDs that are known
1583 to not exist in a given model are not registered with
1584 the LED sysfs class anymore.
diff --git a/MAINTAINERS b/MAINTAINERS
index 303129ab4b75..fa2a16def17a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -230,6 +230,13 @@ L: linux-acenic@sunsite.dk
230S: Maintained 230S: Maintained
231F: drivers/net/acenic* 231F: drivers/net/acenic*
232 232
233ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER
234P: Peter Feuerer
235M: peter@piie.net
236W: http://piie.net/?section=acerhdf
237S: Maintained
238F: drivers/platform/x86/acerhdf.c
239
233ACER WMI LAPTOP EXTRAS 240ACER WMI LAPTOP EXTRAS
234P: Carlos Corbacho 241P: Carlos Corbacho
235M: carlos@strangeworlds.co.uk 242M: carlos@strangeworlds.co.uk
@@ -913,8 +920,7 @@ M: corentincj@iksaif.net
913P: Karol Kozimor 920P: Karol Kozimor
914M: sziwan@users.sourceforge.net 921M: sziwan@users.sourceforge.net
915L: acpi4asus-user@lists.sourceforge.net 922L: acpi4asus-user@lists.sourceforge.net
916W: http://sourceforge.net/projects/acpi4asus 923W: http://acpi4asus.sf.net
917W: http://xf.iksaif.net/acpi4asus
918S: Maintained 924S: Maintained
919F: arch/x86/kernel/acpi/boot.c 925F: arch/x86/kernel/acpi/boot.c
920F: drivers/platform/x86/asus_acpi.c 926F: drivers/platform/x86/asus_acpi.c
@@ -930,8 +936,7 @@ ASUS LAPTOP EXTRAS DRIVER
930P: Corentin Chary 936P: Corentin Chary
931M: corentincj@iksaif.net 937M: corentincj@iksaif.net
932L: acpi4asus-user@lists.sourceforge.net 938L: acpi4asus-user@lists.sourceforge.net
933W: http://sourceforge.net/projects/acpi4asus 939W: http://acpi4asus.sf.net
934W: http://xf.iksaif.net/acpi4asus
935S: Maintained 940S: Maintained
936F: drivers/platform/x86/asus-laptop.c 941F: drivers/platform/x86/asus-laptop.c
937 942
@@ -1636,7 +1641,7 @@ P: Mikael Starvik
1636M: starvik@axis.com 1641M: starvik@axis.com
1637P: Jesper Nilsson 1642P: Jesper Nilsson
1638M: jesper.nilsson@axis.com 1643M: jesper.nilsson@axis.com
1639L: dev-etrax@axis.com 1644L: linux-cris-kernel@axis.com
1640W: http://developer.axis.com 1645W: http://developer.axis.com
1641S: Maintained 1646S: Maintained
1642F: arch/cris/ 1647F: arch/cris/
@@ -2110,7 +2115,7 @@ EEEPC LAPTOP EXTRAS DRIVER
2110P: Corentin Chary 2115P: Corentin Chary
2111M: corentincj@iksaif.net 2116M: corentincj@iksaif.net
2112L: acpi4asus-user@lists.sourceforge.net 2117L: acpi4asus-user@lists.sourceforge.net
2113W: http://sourceforge.net/projects/acpi4asus 2118W: http://acpi4asus.sf.net
2114S: Maintained 2119S: Maintained
2115F: drivers/platform/x86/eeepc-laptop.c 2120F: drivers/platform/x86/eeepc-laptop.c
2116 2121
diff --git a/Makefile b/Makefile
index 46e1c9d03d51..d1216fea0c92 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 30 3SUBLEVEL = 31
4EXTRAVERSION = 4EXTRAVERSION = -rc1
5NAME = Man-Eating Seals of Antiquity 5NAME = Man-Eating Seals of Antiquity
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
index cbe6cee5a550..dbda7bde6112 100644
--- a/arch/ia64/kernel/acpi-processor.c
+++ b/arch/ia64/kernel/acpi-processor.c
@@ -71,3 +71,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
71} 71}
72 72
73EXPORT_SYMBOL(arch_acpi_processor_init_pdc); 73EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
74
75void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
76{
77 if (pr->pdc) {
78 kfree(pr->pdc->pointer->buffer.pointer);
79 kfree(pr->pdc->pointer);
80 kfree(pr->pdc);
81 pr->pdc = NULL;
82 }
83}
84
85EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b29f0280d712..8c4be1f301cf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -601,6 +601,7 @@ config CAVIUM_OCTEON_SIMULATOR
601 select SYS_SUPPORTS_64BIT_KERNEL 601 select SYS_SUPPORTS_64BIT_KERNEL
602 select SYS_SUPPORTS_BIG_ENDIAN 602 select SYS_SUPPORTS_BIG_ENDIAN
603 select SYS_SUPPORTS_HIGHMEM 603 select SYS_SUPPORTS_HIGHMEM
604 select SYS_SUPPORTS_HOTPLUG_CPU
604 select SYS_HAS_CPU_CAVIUM_OCTEON 605 select SYS_HAS_CPU_CAVIUM_OCTEON
605 help 606 help
606 The Octeon simulator is software performance model of the Cavium 607 The Octeon simulator is software performance model of the Cavium
@@ -615,6 +616,7 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
615 select SYS_SUPPORTS_64BIT_KERNEL 616 select SYS_SUPPORTS_64BIT_KERNEL
616 select SYS_SUPPORTS_BIG_ENDIAN 617 select SYS_SUPPORTS_BIG_ENDIAN
617 select SYS_SUPPORTS_HIGHMEM 618 select SYS_SUPPORTS_HIGHMEM
619 select SYS_SUPPORTS_HOTPLUG_CPU
618 select SYS_HAS_EARLY_PRINTK 620 select SYS_HAS_EARLY_PRINTK
619 select SYS_HAS_CPU_CAVIUM_OCTEON 621 select SYS_HAS_CPU_CAVIUM_OCTEON
620 select SWAP_IO_SPACE 622 select SWAP_IO_SPACE
@@ -784,8 +786,17 @@ config SYS_HAS_EARLY_PRINTK
784 bool 786 bool
785 787
786config HOTPLUG_CPU 788config HOTPLUG_CPU
789 bool "Support for hot-pluggable CPUs"
790 depends on SMP && HOTPLUG && SYS_SUPPORTS_HOTPLUG_CPU
791 help
792 Say Y here to allow turning CPUs off and on. CPUs can be
793 controlled through /sys/devices/system/cpu.
794 (Note: power management support will enable this option
795 automatically on SMP systems. )
796 Say N if you want to disable CPU hotplug.
797
798config SYS_SUPPORTS_HOTPLUG_CPU
787 bool 799 bool
788 default n
789 800
790config I8259 801config I8259
791 bool 802 bool
@@ -2136,11 +2147,11 @@ menu "Power management options"
2136 2147
2137config ARCH_HIBERNATION_POSSIBLE 2148config ARCH_HIBERNATION_POSSIBLE
2138 def_bool y 2149 def_bool y
2139 depends on !SMP 2150 depends on SYS_SUPPORTS_HOTPLUG_CPU
2140 2151
2141config ARCH_SUSPEND_POSSIBLE 2152config ARCH_SUSPEND_POSSIBLE
2142 def_bool y 2153 def_bool y
2143 depends on !SMP 2154 depends on SYS_SUPPORTS_HOTPLUG_CPU
2144 2155
2145source "kernel/power/Kconfig" 2156source "kernel/power/Kconfig"
2146 2157
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 8dfa009e0070..384f1842bfb1 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -7,7 +7,7 @@
7 */ 7 */
8#include <linux/irq.h> 8#include <linux/irq.h>
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/hardirq.h> 10#include <linux/smp.h>
11 11
12#include <asm/octeon/octeon.h> 12#include <asm/octeon/octeon.h>
13#include <asm/octeon/cvmx-pexp-defs.h> 13#include <asm/octeon/cvmx-pexp-defs.h>
@@ -501,3 +501,62 @@ asmlinkage void plat_irq_dispatch(void)
501 } 501 }
502 } 502 }
503} 503}
504
505#ifdef CONFIG_HOTPLUG_CPU
506static int is_irq_enabled_on_cpu(unsigned int irq, unsigned int cpu)
507{
508 unsigned int isset;
509#ifdef CONFIG_SMP
510 int coreid = cpu_logical_map(cpu);
511#else
512 int coreid = cvmx_get_core_num();
513#endif
514 int bit = (irq < OCTEON_IRQ_WDOG0) ?
515 irq - OCTEON_IRQ_WORKQ0 : irq - OCTEON_IRQ_WDOG0;
516 if (irq < 64) {
517 isset = (cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2)) &
518 (1ull << bit)) >> bit;
519 } else {
520 isset = (cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1)) &
521 (1ull << bit)) >> bit;
522 }
523 return isset;
524}
525
526void fixup_irqs(void)
527{
528 int irq;
529
530 for (irq = OCTEON_IRQ_SW0; irq <= OCTEON_IRQ_TIMER; irq++)
531 octeon_irq_core_disable_local(irq);
532
533 for (irq = OCTEON_IRQ_WORKQ0; irq <= OCTEON_IRQ_GPIO15; irq++) {
534 if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
535 /* ciu irq migrates to next cpu */
536 octeon_irq_chip_ciu0.disable(irq);
537 octeon_irq_ciu0_set_affinity(irq, &cpu_online_map);
538 }
539 }
540
541#if 0
542 for (irq = OCTEON_IRQ_MBOX0; irq <= OCTEON_IRQ_MBOX1; irq++)
543 octeon_irq_mailbox_mask(irq);
544#endif
545 for (irq = OCTEON_IRQ_UART0; irq <= OCTEON_IRQ_BOOTDMA; irq++) {
546 if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
547 /* ciu irq migrates to next cpu */
548 octeon_irq_chip_ciu0.disable(irq);
549 octeon_irq_ciu0_set_affinity(irq, &cpu_online_map);
550 }
551 }
552
553 for (irq = OCTEON_IRQ_UART2; irq <= OCTEON_IRQ_RESERVED135; irq++) {
554 if (is_irq_enabled_on_cpu(irq, smp_processor_id())) {
555 /* ciu irq migrates to next cpu */
556 octeon_irq_chip_ciu1.disable(irq);
557 octeon_irq_ciu1_set_affinity(irq, &cpu_online_map);
558 }
559 }
560}
561
562#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/mips/cavium-octeon/octeon_boot.h b/arch/mips/cavium-octeon/octeon_boot.h
new file mode 100644
index 000000000000..0f7f84accf9a
--- /dev/null
+++ b/arch/mips/cavium-octeon/octeon_boot.h
@@ -0,0 +1,70 @@
1/*
2 * (C) Copyright 2004, 2005 Cavium Networks
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17 * MA 02111-1307 USA
18 */
19
20#ifndef __OCTEON_BOOT_H__
21#define __OCTEON_BOOT_H__
22
23#include <linux/types.h>
24
25struct boot_init_vector {
26 uint32_t stack_addr;
27 uint32_t code_addr;
28 uint32_t app_start_func_addr;
29 uint32_t k0_val;
30 uint32_t flags;
31 uint32_t boot_info_addr;
32 uint32_t pad;
33 uint32_t pad2;
34};
35
36/* similar to bootloader's linux_app_boot_info but without global data */
37struct linux_app_boot_info {
38 uint32_t labi_signature;
39 uint32_t start_core0_addr;
40 uint32_t avail_coremask;
41 uint32_t pci_console_active;
42 uint32_t icache_prefetch_disable;
43 uint32_t InitTLBStart_addr;
44 uint32_t start_app_addr;
45 uint32_t cur_exception_base;
46 uint32_t no_mark_private_data;
47 uint32_t compact_flash_common_base_addr;
48 uint32_t compact_flash_attribute_base_addr;
49 uint32_t led_display_base_addr;
50};
51
52/* If not to copy a lot of bootloader's structures
53 here is only offset of requested member */
54#define AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK 0x765c
55
56/* hardcoded in bootloader */
57#define LABI_ADDR_IN_BOOTLOADER 0x700
58
59#define LINUX_APP_BOOT_BLOCK_NAME "linux-app-boot"
60
61#define LABI_SIGNATURE 0xAABBCCDD
62
63/* from uboot-headers/octeon_mem_map.h */
64#define EXCEPTION_BASE_INCR (4 * 1024)
65 /* Increment size for exception base addresses (4k minimum) */
66#define EXCEPTION_BASE_BASE 0
67#define BOOTLOADER_PRIV_DATA_BASE (EXCEPTION_BASE_BASE + 0x800)
68#define BOOTLOADER_BOOT_VECTOR (BOOTLOADER_PRIV_DATA_BASE)
69
70#endif /* __OCTEON_BOOT_H__ */
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 5f4e49ba4713..da559249cc2f 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -13,6 +13,7 @@
13#include <linux/io.h> 13#include <linux/io.h>
14#include <linux/irq.h> 14#include <linux/irq.h>
15#include <linux/serial.h> 15#include <linux/serial.h>
16#include <linux/smp.h>
16#include <linux/types.h> 17#include <linux/types.h>
17#include <linux/string.h> /* for memset */ 18#include <linux/string.h> /* for memset */
18#include <linux/tty.h> 19#include <linux/tty.h>
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 24e0ad63980a..0b891a9c6253 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * Copyright (C) 2004-2008 Cavium Networks 6 * Copyright (C) 2004-2008 Cavium Networks
7 */ 7 */
8#include <linux/cpu.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/delay.h> 10#include <linux/delay.h>
10#include <linux/smp.h> 11#include <linux/smp.h>
@@ -19,10 +20,16 @@
19 20
20#include <asm/octeon/octeon.h> 21#include <asm/octeon/octeon.h>
21 22
23#include "octeon_boot.h"
24
22volatile unsigned long octeon_processor_boot = 0xff; 25volatile unsigned long octeon_processor_boot = 0xff;
23volatile unsigned long octeon_processor_sp; 26volatile unsigned long octeon_processor_sp;
24volatile unsigned long octeon_processor_gp; 27volatile unsigned long octeon_processor_gp;
25 28
29#ifdef CONFIG_HOTPLUG_CPU
30static unsigned int InitTLBStart_addr;
31#endif
32
26static irqreturn_t mailbox_interrupt(int irq, void *dev_id) 33static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
27{ 34{
28 const int coreid = cvmx_get_core_num(); 35 const int coreid = cvmx_get_core_num();
@@ -67,8 +74,28 @@ static inline void octeon_send_ipi_mask(cpumask_t mask, unsigned int action)
67} 74}
68 75
69/** 76/**
70 * Detect available CPUs, populate phys_cpu_present_map 77 * Detect available CPUs, populate cpu_possible_map
71 */ 78 */
79static void octeon_smp_hotplug_setup(void)
80{
81#ifdef CONFIG_HOTPLUG_CPU
82 uint32_t labi_signature;
83
84 labi_signature =
85 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
86 LABI_ADDR_IN_BOOTLOADER +
87 offsetof(struct linux_app_boot_info,
88 labi_signature)));
89 if (labi_signature != LABI_SIGNATURE)
90 pr_err("The bootloader version on this board is incorrect\n");
91 InitTLBStart_addr =
92 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
93 LABI_ADDR_IN_BOOTLOADER +
94 offsetof(struct linux_app_boot_info,
95 InitTLBStart_addr)));
96#endif
97}
98
72static void octeon_smp_setup(void) 99static void octeon_smp_setup(void)
73{ 100{
74 const int coreid = cvmx_get_core_num(); 101 const int coreid = cvmx_get_core_num();
@@ -91,6 +118,9 @@ static void octeon_smp_setup(void)
91 cpus++; 118 cpus++;
92 } 119 }
93 } 120 }
121 cpu_present_map = cpu_possible_map;
122
123 octeon_smp_hotplug_setup();
94} 124}
95 125
96/** 126/**
@@ -128,6 +158,17 @@ static void octeon_init_secondary(void)
128 const int coreid = cvmx_get_core_num(); 158 const int coreid = cvmx_get_core_num();
129 union cvmx_ciu_intx_sum0 interrupt_enable; 159 union cvmx_ciu_intx_sum0 interrupt_enable;
130 160
161#ifdef CONFIG_HOTPLUG_CPU
162 unsigned int cur_exception_base;
163
164 cur_exception_base = cvmx_read64_uint32(
165 CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
166 LABI_ADDR_IN_BOOTLOADER +
167 offsetof(struct linux_app_boot_info,
168 cur_exception_base)));
169 /* cur_exception_base is incremented in bootloader after setting */
170 write_c0_ebase((unsigned int)(cur_exception_base - EXCEPTION_BASE_INCR));
171#endif
131 octeon_check_cpu_bist(); 172 octeon_check_cpu_bist();
132 octeon_init_cvmcount(); 173 octeon_init_cvmcount();
133 /* 174 /*
@@ -199,6 +240,193 @@ static void octeon_cpus_done(void)
199#endif 240#endif
200} 241}
201 242
243#ifdef CONFIG_HOTPLUG_CPU
244
245/* State of each CPU. */
246DEFINE_PER_CPU(int, cpu_state);
247
248extern void fixup_irqs(void);
249
250static DEFINE_SPINLOCK(smp_reserve_lock);
251
252static int octeon_cpu_disable(void)
253{
254 unsigned int cpu = smp_processor_id();
255
256 if (cpu == 0)
257 return -EBUSY;
258
259 spin_lock(&smp_reserve_lock);
260
261 cpu_clear(cpu, cpu_online_map);
262 cpu_clear(cpu, cpu_callin_map);
263 local_irq_disable();
264 fixup_irqs();
265 local_irq_enable();
266
267 flush_cache_all();
268 local_flush_tlb_all();
269
270 spin_unlock(&smp_reserve_lock);
271
272 return 0;
273}
274
275static void octeon_cpu_die(unsigned int cpu)
276{
277 int coreid = cpu_logical_map(cpu);
278 uint32_t avail_coremask;
279 struct cvmx_bootmem_named_block_desc *block_desc;
280
281#ifdef CONFIG_CAVIUM_OCTEON_WATCHDOG
282 /* Disable the watchdog */
283 cvmx_ciu_wdogx_t ciu_wdog;
284 ciu_wdog.u64 = cvmx_read_csr(CVMX_CIU_WDOGX(cpu));
285 ciu_wdog.s.mode = 0;
286 cvmx_write_csr(CVMX_CIU_WDOGX(cpu), ciu_wdog.u64);
287#endif
288
289 while (per_cpu(cpu_state, cpu) != CPU_DEAD)
290 cpu_relax();
291
292 /*
293 * This is a bit complicated strategics of getting/settig available
294 * cores mask, copied from bootloader
295 */
296 /* LINUX_APP_BOOT_BLOCK is initialized in bootoct binary */
297 block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
298
299 if (!block_desc) {
300 avail_coremask =
301 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
302 LABI_ADDR_IN_BOOTLOADER +
303 offsetof
304 (struct linux_app_boot_info,
305 avail_coremask)));
306 } else { /* alternative, already initialized */
307 avail_coremask =
308 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
309 block_desc->base_addr +
310 AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK));
311 }
312
313 avail_coremask |= 1 << coreid;
314
315 /* Setting avail_coremask for bootoct binary */
316 if (!block_desc) {
317 cvmx_write64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
318 LABI_ADDR_IN_BOOTLOADER +
319 offsetof(struct linux_app_boot_info,
320 avail_coremask)),
321 avail_coremask);
322 } else {
323 cvmx_write64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
324 block_desc->base_addr +
325 AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK),
326 avail_coremask);
327 }
328
329 pr_info("Reset core %d. Available Coremask = %x \n", coreid,
330 avail_coremask);
331 cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
332 cvmx_write_csr(CVMX_CIU_PP_RST, 0);
333}
334
335void play_dead(void)
336{
337 int coreid = cvmx_get_core_num();
338
339 idle_task_exit();
340 octeon_processor_boot = 0xff;
341 per_cpu(cpu_state, coreid) = CPU_DEAD;
342
343 while (1) /* core will be reset here */
344 ;
345}
346
347extern void kernel_entry(unsigned long arg1, ...);
348
349static void start_after_reset(void)
350{
351 kernel_entry(0, 0, 0); /* set a2 = 0 for secondary core */
352}
353
354int octeon_update_boot_vector(unsigned int cpu)
355{
356
357 int coreid = cpu_logical_map(cpu);
358 unsigned int avail_coremask;
359 struct cvmx_bootmem_named_block_desc *block_desc;
360 struct boot_init_vector *boot_vect =
361 (struct boot_init_vector *) cvmx_phys_to_ptr(0x0 +
362 BOOTLOADER_BOOT_VECTOR);
363
364 block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
365
366 if (!block_desc) {
367 avail_coremask =
368 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
369 LABI_ADDR_IN_BOOTLOADER +
370 offsetof(struct linux_app_boot_info,
371 avail_coremask)));
372 } else { /* alternative, already initialized */
373 avail_coremask =
374 cvmx_read64_uint32(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
375 block_desc->base_addr +
376 AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK));
377 }
378
379 if (!(avail_coremask & (1 << coreid))) {
380 /* core not available, assume, that catched by simple-executive */
381 cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
382 cvmx_write_csr(CVMX_CIU_PP_RST, 0);
383 }
384
385 boot_vect[coreid].app_start_func_addr =
386 (uint32_t) (unsigned long) start_after_reset;
387 boot_vect[coreid].code_addr = InitTLBStart_addr;
388
389 CVMX_SYNC;
390
391 cvmx_write_csr(CVMX_CIU_NMI, (1 << coreid) & avail_coremask);
392
393 return 0;
394}
395
396static int __cpuinit octeon_cpu_callback(struct notifier_block *nfb,
397 unsigned long action, void *hcpu)
398{
399 unsigned int cpu = (unsigned long)hcpu;
400
401 switch (action) {
402 case CPU_UP_PREPARE:
403 octeon_update_boot_vector(cpu);
404 break;
405 case CPU_ONLINE:
406 pr_info("Cpu %d online\n", cpu);
407 break;
408 case CPU_DEAD:
409 break;
410 }
411
412 return NOTIFY_OK;
413}
414
415static struct notifier_block __cpuinitdata octeon_cpu_notifier = {
416 .notifier_call = octeon_cpu_callback,
417};
418
419static int __cpuinit register_cavium_notifier(void)
420{
421 register_hotcpu_notifier(&octeon_cpu_notifier);
422
423 return 0;
424}
425
426late_initcall(register_cavium_notifier);
427
428#endif /* CONFIG_HOTPLUG_CPU */
429
202struct plat_smp_ops octeon_smp_ops = { 430struct plat_smp_ops octeon_smp_ops = {
203 .send_ipi_single = octeon_send_ipi_single, 431 .send_ipi_single = octeon_send_ipi_single,
204 .send_ipi_mask = octeon_send_ipi_mask, 432 .send_ipi_mask = octeon_send_ipi_mask,
@@ -208,4 +436,8 @@ struct plat_smp_ops octeon_smp_ops = {
208 .boot_secondary = octeon_boot_secondary, 436 .boot_secondary = octeon_boot_secondary,
209 .smp_setup = octeon_smp_setup, 437 .smp_setup = octeon_smp_setup,
210 .prepare_cpus = octeon_prepare_cpus, 438 .prepare_cpus = octeon_prepare_cpus,
439#ifdef CONFIG_HOTPLUG_CPU
440 .cpu_disable = octeon_cpu_disable,
441 .cpu_die = octeon_cpu_die,
442#endif
211}; 443};
diff --git a/arch/mips/include/asm/bug.h b/arch/mips/include/asm/bug.h
index 08ea46863fe5..6cf29c26e873 100644
--- a/arch/mips/include/asm/bug.h
+++ b/arch/mips/include/asm/bug.h
@@ -1,6 +1,7 @@
1#ifndef __ASM_BUG_H 1#ifndef __ASM_BUG_H
2#define __ASM_BUG_H 2#define __ASM_BUG_H
3 3
4#include <linux/compiler.h>
4#include <asm/sgidefs.h> 5#include <asm/sgidefs.h>
5 6
6#ifdef CONFIG_BUG 7#ifdef CONFIG_BUG
diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
index 9dc10df32078..b160a706795d 100644
--- a/arch/mips/include/asm/bugs.h
+++ b/arch/mips/include/asm/bugs.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/bug.h> 12#include <linux/bug.h>
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/smp.h>
14 15
15#include <asm/cpu.h> 16#include <asm/cpu.h>
16#include <asm/cpu-info.h> 17#include <asm/cpu-info.h>
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 4f1eed107b08..09b08d05ff72 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -10,6 +10,7 @@
10#define _ASM_IRQ_H 10#define _ASM_IRQ_H
11 11
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <linux/smp.h>
13 14
14#include <asm/mipsmtregs.h> 15#include <asm/mipsmtregs.h>
15 16
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index d7f3eb03ad12..d3bea88d8744 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <asm/cacheflush.h> 18#include <asm/cacheflush.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 64ffc0290b84..fd545547b8aa 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -26,6 +26,10 @@ struct plat_smp_ops {
26 void (*boot_secondary)(int cpu, struct task_struct *idle); 26 void (*boot_secondary)(int cpu, struct task_struct *idle);
27 void (*smp_setup)(void); 27 void (*smp_setup)(void);
28 void (*prepare_cpus)(unsigned int max_cpus); 28 void (*prepare_cpus)(unsigned int max_cpus);
29#ifdef CONFIG_HOTPLUG_CPU
30 int (*cpu_disable)(void);
31 void (*cpu_die)(unsigned int cpu);
32#endif
29}; 33};
30 34
31extern void register_smp_ops(struct plat_smp_ops *ops); 35extern void register_smp_ops(struct plat_smp_ops *ops);
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 40e5ef1d4d26..aaa2d4ab26dc 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/bitops.h> 14#include <linux/bitops.h>
15#include <linux/linkage.h> 15#include <linux/linkage.h>
16#include <linux/smp.h>
16#include <linux/threads.h> 17#include <linux/threads.h>
17#include <linux/cpumask.h> 18#include <linux/cpumask.h>
18 19
@@ -40,6 +41,7 @@ extern int __cpu_logical_map[NR_CPUS];
40/* Octeon - Tell another core to flush its icache */ 41/* Octeon - Tell another core to flush its icache */
41#define SMP_ICACHE_FLUSH 0x4 42#define SMP_ICACHE_FLUSH 0x4
42 43
44extern volatile cpumask_t cpu_callin_map;
43 45
44extern void asmlinkage smp_bootstrap(void); 46extern void asmlinkage smp_bootstrap(void);
45 47
@@ -55,6 +57,24 @@ static inline void smp_send_reschedule(int cpu)
55 mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE_YOURSELF); 57 mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE_YOURSELF);
56} 58}
57 59
60#ifdef CONFIG_HOTPLUG_CPU
61static inline int __cpu_disable(void)
62{
63 extern struct plat_smp_ops *mp_ops; /* private */
64
65 return mp_ops->cpu_disable();
66}
67
68static inline void __cpu_die(unsigned int cpu)
69{
70 extern struct plat_smp_ops *mp_ops; /* private */
71
72 mp_ops->cpu_die(cpu);
73}
74
75extern void play_dead(void);
76#endif
77
58extern asmlinkage void smp_call_function_interrupt(void); 78extern asmlinkage void smp_call_function_interrupt(void);
59 79
60extern void arch_send_call_function_single_ipi(int cpu); 80extern void arch_send_call_function_single_ipi(int cpu);
diff --git a/arch/mips/include/asm/sn/addrs.h b/arch/mips/include/asm/sn/addrs.h
index 3a56d90abfa6..2367b56dcdef 100644
--- a/arch/mips/include/asm/sn/addrs.h
+++ b/arch/mips/include/asm/sn/addrs.h
@@ -11,6 +11,7 @@
11 11
12 12
13#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
14#include <linux/smp.h>
14#include <linux/types.h> 15#include <linux/types.h>
15#endif /* !__ASSEMBLY__ */ 16#endif /* !__ASSEMBLY__ */
16 17
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d9b6a5b5399d..7fd170d007e7 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -10,6 +10,7 @@
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/smp.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14 15
15#include <asm/irq_cpu.h> 16#include <asm/irq_cpu.h>
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index a5182a207696..e02f79b1eb51 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -18,6 +18,7 @@
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/smp.h>
21 22
22#include <asm/addrspace.h> 23#include <asm/addrspace.h>
23#include <asm/io.h> 24#include <asm/io.h>
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 0015e442572b..2652362ce047 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -9,6 +9,7 @@
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/smp.h>
12 13
13#include <asm/smtc_ipi.h> 14#include <asm/smtc_ipi.h>
14#include <asm/time.h> 15#include <asm/time.h>
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 340f53e5c6b1..ac5903d1b20e 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -18,6 +18,7 @@
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/smp.h>
21 22
22#include <asm/addrspace.h> 23#include <asm/addrspace.h>
23#include <asm/io.h> 24#include <asm/io.h>
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index df6f5bc60572..98bd7de75778 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -10,6 +10,7 @@
10#include <linux/clockchips.h> 10#include <linux/clockchips.h>
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/smp.h>
13 14
14#include <asm/smtc_ipi.h> 15#include <asm/smtc_ipi.h>
15#include <asm/time.h> 16#include <asm/time.h>
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index b13b8eb30596..1abe9905c9c1 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ptrace.h> 16#include <linux/ptrace.h>
17#include <linux/smp.h>
17#include <linux/stddef.h> 18#include <linux/stddef.h>
18 19
19#include <asm/bugs.h> 20#include <asm/bugs.h>
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index ed20e7fe65e3..f7d8d5d0ddbf 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/jiffies.h> 8#include <linux/jiffies.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/smp.h>
10#include <linux/spinlock.h> 11#include <linux/spinlock.h>
11 12
12#include <asm/delay.h> 13#include <asm/delay.h>
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 3f43c2e3aa5a..39000f103f2c 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -2,6 +2,7 @@
2 2
3#include <linux/bitmap.h> 3#include <linux/bitmap.h>
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/smp.h>
5 6
6#include <asm/io.h> 7#include <asm/io.h>
7#include <asm/gic.h> 8#include <asm/gic.h>
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 6e152c80cd4a..50c9bb880667 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -26,6 +26,7 @@
26#include <linux/kgdb.h> 26#include <linux/kgdb.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/smp.h>
29#include <asm/inst.h> 30#include <asm/inst.h>
30#include <asm/fpu.h> 31#include <asm/fpu.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 1eaaa450e20c..c09d681b7181 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -50,10 +50,15 @@
50 */ 50 */
51void __noreturn cpu_idle(void) 51void __noreturn cpu_idle(void)
52{ 52{
53 int cpu;
54
55 /* CPU is going idle. */
56 cpu = smp_processor_id();
57
53 /* endless idle loop with no priority at all */ 58 /* endless idle loop with no priority at all */
54 while (1) { 59 while (1) {
55 tick_nohz_stop_sched_tick(1); 60 tick_nohz_stop_sched_tick(1);
56 while (!need_resched()) { 61 while (!need_resched() && cpu_online(cpu)) {
57#ifdef CONFIG_MIPS_MT_SMTC 62#ifdef CONFIG_MIPS_MT_SMTC
58 extern void smtc_idle_loop_hook(void); 63 extern void smtc_idle_loop_hook(void);
59 64
@@ -62,6 +67,12 @@ void __noreturn cpu_idle(void)
62 if (cpu_wait) 67 if (cpu_wait)
63 (*cpu_wait)(); 68 (*cpu_wait)();
64 } 69 }
70#ifdef CONFIG_HOTPLUG_CPU
71 if (!cpu_online(cpu) && !cpu_isset(cpu, cpu_callin_map) &&
72 (system_state == SYSTEM_RUNNING ||
73 system_state == SYSTEM_BOOTING))
74 play_dead();
75#endif
65 tick_nohz_restart_sched_tick(); 76 tick_nohz_restart_sched_tick();
66 preempt_enable_no_resched(); 77 preempt_enable_no_resched();
67 schedule(); 78 schedule();
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index f27beca4b26d..653be061b9ec 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/smp.h>
23#include <linux/cpumask.h> 24#include <linux/cpumask.h>
24#include <linux/interrupt.h> 25#include <linux/interrupt.h>
25#include <linux/compiler.h> 26#include <linux/compiler.h>
diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c
index 878e3733bbb2..2508d55d68fd 100644
--- a/arch/mips/kernel/smp-up.c
+++ b/arch/mips/kernel/smp-up.c
@@ -55,6 +55,18 @@ static void __init up_prepare_cpus(unsigned int max_cpus)
55{ 55{
56} 56}
57 57
58#ifdef CONFIG_HOTPLUG_CPU
59static int up_cpu_disable(void)
60{
61 return -ENOSYS;
62}
63
64static void up_cpu_die(unsigned int cpu)
65{
66 BUG();
67}
68#endif
69
58struct plat_smp_ops up_smp_ops = { 70struct plat_smp_ops up_smp_ops = {
59 .send_ipi_single = up_send_ipi_single, 71 .send_ipi_single = up_send_ipi_single,
60 .send_ipi_mask = up_send_ipi_mask, 72 .send_ipi_mask = up_send_ipi_mask,
@@ -64,4 +76,8 @@ struct plat_smp_ops up_smp_ops = {
64 .boot_secondary = up_boot_secondary, 76 .boot_secondary = up_boot_secondary,
65 .smp_setup = up_smp_setup, 77 .smp_setup = up_smp_setup,
66 .prepare_cpus = up_prepare_cpus, 78 .prepare_cpus = up_prepare_cpus,
79#ifdef CONFIG_HOTPLUG_CPU
80 .cpu_disable = up_cpu_disable,
81 .cpu_die = up_cpu_die,
82#endif
67}; 83};
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index c937506a03aa..bc7d9b05e2f4 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -22,6 +22,7 @@
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/smp.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
26#include <linux/threads.h> 27#include <linux/threads.h>
27#include <linux/module.h> 28#include <linux/module.h>
@@ -44,7 +45,7 @@
44#include <asm/mipsmtregs.h> 45#include <asm/mipsmtregs.h>
45#endif /* CONFIG_MIPS_MT_SMTC */ 46#endif /* CONFIG_MIPS_MT_SMTC */
46 47
47static volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ 48volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
48int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ 49int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
49int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ 50int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
50 51
@@ -200,6 +201,8 @@ void __devinit smp_prepare_boot_cpu(void)
200 * and keep control until "cpu_online(cpu)" is set. Note: cpu is 201 * and keep control until "cpu_online(cpu)" is set. Note: cpu is
201 * physical, not logical. 202 * physical, not logical.
202 */ 203 */
204static struct task_struct *cpu_idle_thread[NR_CPUS];
205
203int __cpuinit __cpu_up(unsigned int cpu) 206int __cpuinit __cpu_up(unsigned int cpu)
204{ 207{
205 struct task_struct *idle; 208 struct task_struct *idle;
@@ -209,9 +212,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
209 * The following code is purely to make sure 212 * The following code is purely to make sure
210 * Linux can schedule processes on this slave. 213 * Linux can schedule processes on this slave.
211 */ 214 */
212 idle = fork_idle(cpu); 215 if (!cpu_idle_thread[cpu]) {
213 if (IS_ERR(idle)) 216 idle = fork_idle(cpu);
214 panic(KERN_ERR "Fork failed for CPU %d", cpu); 217 cpu_idle_thread[cpu] = idle;
218
219 if (IS_ERR(idle))
220 panic(KERN_ERR "Fork failed for CPU %d", cpu);
221 } else {
222 idle = cpu_idle_thread[cpu];
223 init_idle(idle, cpu);
224 }
215 225
216 mp_ops->boot_secondary(cpu, idle); 226 mp_ops->boot_secondary(cpu, idle);
217 227
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 37d51cd124e9..8a0626cbb108 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -20,6 +20,7 @@
20#include <linux/clockchips.h> 20#include <linux/clockchips.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/smp.h>
23#include <linux/cpumask.h> 24#include <linux/cpumask.h>
24#include <linux/interrupt.h> 25#include <linux/interrupt.h>
25#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
diff --git a/arch/mips/kernel/topology.c b/arch/mips/kernel/topology.c
index 660e44ed44d7..cf3eb61fad12 100644
--- a/arch/mips/kernel/topology.c
+++ b/arch/mips/kernel/topology.c
@@ -17,7 +17,10 @@ static int __init topology_init(void)
17#endif /* CONFIG_NUMA */ 17#endif /* CONFIG_NUMA */
18 18
19 for_each_present_cpu(i) { 19 for_each_present_cpu(i) {
20 ret = register_cpu(&per_cpu(cpu_devices, i), i); 20 struct cpu *c = &per_cpu(cpu_devices, i);
21
22 c->hotpluggable = 1;
23 ret = register_cpu(c, i);
21 if (ret) 24 if (ret)
22 printk(KERN_WARNING "topology_init: register_cpu %d " 25 printk(KERN_WARNING "topology_init: register_cpu %d "
23 "failed (%d)\n", i, ret); 26 "failed (%d)\n", i, ret);
diff --git a/arch/mips/mipssim/sim_time.c b/arch/mips/mipssim/sim_time.c
index 881ecbc1fa23..0cea932f1241 100644
--- a/arch/mips/mipssim/sim_time.c
+++ b/arch/mips/mipssim/sim_time.c
@@ -91,6 +91,7 @@ unsigned __cpuinit get_c0_compare_int(void)
91 mips_cpu_timer_irq = MSC01E_INT_BASE + MSC01E_INT_CPUCTR; 91 mips_cpu_timer_irq = MSC01E_INT_BASE + MSC01E_INT_CPUCTR;
92 } else { 92 } else {
93#endif 93#endif
94 {
94 if (cpu_has_vint) 95 if (cpu_has_vint)
95 set_vi_handler(cp0_compare_irq, mips_timer_dispatch); 96 set_vi_handler(cp0_compare_irq, mips_timer_dispatch);
96 mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq; 97 mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index 44d01a0a8490..b165cdcb2818 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/smp.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/bitops.h> 13#include <linux/bitops.h>
13#include <linux/cpu.h> 14#include <linux/cpu.h>
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 5500c20c79ae..54e5f7b9f440 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -12,6 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/smp.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16 17
17#include <asm/page.h> 18#include <asm/page.h>
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 71fe4cb778cd..6721ee2b1e8b 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/linkage.h> 14#include <linux/linkage.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp.h>
16#include <linux/mm.h> 17#include <linux/mm.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/bitops.h> 19#include <linux/bitops.h>
diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c
index f7c8f9ce39c1..6515b4418714 100644
--- a/arch/mips/mm/c-tx39.c
+++ b/arch/mips/mm/c-tx39.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/smp.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15 16
16#include <asm/cacheops.h> 17#include <asm/cacheops.h>
diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c
index 2b1309b2580a..e274fda329f4 100644
--- a/arch/mips/mm/highmem.c
+++ b/arch/mips/mm/highmem.c
@@ -1,5 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/highmem.h> 2#include <linux/highmem.h>
3#include <linux/smp.h>
3#include <asm/fixmap.h> 4#include <asm/fixmap.h>
4#include <asm/tlbflush.h> 5#include <asm/tlbflush.h>
5 6
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c5511294a9ee..0e820508ff23 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/signal.h> 14#include <linux/signal.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp.h>
16#include <linux/kernel.h> 17#include <linux/kernel.h>
17#include <linux/errno.h> 18#include <linux/errno.h>
18#include <linux/string.h> 19#include <linux/string.h>
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 48060c635acd..f5c73754d664 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -10,6 +10,7 @@
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/smp.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/proc_fs.h> 16#include <linux/proc_fs.h>
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index 1c0048a6f5cf..0f5ab236ab69 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -13,6 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp.h>
16#include <linux/mm.h> 17#include <linux/mm.h>
17 18
18#include <asm/page.h> 19#include <asm/page.h>
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index f60fe513eb60..cee502caf398 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/smp.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14#include <linux/hugetlb.h> 15#include <linux/hugetlb.h>
15 16
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
index 4ec95cc2df2f..2b82f23df1a1 100644
--- a/arch/mips/mm/tlb-r8k.c
+++ b/arch/mips/mm/tlb-r8k.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/smp.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14 15
15#include <asm/cpu.h> 16#include <asm/cpu.h>
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 8f606ead826e..9a17bf8395df 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -23,6 +23,7 @@
23#include <linux/bug.h> 23#include <linux/bug.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/smp.h>
26#include <linux/string.h> 27#include <linux/string.h>
27#include <linux/init.h> 28#include <linux/init.h>
28 29
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index ea176113fea9..b4eaf137e4a7 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -24,6 +24,7 @@
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/irq.h> 25#include <linux/irq.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/smp.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <linux/interrupt.h> 29#include <linux/interrupt.h>
29#include <linux/io.h> 30#include <linux/io.h>
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index dda6f2058665..a0e726eb039a 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -10,6 +10,7 @@
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/smp.h>
13#include <asm/sn/arch.h> 14#include <asm/sn/arch.h>
14#include <asm/pci/bridge.h> 15#include <asm/pci/bridge.h>
15#include <asm/paccess.h> 16#include <asm/paccess.h>
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index f78c29b68d77..8ace27716232 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -1,5 +1,6 @@
1#include <linux/linkage.h> 1#include <linux/linkage.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/smp.h>
3 4
4#include <asm/pmon.h> 5#include <asm/pmon.h>
5#include <asm/titan_dep.h> 6#include <asm/titan_dep.h>
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index 486bd3fd01a1..4b8174b382d7 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -43,15 +43,6 @@ LEAF(swsusp_arch_resume)
43 bne t1, t3, 1b 43 bne t1, t3, 1b
44 PTR_L t0, PBE_NEXT(t0) 44 PTR_L t0, PBE_NEXT(t0)
45 bnez t0, 0b 45 bnez t0, 0b
46 /* flush caches to make sure context is in memory */
47 PTR_L t0, __flush_cache_all
48 jalr t0
49 /* flush tlb entries */
50#ifdef CONFIG_SMP
51 jal flush_tlb_all
52#else
53 jal local_flush_tlb_all
54#endif
55 PTR_LA t0, saved_regs 46 PTR_LA t0, saved_regs
56 PTR_L ra, PT_R31(t0) 47 PTR_L ra, PT_R31(t0)
57 PTR_L sp, PT_R29(t0) 48 PTR_L sp, PT_R29(t0)
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 4a500e8cd3cc..51d3a4f2d7e1 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/cpumask.h> 15#include <linux/cpumask.h>
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 1bb692a3b319..c1c8e40d65d6 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -18,6 +18,7 @@
18#include <linux/ioport.h> 18#include <linux/ioport.h>
19#include <linux/timex.h> 19#include <linux/timex.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/smp.h>
21#include <linux/random.h> 22#include <linux/random.h>
22#include <linux/kernel.h> 23#include <linux/kernel.h>
23#include <linux/kernel_stat.h> 24#include <linux/kernel_stat.h>
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index f10a7cd64f7e..6d0e59ffba2e 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -10,6 +10,7 @@
10#include <linux/interrupt.h> 10#include <linux/interrupt.h>
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/param.h> 12#include <linux/param.h>
13#include <linux/smp.h>
13#include <linux/time.h> 14#include <linux/time.h>
14#include <linux/timex.h> 15#include <linux/timex.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index 6ae64e8dfc40..5e871e75a8d9 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/smp.h>
12#include <asm/sn/types.h> 13#include <asm/sn/types.h>
13#include <asm/sn/klconfig.h> 14#include <asm/sn/klconfig.h>
14#include <asm/sn/hub.h> 15#include <asm/sn/hub.h>
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index 690de06bde90..ba59839a021e 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -19,6 +19,7 @@
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/linkage.h> 20#include <linux/linkage.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/smp.h>
22#include <linux/spinlock.h> 23#include <linux/spinlock.h>
23#include <linux/mm.h> 24#include <linux/mm.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
diff --git a/arch/mips/sibyte/common/cfe_console.c b/arch/mips/sibyte/common/cfe_console.c
index 81e3d54376e9..1ad2da103fe9 100644
--- a/arch/mips/sibyte/common/cfe_console.c
+++ b/arch/mips/sibyte/common/cfe_console.c
@@ -51,12 +51,13 @@ static int cfe_console_setup(struct console *cons, char *str)
51 setleds("u0cn"); 51 setleds("u0cn");
52 } else if (!strcmp(consdev, "uart1")) { 52 } else if (!strcmp(consdev, "uart1")) {
53 setleds("u1cn"); 53 setleds("u1cn");
54 } else
54#endif 55#endif
55#ifdef CONFIG_VGA_CONSOLE 56#ifdef CONFIG_VGA_CONSOLE
56 } else if (!strcmp(consdev, "pcconsole0")) { 57 if (!strcmp(consdev, "pcconsole0")) {
57 setleds("pccn"); 58 setleds("pccn");
58#endif
59 } else 59 } else
60#endif
60 return -ENODEV; 61 return -ENODEV;
61 } 62 }
62 return 0; 63 return 0;
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 69f5f88711cc..0d9ec1a5c24a 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -1,5 +1,6 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/interrupt.h> 2#include <linux/interrupt.h>
3#include <linux/smp.h>
3#include <linux/time.h> 4#include <linux/time.h>
4#include <linux/clockchips.h> 5#include <linux/clockchips.h>
5 6
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 5ec17563142e..dd2aadc14af0 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -30,7 +30,6 @@ static void slip_init(struct net_device *dev, void *data)
30 30
31 slip_proto_init(&spri->slip); 31 slip_proto_init(&spri->slip);
32 32
33 dev->init = NULL;
34 dev->hard_header_len = 0; 33 dev->hard_header_len = 0;
35 dev->header_ops = NULL; 34 dev->header_ops = NULL;
36 dev->addr_len = 0; 35 dev->addr_len = 0;
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index f15a6e7654f3..e376284f0fb7 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -32,7 +32,6 @@ void slirp_init(struct net_device *dev, void *data)
32 32
33 slip_proto_init(&spri->slip); 33 slip_proto_init(&spri->slip);
34 34
35 dev->init = NULL;
36 dev->hard_header_len = 0; 35 dev->hard_header_len = 0;
37 dev->header_ops = NULL; 36 dev->header_ops = NULL;
38 dev->addr_len = 0; 37 dev->addr_len = 0;
diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h
index 90fc708b320e..378de4bbf49f 100644
--- a/arch/um/include/asm/dma-mapping.h
+++ b/arch/um/include/asm/dma-mapping.h
@@ -79,14 +79,14 @@ dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
79} 79}
80 80
81static inline void 81static inline void
82dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, 82dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
83 enum dma_data_direction direction) 83 enum dma_data_direction direction)
84{ 84{
85 BUG(); 85 BUG();
86} 86}
87 87
88static inline void 88static inline void
89dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, 89dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
90 enum dma_data_direction direction) 90 enum dma_data_direction direction)
91{ 91{
92 BUG(); 92 BUG();
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4518dc500903..20d1465a2ab0 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
144 144
145#else /* !CONFIG_ACPI */ 145#else /* !CONFIG_ACPI */
146 146
147#define acpi_disabled 1
147#define acpi_lapic 0 148#define acpi_lapic 0
148#define acpi_ioapic 0 149#define acpi_ioapic 0
149static inline void acpi_noirq_set(void) { } 150static inline void acpi_noirq_set(void) { }
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index cb739cc0a080..b399988eee3a 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -25,7 +25,7 @@
25#define PCI_BIOS_IRQ_SCAN 0x2000 25#define PCI_BIOS_IRQ_SCAN 0x2000
26#define PCI_ASSIGN_ALL_BUSSES 0x4000 26#define PCI_ASSIGN_ALL_BUSSES 0x4000
27#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 27#define PCI_CAN_SKIP_ISA_ALIGN 0x8000
28#define PCI_NO_ROOT_CRS 0x10000 28#define PCI_USE__CRS 0x10000
29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
30#define PCI_HAS_IO_ECS 0x40000 30#define PCI_HAS_IO_ECS 0x40000
31#define PCI_NOASSIGN_ROMS 0x80000 31#define PCI_NOASSIGN_ROMS 0x80000
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void);
121extern int __init pci_mmcfg_arch_init(void); 121extern int __init pci_mmcfg_arch_init(void);
122extern void __init pci_mmcfg_arch_free(void); 122extern void __init pci_mmcfg_arch_free(void);
123 123
124extern struct acpi_mcfg_allocation *pci_mmcfg_config;
125extern int pci_mmcfg_config_num;
126
124/* 127/*
125 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space 128 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
126 * on their northbrige except through the * %eax register. As such, you MUST 129 * on their northbrige except through the * %eax register. As such, you MUST
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 631086159c53..6b8ca3a0285d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,11 +44,7 @@
44 44
45static int __initdata acpi_force = 0; 45static int __initdata acpi_force = 0;
46u32 acpi_rsdt_forced; 46u32 acpi_rsdt_forced;
47#ifdef CONFIG_ACPI 47int acpi_disabled;
48int acpi_disabled = 0;
49#else
50int acpi_disabled = 1;
51#endif
52EXPORT_SYMBOL(acpi_disabled); 48EXPORT_SYMBOL(acpi_disabled);
53 49
54#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
@@ -122,72 +118,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size)
122 early_iounmap(map, size); 118 early_iounmap(map, size);
123} 119}
124 120
125#ifdef CONFIG_PCI_MMCONFIG
126
127static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
128
129/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
130struct acpi_mcfg_allocation *pci_mmcfg_config;
131int pci_mmcfg_config_num;
132
133static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
134{
135 if (!strcmp(mcfg->header.oem_id, "SGI"))
136 acpi_mcfg_64bit_base_addr = TRUE;
137
138 return 0;
139}
140
141int __init acpi_parse_mcfg(struct acpi_table_header *header)
142{
143 struct acpi_table_mcfg *mcfg;
144 unsigned long i;
145 int config_size;
146
147 if (!header)
148 return -EINVAL;
149
150 mcfg = (struct acpi_table_mcfg *)header;
151
152 /* how many config structures do we have */
153 pci_mmcfg_config_num = 0;
154 i = header->length - sizeof(struct acpi_table_mcfg);
155 while (i >= sizeof(struct acpi_mcfg_allocation)) {
156 ++pci_mmcfg_config_num;
157 i -= sizeof(struct acpi_mcfg_allocation);
158 };
159 if (pci_mmcfg_config_num == 0) {
160 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
161 return -ENODEV;
162 }
163
164 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
165 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
166 if (!pci_mmcfg_config) {
167 printk(KERN_WARNING PREFIX
168 "No memory for MCFG config tables\n");
169 return -ENOMEM;
170 }
171
172 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
173
174 acpi_mcfg_oem_check(mcfg);
175
176 for (i = 0; i < pci_mmcfg_config_num; ++i) {
177 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
178 !acpi_mcfg_64bit_base_addr) {
179 printk(KERN_ERR PREFIX
180 "MMCONFIG not in low 4GB of memory\n");
181 kfree(pci_mmcfg_config);
182 pci_mmcfg_config_num = 0;
183 return -ENODEV;
184 }
185 }
186
187 return 0;
188}
189#endif /* CONFIG_PCI_MMCONFIG */
190
191#ifdef CONFIG_X86_LOCAL_APIC 121#ifdef CONFIG_X86_LOCAL_APIC
192static int __init acpi_parse_madt(struct acpi_table_header *table) 122static int __init acpi_parse_madt(struct acpi_table_header *table)
193{ 123{
@@ -1519,14 +1449,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1519 }, 1449 },
1520 { 1450 {
1521 .callback = force_acpi_ht, 1451 .callback = force_acpi_ht,
1522 .ident = "ASUS P4B266",
1523 .matches = {
1524 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1525 DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
1526 },
1527 },
1528 {
1529 .callback = force_acpi_ht,
1530 .ident = "ASUS P2B-DS", 1452 .ident = "ASUS P2B-DS",
1531 .matches = { 1453 .matches = {
1532 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 1454 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index bbbe4bbb6f34..8c44c232efcb 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -34,12 +34,22 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
34 flags->bm_check = 1; 34 flags->bm_check = 1;
35 else if (c->x86_vendor == X86_VENDOR_INTEL) { 35 else if (c->x86_vendor == X86_VENDOR_INTEL) {
36 /* 36 /*
37 * Today all CPUs that support C3 share cache. 37 * Today all MP CPUs that support C3 share cache.
38 * TBD: This needs to look at cache shared map, once 38 * And caches should not be flushed by software while
39 * multi-core detection patch makes to the base. 39 * entering C3 type state.
40 */ 40 */
41 flags->bm_check = 1; 41 flags->bm_check = 1;
42 } 42 }
43
44 /*
45 * On all recent Intel platforms, ARB_DISABLE is a nop.
46 * So, set bm_control to zero to indicate that ARB_DISABLE
47 * is not required while entering C3 type state on
48 * P4, Core and beyond CPUs
49 */
50 if (c->x86_vendor == X86_VENDOR_INTEL &&
51 (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
52 flags->bm_control = 0;
43} 53}
44EXPORT_SYMBOL(acpi_processor_power_init_bm_check); 54EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
45 55
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 7c074eec39fb..d296f4a195c9 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
72 return; 72 return;
73} 73}
74 74
75
75/* Initialize _PDC data based on the CPU vendor */ 76/* Initialize _PDC data based on the CPU vendor */
76void arch_acpi_processor_init_pdc(struct acpi_processor *pr) 77void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
77{ 78{
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
85} 86}
86 87
87EXPORT_SYMBOL(arch_acpi_processor_init_pdc); 88EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
89
90void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
91{
92 if (pr->pdc) {
93 kfree(pr->pdc->pointer->buffer.pointer);
94 kfree(pr->pdc->pointer);
95 kfree(pr->pdc);
96 pr->pdc = NULL;
97 }
98}
99
100EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b7a79207295e..4d0216fcb36c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1414,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq,
1414 irte.vector = vector; 1414 irte.vector = vector;
1415 irte.dest_id = IRTE_DEST(destination); 1415 irte.dest_id = IRTE_DEST(destination);
1416 1416
1417 /* Set source-id of interrupt request */
1418 set_ioapic_sid(&irte, apic_id);
1419
1417 modify_irte(irq, &irte); 1420 modify_irte(irq, &irte);
1418 1421
1419 ir_entry->index2 = (index >> 15) & 0x1; 1422 ir_entry->index2 = (index >> 15) & 0x1;
@@ -3290,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3290 irte.vector = cfg->vector; 3293 irte.vector = cfg->vector;
3291 irte.dest_id = IRTE_DEST(dest); 3294 irte.dest_id = IRTE_DEST(dest);
3292 3295
3296 /* Set source-id of interrupt request */
3297 set_msi_sid(&irte, pdev);
3298
3293 modify_irte(irq, &irte); 3299 modify_irte(irq, &irte);
3294 3300
3295 msg->address_hi = MSI_ADDR_BASE_HI; 3301 msg->address_hi = MSI_ADDR_BASE_HI;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 16c3fda85bba..b26626dc517c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -238,7 +238,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
238#endif 238#endif
239 } 239 }
240 240
241 if (bus && !(pci_probe & PCI_NO_ROOT_CRS)) 241 if (bus && (pci_probe & PCI_USE__CRS))
242 get_current_resources(device, busnum, domain, bus); 242 get_current_resources(device, busnum, domain, bus);
243 return bus; 243 return bus;
244} 244}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 2255f880678b..f893d6a6e803 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -101,7 +101,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
101 struct pci_root_info *info; 101 struct pci_root_info *info;
102 102
103 /* don't go for it if _CRS is used */ 103 /* don't go for it if _CRS is used */
104 if (!(pci_probe & PCI_NO_ROOT_CRS)) 104 if (pci_probe & PCI_USE__CRS)
105 return; 105 return;
106 106
107 /* if only one root bus, don't need to anything */ 107 /* if only one root bus, don't need to anything */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 4740119e4bb7..2202b6257b82 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -515,8 +515,8 @@ char * __devinit pcibios_setup(char *str)
515 } else if (!strcmp(str, "assign-busses")) { 515 } else if (!strcmp(str, "assign-busses")) {
516 pci_probe |= PCI_ASSIGN_ALL_BUSSES; 516 pci_probe |= PCI_ASSIGN_ALL_BUSSES;
517 return NULL; 517 return NULL;
518 } else if (!strcmp(str, "nocrs")) { 518 } else if (!strcmp(str, "use_crs")) {
519 pci_probe |= PCI_NO_ROOT_CRS; 519 pci_probe |= PCI_USE__CRS;
520 return NULL; 520 return NULL;
521 } else if (!strcmp(str, "earlydump")) { 521 } else if (!strcmp(str, "earlydump")) {
522 pci_early_dump_regs = 1; 522 pci_early_dump_regs = 1;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8766b0e216c5..712443ec6d43 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -523,6 +523,69 @@ reject:
523 523
524static int __initdata known_bridge; 524static int __initdata known_bridge;
525 525
526static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
527
528/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
529struct acpi_mcfg_allocation *pci_mmcfg_config;
530int pci_mmcfg_config_num;
531
532static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
533{
534 if (!strcmp(mcfg->header.oem_id, "SGI"))
535 acpi_mcfg_64bit_base_addr = TRUE;
536
537 return 0;
538}
539
540static int __init pci_parse_mcfg(struct acpi_table_header *header)
541{
542 struct acpi_table_mcfg *mcfg;
543 unsigned long i;
544 int config_size;
545
546 if (!header)
547 return -EINVAL;
548
549 mcfg = (struct acpi_table_mcfg *)header;
550
551 /* how many config structures do we have */
552 pci_mmcfg_config_num = 0;
553 i = header->length - sizeof(struct acpi_table_mcfg);
554 while (i >= sizeof(struct acpi_mcfg_allocation)) {
555 ++pci_mmcfg_config_num;
556 i -= sizeof(struct acpi_mcfg_allocation);
557 };
558 if (pci_mmcfg_config_num == 0) {
559 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
560 return -ENODEV;
561 }
562
563 config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
564 pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
565 if (!pci_mmcfg_config) {
566 printk(KERN_WARNING PREFIX
567 "No memory for MCFG config tables\n");
568 return -ENOMEM;
569 }
570
571 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
572
573 acpi_mcfg_oem_check(mcfg);
574
575 for (i = 0; i < pci_mmcfg_config_num; ++i) {
576 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
577 !acpi_mcfg_64bit_base_addr) {
578 printk(KERN_ERR PREFIX
579 "MMCONFIG not in low 4GB of memory\n");
580 kfree(pci_mmcfg_config);
581 pci_mmcfg_config_num = 0;
582 return -ENODEV;
583 }
584 }
585
586 return 0;
587}
588
526static void __init __pci_mmcfg_init(int early) 589static void __init __pci_mmcfg_init(int early)
527{ 590{
528 /* MMCONFIG disabled */ 591 /* MMCONFIG disabled */
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early)
543 } 606 }
544 607
545 if (!known_bridge) 608 if (!known_bridge)
546 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); 609 acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
547 610
548 pci_mmcfg_reject_broken(early); 611 pci_mmcfg_reject_broken(early);
549 612
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 88e42abf5d88..0df8fcb687d6 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -61,6 +61,7 @@ static int acpi_ac_open_fs(struct inode *inode, struct file *file);
61static int acpi_ac_add(struct acpi_device *device); 61static int acpi_ac_add(struct acpi_device *device);
62static int acpi_ac_remove(struct acpi_device *device, int type); 62static int acpi_ac_remove(struct acpi_device *device, int type);
63static int acpi_ac_resume(struct acpi_device *device); 63static int acpi_ac_resume(struct acpi_device *device);
64static void acpi_ac_notify(struct acpi_device *device, u32 event);
64 65
65static const struct acpi_device_id ac_device_ids[] = { 66static const struct acpi_device_id ac_device_ids[] = {
66 {"ACPI0003", 0}, 67 {"ACPI0003", 0},
@@ -72,10 +73,12 @@ static struct acpi_driver acpi_ac_driver = {
72 .name = "ac", 73 .name = "ac",
73 .class = ACPI_AC_CLASS, 74 .class = ACPI_AC_CLASS,
74 .ids = ac_device_ids, 75 .ids = ac_device_ids,
76 .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
75 .ops = { 77 .ops = {
76 .add = acpi_ac_add, 78 .add = acpi_ac_add,
77 .remove = acpi_ac_remove, 79 .remove = acpi_ac_remove,
78 .resume = acpi_ac_resume, 80 .resume = acpi_ac_resume,
81 .notify = acpi_ac_notify,
79 }, 82 },
80}; 83};
81 84
@@ -220,16 +223,14 @@ static int acpi_ac_remove_fs(struct acpi_device *device)
220 Driver Model 223 Driver Model
221 -------------------------------------------------------------------------- */ 224 -------------------------------------------------------------------------- */
222 225
223static void acpi_ac_notify(acpi_handle handle, u32 event, void *data) 226static void acpi_ac_notify(struct acpi_device *device, u32 event)
224{ 227{
225 struct acpi_ac *ac = data; 228 struct acpi_ac *ac = acpi_driver_data(device);
226 struct acpi_device *device = NULL;
227 229
228 230
229 if (!ac) 231 if (!ac)
230 return; 232 return;
231 233
232 device = ac->device;
233 switch (event) { 234 switch (event) {
234 default: 235 default:
235 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 236 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -253,7 +254,6 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
253static int acpi_ac_add(struct acpi_device *device) 254static int acpi_ac_add(struct acpi_device *device)
254{ 255{
255 int result = 0; 256 int result = 0;
256 acpi_status status = AE_OK;
257 struct acpi_ac *ac = NULL; 257 struct acpi_ac *ac = NULL;
258 258
259 259
@@ -286,13 +286,6 @@ static int acpi_ac_add(struct acpi_device *device)
286 ac->charger.get_property = get_ac_property; 286 ac->charger.get_property = get_ac_property;
287 power_supply_register(&ac->device->dev, &ac->charger); 287 power_supply_register(&ac->device->dev, &ac->charger);
288#endif 288#endif
289 status = acpi_install_notify_handler(device->handle,
290 ACPI_ALL_NOTIFY, acpi_ac_notify,
291 ac);
292 if (ACPI_FAILURE(status)) {
293 result = -ENODEV;
294 goto end;
295 }
296 289
297 printk(KERN_INFO PREFIX "%s [%s] (%s)\n", 290 printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
298 acpi_device_name(device), acpi_device_bid(device), 291 acpi_device_name(device), acpi_device_bid(device),
@@ -328,7 +321,6 @@ static int acpi_ac_resume(struct acpi_device *device)
328 321
329static int acpi_ac_remove(struct acpi_device *device, int type) 322static int acpi_ac_remove(struct acpi_device *device, int type)
330{ 323{
331 acpi_status status = AE_OK;
332 struct acpi_ac *ac = NULL; 324 struct acpi_ac *ac = NULL;
333 325
334 326
@@ -337,8 +329,6 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
337 329
338 ac = acpi_driver_data(device); 330 ac = acpi_driver_data(device);
339 331
340 status = acpi_remove_notify_handler(device->handle,
341 ACPI_ALL_NOTIFY, acpi_ac_notify);
342#ifdef CONFIG_ACPI_SYSFS_POWER 332#ifdef CONFIG_ACPI_SYSFS_POWER
343 if (ac->charger.dev) 333 if (ac->charger.dev)
344 power_supply_unregister(&ac->charger); 334 power_supply_unregister(&ac->charger);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b0de6312919a..58b4517ce712 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -796,13 +796,12 @@ static void acpi_battery_remove_fs(struct acpi_device *device)
796 Driver Interface 796 Driver Interface
797 -------------------------------------------------------------------------- */ 797 -------------------------------------------------------------------------- */
798 798
799static void acpi_battery_notify(acpi_handle handle, u32 event, void *data) 799static void acpi_battery_notify(struct acpi_device *device, u32 event)
800{ 800{
801 struct acpi_battery *battery = data; 801 struct acpi_battery *battery = acpi_driver_data(device);
802 struct acpi_device *device; 802
803 if (!battery) 803 if (!battery)
804 return; 804 return;
805 device = battery->device;
806 acpi_battery_update(battery); 805 acpi_battery_update(battery);
807 acpi_bus_generate_proc_event(device, event, 806 acpi_bus_generate_proc_event(device, event,
808 acpi_battery_present(battery)); 807 acpi_battery_present(battery));
@@ -819,7 +818,6 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
819static int acpi_battery_add(struct acpi_device *device) 818static int acpi_battery_add(struct acpi_device *device)
820{ 819{
821 int result = 0; 820 int result = 0;
822 acpi_status status = 0;
823 struct acpi_battery *battery = NULL; 821 struct acpi_battery *battery = NULL;
824 if (!device) 822 if (!device)
825 return -EINVAL; 823 return -EINVAL;
@@ -834,22 +832,12 @@ static int acpi_battery_add(struct acpi_device *device)
834 acpi_battery_update(battery); 832 acpi_battery_update(battery);
835#ifdef CONFIG_ACPI_PROCFS_POWER 833#ifdef CONFIG_ACPI_PROCFS_POWER
836 result = acpi_battery_add_fs(device); 834 result = acpi_battery_add_fs(device);
837 if (result)
838 goto end;
839#endif 835#endif
840 status = acpi_install_notify_handler(device->handle, 836 if (!result) {
841 ACPI_ALL_NOTIFY, 837 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
842 acpi_battery_notify, battery); 838 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
843 if (ACPI_FAILURE(status)) { 839 device->status.battery_present ? "present" : "absent");
844 ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler")); 840 } else {
845 result = -ENODEV;
846 goto end;
847 }
848 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
849 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
850 device->status.battery_present ? "present" : "absent");
851 end:
852 if (result) {
853#ifdef CONFIG_ACPI_PROCFS_POWER 841#ifdef CONFIG_ACPI_PROCFS_POWER
854 acpi_battery_remove_fs(device); 842 acpi_battery_remove_fs(device);
855#endif 843#endif
@@ -860,15 +848,11 @@ static int acpi_battery_add(struct acpi_device *device)
860 848
861static int acpi_battery_remove(struct acpi_device *device, int type) 849static int acpi_battery_remove(struct acpi_device *device, int type)
862{ 850{
863 acpi_status status = 0;
864 struct acpi_battery *battery = NULL; 851 struct acpi_battery *battery = NULL;
865 852
866 if (!device || !acpi_driver_data(device)) 853 if (!device || !acpi_driver_data(device))
867 return -EINVAL; 854 return -EINVAL;
868 battery = acpi_driver_data(device); 855 battery = acpi_driver_data(device);
869 status = acpi_remove_notify_handler(device->handle,
870 ACPI_ALL_NOTIFY,
871 acpi_battery_notify);
872#ifdef CONFIG_ACPI_PROCFS_POWER 856#ifdef CONFIG_ACPI_PROCFS_POWER
873 acpi_battery_remove_fs(device); 857 acpi_battery_remove_fs(device);
874#endif 858#endif
@@ -896,10 +880,12 @@ static struct acpi_driver acpi_battery_driver = {
896 .name = "battery", 880 .name = "battery",
897 .class = ACPI_BATTERY_CLASS, 881 .class = ACPI_BATTERY_CLASS,
898 .ids = battery_device_ids, 882 .ids = battery_device_ids,
883 .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
899 .ops = { 884 .ops = {
900 .add = acpi_battery_add, 885 .add = acpi_battery_add,
901 .resume = acpi_battery_resume, 886 .resume = acpi_battery_resume,
902 .remove = acpi_battery_remove, 887 .remove = acpi_battery_remove,
888 .notify = acpi_battery_notify,
903 }, 889 },
904}; 890};
905 891
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 09c69806c1fc..f6baa77deefb 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -192,6 +192,22 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
192 DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"), 192 DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
193 }, 193 },
194 }, 194 },
195 {
196 .callback = dmi_disable_osi_vista,
197 .ident = "Sony VGN-NS10J_S",
198 .matches = {
199 DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
200 DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
201 },
202 },
203 {
204 .callback = dmi_disable_osi_vista,
205 .ident = "Sony VGN-SR290J",
206 .matches = {
207 DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
208 DMI_MATCH(DMI_PRODUCT_NAME, "Sony VGN-SR290J"),
209 },
210 },
195 211
196 /* 212 /*
197 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. 213 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index ae862f1798dc..2876fc70c3a9 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -450,18 +450,16 @@ int acpi_bus_receive_event(struct acpi_bus_event *event)
450 Notification Handling 450 Notification Handling
451 -------------------------------------------------------------------------- */ 451 -------------------------------------------------------------------------- */
452 452
453static int 453static void acpi_bus_check_device(acpi_handle handle)
454acpi_bus_check_device(struct acpi_device *device, int *status_changed)
455{ 454{
456 acpi_status status = 0; 455 struct acpi_device *device;
456 acpi_status status;
457 struct acpi_device_status old_status; 457 struct acpi_device_status old_status;
458 458
459 459 if (acpi_bus_get_device(handle, &device))
460 return;
460 if (!device) 461 if (!device)
461 return -EINVAL; 462 return;
462
463 if (status_changed)
464 *status_changed = 0;
465 463
466 old_status = device->status; 464 old_status = device->status;
467 465
@@ -471,22 +469,15 @@ acpi_bus_check_device(struct acpi_device *device, int *status_changed)
471 */ 469 */
472 if (device->parent && !device->parent->status.present) { 470 if (device->parent && !device->parent->status.present) {
473 device->status = device->parent->status; 471 device->status = device->parent->status;
474 if (STRUCT_TO_INT(old_status) != STRUCT_TO_INT(device->status)) { 472 return;
475 if (status_changed)
476 *status_changed = 1;
477 }
478 return 0;
479 } 473 }
480 474
481 status = acpi_bus_get_status(device); 475 status = acpi_bus_get_status(device);
482 if (ACPI_FAILURE(status)) 476 if (ACPI_FAILURE(status))
483 return -ENODEV; 477 return;
484 478
485 if (STRUCT_TO_INT(old_status) == STRUCT_TO_INT(device->status)) 479 if (STRUCT_TO_INT(old_status) == STRUCT_TO_INT(device->status))
486 return 0; 480 return;
487
488 if (status_changed)
489 *status_changed = 1;
490 481
491 /* 482 /*
492 * Device Insertion/Removal 483 * Device Insertion/Removal
@@ -498,33 +489,17 @@ acpi_bus_check_device(struct acpi_device *device, int *status_changed)
498 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device removal detected\n")); 489 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device removal detected\n"));
499 /* TBD: Handle device removal */ 490 /* TBD: Handle device removal */
500 } 491 }
501
502 return 0;
503} 492}
504 493
505static int acpi_bus_check_scope(struct acpi_device *device) 494static void acpi_bus_check_scope(acpi_handle handle)
506{ 495{
507 int result = 0;
508 int status_changed = 0;
509
510
511 if (!device)
512 return -EINVAL;
513
514 /* Status Change? */ 496 /* Status Change? */
515 result = acpi_bus_check_device(device, &status_changed); 497 acpi_bus_check_device(handle);
516 if (result)
517 return result;
518
519 if (!status_changed)
520 return 0;
521 498
522 /* 499 /*
523 * TBD: Enumerate child devices within this device's scope and 500 * TBD: Enumerate child devices within this device's scope and
524 * run acpi_bus_check_device()'s on them. 501 * run acpi_bus_check_device()'s on them.
525 */ 502 */
526
527 return 0;
528} 503}
529 504
530static BLOCKING_NOTIFIER_HEAD(acpi_bus_notify_list); 505static BLOCKING_NOTIFIER_HEAD(acpi_bus_notify_list);
@@ -547,22 +522,19 @@ EXPORT_SYMBOL_GPL(unregister_acpi_bus_notifier);
547 */ 522 */
548static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) 523static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
549{ 524{
550 int result = 0;
551 struct acpi_device *device = NULL; 525 struct acpi_device *device = NULL;
526 struct acpi_driver *driver;
527
528 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Notification %#02x to handle %p\n",
529 type, handle));
552 530
553 blocking_notifier_call_chain(&acpi_bus_notify_list, 531 blocking_notifier_call_chain(&acpi_bus_notify_list,
554 type, (void *)handle); 532 type, (void *)handle);
555 533
556 if (acpi_bus_get_device(handle, &device))
557 return;
558
559 switch (type) { 534 switch (type) {
560 535
561 case ACPI_NOTIFY_BUS_CHECK: 536 case ACPI_NOTIFY_BUS_CHECK:
562 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 537 acpi_bus_check_scope(handle);
563 "Received BUS CHECK notification for device [%s]\n",
564 device->pnp.bus_id));
565 result = acpi_bus_check_scope(device);
566 /* 538 /*
567 * TBD: We'll need to outsource certain events to non-ACPI 539 * TBD: We'll need to outsource certain events to non-ACPI
568 * drivers via the device manager (device.c). 540 * drivers via the device manager (device.c).
@@ -570,10 +542,7 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
570 break; 542 break;
571 543
572 case ACPI_NOTIFY_DEVICE_CHECK: 544 case ACPI_NOTIFY_DEVICE_CHECK:
573 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 545 acpi_bus_check_device(handle);
574 "Received DEVICE CHECK notification for device [%s]\n",
575 device->pnp.bus_id));
576 result = acpi_bus_check_device(device, NULL);
577 /* 546 /*
578 * TBD: We'll need to outsource certain events to non-ACPI 547 * TBD: We'll need to outsource certain events to non-ACPI
579 * drivers via the device manager (device.c). 548 * drivers via the device manager (device.c).
@@ -581,44 +550,26 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
581 break; 550 break;
582 551
583 case ACPI_NOTIFY_DEVICE_WAKE: 552 case ACPI_NOTIFY_DEVICE_WAKE:
584 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
585 "Received DEVICE WAKE notification for device [%s]\n",
586 device->pnp.bus_id));
587 /* TBD */ 553 /* TBD */
588 break; 554 break;
589 555
590 case ACPI_NOTIFY_EJECT_REQUEST: 556 case ACPI_NOTIFY_EJECT_REQUEST:
591 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
592 "Received EJECT REQUEST notification for device [%s]\n",
593 device->pnp.bus_id));
594 /* TBD */ 557 /* TBD */
595 break; 558 break;
596 559
597 case ACPI_NOTIFY_DEVICE_CHECK_LIGHT: 560 case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
598 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
599 "Received DEVICE CHECK LIGHT notification for device [%s]\n",
600 device->pnp.bus_id));
601 /* TBD: Exactly what does 'light' mean? */ 561 /* TBD: Exactly what does 'light' mean? */
602 break; 562 break;
603 563
604 case ACPI_NOTIFY_FREQUENCY_MISMATCH: 564 case ACPI_NOTIFY_FREQUENCY_MISMATCH:
605 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
606 "Received FREQUENCY MISMATCH notification for device [%s]\n",
607 device->pnp.bus_id));
608 /* TBD */ 565 /* TBD */
609 break; 566 break;
610 567
611 case ACPI_NOTIFY_BUS_MODE_MISMATCH: 568 case ACPI_NOTIFY_BUS_MODE_MISMATCH:
612 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
613 "Received BUS MODE MISMATCH notification for device [%s]\n",
614 device->pnp.bus_id));
615 /* TBD */ 569 /* TBD */
616 break; 570 break;
617 571
618 case ACPI_NOTIFY_POWER_FAULT: 572 case ACPI_NOTIFY_POWER_FAULT:
619 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
620 "Received POWER FAULT notification for device [%s]\n",
621 device->pnp.bus_id));
622 /* TBD */ 573 /* TBD */
623 break; 574 break;
624 575
@@ -629,7 +580,13 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
629 break; 580 break;
630 } 581 }
631 582
632 return; 583 acpi_bus_get_device(handle, &device);
584 if (device) {
585 driver = device->driver;
586 if (driver && driver->ops.notify &&
587 (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
588 driver->ops.notify(device, type);
589 }
633} 590}
634 591
635/* -------------------------------------------------------------------------- 592/* --------------------------------------------------------------------------
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 8bd2c2a6884d..a8a5c29958c8 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -140,46 +140,6 @@ struct device *acpi_get_physical_device(acpi_handle handle)
140 140
141EXPORT_SYMBOL(acpi_get_physical_device); 141EXPORT_SYMBOL(acpi_get_physical_device);
142 142
143/* ToDo: When a PCI bridge is found, return the PCI device behind the bridge
144 * This should work in general, but did not on a Lenovo T61 for the
145 * graphics card. But this must be fixed when the PCI device is
146 * bound and the kernel device struct is attached to the acpi device
147 * Note: A success call will increase reference count by one
148 * Do call put_device(dev) on the returned device then
149 */
150struct device *acpi_get_physical_pci_device(acpi_handle handle)
151{
152 struct device *dev;
153 long long device_id;
154 acpi_status status;
155
156 status =
157 acpi_evaluate_integer(handle, "_ADR", NULL, &device_id);
158
159 if (ACPI_FAILURE(status))
160 return NULL;
161
162 /* We need to attempt to determine whether the _ADR refers to a
163 PCI device or not. There's no terribly good way to do this,
164 so the best we can hope for is to assume that there'll never
165 be a device in the host bridge */
166 if (device_id >= 0x10000) {
167 /* It looks like a PCI device. Does it exist? */
168 dev = acpi_get_physical_device(handle);
169 } else {
170 /* It doesn't look like a PCI device. Does its parent
171 exist? */
172 acpi_handle phandle;
173 if (acpi_get_parent(handle, &phandle))
174 return NULL;
175 dev = acpi_get_physical_device(phandle);
176 }
177 if (!dev)
178 return NULL;
179 return dev;
180}
181EXPORT_SYMBOL(acpi_get_physical_pci_device);
182
183static int acpi_bind_one(struct device *dev, acpi_handle handle) 143static int acpi_bind_one(struct device *dev, acpi_handle handle)
184{ 144{
185 struct acpi_device *acpi_dev; 145 struct acpi_device *acpi_dev;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index d916bea729f1..71670719d61a 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -79,6 +79,7 @@ static acpi_osd_handler acpi_irq_handler;
79static void *acpi_irq_context; 79static void *acpi_irq_context;
80static struct workqueue_struct *kacpid_wq; 80static struct workqueue_struct *kacpid_wq;
81static struct workqueue_struct *kacpi_notify_wq; 81static struct workqueue_struct *kacpi_notify_wq;
82static struct workqueue_struct *kacpi_hotplug_wq;
82 83
83struct acpi_res_list { 84struct acpi_res_list {
84 resource_size_t start; 85 resource_size_t start;
@@ -192,8 +193,10 @@ acpi_status acpi_os_initialize1(void)
192{ 193{
193 kacpid_wq = create_singlethread_workqueue("kacpid"); 194 kacpid_wq = create_singlethread_workqueue("kacpid");
194 kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); 195 kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify");
196 kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug");
195 BUG_ON(!kacpid_wq); 197 BUG_ON(!kacpid_wq);
196 BUG_ON(!kacpi_notify_wq); 198 BUG_ON(!kacpi_notify_wq);
199 BUG_ON(!kacpi_hotplug_wq);
197 return AE_OK; 200 return AE_OK;
198} 201}
199 202
@@ -206,6 +209,7 @@ acpi_status acpi_os_terminate(void)
206 209
207 destroy_workqueue(kacpid_wq); 210 destroy_workqueue(kacpid_wq);
208 destroy_workqueue(kacpi_notify_wq); 211 destroy_workqueue(kacpi_notify_wq);
212 destroy_workqueue(kacpi_hotplug_wq);
209 213
210 return AE_OK; 214 return AE_OK;
211} 215}
@@ -716,6 +720,7 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
716 acpi_status status = AE_OK; 720 acpi_status status = AE_OK;
717 struct acpi_os_dpc *dpc; 721 struct acpi_os_dpc *dpc;
718 struct workqueue_struct *queue; 722 struct workqueue_struct *queue;
723 work_func_t func;
719 int ret; 724 int ret;
720 ACPI_DEBUG_PRINT((ACPI_DB_EXEC, 725 ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
721 "Scheduling function [%p(%p)] for deferred execution.\n", 726 "Scheduling function [%p(%p)] for deferred execution.\n",
@@ -740,15 +745,17 @@ static acpi_status __acpi_os_execute(acpi_execute_type type,
740 dpc->function = function; 745 dpc->function = function;
741 dpc->context = context; 746 dpc->context = context;
742 747
743 if (!hp) { 748 /*
744 INIT_WORK(&dpc->work, acpi_os_execute_deferred); 749 * We can't run hotplug code in keventd_wq/kacpid_wq/kacpid_notify_wq
745 queue = (type == OSL_NOTIFY_HANDLER) ? 750 * because the hotplug code may call driver .remove() functions,
746 kacpi_notify_wq : kacpid_wq; 751 * which invoke flush_scheduled_work/acpi_os_wait_events_complete
747 ret = queue_work(queue, &dpc->work); 752 * to flush these workqueues.
748 } else { 753 */
749 INIT_WORK(&dpc->work, acpi_os_execute_hp_deferred); 754 queue = hp ? kacpi_hotplug_wq :
750 ret = schedule_work(&dpc->work); 755 (type == OSL_NOTIFY_HANDLER ? kacpi_notify_wq : kacpid_wq);
751 } 756 func = hp ? acpi_os_execute_hp_deferred : acpi_os_execute_deferred;
757 INIT_WORK(&dpc->work, func);
758 ret = queue_work(queue, &dpc->work);
752 759
753 if (!ret) { 760 if (!ret) {
754 printk(KERN_ERR PREFIX 761 printk(KERN_ERR PREFIX
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index bc46de3d967f..a5a77b78a723 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -24,12 +24,7 @@
24 */ 24 */
25 25
26#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/types.h> 27#include <linux/types.h>
30#include <linux/proc_fs.h>
31#include <linux/spinlock.h>
32#include <linux/pm.h>
33#include <linux/pci.h> 28#include <linux/pci.h>
34#include <linux/acpi.h> 29#include <linux/acpi.h>
35#include <acpi/acpi_bus.h> 30#include <acpi/acpi_bus.h>
@@ -38,310 +33,76 @@
38#define _COMPONENT ACPI_PCI_COMPONENT 33#define _COMPONENT ACPI_PCI_COMPONENT
39ACPI_MODULE_NAME("pci_bind"); 34ACPI_MODULE_NAME("pci_bind");
40 35
41struct acpi_pci_data { 36static int acpi_pci_unbind(struct acpi_device *device)
42 struct acpi_pci_id id;
43 struct pci_bus *bus;
44 struct pci_dev *dev;
45};
46
47static int acpi_pci_unbind(struct acpi_device *device);
48
49static void acpi_pci_data_handler(acpi_handle handle, u32 function,
50 void *context)
51{
52
53 /* TBD: Anything we need to do here? */
54
55 return;
56}
57
58/**
59 * acpi_get_pci_id
60 * ------------------
61 * This function is used by the ACPI Interpreter (a.k.a. Core Subsystem)
62 * to resolve PCI information for ACPI-PCI devices defined in the namespace.
63 * This typically occurs when resolving PCI operation region information.
64 */
65acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id)
66{ 37{
67 int result = 0; 38 struct pci_dev *dev;
68 acpi_status status = AE_OK;
69 struct acpi_device *device = NULL;
70 struct acpi_pci_data *data = NULL;
71
72
73 if (!id)
74 return AE_BAD_PARAMETER;
75
76 result = acpi_bus_get_device(handle, &device);
77 if (result) {
78 printk(KERN_ERR PREFIX
79 "Invalid ACPI Bus context for device %s\n",
80 acpi_device_bid(device));
81 return AE_NOT_EXIST;
82 }
83
84 status = acpi_get_data(handle, acpi_pci_data_handler, (void **)&data);
85 if (ACPI_FAILURE(status) || !data) {
86 ACPI_EXCEPTION((AE_INFO, status,
87 "Invalid ACPI-PCI context for device %s",
88 acpi_device_bid(device)));
89 return status;
90 }
91 39
92 *id = data->id; 40 dev = acpi_get_pci_dev(device->handle);
41 if (!dev || !dev->subordinate)
42 goto out;
93 43
94 /* 44 acpi_pci_irq_del_prt(dev->subordinate);
95 id->segment = data->id.segment;
96 id->bus = data->id.bus;
97 id->device = data->id.device;
98 id->function = data->id.function;
99 */
100 45
101 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 46 device->ops.bind = NULL;
102 "Device %s has PCI address %04x:%02x:%02x.%d\n", 47 device->ops.unbind = NULL;
103 acpi_device_bid(device), id->segment, id->bus,
104 id->device, id->function));
105 48
106 return AE_OK; 49out:
50 pci_dev_put(dev);
51 return 0;
107} 52}
108 53
109EXPORT_SYMBOL(acpi_get_pci_id); 54static int acpi_pci_bind(struct acpi_device *device)
110
111int acpi_pci_bind(struct acpi_device *device)
112{ 55{
113 int result = 0;
114 acpi_status status; 56 acpi_status status;
115 struct acpi_pci_data *data;
116 struct acpi_pci_data *pdata;
117 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
118 acpi_handle handle; 57 acpi_handle handle;
58 struct pci_bus *bus;
59 struct pci_dev *dev;
119 60
120 if (!device || !device->parent) 61 dev = acpi_get_pci_dev(device->handle);
121 return -EINVAL; 62 if (!dev)
122 63 return 0;
123 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
124 if (!data)
125 return -ENOMEM;
126
127 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
128 if (ACPI_FAILURE(status)) {
129 kfree(data);
130 return -ENODEV;
131 }
132
133 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI device [%s]...\n",
134 (char *)buffer.pointer));
135
136 /*
137 * Segment & Bus
138 * -------------
139 * These are obtained via the parent device's ACPI-PCI context.
140 */
141 status = acpi_get_data(device->parent->handle, acpi_pci_data_handler,
142 (void **)&pdata);
143 if (ACPI_FAILURE(status) || !pdata || !pdata->bus) {
144 ACPI_EXCEPTION((AE_INFO, status,
145 "Invalid ACPI-PCI context for parent device %s",
146 acpi_device_bid(device->parent)));
147 result = -ENODEV;
148 goto end;
149 }
150 data->id.segment = pdata->id.segment;
151 data->id.bus = pdata->bus->number;
152
153 /*
154 * Device & Function
155 * -----------------
156 * These are simply obtained from the device's _ADR method. Note
157 * that a value of zero is valid.
158 */
159 data->id.device = device->pnp.bus_address >> 16;
160 data->id.function = device->pnp.bus_address & 0xFFFF;
161
162 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "...to %04x:%02x:%02x.%d\n",
163 data->id.segment, data->id.bus, data->id.device,
164 data->id.function));
165
166 /*
167 * TBD: Support slot devices (e.g. function=0xFFFF).
168 */
169
170 /*
171 * Locate PCI Device
172 * -----------------
173 * Locate matching device in PCI namespace. If it doesn't exist
174 * this typically means that the device isn't currently inserted
175 * (e.g. docking station, port replicator, etc.).
176 */
177 data->dev = pci_get_slot(pdata->bus,
178 PCI_DEVFN(data->id.device, data->id.function));
179 if (!data->dev) {
180 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
181 "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
182 data->id.segment, data->id.bus,
183 data->id.device, data->id.function));
184 result = -ENODEV;
185 goto end;
186 }
187 if (!data->dev->bus) {
188 printk(KERN_ERR PREFIX
189 "Device %04x:%02x:%02x.%d has invalid 'bus' field\n",
190 data->id.segment, data->id.bus,
191 data->id.device, data->id.function);
192 result = -ENODEV;
193 goto end;
194 }
195 64
196 /* 65 /*
197 * PCI Bridge? 66 * Install the 'bind' function to facilitate callbacks for
198 * ----------- 67 * children of the P2P bridge.
199 * If so, set the 'bus' field and install the 'bind' function to
200 * facilitate callbacks for all of its children.
201 */ 68 */
202 if (data->dev->subordinate) { 69 if (dev->subordinate) {
203 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 70 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
204 "Device %04x:%02x:%02x.%d is a PCI bridge\n", 71 "Device %04x:%02x:%02x.%d is a PCI bridge\n",
205 data->id.segment, data->id.bus, 72 pci_domain_nr(dev->bus), dev->bus->number,
206 data->id.device, data->id.function)); 73 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)));
207 data->bus = data->dev->subordinate;
208 device->ops.bind = acpi_pci_bind; 74 device->ops.bind = acpi_pci_bind;
209 device->ops.unbind = acpi_pci_unbind; 75 device->ops.unbind = acpi_pci_unbind;
210 } 76 }
211 77
212 /* 78 /*
213 * Attach ACPI-PCI Context 79 * Evaluate and parse _PRT, if exists. This code allows parsing of
214 * ----------------------- 80 * _PRT objects within the scope of non-bridge devices. Note that
215 * Thus binding the ACPI and PCI devices. 81 * _PRTs within the scope of a PCI bridge assume the bridge's
216 */ 82 * subordinate bus number.
217 status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
218 if (ACPI_FAILURE(status)) {
219 ACPI_EXCEPTION((AE_INFO, status,
220 "Unable to attach ACPI-PCI context to device %s",
221 acpi_device_bid(device)));
222 result = -ENODEV;
223 goto end;
224 }
225
226 /*
227 * PCI Routing Table
228 * -----------------
229 * Evaluate and parse _PRT, if exists. This code is independent of
230 * PCI bridges (above) to allow parsing of _PRT objects within the
231 * scope of non-bridge devices. Note that _PRTs within the scope of
232 * a PCI bridge assume the bridge's subordinate bus number.
233 * 83 *
234 * TBD: Can _PRTs exist within the scope of non-bridge PCI devices? 84 * TBD: Can _PRTs exist within the scope of non-bridge PCI devices?
235 */ 85 */
236 status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle); 86 status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
237 if (ACPI_SUCCESS(status)) {
238 if (data->bus) /* PCI-PCI bridge */
239 acpi_pci_irq_add_prt(device->handle, data->id.segment,
240 data->bus->number);
241 else /* non-bridge PCI device */
242 acpi_pci_irq_add_prt(device->handle, data->id.segment,
243 data->id.bus);
244 }
245
246 end:
247 kfree(buffer.pointer);
248 if (result) {
249 pci_dev_put(data->dev);
250 kfree(data);
251 }
252 return result;
253}
254
255static int acpi_pci_unbind(struct acpi_device *device)
256{
257 int result = 0;
258 acpi_status status;
259 struct acpi_pci_data *data;
260 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
261
262
263 if (!device || !device->parent)
264 return -EINVAL;
265
266 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
267 if (ACPI_FAILURE(status)) 87 if (ACPI_FAILURE(status))
268 return -ENODEV; 88 goto out;
269 89
270 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unbinding PCI device [%s]...\n", 90 if (dev->subordinate)
271 (char *) buffer.pointer)); 91 bus = dev->subordinate;
272 kfree(buffer.pointer); 92 else
93 bus = dev->bus;
273 94
274 status = 95 acpi_pci_irq_add_prt(device->handle, bus);
275 acpi_get_data(device->handle, acpi_pci_data_handler,
276 (void **)&data);
277 if (ACPI_FAILURE(status)) {
278 result = -ENODEV;
279 goto end;
280 }
281 96
282 status = acpi_detach_data(device->handle, acpi_pci_data_handler); 97out:
283 if (ACPI_FAILURE(status)) { 98 pci_dev_put(dev);
284 ACPI_EXCEPTION((AE_INFO, status, 99 return 0;
285 "Unable to detach data from device %s",
286 acpi_device_bid(device)));
287 result = -ENODEV;
288 goto end;
289 }
290 if (data->dev->subordinate) {
291 acpi_pci_irq_del_prt(data->id.segment, data->bus->number);
292 }
293 pci_dev_put(data->dev);
294 kfree(data);
295
296 end:
297 return result;
298} 100}
299 101
300int 102int acpi_pci_bind_root(struct acpi_device *device)
301acpi_pci_bind_root(struct acpi_device *device,
302 struct acpi_pci_id *id, struct pci_bus *bus)
303{ 103{
304 int result = 0;
305 acpi_status status;
306 struct acpi_pci_data *data = NULL;
307 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
308
309 if (!device || !id || !bus) {
310 return -EINVAL;
311 }
312
313 data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
314 if (!data)
315 return -ENOMEM;
316
317 data->id = *id;
318 data->bus = bus;
319 device->ops.bind = acpi_pci_bind; 104 device->ops.bind = acpi_pci_bind;
320 device->ops.unbind = acpi_pci_unbind; 105 device->ops.unbind = acpi_pci_unbind;
321 106
322 status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer); 107 return 0;
323 if (ACPI_FAILURE(status)) {
324 kfree (data);
325 return -ENODEV;
326 }
327
328 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI root bridge [%s] to "
329 "%04x:%02x\n", (char *)buffer.pointer,
330 id->segment, id->bus));
331
332 status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
333 if (ACPI_FAILURE(status)) {
334 ACPI_EXCEPTION((AE_INFO, status,
335 "Unable to attach ACPI-PCI context to device %s",
336 (char *)buffer.pointer));
337 result = -ENODEV;
338 goto end;
339 }
340
341 end:
342 kfree(buffer.pointer);
343 if (result != 0)
344 kfree(data);
345
346 return result;
347} 108}
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 2faa9e2ac893..b794eb88ab90 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -182,7 +182,7 @@ static void do_prt_fixups(struct acpi_prt_entry *entry,
182 } 182 }
183} 183}
184 184
185static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus, 185static int acpi_pci_irq_add_entry(acpi_handle handle, struct pci_bus *bus,
186 struct acpi_pci_routing_table *prt) 186 struct acpi_pci_routing_table *prt)
187{ 187{
188 struct acpi_prt_entry *entry; 188 struct acpi_prt_entry *entry;
@@ -196,8 +196,8 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
196 * 1=INTA, 2=INTB. We use the PCI encoding throughout, so convert 196 * 1=INTA, 2=INTB. We use the PCI encoding throughout, so convert
197 * it here. 197 * it here.
198 */ 198 */
199 entry->id.segment = segment; 199 entry->id.segment = pci_domain_nr(bus);
200 entry->id.bus = bus; 200 entry->id.bus = bus->number;
201 entry->id.device = (prt->address >> 16) & 0xFFFF; 201 entry->id.device = (prt->address >> 16) & 0xFFFF;
202 entry->pin = prt->pin + 1; 202 entry->pin = prt->pin + 1;
203 203
@@ -242,7 +242,7 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
242 return 0; 242 return 0;
243} 243}
244 244
245int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus) 245int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus)
246{ 246{
247 acpi_status status; 247 acpi_status status;
248 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 248 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -271,7 +271,7 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
271 271
272 entry = buffer.pointer; 272 entry = buffer.pointer;
273 while (entry && (entry->length > 0)) { 273 while (entry && (entry->length > 0)) {
274 acpi_pci_irq_add_entry(handle, segment, bus, entry); 274 acpi_pci_irq_add_entry(handle, bus, entry);
275 entry = (struct acpi_pci_routing_table *) 275 entry = (struct acpi_pci_routing_table *)
276 ((unsigned long)entry + entry->length); 276 ((unsigned long)entry + entry->length);
277 } 277 }
@@ -280,16 +280,17 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
280 return 0; 280 return 0;
281} 281}
282 282
283void acpi_pci_irq_del_prt(int segment, int bus) 283void acpi_pci_irq_del_prt(struct pci_bus *bus)
284{ 284{
285 struct acpi_prt_entry *entry, *tmp; 285 struct acpi_prt_entry *entry, *tmp;
286 286
287 printk(KERN_DEBUG 287 printk(KERN_DEBUG
288 "ACPI: Delete PCI Interrupt Routing Table for %04x:%02x\n", 288 "ACPI: Delete PCI Interrupt Routing Table for %04x:%02x\n",
289 segment, bus); 289 pci_domain_nr(bus), bus->number);
290 spin_lock(&acpi_prt_lock); 290 spin_lock(&acpi_prt_lock);
291 list_for_each_entry_safe(entry, tmp, &acpi_prt_list, list) { 291 list_for_each_entry_safe(entry, tmp, &acpi_prt_list, list) {
292 if (segment == entry->id.segment && bus == entry->id.bus) { 292 if (pci_domain_nr(bus) == entry->id.segment
293 && bus->number == entry->id.bus) {
293 list_del(&entry->list); 294 list_del(&entry->list);
294 kfree(entry); 295 kfree(entry);
295 } 296 }
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 196f97d00956..8a5bf3b356fa 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -63,9 +63,10 @@ static struct acpi_driver acpi_pci_root_driver = {
63 63
64struct acpi_pci_root { 64struct acpi_pci_root {
65 struct list_head node; 65 struct list_head node;
66 struct acpi_device * device; 66 struct acpi_device *device;
67 struct acpi_pci_id id;
68 struct pci_bus *bus; 67 struct pci_bus *bus;
68 u16 segment;
69 u8 bus_nr;
69 70
70 u32 osc_support_set; /* _OSC state of support bits */ 71 u32 osc_support_set; /* _OSC state of support bits */
71 u32 osc_control_set; /* _OSC state of control bits */ 72 u32 osc_control_set; /* _OSC state of control bits */
@@ -82,7 +83,7 @@ static DEFINE_MUTEX(osc_lock);
82int acpi_pci_register_driver(struct acpi_pci_driver *driver) 83int acpi_pci_register_driver(struct acpi_pci_driver *driver)
83{ 84{
84 int n = 0; 85 int n = 0;
85 struct list_head *entry; 86 struct acpi_pci_root *root;
86 87
87 struct acpi_pci_driver **pptr = &sub_driver; 88 struct acpi_pci_driver **pptr = &sub_driver;
88 while (*pptr) 89 while (*pptr)
@@ -92,9 +93,7 @@ int acpi_pci_register_driver(struct acpi_pci_driver *driver)
92 if (!driver->add) 93 if (!driver->add)
93 return 0; 94 return 0;
94 95
95 list_for_each(entry, &acpi_pci_roots) { 96 list_for_each_entry(root, &acpi_pci_roots, node) {
96 struct acpi_pci_root *root;
97 root = list_entry(entry, struct acpi_pci_root, node);
98 driver->add(root->device->handle); 97 driver->add(root->device->handle);
99 n++; 98 n++;
100 } 99 }
@@ -106,7 +105,7 @@ EXPORT_SYMBOL(acpi_pci_register_driver);
106 105
107void acpi_pci_unregister_driver(struct acpi_pci_driver *driver) 106void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
108{ 107{
109 struct list_head *entry; 108 struct acpi_pci_root *root;
110 109
111 struct acpi_pci_driver **pptr = &sub_driver; 110 struct acpi_pci_driver **pptr = &sub_driver;
112 while (*pptr) { 111 while (*pptr) {
@@ -120,28 +119,48 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
120 if (!driver->remove) 119 if (!driver->remove)
121 return; 120 return;
122 121
123 list_for_each(entry, &acpi_pci_roots) { 122 list_for_each_entry(root, &acpi_pci_roots, node)
124 struct acpi_pci_root *root;
125 root = list_entry(entry, struct acpi_pci_root, node);
126 driver->remove(root->device->handle); 123 driver->remove(root->device->handle);
127 }
128} 124}
129 125
130EXPORT_SYMBOL(acpi_pci_unregister_driver); 126EXPORT_SYMBOL(acpi_pci_unregister_driver);
131 127
132acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus) 128acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus)
133{ 129{
134 struct acpi_pci_root *tmp; 130 struct acpi_pci_root *root;
135 131
136 list_for_each_entry(tmp, &acpi_pci_roots, node) { 132 list_for_each_entry(root, &acpi_pci_roots, node)
137 if ((tmp->id.segment == (u16) seg) && (tmp->id.bus == (u16) bus)) 133 if ((root->segment == (u16) seg) && (root->bus_nr == (u16) bus))
138 return tmp->device->handle; 134 return root->device->handle;
139 }
140 return NULL; 135 return NULL;
141} 136}
142 137
143EXPORT_SYMBOL_GPL(acpi_get_pci_rootbridge_handle); 138EXPORT_SYMBOL_GPL(acpi_get_pci_rootbridge_handle);
144 139
140/**
141 * acpi_is_root_bridge - determine whether an ACPI CA node is a PCI root bridge
142 * @handle - the ACPI CA node in question.
143 *
144 * Note: we could make this API take a struct acpi_device * instead, but
145 * for now, it's more convenient to operate on an acpi_handle.
146 */
147int acpi_is_root_bridge(acpi_handle handle)
148{
149 int ret;
150 struct acpi_device *device;
151
152 ret = acpi_bus_get_device(handle, &device);
153 if (ret)
154 return 0;
155
156 ret = acpi_match_device_ids(device, root_device_ids);
157 if (ret)
158 return 0;
159 else
160 return 1;
161}
162EXPORT_SYMBOL_GPL(acpi_is_root_bridge);
163
145static acpi_status 164static acpi_status
146get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data) 165get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
147{ 166{
@@ -161,19 +180,22 @@ get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
161 return AE_OK; 180 return AE_OK;
162} 181}
163 182
164static acpi_status try_get_root_bridge_busnr(acpi_handle handle, int *busnum) 183static acpi_status try_get_root_bridge_busnr(acpi_handle handle,
184 unsigned long long *bus)
165{ 185{
166 acpi_status status; 186 acpi_status status;
187 int busnum;
167 188
168 *busnum = -1; 189 busnum = -1;
169 status = 190 status =
170 acpi_walk_resources(handle, METHOD_NAME__CRS, 191 acpi_walk_resources(handle, METHOD_NAME__CRS,
171 get_root_bridge_busnr_callback, busnum); 192 get_root_bridge_busnr_callback, &busnum);
172 if (ACPI_FAILURE(status)) 193 if (ACPI_FAILURE(status))
173 return status; 194 return status;
174 /* Check if we really get a bus number from _CRS */ 195 /* Check if we really get a bus number from _CRS */
175 if (*busnum == -1) 196 if (busnum == -1)
176 return AE_ERROR; 197 return AE_ERROR;
198 *bus = busnum;
177 return AE_OK; 199 return AE_OK;
178} 200}
179 201
@@ -298,6 +320,7 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
298static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle) 320static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
299{ 321{
300 struct acpi_pci_root *root; 322 struct acpi_pci_root *root;
323
301 list_for_each_entry(root, &acpi_pci_roots, node) { 324 list_for_each_entry(root, &acpi_pci_roots, node) {
302 if (root->device->handle == handle) 325 if (root->device->handle == handle)
303 return root; 326 return root;
@@ -305,6 +328,87 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
305 return NULL; 328 return NULL;
306} 329}
307 330
331struct acpi_handle_node {
332 struct list_head node;
333 acpi_handle handle;
334};
335
336/**
337 * acpi_get_pci_dev - convert ACPI CA handle to struct pci_dev
338 * @handle: the handle in question
339 *
340 * Given an ACPI CA handle, the desired PCI device is located in the
341 * list of PCI devices.
342 *
343 * If the device is found, its reference count is increased and this
344 * function returns a pointer to its data structure. The caller must
345 * decrement the reference count by calling pci_dev_put().
346 * If no device is found, %NULL is returned.
347 */
348struct pci_dev *acpi_get_pci_dev(acpi_handle handle)
349{
350 int dev, fn;
351 unsigned long long adr;
352 acpi_status status;
353 acpi_handle phandle;
354 struct pci_bus *pbus;
355 struct pci_dev *pdev = NULL;
356 struct acpi_handle_node *node, *tmp;
357 struct acpi_pci_root *root;
358 LIST_HEAD(device_list);
359
360 /*
361 * Walk up the ACPI CA namespace until we reach a PCI root bridge.
362 */
363 phandle = handle;
364 while (!acpi_is_root_bridge(phandle)) {
365 node = kzalloc(sizeof(struct acpi_handle_node), GFP_KERNEL);
366 if (!node)
367 goto out;
368
369 INIT_LIST_HEAD(&node->node);
370 node->handle = phandle;
371 list_add(&node->node, &device_list);
372
373 status = acpi_get_parent(phandle, &phandle);
374 if (ACPI_FAILURE(status))
375 goto out;
376 }
377
378 root = acpi_pci_find_root(phandle);
379 if (!root)
380 goto out;
381
382 pbus = root->bus;
383
384 /*
385 * Now, walk back down the PCI device tree until we return to our
386 * original handle. Assumes that everything between the PCI root
387 * bridge and the device we're looking for must be a P2P bridge.
388 */
389 list_for_each_entry(node, &device_list, node) {
390 acpi_handle hnd = node->handle;
391 status = acpi_evaluate_integer(hnd, "_ADR", NULL, &adr);
392 if (ACPI_FAILURE(status))
393 goto out;
394 dev = (adr >> 16) & 0xffff;
395 fn = adr & 0xffff;
396
397 pdev = pci_get_slot(pbus, PCI_DEVFN(dev, fn));
398 if (hnd == handle)
399 break;
400
401 pbus = pdev->subordinate;
402 pci_dev_put(pdev);
403 }
404out:
405 list_for_each_entry_safe(node, tmp, &device_list, node)
406 kfree(node);
407
408 return pdev;
409}
410EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
411
308/** 412/**
309 * acpi_pci_osc_control_set - commit requested control to Firmware 413 * acpi_pci_osc_control_set - commit requested control to Firmware
310 * @handle: acpi_handle for the target ACPI object 414 * @handle: acpi_handle for the target ACPI object
@@ -363,31 +467,46 @@ EXPORT_SYMBOL(acpi_pci_osc_control_set);
363 467
364static int __devinit acpi_pci_root_add(struct acpi_device *device) 468static int __devinit acpi_pci_root_add(struct acpi_device *device)
365{ 469{
366 int result = 0; 470 unsigned long long segment, bus;
367 struct acpi_pci_root *root = NULL; 471 acpi_status status;
368 struct acpi_pci_root *tmp; 472 int result;
369 acpi_status status = AE_OK; 473 struct acpi_pci_root *root;
370 unsigned long long value = 0; 474 acpi_handle handle;
371 acpi_handle handle = NULL;
372 struct acpi_device *child; 475 struct acpi_device *child;
373 u32 flags, base_flags; 476 u32 flags, base_flags;
374 477
478 segment = 0;
479 status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
480 &segment);
481 if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
482 printk(KERN_ERR PREFIX "can't evaluate _SEG\n");
483 return -ENODEV;
484 }
375 485
376 if (!device) 486 /* Check _CRS first, then _BBN. If no _BBN, default to zero. */
377 return -EINVAL; 487 bus = 0;
488 status = try_get_root_bridge_busnr(device->handle, &bus);
489 if (ACPI_FAILURE(status)) {
490 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus);
491 if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
492 printk(KERN_ERR PREFIX
493 "no bus number in _CRS and can't evaluate _BBN\n");
494 return -ENODEV;
495 }
496 }
378 497
379 root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); 498 root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
380 if (!root) 499 if (!root)
381 return -ENOMEM; 500 return -ENOMEM;
382 INIT_LIST_HEAD(&root->node);
383 501
502 INIT_LIST_HEAD(&root->node);
384 root->device = device; 503 root->device = device;
504 root->segment = segment & 0xFFFF;
505 root->bus_nr = bus & 0xFF;
385 strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME); 506 strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
386 strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS); 507 strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
387 device->driver_data = root; 508 device->driver_data = root;
388 509
389 device->ops.bind = acpi_pci_bind;
390
391 /* 510 /*
392 * All supported architectures that use ACPI have support for 511 * All supported architectures that use ACPI have support for
393 * PCI domains, so we indicate this in _OSC support capabilities. 512 * PCI domains, so we indicate this in _OSC support capabilities.
@@ -395,79 +514,6 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
395 flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; 514 flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
396 acpi_pci_osc_support(root, flags); 515 acpi_pci_osc_support(root, flags);
397 516
398 /*
399 * Segment
400 * -------
401 * Obtained via _SEG, if exists, otherwise assumed to be zero (0).
402 */
403 status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
404 &value);
405 switch (status) {
406 case AE_OK:
407 root->id.segment = (u16) value;
408 break;
409 case AE_NOT_FOUND:
410 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
411 "Assuming segment 0 (no _SEG)\n"));
412 root->id.segment = 0;
413 break;
414 default:
415 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _SEG"));
416 result = -ENODEV;
417 goto end;
418 }
419
420 /*
421 * Bus
422 * ---
423 * Obtained via _BBN, if exists, otherwise assumed to be zero (0).
424 */
425 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL,
426 &value);
427 switch (status) {
428 case AE_OK:
429 root->id.bus = (u16) value;
430 break;
431 case AE_NOT_FOUND:
432 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Assuming bus 0 (no _BBN)\n"));
433 root->id.bus = 0;
434 break;
435 default:
436 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BBN"));
437 result = -ENODEV;
438 goto end;
439 }
440
441 /* Some systems have wrong _BBN */
442 list_for_each_entry(tmp, &acpi_pci_roots, node) {
443 if ((tmp->id.segment == root->id.segment)
444 && (tmp->id.bus == root->id.bus)) {
445 int bus = 0;
446 acpi_status status;
447
448 printk(KERN_ERR PREFIX
449 "Wrong _BBN value, reboot"
450 " and use option 'pci=noacpi'\n");
451
452 status = try_get_root_bridge_busnr(device->handle, &bus);
453 if (ACPI_FAILURE(status))
454 break;
455 if (bus != root->id.bus) {
456 printk(KERN_INFO PREFIX
457 "PCI _CRS %d overrides _BBN 0\n", bus);
458 root->id.bus = bus;
459 }
460 break;
461 }
462 }
463 /*
464 * Device & Function
465 * -----------------
466 * Obtained from _ADR (which has already been evaluated for us).
467 */
468 root->id.device = device->pnp.bus_address >> 16;
469 root->id.function = device->pnp.bus_address & 0xFFFF;
470
471 /* 517 /*
472 * TBD: Need PCI interface for enumeration/configuration of roots. 518 * TBD: Need PCI interface for enumeration/configuration of roots.
473 */ 519 */
@@ -477,7 +523,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
477 523
478 printk(KERN_INFO PREFIX "%s [%s] (%04x:%02x)\n", 524 printk(KERN_INFO PREFIX "%s [%s] (%04x:%02x)\n",
479 acpi_device_name(device), acpi_device_bid(device), 525 acpi_device_name(device), acpi_device_bid(device),
480 root->id.segment, root->id.bus); 526 root->segment, root->bus_nr);
481 527
482 /* 528 /*
483 * Scan the Root Bridge 529 * Scan the Root Bridge
@@ -486,11 +532,11 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
486 * PCI namespace does not get created until this call is made (and 532 * PCI namespace does not get created until this call is made (and
487 * thus the root bridge's pci_dev does not exist). 533 * thus the root bridge's pci_dev does not exist).
488 */ 534 */
489 root->bus = pci_acpi_scan_root(device, root->id.segment, root->id.bus); 535 root->bus = pci_acpi_scan_root(device, segment, bus);
490 if (!root->bus) { 536 if (!root->bus) {
491 printk(KERN_ERR PREFIX 537 printk(KERN_ERR PREFIX
492 "Bus %04x:%02x not present in PCI namespace\n", 538 "Bus %04x:%02x not present in PCI namespace\n",
493 root->id.segment, root->id.bus); 539 root->segment, root->bus_nr);
494 result = -ENODEV; 540 result = -ENODEV;
495 goto end; 541 goto end;
496 } 542 }
@@ -500,7 +546,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
500 * ----------------------- 546 * -----------------------
501 * Thus binding the ACPI and PCI devices. 547 * Thus binding the ACPI and PCI devices.
502 */ 548 */
503 result = acpi_pci_bind_root(device, &root->id, root->bus); 549 result = acpi_pci_bind_root(device);
504 if (result) 550 if (result)
505 goto end; 551 goto end;
506 552
@@ -511,8 +557,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
511 */ 557 */
512 status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle); 558 status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
513 if (ACPI_SUCCESS(status)) 559 if (ACPI_SUCCESS(status))
514 result = acpi_pci_irq_add_prt(device->handle, root->id.segment, 560 result = acpi_pci_irq_add_prt(device->handle, root->bus);
515 root->id.bus);
516 561
517 /* 562 /*
518 * Scan and bind all _ADR-Based Devices 563 * Scan and bind all _ADR-Based Devices
@@ -531,42 +576,28 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
531 if (flags != base_flags) 576 if (flags != base_flags)
532 acpi_pci_osc_support(root, flags); 577 acpi_pci_osc_support(root, flags);
533 578
534 end: 579 return 0;
535 if (result) {
536 if (!list_empty(&root->node))
537 list_del(&root->node);
538 kfree(root);
539 }
540 580
581end:
582 if (!list_empty(&root->node))
583 list_del(&root->node);
584 kfree(root);
541 return result; 585 return result;
542} 586}
543 587
544static int acpi_pci_root_start(struct acpi_device *device) 588static int acpi_pci_root_start(struct acpi_device *device)
545{ 589{
546 struct acpi_pci_root *root; 590 struct acpi_pci_root *root = acpi_driver_data(device);
547 591
548 592 pci_bus_add_devices(root->bus);
549 list_for_each_entry(root, &acpi_pci_roots, node) { 593 return 0;
550 if (root->device == device) {
551 pci_bus_add_devices(root->bus);
552 return 0;
553 }
554 }
555 return -ENODEV;
556} 594}
557 595
558static int acpi_pci_root_remove(struct acpi_device *device, int type) 596static int acpi_pci_root_remove(struct acpi_device *device, int type)
559{ 597{
560 struct acpi_pci_root *root = NULL; 598 struct acpi_pci_root *root = acpi_driver_data(device);
561
562
563 if (!device || !acpi_driver_data(device))
564 return -EINVAL;
565
566 root = acpi_driver_data(device);
567 599
568 kfree(root); 600 kfree(root);
569
570 return 0; 601 return 0;
571} 602}
572 603
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 56665a63bf19..d74365d4a6e7 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -194,7 +194,7 @@ static int acpi_power_get_list_state(struct acpi_handle_list *list, int *state)
194 194
195static int acpi_power_on(acpi_handle handle, struct acpi_device *dev) 195static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
196{ 196{
197 int result = 0, state; 197 int result = 0;
198 int found = 0; 198 int found = 0;
199 acpi_status status = AE_OK; 199 acpi_status status = AE_OK;
200 struct acpi_power_resource *resource = NULL; 200 struct acpi_power_resource *resource = NULL;
@@ -236,18 +236,6 @@ static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
236 if (ACPI_FAILURE(status)) 236 if (ACPI_FAILURE(status))
237 return -ENODEV; 237 return -ENODEV;
238 238
239 if (!acpi_power_nocheck) {
240 /*
241 * If acpi_power_nocheck is set, it is unnecessary to check
242 * the power state after power transition.
243 */
244 result = acpi_power_get_state(resource->device->handle,
245 &state);
246 if (result)
247 return result;
248 if (state != ACPI_POWER_RESOURCE_STATE_ON)
249 return -ENOEXEC;
250 }
251 /* Update the power resource's _device_ power state */ 239 /* Update the power resource's _device_ power state */
252 resource->device->power.state = ACPI_STATE_D0; 240 resource->device->power.state = ACPI_STATE_D0;
253 241
@@ -258,7 +246,7 @@ static int acpi_power_on(acpi_handle handle, struct acpi_device *dev)
258 246
259static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev) 247static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev)
260{ 248{
261 int result = 0, state; 249 int result = 0;
262 acpi_status status = AE_OK; 250 acpi_status status = AE_OK;
263 struct acpi_power_resource *resource = NULL; 251 struct acpi_power_resource *resource = NULL;
264 struct list_head *node, *next; 252 struct list_head *node, *next;
@@ -293,18 +281,6 @@ static int acpi_power_off_device(acpi_handle handle, struct acpi_device *dev)
293 if (ACPI_FAILURE(status)) 281 if (ACPI_FAILURE(status))
294 return -ENODEV; 282 return -ENODEV;
295 283
296 if (!acpi_power_nocheck) {
297 /*
298 * If acpi_power_nocheck is set, it is unnecessary to check
299 * the power state after power transition.
300 */
301 result = acpi_power_get_state(handle, &state);
302 if (result)
303 return result;
304 if (state != ACPI_POWER_RESOURCE_STATE_OFF)
305 return -ENOEXEC;
306 }
307
308 /* Update the power resource's _device_ power state */ 284 /* Update the power resource's _device_ power state */
309 resource->device->power.state = ACPI_STATE_D3; 285 resource->device->power.state = ACPI_STATE_D3;
310 286
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 23f0fb84f1c1..84e0f3c07442 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -89,7 +89,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr);
89 89
90static const struct acpi_device_id processor_device_ids[] = { 90static const struct acpi_device_id processor_device_ids[] = {
91 {ACPI_PROCESSOR_OBJECT_HID, 0}, 91 {ACPI_PROCESSOR_OBJECT_HID, 0},
92 {ACPI_PROCESSOR_HID, 0}, 92 {"ACPI0007", 0},
93 {"", 0}, 93 {"", 0},
94}; 94};
95MODULE_DEVICE_TABLE(acpi, processor_device_ids); 95MODULE_DEVICE_TABLE(acpi, processor_device_ids);
@@ -596,7 +596,21 @@ static int acpi_processor_get_info(struct acpi_device *device)
596 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 596 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
597 "No bus mastering arbitration control\n")); 597 "No bus mastering arbitration control\n"));
598 598
599 if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_HID)) { 599 if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) {
600 /* Declared with "Processor" statement; match ProcessorID */
601 status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
602 if (ACPI_FAILURE(status)) {
603 printk(KERN_ERR PREFIX "Evaluating processor object\n");
604 return -ENODEV;
605 }
606
607 /*
608 * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
609 * >>> 'acpi_get_processor_id(acpi_id, &id)' in
610 * arch/xxx/acpi.c
611 */
612 pr->acpi_id = object.processor.proc_id;
613 } else {
600 /* 614 /*
601 * Declared with "Device" statement; match _UID. 615 * Declared with "Device" statement; match _UID.
602 * Note that we don't handle string _UIDs yet. 616 * Note that we don't handle string _UIDs yet.
@@ -611,20 +625,6 @@ static int acpi_processor_get_info(struct acpi_device *device)
611 } 625 }
612 device_declaration = 1; 626 device_declaration = 1;
613 pr->acpi_id = value; 627 pr->acpi_id = value;
614 } else {
615 /* Declared with "Processor" statement; match ProcessorID */
616 status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
617 if (ACPI_FAILURE(status)) {
618 printk(KERN_ERR PREFIX "Evaluating processor object\n");
619 return -ENODEV;
620 }
621
622 /*
623 * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP.
624 * >>> 'acpi_get_processor_id(acpi_id, &id)' in
625 * arch/xxx/acpi.c
626 */
627 pr->acpi_id = object.processor.proc_id;
628 } 628 }
629 cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id); 629 cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id);
630 630
@@ -649,7 +649,16 @@ static int acpi_processor_get_info(struct acpi_device *device)
649 return -ENODEV; 649 return -ENODEV;
650 } 650 }
651 } 651 }
652 652 /*
653 * On some boxes several processors use the same processor bus id.
654 * But they are located in different scope. For example:
655 * \_SB.SCK0.CPU0
656 * \_SB.SCK1.CPU0
657 * Rename the processor device bus id. And the new bus id will be
658 * generated as the following format:
659 * CPU+CPU ID.
660 */
661 sprintf(acpi_device_bid(device), "CPU%X", pr->id);
653 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, 662 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
654 pr->acpi_id)); 663 pr->acpi_id));
655 664
@@ -731,6 +740,8 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
731 /* _PDC call should be done before doing anything else (if reqd.). */ 740 /* _PDC call should be done before doing anything else (if reqd.). */
732 arch_acpi_processor_init_pdc(pr); 741 arch_acpi_processor_init_pdc(pr);
733 acpi_processor_set_pdc(pr); 742 acpi_processor_set_pdc(pr);
743 arch_acpi_processor_cleanup_pdc(pr);
744
734#ifdef CONFIG_CPU_FREQ 745#ifdef CONFIG_CPU_FREQ
735 acpi_processor_ppc_has_changed(pr); 746 acpi_processor_ppc_has_changed(pr);
736#endif 747#endif
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 10a2d913635a..0efa59e7e3af 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -139,7 +139,7 @@ static void acpi_safe_halt(void)
139 * are affected too. We pick the most conservative approach: we assume 139 * are affected too. We pick the most conservative approach: we assume
140 * that the local APIC stops in both C2 and C3. 140 * that the local APIC stops in both C2 and C3.
141 */ 141 */
142static void acpi_timer_check_state(int state, struct acpi_processor *pr, 142static void lapic_timer_check_state(int state, struct acpi_processor *pr,
143 struct acpi_processor_cx *cx) 143 struct acpi_processor_cx *cx)
144{ 144{
145 struct acpi_processor_power *pwr = &pr->power; 145 struct acpi_processor_power *pwr = &pr->power;
@@ -162,7 +162,7 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
162 pr->power.timer_broadcast_on_state = state; 162 pr->power.timer_broadcast_on_state = state;
163} 163}
164 164
165static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) 165static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
166{ 166{
167 unsigned long reason; 167 unsigned long reason;
168 168
@@ -173,7 +173,7 @@ static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
173} 173}
174 174
175/* Power(C) State timer broadcast control */ 175/* Power(C) State timer broadcast control */
176static void acpi_state_timer_broadcast(struct acpi_processor *pr, 176static void lapic_timer_state_broadcast(struct acpi_processor *pr,
177 struct acpi_processor_cx *cx, 177 struct acpi_processor_cx *cx,
178 int broadcast) 178 int broadcast)
179{ 179{
@@ -190,10 +190,10 @@ static void acpi_state_timer_broadcast(struct acpi_processor *pr,
190 190
191#else 191#else
192 192
193static void acpi_timer_check_state(int state, struct acpi_processor *pr, 193static void lapic_timer_check_state(int state, struct acpi_processor *pr,
194 struct acpi_processor_cx *cstate) { } 194 struct acpi_processor_cx *cstate) { }
195static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { } 195static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
196static void acpi_state_timer_broadcast(struct acpi_processor *pr, 196static void lapic_timer_state_broadcast(struct acpi_processor *pr,
197 struct acpi_processor_cx *cx, 197 struct acpi_processor_cx *cx,
198 int broadcast) 198 int broadcast)
199{ 199{
@@ -515,7 +515,8 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
515static void acpi_processor_power_verify_c3(struct acpi_processor *pr, 515static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
516 struct acpi_processor_cx *cx) 516 struct acpi_processor_cx *cx)
517{ 517{
518 static int bm_check_flag; 518 static int bm_check_flag = -1;
519 static int bm_control_flag = -1;
519 520
520 521
521 if (!cx->address) 522 if (!cx->address)
@@ -545,12 +546,14 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
545 } 546 }
546 547
547 /* All the logic here assumes flags.bm_check is same across all CPUs */ 548 /* All the logic here assumes flags.bm_check is same across all CPUs */
548 if (!bm_check_flag) { 549 if (bm_check_flag == -1) {
549 /* Determine whether bm_check is needed based on CPU */ 550 /* Determine whether bm_check is needed based on CPU */
550 acpi_processor_power_init_bm_check(&(pr->flags), pr->id); 551 acpi_processor_power_init_bm_check(&(pr->flags), pr->id);
551 bm_check_flag = pr->flags.bm_check; 552 bm_check_flag = pr->flags.bm_check;
553 bm_control_flag = pr->flags.bm_control;
552 } else { 554 } else {
553 pr->flags.bm_check = bm_check_flag; 555 pr->flags.bm_check = bm_check_flag;
556 pr->flags.bm_control = bm_control_flag;
554 } 557 }
555 558
556 if (pr->flags.bm_check) { 559 if (pr->flags.bm_check) {
@@ -614,29 +617,25 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
614 switch (cx->type) { 617 switch (cx->type) {
615 case ACPI_STATE_C1: 618 case ACPI_STATE_C1:
616 cx->valid = 1; 619 cx->valid = 1;
617 acpi_timer_check_state(i, pr, cx);
618 break; 620 break;
619 621
620 case ACPI_STATE_C2: 622 case ACPI_STATE_C2:
621 acpi_processor_power_verify_c2(cx); 623 acpi_processor_power_verify_c2(cx);
622 if (cx->valid)
623 acpi_timer_check_state(i, pr, cx);
624 break; 624 break;
625 625
626 case ACPI_STATE_C3: 626 case ACPI_STATE_C3:
627 acpi_processor_power_verify_c3(pr, cx); 627 acpi_processor_power_verify_c3(pr, cx);
628 if (cx->valid)
629 acpi_timer_check_state(i, pr, cx);
630 break; 628 break;
631 } 629 }
632 if (cx->valid) 630 if (!cx->valid)
633 tsc_check_state(cx->type); 631 continue;
634 632
635 if (cx->valid) 633 lapic_timer_check_state(i, pr, cx);
636 working++; 634 tsc_check_state(cx->type);
635 working++;
637 } 636 }
638 637
639 acpi_propagate_timer_broadcast(pr); 638 lapic_timer_propagate_broadcast(pr);
640 639
641 return (working); 640 return (working);
642} 641}
@@ -839,7 +838,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
839 return 0; 838 return 0;
840 } 839 }
841 840
842 acpi_state_timer_broadcast(pr, cx, 1); 841 lapic_timer_state_broadcast(pr, cx, 1);
843 kt1 = ktime_get_real(); 842 kt1 = ktime_get_real();
844 acpi_idle_do_entry(cx); 843 acpi_idle_do_entry(cx);
845 kt2 = ktime_get_real(); 844 kt2 = ktime_get_real();
@@ -847,7 +846,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
847 846
848 local_irq_enable(); 847 local_irq_enable();
849 cx->usage++; 848 cx->usage++;
850 acpi_state_timer_broadcast(pr, cx, 0); 849 lapic_timer_state_broadcast(pr, cx, 0);
851 850
852 return idle_time; 851 return idle_time;
853} 852}
@@ -892,7 +891,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
892 * Must be done before busmaster disable as we might need to 891 * Must be done before busmaster disable as we might need to
893 * access HPET ! 892 * access HPET !
894 */ 893 */
895 acpi_state_timer_broadcast(pr, cx, 1); 894 lapic_timer_state_broadcast(pr, cx, 1);
896 895
897 if (cx->type == ACPI_STATE_C3) 896 if (cx->type == ACPI_STATE_C3)
898 ACPI_FLUSH_CPU_CACHE(); 897 ACPI_FLUSH_CPU_CACHE();
@@ -914,7 +913,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
914 913
915 cx->usage++; 914 cx->usage++;
916 915
917 acpi_state_timer_broadcast(pr, cx, 0); 916 lapic_timer_state_broadcast(pr, cx, 0);
918 cx->time += sleep_ticks; 917 cx->time += sleep_ticks;
919 return idle_time; 918 return idle_time;
920} 919}
@@ -981,7 +980,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
981 * Must be done before busmaster disable as we might need to 980 * Must be done before busmaster disable as we might need to
982 * access HPET ! 981 * access HPET !
983 */ 982 */
984 acpi_state_timer_broadcast(pr, cx, 1); 983 lapic_timer_state_broadcast(pr, cx, 1);
985 984
986 kt1 = ktime_get_real(); 985 kt1 = ktime_get_real();
987 /* 986 /*
@@ -1026,7 +1025,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
1026 1025
1027 cx->usage++; 1026 cx->usage++;
1028 1027
1029 acpi_state_timer_broadcast(pr, cx, 0); 1028 lapic_timer_state_broadcast(pr, cx, 0);
1030 cx->time += sleep_ticks; 1029 cx->time += sleep_ticks;
1031 return idle_time; 1030 return idle_time;
1032} 1031}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 8ff510b91d88..781435d7e369 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -95,7 +95,7 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha
95} 95}
96static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); 96static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
97 97
98static int acpi_bus_hot_remove_device(void *context) 98static void acpi_bus_hot_remove_device(void *context)
99{ 99{
100 struct acpi_device *device; 100 struct acpi_device *device;
101 acpi_handle handle = context; 101 acpi_handle handle = context;
@@ -104,10 +104,10 @@ static int acpi_bus_hot_remove_device(void *context)
104 acpi_status status = AE_OK; 104 acpi_status status = AE_OK;
105 105
106 if (acpi_bus_get_device(handle, &device)) 106 if (acpi_bus_get_device(handle, &device))
107 return 0; 107 return;
108 108
109 if (!device) 109 if (!device)
110 return 0; 110 return;
111 111
112 ACPI_DEBUG_PRINT((ACPI_DB_INFO, 112 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
113 "Hot-removing device %s...\n", dev_name(&device->dev))); 113 "Hot-removing device %s...\n", dev_name(&device->dev)));
@@ -115,7 +115,7 @@ static int acpi_bus_hot_remove_device(void *context)
115 if (acpi_bus_trim(device, 1)) { 115 if (acpi_bus_trim(device, 1)) {
116 printk(KERN_ERR PREFIX 116 printk(KERN_ERR PREFIX
117 "Removing device failed\n"); 117 "Removing device failed\n");
118 return -1; 118 return;
119 } 119 }
120 120
121 /* power off device */ 121 /* power off device */
@@ -142,9 +142,10 @@ static int acpi_bus_hot_remove_device(void *context)
142 */ 142 */
143 status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL); 143 status = acpi_evaluate_object(handle, "_EJ0", &arg_list, NULL);
144 if (ACPI_FAILURE(status)) 144 if (ACPI_FAILURE(status))
145 return -ENODEV; 145 printk(KERN_WARNING PREFIX
146 "Eject device failed\n");
146 147
147 return 0; 148 return;
148} 149}
149 150
150static ssize_t 151static ssize_t
@@ -155,7 +156,6 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
155 acpi_status status; 156 acpi_status status;
156 acpi_object_type type = 0; 157 acpi_object_type type = 0;
157 struct acpi_device *acpi_device = to_acpi_device(d); 158 struct acpi_device *acpi_device = to_acpi_device(d);
158 struct task_struct *task;
159 159
160 if ((!count) || (buf[0] != '1')) { 160 if ((!count) || (buf[0] != '1')) {
161 return -EINVAL; 161 return -EINVAL;
@@ -172,11 +172,7 @@ acpi_eject_store(struct device *d, struct device_attribute *attr,
172 goto err; 172 goto err;
173 } 173 }
174 174
175 /* remove the device in another thread to fix the deadlock issue */ 175 acpi_os_hotplug_execute(acpi_bus_hot_remove_device, acpi_device->handle);
176 task = kthread_run(acpi_bus_hot_remove_device,
177 acpi_device->handle, "acpi_hot_remove_device");
178 if (IS_ERR(task))
179 ret = PTR_ERR(task);
180err: 176err:
181 return ret; 177 return ret;
182} 178}
@@ -198,12 +194,12 @@ acpi_device_path_show(struct device *dev, struct device_attribute *attr, char *b
198 int result; 194 int result;
199 195
200 result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path); 196 result = acpi_get_name(acpi_dev->handle, ACPI_FULL_PATHNAME, &path);
201 if(result) 197 if (result)
202 goto end; 198 goto end;
203 199
204 result = sprintf(buf, "%s\n", (char*)path.pointer); 200 result = sprintf(buf, "%s\n", (char*)path.pointer);
205 kfree(path.pointer); 201 kfree(path.pointer);
206 end: 202end:
207 return result; 203 return result;
208} 204}
209static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL); 205static DEVICE_ATTR(path, 0444, acpi_device_path_show, NULL);
@@ -217,21 +213,21 @@ static int acpi_device_setup_files(struct acpi_device *dev)
217 /* 213 /*
218 * Devices gotten from FADT don't have a "path" attribute 214 * Devices gotten from FADT don't have a "path" attribute
219 */ 215 */
220 if(dev->handle) { 216 if (dev->handle) {
221 result = device_create_file(&dev->dev, &dev_attr_path); 217 result = device_create_file(&dev->dev, &dev_attr_path);
222 if(result) 218 if (result)
223 goto end; 219 goto end;
224 } 220 }
225 221
226 if(dev->flags.hardware_id) { 222 if (dev->flags.hardware_id) {
227 result = device_create_file(&dev->dev, &dev_attr_hid); 223 result = device_create_file(&dev->dev, &dev_attr_hid);
228 if(result) 224 if (result)
229 goto end; 225 goto end;
230 } 226 }
231 227
232 if (dev->flags.hardware_id || dev->flags.compatible_ids){ 228 if (dev->flags.hardware_id || dev->flags.compatible_ids) {
233 result = device_create_file(&dev->dev, &dev_attr_modalias); 229 result = device_create_file(&dev->dev, &dev_attr_modalias);
234 if(result) 230 if (result)
235 goto end; 231 goto end;
236 } 232 }
237 233
@@ -242,7 +238,7 @@ static int acpi_device_setup_files(struct acpi_device *dev)
242 status = acpi_get_handle(dev->handle, "_EJ0", &temp); 238 status = acpi_get_handle(dev->handle, "_EJ0", &temp);
243 if (ACPI_SUCCESS(status)) 239 if (ACPI_SUCCESS(status))
244 result = device_create_file(&dev->dev, &dev_attr_eject); 240 result = device_create_file(&dev->dev, &dev_attr_eject);
245 end: 241end:
246 return result; 242 return result;
247} 243}
248 244
@@ -262,9 +258,9 @@ static void acpi_device_remove_files(struct acpi_device *dev)
262 if (dev->flags.hardware_id || dev->flags.compatible_ids) 258 if (dev->flags.hardware_id || dev->flags.compatible_ids)
263 device_remove_file(&dev->dev, &dev_attr_modalias); 259 device_remove_file(&dev->dev, &dev_attr_modalias);
264 260
265 if(dev->flags.hardware_id) 261 if (dev->flags.hardware_id)
266 device_remove_file(&dev->dev, &dev_attr_hid); 262 device_remove_file(&dev->dev, &dev_attr_hid);
267 if(dev->handle) 263 if (dev->handle)
268 device_remove_file(&dev->dev, &dev_attr_path); 264 device_remove_file(&dev->dev, &dev_attr_path);
269} 265}
270/* -------------------------------------------------------------------------- 266/* --------------------------------------------------------------------------
@@ -512,7 +508,7 @@ static int acpi_device_register(struct acpi_device *device,
512 break; 508 break;
513 } 509 }
514 } 510 }
515 if(!found) { 511 if (!found) {
516 acpi_device_bus_id = new_bus_id; 512 acpi_device_bus_id = new_bus_id;
517 strcpy(acpi_device_bus_id->bus_id, device->flags.hardware_id ? device->pnp.hardware_id : "device"); 513 strcpy(acpi_device_bus_id->bus_id, device->flags.hardware_id ? device->pnp.hardware_id : "device");
518 acpi_device_bus_id->instance_no = 0; 514 acpi_device_bus_id->instance_no = 0;
@@ -530,22 +526,21 @@ static int acpi_device_register(struct acpi_device *device,
530 if (device->parent) 526 if (device->parent)
531 device->dev.parent = &parent->dev; 527 device->dev.parent = &parent->dev;
532 device->dev.bus = &acpi_bus_type; 528 device->dev.bus = &acpi_bus_type;
533 device_initialize(&device->dev);
534 device->dev.release = &acpi_device_release; 529 device->dev.release = &acpi_device_release;
535 result = device_add(&device->dev); 530 result = device_register(&device->dev);
536 if(result) { 531 if (result) {
537 dev_err(&device->dev, "Error adding device\n"); 532 dev_err(&device->dev, "Error registering device\n");
538 goto end; 533 goto end;
539 } 534 }
540 535
541 result = acpi_device_setup_files(device); 536 result = acpi_device_setup_files(device);
542 if(result) 537 if (result)
543 printk(KERN_ERR PREFIX "Error creating sysfs interface for device %s\n", 538 printk(KERN_ERR PREFIX "Error creating sysfs interface for device %s\n",
544 dev_name(&device->dev)); 539 dev_name(&device->dev));
545 540
546 device->removal_type = ACPI_BUS_REMOVAL_NORMAL; 541 device->removal_type = ACPI_BUS_REMOVAL_NORMAL;
547 return 0; 542 return 0;
548 end: 543end:
549 mutex_lock(&acpi_device_lock); 544 mutex_lock(&acpi_device_lock);
550 if (device->parent) 545 if (device->parent)
551 list_del(&device->node); 546 list_del(&device->node);
@@ -577,7 +572,7 @@ static void acpi_device_unregister(struct acpi_device *device, int type)
577 * @device: the device to add and initialize 572 * @device: the device to add and initialize
578 * @driver: driver for the device 573 * @driver: driver for the device
579 * 574 *
580 * Used to initialize a device via its device driver. Called whenever a 575 * Used to initialize a device via its device driver. Called whenever a
581 * driver is bound to a device. Invokes the driver's add() ops. 576 * driver is bound to a device. Invokes the driver's add() ops.
582 */ 577 */
583static int 578static int
@@ -585,7 +580,6 @@ acpi_bus_driver_init(struct acpi_device *device, struct acpi_driver *driver)
585{ 580{
586 int result = 0; 581 int result = 0;
587 582
588
589 if (!device || !driver) 583 if (!device || !driver)
590 return -EINVAL; 584 return -EINVAL;
591 585
@@ -802,7 +796,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
802 if (!acpi_match_device_ids(device, button_device_ids)) 796 if (!acpi_match_device_ids(device, button_device_ids))
803 device->wakeup.flags.run_wake = 1; 797 device->wakeup.flags.run_wake = 1;
804 798
805 end: 799end:
806 if (ACPI_FAILURE(status)) 800 if (ACPI_FAILURE(status))
807 device->flags.wake_capable = 0; 801 device->flags.wake_capable = 0;
808 return 0; 802 return 0;
@@ -1070,7 +1064,7 @@ static void acpi_device_set_id(struct acpi_device *device,
1070 break; 1064 break;
1071 } 1065 }
1072 1066
1073 /* 1067 /*
1074 * \_SB 1068 * \_SB
1075 * ---- 1069 * ----
1076 * Fix for the system root bus device -- the only root-level device. 1070 * Fix for the system root bus device -- the only root-level device.
@@ -1320,7 +1314,7 @@ acpi_add_single_object(struct acpi_device **child,
1320 device->parent->ops.bind(device); 1314 device->parent->ops.bind(device);
1321 } 1315 }
1322 1316
1323 end: 1317end:
1324 if (!result) 1318 if (!result)
1325 *child = device; 1319 *child = device;
1326 else { 1320 else {
@@ -1464,7 +1458,6 @@ acpi_bus_add(struct acpi_device **child,
1464 1458
1465 return result; 1459 return result;
1466} 1460}
1467
1468EXPORT_SYMBOL(acpi_bus_add); 1461EXPORT_SYMBOL(acpi_bus_add);
1469 1462
1470int acpi_bus_start(struct acpi_device *device) 1463int acpi_bus_start(struct acpi_device *device)
@@ -1484,7 +1477,6 @@ int acpi_bus_start(struct acpi_device *device)
1484 } 1477 }
1485 return result; 1478 return result;
1486} 1479}
1487
1488EXPORT_SYMBOL(acpi_bus_start); 1480EXPORT_SYMBOL(acpi_bus_start);
1489 1481
1490int acpi_bus_trim(struct acpi_device *start, int rmdevice) 1482int acpi_bus_trim(struct acpi_device *start, int rmdevice)
@@ -1542,7 +1534,6 @@ int acpi_bus_trim(struct acpi_device *start, int rmdevice)
1542} 1534}
1543EXPORT_SYMBOL_GPL(acpi_bus_trim); 1535EXPORT_SYMBOL_GPL(acpi_bus_trim);
1544 1536
1545
1546static int acpi_bus_scan_fixed(struct acpi_device *root) 1537static int acpi_bus_scan_fixed(struct acpi_device *root)
1547{ 1538{
1548 int result = 0; 1539 int result = 0;
@@ -1610,6 +1601,6 @@ int __init acpi_scan_init(void)
1610 if (result) 1601 if (result)
1611 acpi_device_unregister(acpi_root, ACPI_BUS_REMOVAL_NORMAL); 1602 acpi_device_unregister(acpi_root, ACPI_BUS_REMOVAL_NORMAL);
1612 1603
1613 Done: 1604Done:
1614 return result; 1605 return result;
1615} 1606}
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 1bdfb37377e3..8851315ce858 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -76,6 +76,7 @@ MODULE_LICENSE("GPL");
76static int brightness_switch_enabled = 1; 76static int brightness_switch_enabled = 1;
77module_param(brightness_switch_enabled, bool, 0644); 77module_param(brightness_switch_enabled, bool, 0644);
78 78
79static int register_count = 0;
79static int acpi_video_bus_add(struct acpi_device *device); 80static int acpi_video_bus_add(struct acpi_device *device);
80static int acpi_video_bus_remove(struct acpi_device *device, int type); 81static int acpi_video_bus_remove(struct acpi_device *device, int type);
81static int acpi_video_resume(struct acpi_device *device); 82static int acpi_video_resume(struct acpi_device *device);
@@ -586,6 +587,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
586 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"), 587 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"),
587 }, 588 },
588 }, 589 },
590 {
591 .callback = video_set_bqc_offset,
592 .ident = "Acer Aspire 7720",
593 .matches = {
594 DMI_MATCH(DMI_BOARD_VENDOR, "Acer"),
595 DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
596 },
597 },
589 {} 598 {}
590}; 599};
591 600
@@ -976,6 +985,11 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
976 device->backlight->props.max_brightness = device->brightness->count-3; 985 device->backlight->props.max_brightness = device->brightness->count-3;
977 kfree(name); 986 kfree(name);
978 987
988 result = sysfs_create_link(&device->backlight->dev.kobj,
989 &device->dev->dev.kobj, "device");
990 if (result)
991 printk(KERN_ERR PREFIX "Create sysfs link\n");
992
979 device->cdev = thermal_cooling_device_register("LCD", 993 device->cdev = thermal_cooling_device_register("LCD",
980 device->dev, &video_cooling_ops); 994 device->dev, &video_cooling_ops);
981 if (IS_ERR(device->cdev)) 995 if (IS_ERR(device->cdev))
@@ -1054,15 +1068,15 @@ static void acpi_video_bus_find_cap(struct acpi_video_bus *video)
1054static int acpi_video_bus_check(struct acpi_video_bus *video) 1068static int acpi_video_bus_check(struct acpi_video_bus *video)
1055{ 1069{
1056 acpi_status status = -ENOENT; 1070 acpi_status status = -ENOENT;
1057 struct device *dev; 1071 struct pci_dev *dev;
1058 1072
1059 if (!video) 1073 if (!video)
1060 return -EINVAL; 1074 return -EINVAL;
1061 1075
1062 dev = acpi_get_physical_pci_device(video->device->handle); 1076 dev = acpi_get_pci_dev(video->device->handle);
1063 if (!dev) 1077 if (!dev)
1064 return -ENODEV; 1078 return -ENODEV;
1065 put_device(dev); 1079 pci_dev_put(dev);
1066 1080
1067 /* Since there is no HID, CID and so on for VGA driver, we have 1081 /* Since there is no HID, CID and so on for VGA driver, we have
1068 * to check well known required nodes. 1082 * to check well known required nodes.
@@ -1990,6 +2004,7 @@ static int acpi_video_bus_put_one_device(struct acpi_video_device *device)
1990 status = acpi_remove_notify_handler(device->dev->handle, 2004 status = acpi_remove_notify_handler(device->dev->handle,
1991 ACPI_DEVICE_NOTIFY, 2005 ACPI_DEVICE_NOTIFY,
1992 acpi_video_device_notify); 2006 acpi_video_device_notify);
2007 sysfs_remove_link(&device->backlight->dev.kobj, "device");
1993 backlight_device_unregister(device->backlight); 2008 backlight_device_unregister(device->backlight);
1994 if (device->cdev) { 2009 if (device->cdev) {
1995 sysfs_remove_link(&device->dev->dev.kobj, 2010 sysfs_remove_link(&device->dev->dev.kobj,
@@ -2318,6 +2333,13 @@ static int __init intel_opregion_present(void)
2318int acpi_video_register(void) 2333int acpi_video_register(void)
2319{ 2334{
2320 int result = 0; 2335 int result = 0;
2336 if (register_count) {
2337 /*
2338 * if the function of acpi_video_register is already called,
2339 * don't register the acpi_vide_bus again and return no error.
2340 */
2341 return 0;
2342 }
2321 2343
2322 acpi_video_dir = proc_mkdir(ACPI_VIDEO_CLASS, acpi_root_dir); 2344 acpi_video_dir = proc_mkdir(ACPI_VIDEO_CLASS, acpi_root_dir);
2323 if (!acpi_video_dir) 2345 if (!acpi_video_dir)
@@ -2329,10 +2351,35 @@ int acpi_video_register(void)
2329 return -ENODEV; 2351 return -ENODEV;
2330 } 2352 }
2331 2353
2354 /*
2355 * When the acpi_video_bus is loaded successfully, increase
2356 * the counter reference.
2357 */
2358 register_count = 1;
2359
2332 return 0; 2360 return 0;
2333} 2361}
2334EXPORT_SYMBOL(acpi_video_register); 2362EXPORT_SYMBOL(acpi_video_register);
2335 2363
2364void acpi_video_unregister(void)
2365{
2366 if (!register_count) {
2367 /*
2368 * If the acpi video bus is already unloaded, don't
2369 * unload it again and return directly.
2370 */
2371 return;
2372 }
2373 acpi_bus_unregister_driver(&acpi_video_bus);
2374
2375 remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
2376
2377 register_count = 0;
2378
2379 return;
2380}
2381EXPORT_SYMBOL(acpi_video_unregister);
2382
2336/* 2383/*
2337 * This is kind of nasty. Hardware using Intel chipsets may require 2384 * This is kind of nasty. Hardware using Intel chipsets may require
2338 * the video opregion code to be run first in order to initialise 2385 * the video opregion code to be run first in order to initialise
@@ -2350,16 +2397,12 @@ static int __init acpi_video_init(void)
2350 return acpi_video_register(); 2397 return acpi_video_register();
2351} 2398}
2352 2399
2353void acpi_video_exit(void) 2400static void __exit acpi_video_exit(void)
2354{ 2401{
2355 2402 acpi_video_unregister();
2356 acpi_bus_unregister_driver(&acpi_video_bus);
2357
2358 remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
2359 2403
2360 return; 2404 return;
2361} 2405}
2362EXPORT_SYMBOL(acpi_video_exit);
2363 2406
2364module_init(acpi_video_init); 2407module_init(acpi_video_init);
2365module_exit(acpi_video_exit); 2408module_exit(acpi_video_exit);
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 09737275e25f..7cd2b63435ea 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -10,7 +10,7 @@
10 * assinged 10 * assinged
11 * 11 *
12 * After PCI devices are glued with ACPI devices 12 * After PCI devices are glued with ACPI devices
13 * acpi_get_physical_pci_device() can be called to identify ACPI graphics 13 * acpi_get_pci_dev() can be called to identify ACPI graphics
14 * devices for which a real graphics card is plugged in 14 * devices for which a real graphics card is plugged in
15 * 15 *
16 * Now acpi_video_get_capabilities() can be called to check which 16 * Now acpi_video_get_capabilities() can be called to check which
@@ -36,6 +36,7 @@
36 36
37#include <linux/acpi.h> 37#include <linux/acpi.h>
38#include <linux/dmi.h> 38#include <linux/dmi.h>
39#include <linux/pci.h>
39 40
40ACPI_MODULE_NAME("video"); 41ACPI_MODULE_NAME("video");
41#define _COMPONENT ACPI_VIDEO_COMPONENT 42#define _COMPONENT ACPI_VIDEO_COMPONENT
@@ -109,7 +110,7 @@ static acpi_status
109find_video(acpi_handle handle, u32 lvl, void *context, void **rv) 110find_video(acpi_handle handle, u32 lvl, void *context, void **rv)
110{ 111{
111 long *cap = context; 112 long *cap = context;
112 struct device *dev; 113 struct pci_dev *dev;
113 struct acpi_device *acpi_dev; 114 struct acpi_device *acpi_dev;
114 115
115 const struct acpi_device_id video_ids[] = { 116 const struct acpi_device_id video_ids[] = {
@@ -120,10 +121,10 @@ find_video(acpi_handle handle, u32 lvl, void *context, void **rv)
120 return AE_OK; 121 return AE_OK;
121 122
122 if (!acpi_match_device_ids(acpi_dev, video_ids)) { 123 if (!acpi_match_device_ids(acpi_dev, video_ids)) {
123 dev = acpi_get_physical_pci_device(handle); 124 dev = acpi_get_pci_dev(handle);
124 if (!dev) 125 if (!dev)
125 return AE_OK; 126 return AE_OK;
126 put_device(dev); 127 pci_dev_put(dev);
127 *cap |= acpi_is_video_device(acpi_dev); 128 *cap |= acpi_is_video_device(acpi_dev);
128 } 129 }
129 return AE_OK; 130 return AE_OK;
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 9533f43a30bb..52d953eb30c3 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -1048,8 +1048,6 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
1048 if (retval) 1048 if (retval)
1049 return retval; 1049 return retval;
1050 1050
1051 /* unmark here for very high baud rate (ex. 921600 bps) used */
1052 tty->low_latency = 1;
1053 return 0; 1051 return 0;
1054} 1052}
1055 1053
diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index d6102b644b55..574f1c79b6e6 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -1591,8 +1591,6 @@ static int ntty_open(struct tty_struct *tty, struct file *file)
1591 1591
1592 /* Enable interrupt downlink for channel */ 1592 /* Enable interrupt downlink for channel */
1593 if (port->port.count == 1) { 1593 if (port->port.count == 1) {
1594 /* FIXME: is this needed now ? */
1595 tty->low_latency = 1;
1596 tty->driver_data = port; 1594 tty->driver_data = port;
1597 tty_port_tty_set(&port->port, tty); 1595 tty_port_tty_set(&port->port, tty);
1598 DBG1("open: %d", port->token_dl); 1596 DBG1("open: %d", port->token_dl);
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 1386625fc4ca..a2e67e6df3a1 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -467,7 +467,6 @@ static unsigned int free_tbuf_count(struct slgt_info *info);
467static unsigned int tbuf_bytes(struct slgt_info *info); 467static unsigned int tbuf_bytes(struct slgt_info *info);
468static void reset_tbufs(struct slgt_info *info); 468static void reset_tbufs(struct slgt_info *info);
469static void tdma_reset(struct slgt_info *info); 469static void tdma_reset(struct slgt_info *info);
470static void tdma_start(struct slgt_info *info);
471static void tx_load(struct slgt_info *info, const char *buf, unsigned int count); 470static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
472 471
473static void get_signals(struct slgt_info *info); 472static void get_signals(struct slgt_info *info);
@@ -795,6 +794,18 @@ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios)
795 } 794 }
796} 795}
797 796
797static void update_tx_timer(struct slgt_info *info)
798{
799 /*
800 * use worst case speed of 1200bps to calculate transmit timeout
801 * based on data in buffers (tbuf_bytes) and FIFO (128 bytes)
802 */
803 if (info->params.mode == MGSL_MODE_HDLC) {
804 int timeout = (tbuf_bytes(info) * 7) + 1000;
805 mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(timeout));
806 }
807}
808
798static int write(struct tty_struct *tty, 809static int write(struct tty_struct *tty,
799 const unsigned char *buf, int count) 810 const unsigned char *buf, int count)
800{ 811{
@@ -838,8 +849,18 @@ start:
838 spin_lock_irqsave(&info->lock,flags); 849 spin_lock_irqsave(&info->lock,flags);
839 if (!info->tx_active) 850 if (!info->tx_active)
840 tx_start(info); 851 tx_start(info);
841 else 852 else if (!(rd_reg32(info, TDCSR) & BIT0)) {
842 tdma_start(info); 853 /* transmit still active but transmit DMA stopped */
854 unsigned int i = info->tbuf_current;
855 if (!i)
856 i = info->tbuf_count;
857 i--;
858 /* if DMA buf unsent must try later after tx idle */
859 if (desc_count(info->tbufs[i]))
860 ret = 0;
861 }
862 if (ret > 0)
863 update_tx_timer(info);
843 spin_unlock_irqrestore(&info->lock,flags); 864 spin_unlock_irqrestore(&info->lock,flags);
844 } 865 }
845 866
@@ -1502,10 +1523,9 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
1502 /* save start time for transmit timeout detection */ 1523 /* save start time for transmit timeout detection */
1503 dev->trans_start = jiffies; 1524 dev->trans_start = jiffies;
1504 1525
1505 /* start hardware transmitter if necessary */
1506 spin_lock_irqsave(&info->lock,flags); 1526 spin_lock_irqsave(&info->lock,flags);
1507 if (!info->tx_active) 1527 tx_start(info);
1508 tx_start(info); 1528 update_tx_timer(info);
1509 spin_unlock_irqrestore(&info->lock,flags); 1529 spin_unlock_irqrestore(&info->lock,flags);
1510 1530
1511 return 0; 1531 return 0;
@@ -3946,50 +3966,19 @@ static void tx_start(struct slgt_info *info)
3946 slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE); 3966 slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
3947 /* clear tx idle and underrun status bits */ 3967 /* clear tx idle and underrun status bits */
3948 wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER)); 3968 wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
3949 if (info->params.mode == MGSL_MODE_HDLC)
3950 mod_timer(&info->tx_timer, jiffies +
3951 msecs_to_jiffies(5000));
3952 } else { 3969 } else {
3953 slgt_irq_off(info, IRQ_TXDATA); 3970 slgt_irq_off(info, IRQ_TXDATA);
3954 slgt_irq_on(info, IRQ_TXIDLE); 3971 slgt_irq_on(info, IRQ_TXIDLE);
3955 /* clear tx idle status bit */ 3972 /* clear tx idle status bit */
3956 wr_reg16(info, SSR, IRQ_TXIDLE); 3973 wr_reg16(info, SSR, IRQ_TXIDLE);
3957 } 3974 }
3958 tdma_start(info); 3975 /* set 1st descriptor address and start DMA */
3976 wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
3977 wr_reg32(info, TDCSR, BIT2 + BIT0);
3959 info->tx_active = true; 3978 info->tx_active = true;
3960 } 3979 }
3961} 3980}
3962 3981
3963/*
3964 * start transmit DMA if inactive and there are unsent buffers
3965 */
3966static void tdma_start(struct slgt_info *info)
3967{
3968 unsigned int i;
3969
3970 if (rd_reg32(info, TDCSR) & BIT0)
3971 return;
3972
3973 /* transmit DMA inactive, check for unsent buffers */
3974 i = info->tbuf_start;
3975 while (!desc_count(info->tbufs[i])) {
3976 if (++i == info->tbuf_count)
3977 i = 0;
3978 if (i == info->tbuf_current)
3979 return;
3980 }
3981 info->tbuf_start = i;
3982
3983 /* there are unsent buffers, start transmit DMA */
3984
3985 /* reset needed if previous error condition */
3986 tdma_reset(info);
3987
3988 /* set 1st descriptor address */
3989 wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
3990 wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */
3991}
3992
3993static void tx_stop(struct slgt_info *info) 3982static void tx_stop(struct slgt_info *info)
3994{ 3983{
3995 unsigned short val; 3984 unsigned short val;
@@ -5004,8 +4993,7 @@ static void tx_timeout(unsigned long context)
5004 info->icount.txtimeout++; 4993 info->icount.txtimeout++;
5005 } 4994 }
5006 spin_lock_irqsave(&info->lock,flags); 4995 spin_lock_irqsave(&info->lock,flags);
5007 info->tx_active = false; 4996 tx_stop(info);
5008 info->tx_count = 0;
5009 spin_unlock_irqrestore(&info->lock,flags); 4997 spin_unlock_irqrestore(&info->lock,flags);
5010 4998
5011#if SYNCLINK_GENERIC_HDLC 4999#if SYNCLINK_GENERIC_HDLC
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 62dadfc95e34..4e862a75f7ff 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -193,7 +193,7 @@ int tty_port_block_til_ready(struct tty_port *port,
193{ 193{
194 int do_clocal = 0, retval; 194 int do_clocal = 0, retval;
195 unsigned long flags; 195 unsigned long flags;
196 DECLARE_WAITQUEUE(wait, current); 196 DEFINE_WAIT(wait);
197 int cd; 197 int cd;
198 198
199 /* block if port is in the process of being closed */ 199 /* block if port is in the process of being closed */
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 9aa9ea9822c8..88dab52926f4 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -432,23 +432,27 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
432 list_splice_init(&txd->tx_list, &dc->free_list); 432 list_splice_init(&txd->tx_list, &dc->free_list);
433 list_move(&desc->desc_node, &dc->free_list); 433 list_move(&desc->desc_node, &dc->free_list);
434 434
435 /*
436 * We use dma_unmap_page() regardless of how the buffers were
437 * mapped before they were submitted...
438 */
439 if (!ds) { 435 if (!ds) {
440 dma_addr_t dmaaddr; 436 dma_addr_t dmaaddr;
441 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { 437 if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
442 dmaaddr = is_dmac64(dc) ? 438 dmaaddr = is_dmac64(dc) ?
443 desc->hwdesc.DAR : desc->hwdesc32.DAR; 439 desc->hwdesc.DAR : desc->hwdesc32.DAR;
444 dma_unmap_page(chan2parent(&dc->chan), dmaaddr, 440 if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
445 desc->len, DMA_FROM_DEVICE); 441 dma_unmap_single(chan2parent(&dc->chan),
442 dmaaddr, desc->len, DMA_FROM_DEVICE);
443 else
444 dma_unmap_page(chan2parent(&dc->chan),
445 dmaaddr, desc->len, DMA_FROM_DEVICE);
446 } 446 }
447 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 447 if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
448 dmaaddr = is_dmac64(dc) ? 448 dmaaddr = is_dmac64(dc) ?
449 desc->hwdesc.SAR : desc->hwdesc32.SAR; 449 desc->hwdesc.SAR : desc->hwdesc32.SAR;
450 dma_unmap_page(chan2parent(&dc->chan), dmaaddr, 450 if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
451 desc->len, DMA_TO_DEVICE); 451 dma_unmap_single(chan2parent(&dc->chan),
452 dmaaddr, desc->len, DMA_TO_DEVICE);
453 else
454 dma_unmap_page(chan2parent(&dc->chan),
455 dmaaddr, desc->len, DMA_TO_DEVICE);
452 } 456 }
453 } 457 }
454 458
diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c
index dc425e74a268..e4b4e8898e39 100644
--- a/drivers/gpu/drm/i915/i915_opregion.c
+++ b/drivers/gpu/drm/i915/i915_opregion.c
@@ -419,7 +419,7 @@ void intel_opregion_free(struct drm_device *dev, int suspend)
419 return; 419 return;
420 420
421 if (!suspend) 421 if (!suspend)
422 acpi_video_exit(); 422 acpi_video_unregister();
423 423
424 opregion->acpi->drdy = 0; 424 opregion->acpi->drdy = 0;
425 425
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 3c259ee7ddda..8206442fbabd 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -326,6 +326,16 @@ config I2C_DAVINCI
326 devices such as DaVinci NIC. 326 devices such as DaVinci NIC.
327 For details please see http://www.ti.com/davinci 327 For details please see http://www.ti.com/davinci
328 328
329config I2C_DESIGNWARE
330 tristate "Synopsys DesignWare"
331 depends on HAVE_CLK
332 help
333 If you say yes to this option, support will be included for the
334 Synopsys DesignWare I2C adapter. Only master mode is supported.
335
336 This driver can also be built as a module. If so, the module
337 will be called i2c-designware.
338
329config I2C_GPIO 339config I2C_GPIO
330 tristate "GPIO-based bitbanging I2C" 340 tristate "GPIO-based bitbanging I2C"
331 depends on GENERIC_GPIO 341 depends on GENERIC_GPIO
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index edeabf003106..e654263bfc01 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_I2C_AU1550) += i2c-au1550.o
30obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o 30obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o
31obj-$(CONFIG_I2C_CPM) += i2c-cpm.o 31obj-$(CONFIG_I2C_CPM) += i2c-cpm.o
32obj-$(CONFIG_I2C_DAVINCI) += i2c-davinci.o 32obj-$(CONFIG_I2C_DAVINCI) += i2c-davinci.o
33obj-$(CONFIG_I2C_DESIGNWARE) += i2c-designware.o
33obj-$(CONFIG_I2C_GPIO) += i2c-gpio.o 34obj-$(CONFIG_I2C_GPIO) += i2c-gpio.o
34obj-$(CONFIG_I2C_HIGHLANDER) += i2c-highlander.o 35obj-$(CONFIG_I2C_HIGHLANDER) += i2c-highlander.o
35obj-$(CONFIG_I2C_IBM_IIC) += i2c-ibm_iic.o 36obj-$(CONFIG_I2C_IBM_IIC) += i2c-ibm_iic.o
diff --git a/drivers/i2c/busses/i2c-designware.c b/drivers/i2c/busses/i2c-designware.c
new file mode 100644
index 000000000000..b444762e9b9f
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware.c
@@ -0,0 +1,624 @@
1/*
2 * Synopsys Designware I2C adapter driver (master only).
3 *
4 * Based on the TI DAVINCI I2C adapter driver.
5 *
6 * Copyright (C) 2006 Texas Instruments.
7 * Copyright (C) 2007 MontaVista Software Inc.
8 * Copyright (C) 2009 Provigent Ltd.
9 *
10 * ----------------------------------------------------------------------------
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 * ----------------------------------------------------------------------------
26 *
27 */
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/delay.h>
31#include <linux/i2c.h>
32#include <linux/clk.h>
33#include <linux/errno.h>
34#include <linux/sched.h>
35#include <linux/err.h>
36#include <linux/interrupt.h>
37#include <linux/platform_device.h>
38#include <linux/io.h>
39
40/*
41 * Registers offset
42 */
43#define DW_IC_CON 0x0
44#define DW_IC_TAR 0x4
45#define DW_IC_DATA_CMD 0x10
46#define DW_IC_SS_SCL_HCNT 0x14
47#define DW_IC_SS_SCL_LCNT 0x18
48#define DW_IC_FS_SCL_HCNT 0x1c
49#define DW_IC_FS_SCL_LCNT 0x20
50#define DW_IC_INTR_STAT 0x2c
51#define DW_IC_INTR_MASK 0x30
52#define DW_IC_CLR_INTR 0x40
53#define DW_IC_ENABLE 0x6c
54#define DW_IC_STATUS 0x70
55#define DW_IC_TXFLR 0x74
56#define DW_IC_RXFLR 0x78
57#define DW_IC_COMP_PARAM_1 0xf4
58#define DW_IC_TX_ABRT_SOURCE 0x80
59
60#define DW_IC_CON_MASTER 0x1
61#define DW_IC_CON_SPEED_STD 0x2
62#define DW_IC_CON_SPEED_FAST 0x4
63#define DW_IC_CON_10BITADDR_MASTER 0x10
64#define DW_IC_CON_RESTART_EN 0x20
65#define DW_IC_CON_SLAVE_DISABLE 0x40
66
67#define DW_IC_INTR_TX_EMPTY 0x10
68#define DW_IC_INTR_TX_ABRT 0x40
69#define DW_IC_INTR_STOP_DET 0x200
70
71#define DW_IC_STATUS_ACTIVITY 0x1
72
73#define DW_IC_ERR_TX_ABRT 0x1
74
75/*
76 * status codes
77 */
78#define STATUS_IDLE 0x0
79#define STATUS_WRITE_IN_PROGRESS 0x1
80#define STATUS_READ_IN_PROGRESS 0x2
81
82#define TIMEOUT 20 /* ms */
83
84/*
85 * hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
86 *
87 * only expected abort codes are listed here
88 * refer to the datasheet for the full list
89 */
90#define ABRT_7B_ADDR_NOACK 0
91#define ABRT_10ADDR1_NOACK 1
92#define ABRT_10ADDR2_NOACK 2
93#define ABRT_TXDATA_NOACK 3
94#define ABRT_GCALL_NOACK 4
95#define ABRT_GCALL_READ 5
96#define ABRT_SBYTE_ACKDET 7
97#define ABRT_SBYTE_NORSTRT 9
98#define ABRT_10B_RD_NORSTRT 10
99#define ARB_MASTER_DIS 11
100#define ARB_LOST 12
101
102static char *abort_sources[] = {
103 [ABRT_7B_ADDR_NOACK] =
104 "slave address not acknowledged (7bit mode)",
105 [ABRT_10ADDR1_NOACK] =
106 "first address byte not acknowledged (10bit mode)",
107 [ABRT_10ADDR2_NOACK] =
108 "second address byte not acknowledged (10bit mode)",
109 [ABRT_TXDATA_NOACK] =
110 "data not acknowledged",
111 [ABRT_GCALL_NOACK] =
112 "no acknowledgement for a general call",
113 [ABRT_GCALL_READ] =
114 "read after general call",
115 [ABRT_SBYTE_ACKDET] =
116 "start byte acknowledged",
117 [ABRT_SBYTE_NORSTRT] =
118 "trying to send start byte when restart is disabled",
119 [ABRT_10B_RD_NORSTRT] =
120 "trying to read when restart is disabled (10bit mode)",
121 [ARB_MASTER_DIS] =
122 "trying to use disabled adapter",
123 [ARB_LOST] =
124 "lost arbitration",
125};
126
127/**
128 * struct dw_i2c_dev - private i2c-designware data
129 * @dev: driver model device node
130 * @base: IO registers pointer
131 * @cmd_complete: tx completion indicator
132 * @pump_msg: continue in progress transfers
133 * @lock: protect this struct and IO registers
134 * @clk: input reference clock
135 * @cmd_err: run time hadware error code
136 * @msgs: points to an array of messages currently being transfered
137 * @msgs_num: the number of elements in msgs
138 * @msg_write_idx: the element index of the current tx message in the msgs
139 * array
140 * @tx_buf_len: the length of the current tx buffer
141 * @tx_buf: the current tx buffer
142 * @msg_read_idx: the element index of the current rx message in the msgs
143 * array
144 * @rx_buf_len: the length of the current rx buffer
145 * @rx_buf: the current rx buffer
146 * @msg_err: error status of the current transfer
147 * @status: i2c master status, one of STATUS_*
148 * @abort_source: copy of the TX_ABRT_SOURCE register
149 * @irq: interrupt number for the i2c master
150 * @adapter: i2c subsystem adapter node
151 * @tx_fifo_depth: depth of the hardware tx fifo
152 * @rx_fifo_depth: depth of the hardware rx fifo
153 */
154struct dw_i2c_dev {
155 struct device *dev;
156 void __iomem *base;
157 struct completion cmd_complete;
158 struct tasklet_struct pump_msg;
159 struct mutex lock;
160 struct clk *clk;
161 int cmd_err;
162 struct i2c_msg *msgs;
163 int msgs_num;
164 int msg_write_idx;
165 u16 tx_buf_len;
166 u8 *tx_buf;
167 int msg_read_idx;
168 u16 rx_buf_len;
169 u8 *rx_buf;
170 int msg_err;
171 unsigned int status;
172 u16 abort_source;
173 int irq;
174 struct i2c_adapter adapter;
175 unsigned int tx_fifo_depth;
176 unsigned int rx_fifo_depth;
177};
178
179/**
180 * i2c_dw_init() - initialize the designware i2c master hardware
181 * @dev: device private data
182 *
183 * This functions configures and enables the I2C master.
184 * This function is called during I2C init function, and in case of timeout at
185 * run time.
186 */
187static void i2c_dw_init(struct dw_i2c_dev *dev)
188{
189 u32 input_clock_khz = clk_get_rate(dev->clk) / 1000;
190 u16 ic_con;
191
192 /* Disable the adapter */
193 writeb(0, dev->base + DW_IC_ENABLE);
194
195 /* set standard and fast speed deviders for high/low periods */
196 writew((input_clock_khz * 40 / 10000)+1, /* std speed high, 4us */
197 dev->base + DW_IC_SS_SCL_HCNT);
198 writew((input_clock_khz * 47 / 10000)+1, /* std speed low, 4.7us */
199 dev->base + DW_IC_SS_SCL_LCNT);
200 writew((input_clock_khz * 6 / 10000)+1, /* fast speed high, 0.6us */
201 dev->base + DW_IC_FS_SCL_HCNT);
202 writew((input_clock_khz * 13 / 10000)+1, /* fast speed low, 1.3us */
203 dev->base + DW_IC_FS_SCL_LCNT);
204
205 /* configure the i2c master */
206 ic_con = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
207 DW_IC_CON_RESTART_EN | DW_IC_CON_SPEED_FAST;
208 writew(ic_con, dev->base + DW_IC_CON);
209}
210
211/*
212 * Waiting for bus not busy
213 */
214static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
215{
216 int timeout = TIMEOUT;
217
218 while (readb(dev->base + DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
219 if (timeout <= 0) {
220 dev_warn(dev->dev, "timeout waiting for bus ready\n");
221 return -ETIMEDOUT;
222 }
223 timeout--;
224 mdelay(1);
225 }
226
227 return 0;
228}
229
230/*
231 * Initiate low level master read/write transaction.
232 * This function is called from i2c_dw_xfer when starting a transfer.
233 * This function is also called from dw_i2c_pump_msg to continue a transfer
234 * that is longer than the size of the TX FIFO.
235 */
236static void
237i2c_dw_xfer_msg(struct i2c_adapter *adap)
238{
239 struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
240 struct i2c_msg *msgs = dev->msgs;
241 int num = dev->msgs_num;
242 u16 ic_con, intr_mask;
243 int tx_limit = dev->tx_fifo_depth - readb(dev->base + DW_IC_TXFLR);
244 int rx_limit = dev->rx_fifo_depth - readb(dev->base + DW_IC_RXFLR);
245 u16 addr = msgs[dev->msg_write_idx].addr;
246 u16 buf_len = dev->tx_buf_len;
247
248 if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
249 /* Disable the adapter */
250 writeb(0, dev->base + DW_IC_ENABLE);
251
252 /* set the slave (target) address */
253 writew(msgs[dev->msg_write_idx].addr, dev->base + DW_IC_TAR);
254
255 /* if the slave address is ten bit address, enable 10BITADDR */
256 ic_con = readw(dev->base + DW_IC_CON);
257 if (msgs[dev->msg_write_idx].flags & I2C_M_TEN)
258 ic_con |= DW_IC_CON_10BITADDR_MASTER;
259 else
260 ic_con &= ~DW_IC_CON_10BITADDR_MASTER;
261 writew(ic_con, dev->base + DW_IC_CON);
262
263 /* Enable the adapter */
264 writeb(1, dev->base + DW_IC_ENABLE);
265 }
266
267 for (; dev->msg_write_idx < num; dev->msg_write_idx++) {
268 /* if target address has changed, we need to
269 * reprogram the target address in the i2c
270 * adapter when we are done with this transfer
271 */
272 if (msgs[dev->msg_write_idx].addr != addr)
273 return;
274
275 if (msgs[dev->msg_write_idx].len == 0) {
276 dev_err(dev->dev,
277 "%s: invalid message length\n", __func__);
278 dev->msg_err = -EINVAL;
279 return;
280 }
281
282 if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
283 /* new i2c_msg */
284 dev->tx_buf = msgs[dev->msg_write_idx].buf;
285 buf_len = msgs[dev->msg_write_idx].len;
286 }
287
288 while (buf_len > 0 && tx_limit > 0 && rx_limit > 0) {
289 if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
290 writew(0x100, dev->base + DW_IC_DATA_CMD);
291 rx_limit--;
292 } else
293 writew(*(dev->tx_buf++),
294 dev->base + DW_IC_DATA_CMD);
295 tx_limit--; buf_len--;
296 }
297 }
298
299 intr_mask = DW_IC_INTR_STOP_DET | DW_IC_INTR_TX_ABRT;
300 if (buf_len > 0) { /* more bytes to be written */
301 intr_mask |= DW_IC_INTR_TX_EMPTY;
302 dev->status |= STATUS_WRITE_IN_PROGRESS;
303 } else
304 dev->status &= ~STATUS_WRITE_IN_PROGRESS;
305 writew(intr_mask, dev->base + DW_IC_INTR_MASK);
306
307 dev->tx_buf_len = buf_len;
308}
309
310static void
311i2c_dw_read(struct i2c_adapter *adap)
312{
313 struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
314 struct i2c_msg *msgs = dev->msgs;
315 int num = dev->msgs_num;
316 u16 addr = msgs[dev->msg_read_idx].addr;
317 int rx_valid = readw(dev->base + DW_IC_RXFLR);
318
319 for (; dev->msg_read_idx < num; dev->msg_read_idx++) {
320 u16 len;
321 u8 *buf;
322
323 if (!(msgs[dev->msg_read_idx].flags & I2C_M_RD))
324 continue;
325
326 /* different i2c client, reprogram the i2c adapter */
327 if (msgs[dev->msg_read_idx].addr != addr)
328 return;
329
330 if (!(dev->status & STATUS_READ_IN_PROGRESS)) {
331 len = msgs[dev->msg_read_idx].len;
332 buf = msgs[dev->msg_read_idx].buf;
333 } else {
334 len = dev->rx_buf_len;
335 buf = dev->rx_buf;
336 }
337
338 for (; len > 0 && rx_valid > 0; len--, rx_valid--)
339 *buf++ = readb(dev->base + DW_IC_DATA_CMD);
340
341 if (len > 0) {
342 dev->status |= STATUS_READ_IN_PROGRESS;
343 dev->rx_buf_len = len;
344 dev->rx_buf = buf;
345 return;
346 } else
347 dev->status &= ~STATUS_READ_IN_PROGRESS;
348 }
349}
350
351/*
352 * Prepare controller for a transaction and call i2c_dw_xfer_msg
353 */
354static int
355i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
356{
357 struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
358 int ret;
359
360 dev_dbg(dev->dev, "%s: msgs: %d\n", __func__, num);
361
362 mutex_lock(&dev->lock);
363
364 INIT_COMPLETION(dev->cmd_complete);
365 dev->msgs = msgs;
366 dev->msgs_num = num;
367 dev->cmd_err = 0;
368 dev->msg_write_idx = 0;
369 dev->msg_read_idx = 0;
370 dev->msg_err = 0;
371 dev->status = STATUS_IDLE;
372
373 ret = i2c_dw_wait_bus_not_busy(dev);
374 if (ret < 0)
375 goto done;
376
377 /* start the transfers */
378 i2c_dw_xfer_msg(adap);
379
380 /* wait for tx to complete */
381 ret = wait_for_completion_interruptible_timeout(&dev->cmd_complete, HZ);
382 if (ret == 0) {
383 dev_err(dev->dev, "controller timed out\n");
384 i2c_dw_init(dev);
385 ret = -ETIMEDOUT;
386 goto done;
387 } else if (ret < 0)
388 goto done;
389
390 if (dev->msg_err) {
391 ret = dev->msg_err;
392 goto done;
393 }
394
395 /* no error */
396 if (likely(!dev->cmd_err)) {
397 /* read rx fifo, and disable the adapter */
398 do {
399 i2c_dw_read(adap);
400 } while (dev->status & STATUS_READ_IN_PROGRESS);
401 writeb(0, dev->base + DW_IC_ENABLE);
402 ret = num;
403 goto done;
404 }
405
406 /* We have an error */
407 if (dev->cmd_err == DW_IC_ERR_TX_ABRT) {
408 unsigned long abort_source = dev->abort_source;
409 int i;
410
411 for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources)) {
412 dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
413 }
414 }
415 ret = -EIO;
416
417done:
418 mutex_unlock(&dev->lock);
419
420 return ret;
421}
422
423static u32 i2c_dw_func(struct i2c_adapter *adap)
424{
425 return I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR;
426}
427
428static void dw_i2c_pump_msg(unsigned long data)
429{
430 struct dw_i2c_dev *dev = (struct dw_i2c_dev *) data;
431 u16 intr_mask;
432
433 i2c_dw_read(&dev->adapter);
434 i2c_dw_xfer_msg(&dev->adapter);
435
436 intr_mask = DW_IC_INTR_STOP_DET | DW_IC_INTR_TX_ABRT;
437 if (dev->status & STATUS_WRITE_IN_PROGRESS)
438 intr_mask |= DW_IC_INTR_TX_EMPTY;
439 writew(intr_mask, dev->base + DW_IC_INTR_MASK);
440}
441
442/*
443 * Interrupt service routine. This gets called whenever an I2C interrupt
444 * occurs.
445 */
446static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
447{
448 struct dw_i2c_dev *dev = dev_id;
449 u16 stat;
450
451 stat = readw(dev->base + DW_IC_INTR_STAT);
452 dev_dbg(dev->dev, "%s: stat=0x%x\n", __func__, stat);
453 if (stat & DW_IC_INTR_TX_ABRT) {
454 dev->abort_source = readw(dev->base + DW_IC_TX_ABRT_SOURCE);
455 dev->cmd_err |= DW_IC_ERR_TX_ABRT;
456 dev->status = STATUS_IDLE;
457 } else if (stat & DW_IC_INTR_TX_EMPTY)
458 tasklet_schedule(&dev->pump_msg);
459
460 readb(dev->base + DW_IC_CLR_INTR); /* clear interrupts */
461 writew(0, dev->base + DW_IC_INTR_MASK); /* disable interrupts */
462 if (stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET))
463 complete(&dev->cmd_complete);
464
465 return IRQ_HANDLED;
466}
467
468static struct i2c_algorithm i2c_dw_algo = {
469 .master_xfer = i2c_dw_xfer,
470 .functionality = i2c_dw_func,
471};
472
473static int __devinit dw_i2c_probe(struct platform_device *pdev)
474{
475 struct dw_i2c_dev *dev;
476 struct i2c_adapter *adap;
477 struct resource *mem, *irq, *ioarea;
478 int r;
479
480 /* NOTE: driver uses the static register mapping */
481 mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
482 if (!mem) {
483 dev_err(&pdev->dev, "no mem resource?\n");
484 return -EINVAL;
485 }
486
487 irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
488 if (!irq) {
489 dev_err(&pdev->dev, "no irq resource?\n");
490 return -EINVAL;
491 }
492
493 ioarea = request_mem_region(mem->start, resource_size(mem),
494 pdev->name);
495 if (!ioarea) {
496 dev_err(&pdev->dev, "I2C region already claimed\n");
497 return -EBUSY;
498 }
499
500 dev = kzalloc(sizeof(struct dw_i2c_dev), GFP_KERNEL);
501 if (!dev) {
502 r = -ENOMEM;
503 goto err_release_region;
504 }
505
506 init_completion(&dev->cmd_complete);
507 tasklet_init(&dev->pump_msg, dw_i2c_pump_msg, (unsigned long) dev);
508 mutex_init(&dev->lock);
509 dev->dev = get_device(&pdev->dev);
510 dev->irq = irq->start;
511 platform_set_drvdata(pdev, dev);
512
513 dev->clk = clk_get(&pdev->dev, NULL);
514 if (IS_ERR(dev->clk)) {
515 r = -ENODEV;
516 goto err_free_mem;
517 }
518 clk_enable(dev->clk);
519
520 dev->base = ioremap(mem->start, resource_size(mem));
521 if (dev->base == NULL) {
522 dev_err(&pdev->dev, "failure mapping io resources\n");
523 r = -EBUSY;
524 goto err_unuse_clocks;
525 }
526 {
527 u32 param1 = readl(dev->base + DW_IC_COMP_PARAM_1);
528
529 dev->tx_fifo_depth = ((param1 >> 16) & 0xff) + 1;
530 dev->rx_fifo_depth = ((param1 >> 8) & 0xff) + 1;
531 }
532 i2c_dw_init(dev);
533
534 writew(0, dev->base + DW_IC_INTR_MASK); /* disable IRQ */
535 r = request_irq(dev->irq, i2c_dw_isr, 0, pdev->name, dev);
536 if (r) {
537 dev_err(&pdev->dev, "failure requesting irq %i\n", dev->irq);
538 goto err_iounmap;
539 }
540
541 adap = &dev->adapter;
542 i2c_set_adapdata(adap, dev);
543 adap->owner = THIS_MODULE;
544 adap->class = I2C_CLASS_HWMON;
545 strlcpy(adap->name, "Synopsys DesignWare I2C adapter",
546 sizeof(adap->name));
547 adap->algo = &i2c_dw_algo;
548 adap->dev.parent = &pdev->dev;
549
550 adap->nr = pdev->id;
551 r = i2c_add_numbered_adapter(adap);
552 if (r) {
553 dev_err(&pdev->dev, "failure adding adapter\n");
554 goto err_free_irq;
555 }
556
557 return 0;
558
559err_free_irq:
560 free_irq(dev->irq, dev);
561err_iounmap:
562 iounmap(dev->base);
563err_unuse_clocks:
564 clk_disable(dev->clk);
565 clk_put(dev->clk);
566 dev->clk = NULL;
567err_free_mem:
568 platform_set_drvdata(pdev, NULL);
569 put_device(&pdev->dev);
570 kfree(dev);
571err_release_region:
572 release_mem_region(mem->start, resource_size(mem));
573
574 return r;
575}
576
577static int __devexit dw_i2c_remove(struct platform_device *pdev)
578{
579 struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
580 struct resource *mem;
581
582 platform_set_drvdata(pdev, NULL);
583 i2c_del_adapter(&dev->adapter);
584 put_device(&pdev->dev);
585
586 clk_disable(dev->clk);
587 clk_put(dev->clk);
588 dev->clk = NULL;
589
590 writeb(0, dev->base + DW_IC_ENABLE);
591 free_irq(dev->irq, dev);
592 kfree(dev);
593
594 mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
595 release_mem_region(mem->start, resource_size(mem));
596 return 0;
597}
598
599/* work with hotplug and coldplug */
600MODULE_ALIAS("platform:i2c_designware");
601
602static struct platform_driver dw_i2c_driver = {
603 .remove = __devexit_p(dw_i2c_remove),
604 .driver = {
605 .name = "i2c_designware",
606 .owner = THIS_MODULE,
607 },
608};
609
610static int __init dw_i2c_init_driver(void)
611{
612 return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe);
613}
614module_init(dw_i2c_init_driver);
615
616static void __exit dw_i2c_exit_driver(void)
617{
618 platform_driver_unregister(&dw_i2c_driver);
619}
620module_exit(dw_i2c_exit_driver);
621
622MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
623MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter");
624MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index bd066bb9d611..09f98ed0731f 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -135,6 +135,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
135 135
136 ide_pci_setup_ports(dev, d, &hw[0], &hws[0]); 136 ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
137 hw[0].irq = 14; 137 hw[0].irq = 14;
138 hw[1].irq = 15;
138 139
139 return ide_host_add(d, hws, 2, NULL); 140 return ide_host_add(d, hws, 2, NULL);
140} 141}
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 4a19686fcfe9..f0ede5953af8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -876,9 +876,12 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
876 return stat; 876 return stat;
877 877
878 /* 878 /*
879 * Sanity check the given block size 879 * Sanity check the given block size, in so far as making
880 * sure the sectors_per_frame we give to the caller won't
881 * end up being bogus.
880 */ 882 */
881 blocklen = be32_to_cpu(capbuf.blocklen); 883 blocklen = be32_to_cpu(capbuf.blocklen);
884 blocklen = (blocklen >> SECTOR_BITS) << SECTOR_BITS;
882 switch (blocklen) { 885 switch (blocklen) {
883 case 512: 886 case 512:
884 case 1024: 887 case 1024:
@@ -886,10 +889,9 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
886 case 4096: 889 case 4096:
887 break; 890 break;
888 default: 891 default:
889 printk(KERN_ERR PFX "%s: weird block size %u\n", 892 printk_once(KERN_ERR PFX "%s: weird block size %u; "
893 "setting default block size to 2048\n",
890 drive->name, blocklen); 894 drive->name, blocklen);
891 printk(KERN_ERR PFX "%s: default to 2kb block size\n",
892 drive->name);
893 blocklen = 2048; 895 blocklen = 2048;
894 break; 896 break;
895 } 897 }
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 219e6fb78dc6..ee58c88dee5a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -361,9 +361,6 @@ static int ide_tune_dma(ide_drive_t *drive)
361 if (__ide_dma_bad_drive(drive)) 361 if (__ide_dma_bad_drive(drive))
362 return 0; 362 return 0;
363 363
364 if (ide_id_dma_bug(drive))
365 return 0;
366
367 if (hwif->host_flags & IDE_HFLAG_TRUST_BIOS_FOR_DMA) 364 if (hwif->host_flags & IDE_HFLAG_TRUST_BIOS_FOR_DMA)
368 return config_drive_for_dma(drive); 365 return config_drive_for_dma(drive);
369 366
@@ -394,24 +391,6 @@ static int ide_dma_check(ide_drive_t *drive)
394 return -1; 391 return -1;
395} 392}
396 393
397int ide_id_dma_bug(ide_drive_t *drive)
398{
399 u16 *id = drive->id;
400
401 if (id[ATA_ID_FIELD_VALID] & 4) {
402 if ((id[ATA_ID_UDMA_MODES] >> 8) &&
403 (id[ATA_ID_MWDMA_MODES] >> 8))
404 goto err_out;
405 } else if ((id[ATA_ID_MWDMA_MODES] >> 8) &&
406 (id[ATA_ID_SWDMA_MODES] >> 8))
407 goto err_out;
408
409 return 0;
410err_out:
411 printk(KERN_ERR "%s: bad DMA info in identify block\n", drive->name);
412 return 1;
413}
414
415int ide_set_dma(ide_drive_t *drive) 394int ide_set_dma(ide_drive_t *drive)
416{ 395{
417 int rc; 396 int rc;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1059f809b809..93b7886a2d6e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -476,10 +476,14 @@ void do_ide_request(struct request_queue *q)
476 476
477 if (!ide_lock_port(hwif)) { 477 if (!ide_lock_port(hwif)) {
478 ide_hwif_t *prev_port; 478 ide_hwif_t *prev_port;
479
480 WARN_ON_ONCE(hwif->rq);
481repeat: 479repeat:
482 prev_port = hwif->host->cur_port; 480 prev_port = hwif->host->cur_port;
481
482 if (drive->dev_flags & IDE_DFLAG_BLOCKED)
483 rq = hwif->rq;
484 else
485 WARN_ON_ONCE(hwif->rq);
486
483 if (drive->dev_flags & IDE_DFLAG_SLEEPING && 487 if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
484 time_after(drive->sleep, jiffies)) { 488 time_after(drive->sleep, jiffies)) {
485 ide_unlock_port(hwif); 489 ide_unlock_port(hwif);
@@ -506,43 +510,29 @@ repeat:
506 hwif->cur_dev = drive; 510 hwif->cur_dev = drive;
507 drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); 511 drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
508 512
509 spin_unlock_irq(&hwif->lock); 513 if (rq == NULL) {
510 spin_lock_irq(q->queue_lock); 514 spin_unlock_irq(&hwif->lock);
511 /* 515 spin_lock_irq(q->queue_lock);
512 * we know that the queue isn't empty, but this can happen 516 /*
513 * if the q->prep_rq_fn() decides to kill a request 517 * we know that the queue isn't empty, but this can
514 */ 518 * happen if ->prep_rq_fn() decides to kill a request
515 if (!rq) 519 */
516 rq = blk_fetch_request(drive->queue); 520 rq = blk_fetch_request(drive->queue);
521 spin_unlock_irq(q->queue_lock);
522 spin_lock_irq(&hwif->lock);
517 523
518 spin_unlock_irq(q->queue_lock); 524 if (rq == NULL) {
519 spin_lock_irq(&hwif->lock); 525 ide_unlock_port(hwif);
520 526 goto out;
521 if (!rq) { 527 }
522 ide_unlock_port(hwif);
523 goto out;
524 } 528 }
525 529
526 /* 530 /*
527 * Sanity: don't accept a request that isn't a PM request 531 * Sanity: don't accept a request that isn't a PM request
528 * if we are currently power managed. This is very important as 532 * if we are currently power managed.
529 * blk_stop_queue() doesn't prevent the blk_fetch_request()
530 * above to return us whatever is in the queue. Since we call
531 * ide_do_request() ourselves, we end up taking requests while
532 * the queue is blocked...
533 *
534 * We let requests forced at head of queue with ide-preempt
535 * though. I hope that doesn't happen too much, hopefully not
536 * unless the subdriver triggers such a thing in its own PM
537 * state machine.
538 */ 533 */
539 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && 534 BUG_ON((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
540 blk_pm_request(rq) == 0 && 535 blk_pm_request(rq) == 0);
541 (rq->cmd_flags & REQ_PREEMPT) == 0) {
542 /* there should be no pending command at this point */
543 ide_unlock_port(hwif);
544 goto plug_device;
545 }
546 536
547 hwif->rq = rq; 537 hwif->rq = rq;
548 538
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index fa047150a1c6..2892b242bbe1 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -210,6 +210,7 @@ EXPORT_SYMBOL_GPL(ide_in_drive_list);
210 */ 210 */
211static const struct drive_list_entry ivb_list[] = { 211static const struct drive_list_entry ivb_list[] = {
212 { "QUANTUM FIREBALLlct10 05" , "A03.0900" }, 212 { "QUANTUM FIREBALLlct10 05" , "A03.0900" },
213 { "QUANTUM FIREBALLlct20 30" , "APL.0900" },
213 { "TSSTcorp CDDVDW SH-S202J" , "SB00" }, 214 { "TSSTcorp CDDVDW SH-S202J" , "SB00" },
214 { "TSSTcorp CDDVDW SH-S202J" , "SB01" }, 215 { "TSSTcorp CDDVDW SH-S202J" , "SB01" },
215 { "TSSTcorp CDDVDW SH-S202N" , "SB00" }, 216 { "TSSTcorp CDDVDW SH-S202N" , "SB00" },
@@ -329,9 +330,6 @@ int ide_driveid_update(ide_drive_t *drive)
329 330
330 kfree(id); 331 kfree(id);
331 332
332 if ((drive->dev_flags & IDE_DFLAG_USING_DMA) && ide_id_dma_bug(drive))
333 ide_dma_off(drive);
334
335 return 1; 333 return 1;
336out_err: 334out_err:
337 if (rc == 2) 335 if (rc == 2)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 51af4eea0d36..1bb106f6221a 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -818,6 +818,24 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
818 return j; 818 return j;
819} 819}
820 820
821static void ide_host_enable_irqs(struct ide_host *host)
822{
823 ide_hwif_t *hwif;
824 int i;
825
826 ide_host_for_each_port(i, hwif, host) {
827 if (hwif == NULL)
828 continue;
829
830 /* clear any pending IRQs */
831 hwif->tp_ops->read_status(hwif);
832
833 /* unmask IRQs */
834 if (hwif->io_ports.ctl_addr)
835 hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
836 }
837}
838
821/* 839/*
822 * This routine sets up the IRQ for an IDE interface. 840 * This routine sets up the IRQ for an IDE interface.
823 */ 841 */
@@ -831,9 +849,6 @@ static int init_irq (ide_hwif_t *hwif)
831 if (irq_handler == NULL) 849 if (irq_handler == NULL)
832 irq_handler = ide_intr; 850 irq_handler = ide_intr;
833 851
834 if (io_ports->ctl_addr)
835 hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
836
837 if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif)) 852 if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif))
838 goto out_up; 853 goto out_up;
839 854
@@ -1404,6 +1419,8 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1404 ide_port_tune_devices(hwif); 1419 ide_port_tune_devices(hwif);
1405 } 1420 }
1406 1421
1422 ide_host_enable_irqs(host);
1423
1407 ide_host_for_each_port(i, hwif, host) { 1424 ide_host_for_each_port(i, hwif, host) {
1408 if (hwif == NULL) 1425 if (hwif == NULL)
1409 continue; 1426 continue;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 36e0675be9f7..020f9573fd82 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -231,6 +231,17 @@ config DM_MIRROR
231 Allow volume managers to mirror logical volumes, also 231 Allow volume managers to mirror logical volumes, also
232 needed for live data migration tools such as 'pvmove'. 232 needed for live data migration tools such as 'pvmove'.
233 233
234config DM_LOG_USERSPACE
235 tristate "Mirror userspace logging (EXPERIMENTAL)"
236 depends on DM_MIRROR && EXPERIMENTAL && NET
237 select CONNECTOR
238 ---help---
239 The userspace logging module provides a mechanism for
240 relaying the dm-dirty-log API to userspace. Log designs
241 which are more suited to userspace implementation (e.g.
242 shared storage logs) or experimental logs can be implemented
243 by leveraging this framework.
244
234config DM_ZERO 245config DM_ZERO
235 tristate "Zero target" 246 tristate "Zero target"
236 depends on BLK_DEV_DM 247 depends on BLK_DEV_DM
@@ -249,6 +260,25 @@ config DM_MULTIPATH
249 ---help--- 260 ---help---
250 Allow volume managers to support multipath hardware. 261 Allow volume managers to support multipath hardware.
251 262
263config DM_MULTIPATH_QL
264 tristate "I/O Path Selector based on the number of in-flight I/Os"
265 depends on DM_MULTIPATH
266 ---help---
267 This path selector is a dynamic load balancer which selects
268 the path with the least number of in-flight I/Os.
269
270 If unsure, say N.
271
272config DM_MULTIPATH_ST
273 tristate "I/O Path Selector based on the service time"
274 depends on DM_MULTIPATH
275 ---help---
276 This path selector is a dynamic load balancer which selects
277 the path expected to complete the incoming I/O in the shortest
278 time.
279
280 If unsure, say N.
281
252config DM_DELAY 282config DM_DELAY
253 tristate "I/O delaying target (EXPERIMENTAL)" 283 tristate "I/O delaying target (EXPERIMENTAL)"
254 depends on BLK_DEV_DM && EXPERIMENTAL 284 depends on BLK_DEV_DM && EXPERIMENTAL
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 45cc5951d928..1dc4185bd781 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,8 @@ dm-multipath-y += dm-path-selector.o dm-mpath.o
8dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ 8dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
9 dm-snap-persistent.o 9 dm-snap-persistent.o
10dm-mirror-y += dm-raid1.o 10dm-mirror-y += dm-raid1.o
11dm-log-userspace-y \
12 += dm-log-userspace-base.o dm-log-userspace-transfer.o
11md-mod-y += md.o bitmap.o 13md-mod-y += md.o bitmap.o
12raid456-y += raid5.o 14raid456-y += raid5.o
13raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ 15raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
@@ -36,8 +38,11 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
36obj-$(CONFIG_DM_CRYPT) += dm-crypt.o 38obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
37obj-$(CONFIG_DM_DELAY) += dm-delay.o 39obj-$(CONFIG_DM_DELAY) += dm-delay.o
38obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o 40obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
41obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o
42obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o
39obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o 43obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
40obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o 44obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
45obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
41obj-$(CONFIG_DM_ZERO) += dm-zero.o 46obj-$(CONFIG_DM_ZERO) += dm-zero.o
42 47
43quiet_cmd_unroll = UNROLL $@ 48quiet_cmd_unroll = UNROLL $@
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 53394e863c74..9933eb861c71 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1132,6 +1132,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1132 goto bad_crypt_queue; 1132 goto bad_crypt_queue;
1133 } 1133 }
1134 1134
1135 ti->num_flush_requests = 1;
1135 ti->private = cc; 1136 ti->private = cc;
1136 return 0; 1137 return 0;
1137 1138
@@ -1189,6 +1190,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
1189 union map_info *map_context) 1190 union map_info *map_context)
1190{ 1191{
1191 struct dm_crypt_io *io; 1192 struct dm_crypt_io *io;
1193 struct crypt_config *cc;
1194
1195 if (unlikely(bio_empty_barrier(bio))) {
1196 cc = ti->private;
1197 bio->bi_bdev = cc->dev->bdev;
1198 return DM_MAPIO_REMAPPED;
1199 }
1192 1200
1193 io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); 1201 io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin);
1194 1202
@@ -1305,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
1305 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 1313 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
1306} 1314}
1307 1315
1316static int crypt_iterate_devices(struct dm_target *ti,
1317 iterate_devices_callout_fn fn, void *data)
1318{
1319 struct crypt_config *cc = ti->private;
1320
1321 return fn(ti, cc->dev, cc->start, data);
1322}
1323
1308static struct target_type crypt_target = { 1324static struct target_type crypt_target = {
1309 .name = "crypt", 1325 .name = "crypt",
1310 .version= {1, 6, 0}, 1326 .version = {1, 7, 0},
1311 .module = THIS_MODULE, 1327 .module = THIS_MODULE,
1312 .ctr = crypt_ctr, 1328 .ctr = crypt_ctr,
1313 .dtr = crypt_dtr, 1329 .dtr = crypt_dtr,
@@ -1318,6 +1334,7 @@ static struct target_type crypt_target = {
1318 .resume = crypt_resume, 1334 .resume = crypt_resume,
1319 .message = crypt_message, 1335 .message = crypt_message,
1320 .merge = crypt_merge, 1336 .merge = crypt_merge,
1337 .iterate_devices = crypt_iterate_devices,
1321}; 1338};
1322 1339
1323static int __init dm_crypt_init(void) 1340static int __init dm_crypt_init(void)
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 559dbb52bc85..4e5b843cd4d7 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -197,6 +197,7 @@ out:
197 mutex_init(&dc->timer_lock); 197 mutex_init(&dc->timer_lock);
198 atomic_set(&dc->may_delay, 1); 198 atomic_set(&dc->may_delay, 1);
199 199
200 ti->num_flush_requests = 1;
200 ti->private = dc; 201 ti->private = dc;
201 return 0; 202 return 0;
202 203
@@ -278,8 +279,9 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
278 279
279 if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { 280 if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
280 bio->bi_bdev = dc->dev_write->bdev; 281 bio->bi_bdev = dc->dev_write->bdev;
281 bio->bi_sector = dc->start_write + 282 if (bio_sectors(bio))
282 (bio->bi_sector - ti->begin); 283 bio->bi_sector = dc->start_write +
284 (bio->bi_sector - ti->begin);
283 285
284 return delay_bio(dc, dc->write_delay, bio); 286 return delay_bio(dc, dc->write_delay, bio);
285 } 287 }
@@ -316,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type,
316 return 0; 318 return 0;
317} 319}
318 320
321static int delay_iterate_devices(struct dm_target *ti,
322 iterate_devices_callout_fn fn, void *data)
323{
324 struct delay_c *dc = ti->private;
325 int ret = 0;
326
327 ret = fn(ti, dc->dev_read, dc->start_read, data);
328 if (ret)
329 goto out;
330
331 if (dc->dev_write)
332 ret = fn(ti, dc->dev_write, dc->start_write, data);
333
334out:
335 return ret;
336}
337
319static struct target_type delay_target = { 338static struct target_type delay_target = {
320 .name = "delay", 339 .name = "delay",
321 .version = {1, 0, 2}, 340 .version = {1, 1, 0},
322 .module = THIS_MODULE, 341 .module = THIS_MODULE,
323 .ctr = delay_ctr, 342 .ctr = delay_ctr,
324 .dtr = delay_dtr, 343 .dtr = delay_dtr,
@@ -326,6 +345,7 @@ static struct target_type delay_target = {
326 .presuspend = delay_presuspend, 345 .presuspend = delay_presuspend,
327 .resume = delay_resume, 346 .resume = delay_resume,
328 .status = delay_status, 347 .status = delay_status,
348 .iterate_devices = delay_iterate_devices,
329}; 349};
330 350
331static int __init dm_delay_init(void) 351static int __init dm_delay_init(void)
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 75d8081a9041..c3ae51584b12 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -216,7 +216,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
216 return -EINVAL; 216 return -EINVAL;
217 } 217 }
218 218
219 type = get_type(argv[1]); 219 type = get_type(&persistent);
220 if (!type) { 220 if (!type) {
221 ti->error = "Exception store type not recognised"; 221 ti->error = "Exception store type not recognised";
222 r = -EINVAL; 222 r = -EINVAL;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index c92701dc5001..2442c8c07898 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -156,7 +156,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
156 */ 156 */
157static inline sector_t get_dev_size(struct block_device *bdev) 157static inline sector_t get_dev_size(struct block_device *bdev)
158{ 158{
159 return bdev->bd_inode->i_size >> SECTOR_SHIFT; 159 return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
160} 160}
161 161
162static inline chunk_t sector_to_chunk(struct dm_exception_store *store, 162static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index e73aabd61cd7..3a2e6a2f8bdd 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -22,6 +22,7 @@ struct dm_io_client {
22/* FIXME: can we shrink this ? */ 22/* FIXME: can we shrink this ? */
23struct io { 23struct io {
24 unsigned long error_bits; 24 unsigned long error_bits;
25 unsigned long eopnotsupp_bits;
25 atomic_t count; 26 atomic_t count;
26 struct task_struct *sleeper; 27 struct task_struct *sleeper;
27 struct dm_io_client *client; 28 struct dm_io_client *client;
@@ -107,8 +108,11 @@ static inline unsigned bio_get_region(struct bio *bio)
107 *---------------------------------------------------------------*/ 108 *---------------------------------------------------------------*/
108static void dec_count(struct io *io, unsigned int region, int error) 109static void dec_count(struct io *io, unsigned int region, int error)
109{ 110{
110 if (error) 111 if (error) {
111 set_bit(region, &io->error_bits); 112 set_bit(region, &io->error_bits);
113 if (error == -EOPNOTSUPP)
114 set_bit(region, &io->eopnotsupp_bits);
115 }
112 116
113 if (atomic_dec_and_test(&io->count)) { 117 if (atomic_dec_and_test(&io->count)) {
114 if (io->sleeper) 118 if (io->sleeper)
@@ -360,7 +364,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
360 return -EIO; 364 return -EIO;
361 } 365 }
362 366
367retry:
363 io.error_bits = 0; 368 io.error_bits = 0;
369 io.eopnotsupp_bits = 0;
364 atomic_set(&io.count, 1); /* see dispatch_io() */ 370 atomic_set(&io.count, 1); /* see dispatch_io() */
365 io.sleeper = current; 371 io.sleeper = current;
366 io.client = client; 372 io.client = client;
@@ -377,6 +383,11 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
377 } 383 }
378 set_current_state(TASK_RUNNING); 384 set_current_state(TASK_RUNNING);
379 385
386 if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
387 rw &= ~(1 << BIO_RW_BARRIER);
388 goto retry;
389 }
390
380 if (error_bits) 391 if (error_bits)
381 *error_bits = io.error_bits; 392 *error_bits = io.error_bits;
382 393
@@ -397,6 +408,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
397 408
398 io = mempool_alloc(client->pool, GFP_NOIO); 409 io = mempool_alloc(client->pool, GFP_NOIO);
399 io->error_bits = 0; 410 io->error_bits = 0;
411 io->eopnotsupp_bits = 0;
400 atomic_set(&io->count, 1); /* see dispatch_io() */ 412 atomic_set(&io->count, 1); /* see dispatch_io() */
401 io->sleeper = NULL; 413 io->sleeper = NULL;
402 io->client = client; 414 io->client = client;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1128d3fba797..7f77f18fcafa 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -276,7 +276,7 @@ retry:
276 up_write(&_hash_lock); 276 up_write(&_hash_lock);
277} 277}
278 278
279static int dm_hash_rename(const char *old, const char *new) 279static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
280{ 280{
281 char *new_name, *old_name; 281 char *new_name, *old_name;
282 struct hash_cell *hc; 282 struct hash_cell *hc;
@@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new)
333 dm_table_put(table); 333 dm_table_put(table);
334 } 334 }
335 335
336 dm_kobject_uevent(hc->md); 336 dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
337 337
338 dm_put(hc->md); 338 dm_put(hc->md);
339 up_write(&_hash_lock); 339 up_write(&_hash_lock);
@@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
680 680
681 __hash_remove(hc); 681 __hash_remove(hc);
682 up_write(&_hash_lock); 682 up_write(&_hash_lock);
683
684 dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
685
683 dm_put(md); 686 dm_put(md);
684 param->data_size = 0; 687 param->data_size = 0;
685 return 0; 688 return 0;
@@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
715 return r; 718 return r;
716 719
717 param->data_size = 0; 720 param->data_size = 0;
718 return dm_hash_rename(param->name, new_name); 721 return dm_hash_rename(param->event_nr, param->name, new_name);
719} 722}
720 723
721static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) 724static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param)
842 if (dm_suspended(md)) 845 if (dm_suspended(md))
843 r = dm_resume(md); 846 r = dm_resume(md);
844 847
845 if (!r) 848
849 if (!r) {
850 dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
846 r = __dev_status(md, param); 851 r = __dev_status(md, param);
852 }
847 853
848 dm_put(md); 854 dm_put(md);
849 return r; 855 return r;
@@ -1044,6 +1050,12 @@ static int populate_table(struct dm_table *table,
1044 next = spec->next; 1050 next = spec->next;
1045 } 1051 }
1046 1052
1053 r = dm_table_set_type(table);
1054 if (r) {
1055 DMWARN("unable to set table type");
1056 return r;
1057 }
1058
1047 return dm_table_complete(table); 1059 return dm_table_complete(table);
1048} 1060}
1049 1061
@@ -1089,6 +1101,13 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
1089 goto out; 1101 goto out;
1090 } 1102 }
1091 1103
1104 r = dm_table_alloc_md_mempools(t);
1105 if (r) {
1106 DMWARN("unable to allocate mempools for this table");
1107 dm_table_destroy(t);
1108 goto out;
1109 }
1110
1092 down_write(&_hash_lock); 1111 down_write(&_hash_lock);
1093 hc = dm_get_mdptr(md); 1112 hc = dm_get_mdptr(md);
1094 if (!hc || hc->md != md) { 1113 if (!hc || hc->md != md) {
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 79fb53e51c70..9184b6deb868 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,6 +53,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
53 goto bad; 53 goto bad;
54 } 54 }
55 55
56 ti->num_flush_requests = 1;
56 ti->private = lc; 57 ti->private = lc;
57 return 0; 58 return 0;
58 59
@@ -81,7 +82,8 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
81 struct linear_c *lc = ti->private; 82 struct linear_c *lc = ti->private;
82 83
83 bio->bi_bdev = lc->dev->bdev; 84 bio->bi_bdev = lc->dev->bdev;
84 bio->bi_sector = linear_map_sector(ti, bio->bi_sector); 85 if (bio_sectors(bio))
86 bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
85} 87}
86 88
87static int linear_map(struct dm_target *ti, struct bio *bio, 89static int linear_map(struct dm_target *ti, struct bio *bio,
@@ -132,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
132 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 134 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
133} 135}
134 136
137static int linear_iterate_devices(struct dm_target *ti,
138 iterate_devices_callout_fn fn, void *data)
139{
140 struct linear_c *lc = ti->private;
141
142 return fn(ti, lc->dev, lc->start, data);
143}
144
135static struct target_type linear_target = { 145static struct target_type linear_target = {
136 .name = "linear", 146 .name = "linear",
137 .version= {1, 0, 3}, 147 .version = {1, 1, 0},
138 .module = THIS_MODULE, 148 .module = THIS_MODULE,
139 .ctr = linear_ctr, 149 .ctr = linear_ctr,
140 .dtr = linear_dtr, 150 .dtr = linear_dtr,
@@ -142,6 +152,7 @@ static struct target_type linear_target = {
142 .status = linear_status, 152 .status = linear_status,
143 .ioctl = linear_ioctl, 153 .ioctl = linear_ioctl,
144 .merge = linear_merge, 154 .merge = linear_merge,
155 .iterate_devices = linear_iterate_devices,
145}; 156};
146 157
147int __init dm_linear_init(void) 158int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
new file mode 100644
index 000000000000..e69b96560997
--- /dev/null
+++ b/drivers/md/dm-log-userspace-base.c
@@ -0,0 +1,696 @@
1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc.
3 *
4 * This file is released under the LGPL.
5 */
6
7#include <linux/bio.h>
8#include <linux/dm-dirty-log.h>
9#include <linux/device-mapper.h>
10#include <linux/dm-log-userspace.h>
11
12#include "dm-log-userspace-transfer.h"
13
14struct flush_entry {
15 int type;
16 region_t region;
17 struct list_head list;
18};
19
20struct log_c {
21 struct dm_target *ti;
22 uint32_t region_size;
23 region_t region_count;
24 char uuid[DM_UUID_LEN];
25
26 char *usr_argv_str;
27 uint32_t usr_argc;
28
29 /*
30 * in_sync_hint gets set when doing is_remote_recovering. It
31 * represents the first region that needs recovery. IOW, the
32 * first zero bit of sync_bits. This can be useful for to limit
33 * traffic for calls like is_remote_recovering and get_resync_work,
34 * but be take care in its use for anything else.
35 */
36 uint64_t in_sync_hint;
37
38 spinlock_t flush_lock;
39 struct list_head flush_list; /* only for clear and mark requests */
40};
41
42static mempool_t *flush_entry_pool;
43
44static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
45{
46 return kmalloc(sizeof(struct flush_entry), gfp_mask);
47}
48
49static void flush_entry_free(void *element, void *pool_data)
50{
51 kfree(element);
52}
53
54static int userspace_do_request(struct log_c *lc, const char *uuid,
55 int request_type, char *data, size_t data_size,
56 char *rdata, size_t *rdata_size)
57{
58 int r;
59
60 /*
61 * If the server isn't there, -ESRCH is returned,
62 * and we must keep trying until the server is
63 * restored.
64 */
65retry:
66 r = dm_consult_userspace(uuid, request_type, data,
67 data_size, rdata, rdata_size);
68
69 if (r != -ESRCH)
70 return r;
71
72 DMERR(" Userspace log server not found.");
73 while (1) {
74 set_current_state(TASK_INTERRUPTIBLE);
75 schedule_timeout(2*HZ);
76 DMWARN("Attempting to contact userspace log server...");
77 r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
78 strlen(lc->usr_argv_str) + 1,
79 NULL, NULL);
80 if (!r)
81 break;
82 }
83 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
84 r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
85 0, NULL, NULL);
86 if (!r)
87 goto retry;
88
89 DMERR("Error trying to resume userspace log: %d", r);
90
91 return -ESRCH;
92}
93
94static int build_constructor_string(struct dm_target *ti,
95 unsigned argc, char **argv,
96 char **ctr_str)
97{
98 int i, str_size;
99 char *str = NULL;
100
101 *ctr_str = NULL;
102
103 for (i = 0, str_size = 0; i < argc; i++)
104 str_size += strlen(argv[i]) + 1; /* +1 for space between args */
105
106 str_size += 20; /* Max number of chars in a printed u64 number */
107
108 str = kzalloc(str_size, GFP_KERNEL);
109 if (!str) {
110 DMWARN("Unable to allocate memory for constructor string");
111 return -ENOMEM;
112 }
113
114 for (i = 0, str_size = 0; i < argc; i++)
115 str_size += sprintf(str + str_size, "%s ", argv[i]);
116 str_size += sprintf(str + str_size, "%llu",
117 (unsigned long long)ti->len);
118
119 *ctr_str = str;
120 return str_size;
121}
122
123/*
124 * userspace_ctr
125 *
126 * argv contains:
127 * <UUID> <other args>
128 * Where 'other args' is the userspace implementation specific log
129 * arguments. An example might be:
130 * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
131 *
132 * So, this module will strip off the <UUID> for identification purposes
133 * when communicating with userspace about a log; but will pass on everything
134 * else.
135 */
136static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
137 unsigned argc, char **argv)
138{
139 int r = 0;
140 int str_size;
141 char *ctr_str = NULL;
142 struct log_c *lc = NULL;
143 uint64_t rdata;
144 size_t rdata_size = sizeof(rdata);
145
146 if (argc < 3) {
147 DMWARN("Too few arguments to userspace dirty log");
148 return -EINVAL;
149 }
150
151 lc = kmalloc(sizeof(*lc), GFP_KERNEL);
152 if (!lc) {
153 DMWARN("Unable to allocate userspace log context.");
154 return -ENOMEM;
155 }
156
157 lc->ti = ti;
158
159 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
160 DMWARN("UUID argument too long.");
161 kfree(lc);
162 return -EINVAL;
163 }
164
165 strncpy(lc->uuid, argv[0], DM_UUID_LEN);
166 spin_lock_init(&lc->flush_lock);
167 INIT_LIST_HEAD(&lc->flush_list);
168
169 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
170 if (str_size < 0) {
171 kfree(lc);
172 return str_size;
173 }
174
175 /* Send table string */
176 r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
177 ctr_str, str_size, NULL, NULL);
178
179 if (r == -ESRCH) {
180 DMERR("Userspace log server not found");
181 goto out;
182 }
183
184 /* Since the region size does not change, get it now */
185 rdata_size = sizeof(rdata);
186 r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
187 NULL, 0, (char *)&rdata, &rdata_size);
188
189 if (r) {
190 DMERR("Failed to get region size of dirty log");
191 goto out;
192 }
193
194 lc->region_size = (uint32_t)rdata;
195 lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
196
197out:
198 if (r) {
199 kfree(lc);
200 kfree(ctr_str);
201 } else {
202 lc->usr_argv_str = ctr_str;
203 lc->usr_argc = argc;
204 log->context = lc;
205 }
206
207 return r;
208}
209
210static void userspace_dtr(struct dm_dirty_log *log)
211{
212 int r;
213 struct log_c *lc = log->context;
214
215 r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
216 NULL, 0,
217 NULL, NULL);
218
219 kfree(lc->usr_argv_str);
220 kfree(lc);
221
222 return;
223}
224
225static int userspace_presuspend(struct dm_dirty_log *log)
226{
227 int r;
228 struct log_c *lc = log->context;
229
230 r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
231 NULL, 0,
232 NULL, NULL);
233
234 return r;
235}
236
237static int userspace_postsuspend(struct dm_dirty_log *log)
238{
239 int r;
240 struct log_c *lc = log->context;
241
242 r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
243 NULL, 0,
244 NULL, NULL);
245
246 return r;
247}
248
249static int userspace_resume(struct dm_dirty_log *log)
250{
251 int r;
252 struct log_c *lc = log->context;
253
254 lc->in_sync_hint = 0;
255 r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
256 NULL, 0,
257 NULL, NULL);
258
259 return r;
260}
261
262static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
263{
264 struct log_c *lc = log->context;
265
266 return lc->region_size;
267}
268
269/*
270 * userspace_is_clean
271 *
272 * Check whether a region is clean. If there is any sort of
273 * failure when consulting the server, we return not clean.
274 *
275 * Returns: 1 if clean, 0 otherwise
276 */
277static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
278{
279 int r;
280 uint64_t region64 = (uint64_t)region;
281 int64_t is_clean;
282 size_t rdata_size;
283 struct log_c *lc = log->context;
284
285 rdata_size = sizeof(is_clean);
286 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
287 (char *)&region64, sizeof(region64),
288 (char *)&is_clean, &rdata_size);
289
290 return (r) ? 0 : (int)is_clean;
291}
292
293/*
294 * userspace_in_sync
295 *
296 * Check if the region is in-sync. If there is any sort
297 * of failure when consulting the server, we assume that
298 * the region is not in sync.
299 *
300 * If 'can_block' is set, return immediately
301 *
302 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
303 */
304static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
305 int can_block)
306{
307 int r;
308 uint64_t region64 = region;
309 int64_t in_sync;
310 size_t rdata_size;
311 struct log_c *lc = log->context;
312
313 /*
314 * We can never respond directly - even if in_sync_hint is
315 * set. This is because another machine could see a device
316 * failure and mark the region out-of-sync. If we don't go
317 * to userspace to ask, we might think the region is in-sync
318 * and allow a read to pick up data that is stale. (This is
319 * very unlikely if a device actually fails; but it is very
320 * likely if a connection to one device from one machine fails.)
321 *
322 * There still might be a problem if the mirror caches the region
323 * state as in-sync... but then this call would not be made. So,
324 * that is a mirror problem.
325 */
326 if (!can_block)
327 return -EWOULDBLOCK;
328
329 rdata_size = sizeof(in_sync);
330 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
331 (char *)&region64, sizeof(region64),
332 (char *)&in_sync, &rdata_size);
333 return (r) ? 0 : (int)in_sync;
334}
335
336/*
337 * userspace_flush
338 *
339 * This function is ok to block.
340 * The flush happens in two stages. First, it sends all
341 * clear/mark requests that are on the list. Then it
342 * tells the server to commit them. This gives the
343 * server a chance to optimise the commit, instead of
344 * doing it for every request.
345 *
346 * Additionally, we could implement another thread that
347 * sends the requests up to the server - reducing the
348 * load on flush. Then the flush would have less in
349 * the list and be responsible for the finishing commit.
350 *
351 * Returns: 0 on success, < 0 on failure
352 */
353static int userspace_flush(struct dm_dirty_log *log)
354{
355 int r = 0;
356 unsigned long flags;
357 struct log_c *lc = log->context;
358 LIST_HEAD(flush_list);
359 struct flush_entry *fe, *tmp_fe;
360
361 spin_lock_irqsave(&lc->flush_lock, flags);
362 list_splice_init(&lc->flush_list, &flush_list);
363 spin_unlock_irqrestore(&lc->flush_lock, flags);
364
365 if (list_empty(&flush_list))
366 return 0;
367
368 /*
369 * FIXME: Count up requests, group request types,
370 * allocate memory to stick all requests in and
371 * send to server in one go. Failing the allocation,
372 * do it one by one.
373 */
374
375 list_for_each_entry(fe, &flush_list, list) {
376 r = userspace_do_request(lc, lc->uuid, fe->type,
377 (char *)&fe->region,
378 sizeof(fe->region),
379 NULL, NULL);
380 if (r)
381 goto fail;
382 }
383
384 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
385 NULL, 0, NULL, NULL);
386
387fail:
388 /*
389 * We can safely remove these entries, even if failure.
390 * Calling code will receive an error and will know that
391 * the log facility has failed.
392 */
393 list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
394 list_del(&fe->list);
395 mempool_free(fe, flush_entry_pool);
396 }
397
398 if (r)
399 dm_table_event(lc->ti->table);
400
401 return r;
402}
403
404/*
405 * userspace_mark_region
406 *
407 * This function should avoid blocking unless absolutely required.
408 * (Memory allocation is valid for blocking.)
409 */
410static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
411{
412 unsigned long flags;
413 struct log_c *lc = log->context;
414 struct flush_entry *fe;
415
416 /* Wait for an allocation, but _never_ fail */
417 fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
418 BUG_ON(!fe);
419
420 spin_lock_irqsave(&lc->flush_lock, flags);
421 fe->type = DM_ULOG_MARK_REGION;
422 fe->region = region;
423 list_add(&fe->list, &lc->flush_list);
424 spin_unlock_irqrestore(&lc->flush_lock, flags);
425
426 return;
427}
428
429/*
430 * userspace_clear_region
431 *
432 * This function must not block.
433 * So, the alloc can't block. In the worst case, it is ok to
434 * fail. It would simply mean we can't clear the region.
435 * Does nothing to current sync context, but does mean
436 * the region will be re-sync'ed on a reload of the mirror
437 * even though it is in-sync.
438 */
439static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
440{
441 unsigned long flags;
442 struct log_c *lc = log->context;
443 struct flush_entry *fe;
444
445 /*
446 * If we fail to allocate, we skip the clearing of
447 * the region. This doesn't hurt us in any way, except
448 * to cause the region to be resync'ed when the
449 * device is activated next time.
450 */
451 fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
452 if (!fe) {
453 DMERR("Failed to allocate memory to clear region.");
454 return;
455 }
456
457 spin_lock_irqsave(&lc->flush_lock, flags);
458 fe->type = DM_ULOG_CLEAR_REGION;
459 fe->region = region;
460 list_add(&fe->list, &lc->flush_list);
461 spin_unlock_irqrestore(&lc->flush_lock, flags);
462
463 return;
464}
465
466/*
467 * userspace_get_resync_work
468 *
469 * Get a region that needs recovery. It is valid to return
470 * an error for this function.
471 *
472 * Returns: 1 if region filled, 0 if no work, <0 on error
473 */
474static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
475{
476 int r;
477 size_t rdata_size;
478 struct log_c *lc = log->context;
479 struct {
480 int64_t i; /* 64-bit for mix arch compatibility */
481 region_t r;
482 } pkg;
483
484 if (lc->in_sync_hint >= lc->region_count)
485 return 0;
486
487 rdata_size = sizeof(pkg);
488 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
489 NULL, 0,
490 (char *)&pkg, &rdata_size);
491
492 *region = pkg.r;
493 return (r) ? r : (int)pkg.i;
494}
495
496/*
497 * userspace_set_region_sync
498 *
499 * Set the sync status of a given region. This function
500 * must not fail.
501 */
502static void userspace_set_region_sync(struct dm_dirty_log *log,
503 region_t region, int in_sync)
504{
505 int r;
506 struct log_c *lc = log->context;
507 struct {
508 region_t r;
509 int64_t i;
510 } pkg;
511
512 pkg.r = region;
513 pkg.i = (int64_t)in_sync;
514
515 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
516 (char *)&pkg, sizeof(pkg),
517 NULL, NULL);
518
519 /*
520 * It would be nice to be able to report failures.
521 * However, it is easy emough to detect and resolve.
522 */
523 return;
524}
525
526/*
527 * userspace_get_sync_count
528 *
529 * If there is any sort of failure when consulting the server,
530 * we assume that the sync count is zero.
531 *
532 * Returns: sync count on success, 0 on failure
533 */
534static region_t userspace_get_sync_count(struct dm_dirty_log *log)
535{
536 int r;
537 size_t rdata_size;
538 uint64_t sync_count;
539 struct log_c *lc = log->context;
540
541 rdata_size = sizeof(sync_count);
542 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
543 NULL, 0,
544 (char *)&sync_count, &rdata_size);
545
546 if (r)
547 return 0;
548
549 if (sync_count >= lc->region_count)
550 lc->in_sync_hint = lc->region_count;
551
552 return (region_t)sync_count;
553}
554
555/*
556 * userspace_status
557 *
558 * Returns: amount of space consumed
559 */
560static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
561 char *result, unsigned maxlen)
562{
563 int r = 0;
564 size_t sz = (size_t)maxlen;
565 struct log_c *lc = log->context;
566
567 switch (status_type) {
568 case STATUSTYPE_INFO:
569 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
570 NULL, 0,
571 result, &sz);
572
573 if (r) {
574 sz = 0;
575 DMEMIT("%s 1 COM_FAILURE", log->type->name);
576 }
577 break;
578 case STATUSTYPE_TABLE:
579 sz = 0;
580 DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
581 lc->uuid, lc->usr_argv_str);
582 break;
583 }
584 return (r) ? 0 : (int)sz;
585}
586
587/*
588 * userspace_is_remote_recovering
589 *
590 * Returns: 1 if region recovering, 0 otherwise
591 */
592static int userspace_is_remote_recovering(struct dm_dirty_log *log,
593 region_t region)
594{
595 int r;
596 uint64_t region64 = region;
597 struct log_c *lc = log->context;
598 static unsigned long long limit;
599 struct {
600 int64_t is_recovering;
601 uint64_t in_sync_hint;
602 } pkg;
603 size_t rdata_size = sizeof(pkg);
604
605 /*
606 * Once the mirror has been reported to be in-sync,
607 * it will never again ask for recovery work. So,
608 * we can safely say there is not a remote machine
609 * recovering if the device is in-sync. (in_sync_hint
610 * must be reset at resume time.)
611 */
612 if (region < lc->in_sync_hint)
613 return 0;
614 else if (jiffies < limit)
615 return 1;
616
617 limit = jiffies + (HZ / 4);
618 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
619 (char *)&region64, sizeof(region64),
620 (char *)&pkg, &rdata_size);
621 if (r)
622 return 1;
623
624 lc->in_sync_hint = pkg.in_sync_hint;
625
626 return (int)pkg.is_recovering;
627}
628
629static struct dm_dirty_log_type _userspace_type = {
630 .name = "userspace",
631 .module = THIS_MODULE,
632 .ctr = userspace_ctr,
633 .dtr = userspace_dtr,
634 .presuspend = userspace_presuspend,
635 .postsuspend = userspace_postsuspend,
636 .resume = userspace_resume,
637 .get_region_size = userspace_get_region_size,
638 .is_clean = userspace_is_clean,
639 .in_sync = userspace_in_sync,
640 .flush = userspace_flush,
641 .mark_region = userspace_mark_region,
642 .clear_region = userspace_clear_region,
643 .get_resync_work = userspace_get_resync_work,
644 .set_region_sync = userspace_set_region_sync,
645 .get_sync_count = userspace_get_sync_count,
646 .status = userspace_status,
647 .is_remote_recovering = userspace_is_remote_recovering,
648};
649
650static int __init userspace_dirty_log_init(void)
651{
652 int r = 0;
653
654 flush_entry_pool = mempool_create(100, flush_entry_alloc,
655 flush_entry_free, NULL);
656
657 if (!flush_entry_pool) {
658 DMWARN("Unable to create flush_entry_pool: No memory.");
659 return -ENOMEM;
660 }
661
662 r = dm_ulog_tfr_init();
663 if (r) {
664 DMWARN("Unable to initialize userspace log communications");
665 mempool_destroy(flush_entry_pool);
666 return r;
667 }
668
669 r = dm_dirty_log_type_register(&_userspace_type);
670 if (r) {
671 DMWARN("Couldn't register userspace dirty log type");
672 dm_ulog_tfr_exit();
673 mempool_destroy(flush_entry_pool);
674 return r;
675 }
676
677 DMINFO("version 1.0.0 loaded");
678 return 0;
679}
680
681static void __exit userspace_dirty_log_exit(void)
682{
683 dm_dirty_log_type_unregister(&_userspace_type);
684 dm_ulog_tfr_exit();
685 mempool_destroy(flush_entry_pool);
686
687 DMINFO("version 1.0.0 unloaded");
688 return;
689}
690
691module_init(userspace_dirty_log_init);
692module_exit(userspace_dirty_log_exit);
693
694MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
695MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
696MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
new file mode 100644
index 000000000000..0ca1ee768a1f
--- /dev/null
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -0,0 +1,276 @@
1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc.
3 *
4 * This file is released under the LGPL.
5 */
6
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <net/sock.h>
10#include <linux/workqueue.h>
11#include <linux/connector.h>
12#include <linux/device-mapper.h>
13#include <linux/dm-log-userspace.h>
14
15#include "dm-log-userspace-transfer.h"
16
17static uint32_t dm_ulog_seq;
18
19/*
20 * Netlink/Connector is an unreliable protocol. How long should
21 * we wait for a response before assuming it was lost and retrying?
22 * (If we do receive a response after this time, it will be discarded
23 * and the response to the resent request will be waited for.
24 */
25#define DM_ULOG_RETRY_TIMEOUT (15 * HZ)
26
27/*
28 * Pre-allocated space for speed
29 */
30#define DM_ULOG_PREALLOCED_SIZE 512
31static struct cn_msg *prealloced_cn_msg;
32static struct dm_ulog_request *prealloced_ulog_tfr;
33
34static struct cb_id ulog_cn_id = {
35 .idx = CN_IDX_DM,
36 .val = CN_VAL_DM_USERSPACE_LOG
37};
38
39static DEFINE_MUTEX(dm_ulog_lock);
40
41struct receiving_pkg {
42 struct list_head list;
43 struct completion complete;
44
45 uint32_t seq;
46
47 int error;
48 size_t *data_size;
49 char *data;
50};
51
52static DEFINE_SPINLOCK(receiving_list_lock);
53static struct list_head receiving_list;
54
55static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
56{
57 int r;
58 struct cn_msg *msg = prealloced_cn_msg;
59
60 memset(msg, 0, sizeof(struct cn_msg));
61
62 msg->id.idx = ulog_cn_id.idx;
63 msg->id.val = ulog_cn_id.val;
64 msg->ack = 0;
65 msg->seq = tfr->seq;
66 msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
67
68 r = cn_netlink_send(msg, 0, gfp_any());
69
70 return r;
71}
72
73/*
74 * Parameters for this function can be either msg or tfr, but not
75 * both. This function fills in the reply for a waiting request.
76 * If just msg is given, then the reply is simply an ACK from userspace
77 * that the request was received.
78 *
79 * Returns: 0 on success, -ENOENT on failure
80 */
81static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
82{
83 uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
84 struct receiving_pkg *pkg;
85
86 /*
87 * The 'receiving_pkg' entries in this list are statically
88 * allocated on the stack in 'dm_consult_userspace'.
89 * Each process that is waiting for a reply from the user
90 * space server will have an entry in this list.
91 *
92 * We are safe to do it this way because the stack space
93 * is unique to each process, but still addressable by
94 * other processes.
95 */
96 list_for_each_entry(pkg, &receiving_list, list) {
97 if (rtn_seq != pkg->seq)
98 continue;
99
100 if (msg) {
101 pkg->error = -msg->ack;
102 /*
103 * If we are trying again, we will need to know our
104 * storage capacity. Otherwise, along with the
105 * error code, we make explicit that we have no data.
106 */
107 if (pkg->error != -EAGAIN)
108 *(pkg->data_size) = 0;
109 } else if (tfr->data_size > *(pkg->data_size)) {
110 DMERR("Insufficient space to receive package [%u] "
111 "(%u vs %lu)", tfr->request_type,
112 tfr->data_size, *(pkg->data_size));
113
114 *(pkg->data_size) = 0;
115 pkg->error = -ENOSPC;
116 } else {
117 pkg->error = tfr->error;
118 memcpy(pkg->data, tfr->data, tfr->data_size);
119 *(pkg->data_size) = tfr->data_size;
120 }
121 complete(&pkg->complete);
122 return 0;
123 }
124
125 return -ENOENT;
126}
127
128/*
129 * This is the connector callback that delivers data
130 * that was sent from userspace.
131 */
132static void cn_ulog_callback(void *data)
133{
134 struct cn_msg *msg = (struct cn_msg *)data;
135 struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
136
137 spin_lock(&receiving_list_lock);
138 if (msg->len == 0)
139 fill_pkg(msg, NULL);
140 else if (msg->len < sizeof(*tfr))
141 DMERR("Incomplete message received (expected %u, got %u): [%u]",
142 (unsigned)sizeof(*tfr), msg->len, msg->seq);
143 else
144 fill_pkg(NULL, tfr);
145 spin_unlock(&receiving_list_lock);
146}
147
148/**
149 * dm_consult_userspace
150 * @uuid: log's uuid (must be DM_UUID_LEN in size)
151 * @request_type: found in include/linux/dm-log-userspace.h
152 * @data: data to tx to the server
153 * @data_size: size of data in bytes
154 * @rdata: place to put return data from server
155 * @rdata_size: value-result (amount of space given/amount of space used)
156 *
157 * rdata_size is undefined on failure.
158 *
159 * Memory used to communicate with userspace is zero'ed
160 * before populating to ensure that no unwanted bits leak
161 * from kernel space to user-space. All userspace log communications
162 * between kernel and user space go through this function.
163 *
164 * Returns: 0 on success, -EXXX on failure
165 **/
166int dm_consult_userspace(const char *uuid, int request_type,
167 char *data, size_t data_size,
168 char *rdata, size_t *rdata_size)
169{
170 int r = 0;
171 size_t dummy = 0;
172 int overhead_size =
173 sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg);
174 struct dm_ulog_request *tfr = prealloced_ulog_tfr;
175 struct receiving_pkg pkg;
176
177 if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
178 DMINFO("Size of tfr exceeds preallocated size");
179 return -EINVAL;
180 }
181
182 if (!rdata_size)
183 rdata_size = &dummy;
184resend:
185 /*
186 * We serialize the sending of requests so we can
187 * use the preallocated space.
188 */
189 mutex_lock(&dm_ulog_lock);
190
191 memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
192 memcpy(tfr->uuid, uuid, DM_UUID_LEN);
193 tfr->seq = dm_ulog_seq++;
194
195 /*
196 * Must be valid request type (all other bits set to
197 * zero). This reserves other bits for possible future
198 * use.
199 */
200 tfr->request_type = request_type & DM_ULOG_REQUEST_MASK;
201
202 tfr->data_size = data_size;
203 if (data && data_size)
204 memcpy(tfr->data, data, data_size);
205
206 memset(&pkg, 0, sizeof(pkg));
207 init_completion(&pkg.complete);
208 pkg.seq = tfr->seq;
209 pkg.data_size = rdata_size;
210 pkg.data = rdata;
211 spin_lock(&receiving_list_lock);
212 list_add(&(pkg.list), &receiving_list);
213 spin_unlock(&receiving_list_lock);
214
215 r = dm_ulog_sendto_server(tfr);
216
217 mutex_unlock(&dm_ulog_lock);
218
219 if (r) {
220 DMERR("Unable to send log request [%u] to userspace: %d",
221 request_type, r);
222 spin_lock(&receiving_list_lock);
223 list_del_init(&(pkg.list));
224 spin_unlock(&receiving_list_lock);
225
226 goto out;
227 }
228
229 r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
230 spin_lock(&receiving_list_lock);
231 list_del_init(&(pkg.list));
232 spin_unlock(&receiving_list_lock);
233 if (!r) {
234 DMWARN("[%s] Request timed out: [%u/%u] - retrying",
235 (strlen(uuid) > 8) ?
236 (uuid + (strlen(uuid) - 8)) : (uuid),
237 request_type, pkg.seq);
238 goto resend;
239 }
240
241 r = pkg.error;
242 if (r == -EAGAIN)
243 goto resend;
244
245out:
246 return r;
247}
248
249int dm_ulog_tfr_init(void)
250{
251 int r;
252 void *prealloced;
253
254 INIT_LIST_HEAD(&receiving_list);
255
256 prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL);
257 if (!prealloced)
258 return -ENOMEM;
259
260 prealloced_cn_msg = prealloced;
261 prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg);
262
263 r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
264 if (r) {
265 cn_del_callback(&ulog_cn_id);
266 return r;
267 }
268
269 return 0;
270}
271
272void dm_ulog_tfr_exit(void)
273{
274 cn_del_callback(&ulog_cn_id);
275 kfree(prealloced_cn_msg);
276}
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
new file mode 100644
index 000000000000..c26d8e4e2710
--- /dev/null
+++ b/drivers/md/dm-log-userspace-transfer.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc.
3 *
4 * This file is released under the LGPL.
5 */
6
7#ifndef __DM_LOG_USERSPACE_TRANSFER_H__
8#define __DM_LOG_USERSPACE_TRANSFER_H__
9
10#define DM_MSG_PREFIX "dm-log-userspace"
11
12int dm_ulog_tfr_init(void);
13void dm_ulog_tfr_exit(void);
14int dm_consult_userspace(const char *uuid, int request_type,
15 char *data, size_t data_size,
16 char *rdata, size_t *rdata_size);
17
18#endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 6fa8ccf91c70..9443896ede07 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -412,11 +412,12 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
412 /* 412 /*
413 * Buffer holds both header and bitset. 413 * Buffer holds both header and bitset.
414 */ 414 */
415 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 415 buf_size =
416 bitset_size, 416 dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + bitset_size,
417 ti->limits.logical_block_size); 417 bdev_logical_block_size(lc->header_location.
418 bdev));
418 419
419 if (buf_size > dev->bdev->bd_inode->i_size) { 420 if (buf_size > i_size_read(dev->bdev->bd_inode)) {
420 DMWARN("log device %s too small: need %llu bytes", 421 DMWARN("log device %s too small: need %llu bytes",
421 dev->name, (unsigned long long)buf_size); 422 dev->name, (unsigned long long)buf_size);
422 kfree(lc); 423 kfree(lc);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6a386ab4f7eb..c70604a20897 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -8,7 +8,6 @@
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm-path-selector.h" 10#include "dm-path-selector.h"
11#include "dm-bio-record.h"
12#include "dm-uevent.h" 11#include "dm-uevent.h"
13 12
14#include <linux/ctype.h> 13#include <linux/ctype.h>
@@ -35,6 +34,7 @@ struct pgpath {
35 34
36 struct dm_path path; 35 struct dm_path path;
37 struct work_struct deactivate_path; 36 struct work_struct deactivate_path;
37 struct work_struct activate_path;
38}; 38};
39 39
40#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 40#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +64,6 @@ struct multipath {
64 spinlock_t lock; 64 spinlock_t lock;
65 65
66 const char *hw_handler_name; 66 const char *hw_handler_name;
67 struct work_struct activate_path;
68 struct pgpath *pgpath_to_activate;
69 unsigned nr_priority_groups; 67 unsigned nr_priority_groups;
70 struct list_head priority_groups; 68 struct list_head priority_groups;
71 unsigned pg_init_required; /* pg_init needs calling? */ 69 unsigned pg_init_required; /* pg_init needs calling? */
@@ -84,7 +82,7 @@ struct multipath {
84 unsigned pg_init_count; /* Number of times pg_init called */ 82 unsigned pg_init_count; /* Number of times pg_init called */
85 83
86 struct work_struct process_queued_ios; 84 struct work_struct process_queued_ios;
87 struct bio_list queued_ios; 85 struct list_head queued_ios;
88 unsigned queue_size; 86 unsigned queue_size;
89 87
90 struct work_struct trigger_event; 88 struct work_struct trigger_event;
@@ -101,7 +99,7 @@ struct multipath {
101 */ 99 */
102struct dm_mpath_io { 100struct dm_mpath_io {
103 struct pgpath *pgpath; 101 struct pgpath *pgpath;
104 struct dm_bio_details details; 102 size_t nr_bytes;
105}; 103};
106 104
107typedef int (*action_fn) (struct pgpath *pgpath); 105typedef int (*action_fn) (struct pgpath *pgpath);
@@ -128,6 +126,7 @@ static struct pgpath *alloc_pgpath(void)
128 if (pgpath) { 126 if (pgpath) {
129 pgpath->is_active = 1; 127 pgpath->is_active = 1;
130 INIT_WORK(&pgpath->deactivate_path, deactivate_path); 128 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
129 INIT_WORK(&pgpath->activate_path, activate_path);
131 } 130 }
132 131
133 return pgpath; 132 return pgpath;
@@ -160,7 +159,6 @@ static struct priority_group *alloc_priority_group(void)
160 159
161static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 160static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
162{ 161{
163 unsigned long flags;
164 struct pgpath *pgpath, *tmp; 162 struct pgpath *pgpath, *tmp;
165 struct multipath *m = ti->private; 163 struct multipath *m = ti->private;
166 164
@@ -169,10 +167,6 @@ static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
169 if (m->hw_handler_name) 167 if (m->hw_handler_name)
170 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 168 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
171 dm_put_device(ti, pgpath->path.dev); 169 dm_put_device(ti, pgpath->path.dev);
172 spin_lock_irqsave(&m->lock, flags);
173 if (m->pgpath_to_activate == pgpath)
174 m->pgpath_to_activate = NULL;
175 spin_unlock_irqrestore(&m->lock, flags);
176 free_pgpath(pgpath); 170 free_pgpath(pgpath);
177 } 171 }
178} 172}
@@ -198,11 +192,11 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
198 m = kzalloc(sizeof(*m), GFP_KERNEL); 192 m = kzalloc(sizeof(*m), GFP_KERNEL);
199 if (m) { 193 if (m) {
200 INIT_LIST_HEAD(&m->priority_groups); 194 INIT_LIST_HEAD(&m->priority_groups);
195 INIT_LIST_HEAD(&m->queued_ios);
201 spin_lock_init(&m->lock); 196 spin_lock_init(&m->lock);
202 m->queue_io = 1; 197 m->queue_io = 1;
203 INIT_WORK(&m->process_queued_ios, process_queued_ios); 198 INIT_WORK(&m->process_queued_ios, process_queued_ios);
204 INIT_WORK(&m->trigger_event, trigger_event); 199 INIT_WORK(&m->trigger_event, trigger_event);
205 INIT_WORK(&m->activate_path, activate_path);
206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 200 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
207 if (!m->mpio_pool) { 201 if (!m->mpio_pool) {
208 kfree(m); 202 kfree(m);
@@ -250,11 +244,12 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
250 m->pg_init_count = 0; 244 m->pg_init_count = 0;
251} 245}
252 246
253static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) 247static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
248 size_t nr_bytes)
254{ 249{
255 struct dm_path *path; 250 struct dm_path *path;
256 251
257 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count); 252 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
258 if (!path) 253 if (!path)
259 return -ENXIO; 254 return -ENXIO;
260 255
@@ -266,7 +261,7 @@ static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
266 return 0; 261 return 0;
267} 262}
268 263
269static void __choose_pgpath(struct multipath *m) 264static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
270{ 265{
271 struct priority_group *pg; 266 struct priority_group *pg;
272 unsigned bypassed = 1; 267 unsigned bypassed = 1;
@@ -278,12 +273,12 @@ static void __choose_pgpath(struct multipath *m)
278 if (m->next_pg) { 273 if (m->next_pg) {
279 pg = m->next_pg; 274 pg = m->next_pg;
280 m->next_pg = NULL; 275 m->next_pg = NULL;
281 if (!__choose_path_in_pg(m, pg)) 276 if (!__choose_path_in_pg(m, pg, nr_bytes))
282 return; 277 return;
283 } 278 }
284 279
285 /* Don't change PG until it has no remaining paths */ 280 /* Don't change PG until it has no remaining paths */
286 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) 281 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
287 return; 282 return;
288 283
289 /* 284 /*
@@ -295,7 +290,7 @@ static void __choose_pgpath(struct multipath *m)
295 list_for_each_entry(pg, &m->priority_groups, list) { 290 list_for_each_entry(pg, &m->priority_groups, list) {
296 if (pg->bypassed == bypassed) 291 if (pg->bypassed == bypassed)
297 continue; 292 continue;
298 if (!__choose_path_in_pg(m, pg)) 293 if (!__choose_path_in_pg(m, pg, nr_bytes))
299 return; 294 return;
300 } 295 }
301 } while (bypassed--); 296 } while (bypassed--);
@@ -322,19 +317,21 @@ static int __must_push_back(struct multipath *m)
322 dm_noflush_suspending(m->ti)); 317 dm_noflush_suspending(m->ti));
323} 318}
324 319
325static int map_io(struct multipath *m, struct bio *bio, 320static int map_io(struct multipath *m, struct request *clone,
326 struct dm_mpath_io *mpio, unsigned was_queued) 321 struct dm_mpath_io *mpio, unsigned was_queued)
327{ 322{
328 int r = DM_MAPIO_REMAPPED; 323 int r = DM_MAPIO_REMAPPED;
324 size_t nr_bytes = blk_rq_bytes(clone);
329 unsigned long flags; 325 unsigned long flags;
330 struct pgpath *pgpath; 326 struct pgpath *pgpath;
327 struct block_device *bdev;
331 328
332 spin_lock_irqsave(&m->lock, flags); 329 spin_lock_irqsave(&m->lock, flags);
333 330
334 /* Do we need to select a new pgpath? */ 331 /* Do we need to select a new pgpath? */
335 if (!m->current_pgpath || 332 if (!m->current_pgpath ||
336 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 333 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
337 __choose_pgpath(m); 334 __choose_pgpath(m, nr_bytes);
338 335
339 pgpath = m->current_pgpath; 336 pgpath = m->current_pgpath;
340 337
@@ -344,21 +341,28 @@ static int map_io(struct multipath *m, struct bio *bio,
344 if ((pgpath && m->queue_io) || 341 if ((pgpath && m->queue_io) ||
345 (!pgpath && m->queue_if_no_path)) { 342 (!pgpath && m->queue_if_no_path)) {
346 /* Queue for the daemon to resubmit */ 343 /* Queue for the daemon to resubmit */
347 bio_list_add(&m->queued_ios, bio); 344 list_add_tail(&clone->queuelist, &m->queued_ios);
348 m->queue_size++; 345 m->queue_size++;
349 if ((m->pg_init_required && !m->pg_init_in_progress) || 346 if ((m->pg_init_required && !m->pg_init_in_progress) ||
350 !m->queue_io) 347 !m->queue_io)
351 queue_work(kmultipathd, &m->process_queued_ios); 348 queue_work(kmultipathd, &m->process_queued_ios);
352 pgpath = NULL; 349 pgpath = NULL;
353 r = DM_MAPIO_SUBMITTED; 350 r = DM_MAPIO_SUBMITTED;
354 } else if (pgpath) 351 } else if (pgpath) {
355 bio->bi_bdev = pgpath->path.dev->bdev; 352 bdev = pgpath->path.dev->bdev;
356 else if (__must_push_back(m)) 353 clone->q = bdev_get_queue(bdev);
354 clone->rq_disk = bdev->bd_disk;
355 } else if (__must_push_back(m))
357 r = DM_MAPIO_REQUEUE; 356 r = DM_MAPIO_REQUEUE;
358 else 357 else
359 r = -EIO; /* Failed */ 358 r = -EIO; /* Failed */
360 359
361 mpio->pgpath = pgpath; 360 mpio->pgpath = pgpath;
361 mpio->nr_bytes = nr_bytes;
362
363 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
364 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
365 nr_bytes);
362 366
363 spin_unlock_irqrestore(&m->lock, flags); 367 spin_unlock_irqrestore(&m->lock, flags);
364 368
@@ -396,30 +400,31 @@ static void dispatch_queued_ios(struct multipath *m)
396{ 400{
397 int r; 401 int r;
398 unsigned long flags; 402 unsigned long flags;
399 struct bio *bio = NULL, *next;
400 struct dm_mpath_io *mpio; 403 struct dm_mpath_io *mpio;
401 union map_info *info; 404 union map_info *info;
405 struct request *clone, *n;
406 LIST_HEAD(cl);
402 407
403 spin_lock_irqsave(&m->lock, flags); 408 spin_lock_irqsave(&m->lock, flags);
404 bio = bio_list_get(&m->queued_ios); 409 list_splice_init(&m->queued_ios, &cl);
405 spin_unlock_irqrestore(&m->lock, flags); 410 spin_unlock_irqrestore(&m->lock, flags);
406 411
407 while (bio) { 412 list_for_each_entry_safe(clone, n, &cl, queuelist) {
408 next = bio->bi_next; 413 list_del_init(&clone->queuelist);
409 bio->bi_next = NULL;
410 414
411 info = dm_get_mapinfo(bio); 415 info = dm_get_rq_mapinfo(clone);
412 mpio = info->ptr; 416 mpio = info->ptr;
413 417
414 r = map_io(m, bio, mpio, 1); 418 r = map_io(m, clone, mpio, 1);
415 if (r < 0) 419 if (r < 0) {
416 bio_endio(bio, r); 420 mempool_free(mpio, m->mpio_pool);
417 else if (r == DM_MAPIO_REMAPPED) 421 dm_kill_unmapped_request(clone, r);
418 generic_make_request(bio); 422 } else if (r == DM_MAPIO_REMAPPED)
419 else if (r == DM_MAPIO_REQUEUE) 423 dm_dispatch_request(clone);
420 bio_endio(bio, -EIO); 424 else if (r == DM_MAPIO_REQUEUE) {
421 425 mempool_free(mpio, m->mpio_pool);
422 bio = next; 426 dm_requeue_unmapped_request(clone);
427 }
423 } 428 }
424} 429}
425 430
@@ -427,8 +432,8 @@ static void process_queued_ios(struct work_struct *work)
427{ 432{
428 struct multipath *m = 433 struct multipath *m =
429 container_of(work, struct multipath, process_queued_ios); 434 container_of(work, struct multipath, process_queued_ios);
430 struct pgpath *pgpath = NULL; 435 struct pgpath *pgpath = NULL, *tmp;
431 unsigned init_required = 0, must_queue = 1; 436 unsigned must_queue = 1;
432 unsigned long flags; 437 unsigned long flags;
433 438
434 spin_lock_irqsave(&m->lock, flags); 439 spin_lock_irqsave(&m->lock, flags);
@@ -437,7 +442,7 @@ static void process_queued_ios(struct work_struct *work)
437 goto out; 442 goto out;
438 443
439 if (!m->current_pgpath) 444 if (!m->current_pgpath)
440 __choose_pgpath(m); 445 __choose_pgpath(m, 0);
441 446
442 pgpath = m->current_pgpath; 447 pgpath = m->current_pgpath;
443 448
@@ -446,19 +451,15 @@ static void process_queued_ios(struct work_struct *work)
446 must_queue = 0; 451 must_queue = 0;
447 452
448 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { 453 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
449 m->pgpath_to_activate = pgpath;
450 m->pg_init_count++; 454 m->pg_init_count++;
451 m->pg_init_required = 0; 455 m->pg_init_required = 0;
452 m->pg_init_in_progress = 1; 456 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
453 init_required = 1; 457 if (queue_work(kmpath_handlerd, &tmp->activate_path))
458 m->pg_init_in_progress++;
459 }
454 } 460 }
455
456out: 461out:
457 spin_unlock_irqrestore(&m->lock, flags); 462 spin_unlock_irqrestore(&m->lock, flags);
458
459 if (init_required)
460 queue_work(kmpath_handlerd, &m->activate_path);
461
462 if (!must_queue) 463 if (!must_queue)
463 dispatch_queued_ios(m); 464 dispatch_queued_ios(m);
464} 465}
@@ -553,6 +554,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
553 return -EINVAL; 554 return -EINVAL;
554 } 555 }
555 556
557 if (ps_argc > as->argc) {
558 dm_put_path_selector(pst);
559 ti->error = "not enough arguments for path selector";
560 return -EINVAL;
561 }
562
556 r = pst->create(&pg->ps, ps_argc, as->argv); 563 r = pst->create(&pg->ps, ps_argc, as->argv);
557 if (r) { 564 if (r) {
558 dm_put_path_selector(pst); 565 dm_put_path_selector(pst);
@@ -591,9 +598,20 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
591 } 598 }
592 599
593 if (m->hw_handler_name) { 600 if (m->hw_handler_name) {
594 r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev), 601 struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
595 m->hw_handler_name); 602
603 r = scsi_dh_attach(q, m->hw_handler_name);
604 if (r == -EBUSY) {
605 /*
606 * Already attached to different hw_handler,
607 * try to reattach with correct one.
608 */
609 scsi_dh_detach(q);
610 r = scsi_dh_attach(q, m->hw_handler_name);
611 }
612
596 if (r < 0) { 613 if (r < 0) {
614 ti->error = "error attaching hardware handler";
597 dm_put_device(ti, p->path.dev); 615 dm_put_device(ti, p->path.dev);
598 goto bad; 616 goto bad;
599 } 617 }
@@ -699,6 +717,11 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
699 if (!hw_argc) 717 if (!hw_argc)
700 return 0; 718 return 0;
701 719
720 if (hw_argc > as->argc) {
721 ti->error = "not enough arguments for hardware handler";
722 return -EINVAL;
723 }
724
702 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); 725 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
703 request_module("scsi_dh_%s", m->hw_handler_name); 726 request_module("scsi_dh_%s", m->hw_handler_name);
704 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { 727 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
@@ -823,6 +846,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
823 goto bad; 846 goto bad;
824 } 847 }
825 848
849 ti->num_flush_requests = 1;
850
826 return 0; 851 return 0;
827 852
828 bad: 853 bad:
@@ -836,25 +861,29 @@ static void multipath_dtr(struct dm_target *ti)
836 861
837 flush_workqueue(kmpath_handlerd); 862 flush_workqueue(kmpath_handlerd);
838 flush_workqueue(kmultipathd); 863 flush_workqueue(kmultipathd);
864 flush_scheduled_work();
839 free_multipath(m); 865 free_multipath(m);
840} 866}
841 867
842/* 868/*
843 * Map bios, recording original fields for later in case we have to resubmit 869 * Map cloned requests
844 */ 870 */
845static int multipath_map(struct dm_target *ti, struct bio *bio, 871static int multipath_map(struct dm_target *ti, struct request *clone,
846 union map_info *map_context) 872 union map_info *map_context)
847{ 873{
848 int r; 874 int r;
849 struct dm_mpath_io *mpio; 875 struct dm_mpath_io *mpio;
850 struct multipath *m = (struct multipath *) ti->private; 876 struct multipath *m = (struct multipath *) ti->private;
851 877
852 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); 878 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
853 dm_bio_record(&mpio->details, bio); 879 if (!mpio)
880 /* ENOMEM, requeue */
881 return DM_MAPIO_REQUEUE;
882 memset(mpio, 0, sizeof(*mpio));
854 883
855 map_context->ptr = mpio; 884 map_context->ptr = mpio;
856 bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); 885 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
857 r = map_io(m, bio, mpio, 0); 886 r = map_io(m, clone, mpio, 0);
858 if (r < 0 || r == DM_MAPIO_REQUEUE) 887 if (r < 0 || r == DM_MAPIO_REQUEUE)
859 mempool_free(mpio, m->mpio_pool); 888 mempool_free(mpio, m->mpio_pool);
860 889
@@ -924,9 +953,13 @@ static int reinstate_path(struct pgpath *pgpath)
924 953
925 pgpath->is_active = 1; 954 pgpath->is_active = 1;
926 955
927 m->current_pgpath = NULL; 956 if (!m->nr_valid_paths++ && m->queue_size) {
928 if (!m->nr_valid_paths++ && m->queue_size) 957 m->current_pgpath = NULL;
929 queue_work(kmultipathd, &m->process_queued_ios); 958 queue_work(kmultipathd, &m->process_queued_ios);
959 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
960 if (queue_work(kmpath_handlerd, &pgpath->activate_path))
961 m->pg_init_in_progress++;
962 }
930 963
931 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 964 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
932 pgpath->path.dev->name, m->nr_valid_paths); 965 pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,87 +1135,70 @@ static void pg_init_done(struct dm_path *path, int errors)
1102 1135
1103 spin_lock_irqsave(&m->lock, flags); 1136 spin_lock_irqsave(&m->lock, flags);
1104 if (errors) { 1137 if (errors) {
1105 DMERR("Could not failover device. Error %d.", errors); 1138 if (pgpath == m->current_pgpath) {
1106 m->current_pgpath = NULL; 1139 DMERR("Could not failover device. Error %d.", errors);
1107 m->current_pg = NULL; 1140 m->current_pgpath = NULL;
1141 m->current_pg = NULL;
1142 }
1108 } else if (!m->pg_init_required) { 1143 } else if (!m->pg_init_required) {
1109 m->queue_io = 0; 1144 m->queue_io = 0;
1110 pg->bypassed = 0; 1145 pg->bypassed = 0;
1111 } 1146 }
1112 1147
1113 m->pg_init_in_progress = 0; 1148 m->pg_init_in_progress--;
1114 queue_work(kmultipathd, &m->process_queued_ios); 1149 if (!m->pg_init_in_progress)
1150 queue_work(kmultipathd, &m->process_queued_ios);
1115 spin_unlock_irqrestore(&m->lock, flags); 1151 spin_unlock_irqrestore(&m->lock, flags);
1116} 1152}
1117 1153
1118static void activate_path(struct work_struct *work) 1154static void activate_path(struct work_struct *work)
1119{ 1155{
1120 int ret; 1156 int ret;
1121 struct multipath *m = 1157 struct pgpath *pgpath =
1122 container_of(work, struct multipath, activate_path); 1158 container_of(work, struct pgpath, activate_path);
1123 struct dm_path *path;
1124 unsigned long flags;
1125 1159
1126 spin_lock_irqsave(&m->lock, flags); 1160 ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
1127 path = &m->pgpath_to_activate->path; 1161 pg_init_done(&pgpath->path, ret);
1128 m->pgpath_to_activate = NULL;
1129 spin_unlock_irqrestore(&m->lock, flags);
1130 if (!path)
1131 return;
1132 ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1133 pg_init_done(path, ret);
1134} 1162}
1135 1163
1136/* 1164/*
1137 * end_io handling 1165 * end_io handling
1138 */ 1166 */
1139static int do_end_io(struct multipath *m, struct bio *bio, 1167static int do_end_io(struct multipath *m, struct request *clone,
1140 int error, struct dm_mpath_io *mpio) 1168 int error, struct dm_mpath_io *mpio)
1141{ 1169{
1170 /*
1171 * We don't queue any clone request inside the multipath target
1172 * during end I/O handling, since those clone requests don't have
1173 * bio clones. If we queue them inside the multipath target,
1174 * we need to make bio clones, that requires memory allocation.
1175 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1176 * don't have bio clones.)
1177 * Instead of queueing the clone request here, we queue the original
1178 * request into dm core, which will remake a clone request and
1179 * clone bios for it and resubmit it later.
1180 */
1181 int r = DM_ENDIO_REQUEUE;
1142 unsigned long flags; 1182 unsigned long flags;
1143 1183
1144 if (!error) 1184 if (!error && !clone->errors)
1145 return 0; /* I/O complete */ 1185 return 0; /* I/O complete */
1146 1186
1147 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1148 return error;
1149
1150 if (error == -EOPNOTSUPP) 1187 if (error == -EOPNOTSUPP)
1151 return error; 1188 return error;
1152 1189
1153 spin_lock_irqsave(&m->lock, flags);
1154 if (!m->nr_valid_paths) {
1155 if (__must_push_back(m)) {
1156 spin_unlock_irqrestore(&m->lock, flags);
1157 return DM_ENDIO_REQUEUE;
1158 } else if (!m->queue_if_no_path) {
1159 spin_unlock_irqrestore(&m->lock, flags);
1160 return -EIO;
1161 } else {
1162 spin_unlock_irqrestore(&m->lock, flags);
1163 goto requeue;
1164 }
1165 }
1166 spin_unlock_irqrestore(&m->lock, flags);
1167
1168 if (mpio->pgpath) 1190 if (mpio->pgpath)
1169 fail_path(mpio->pgpath); 1191 fail_path(mpio->pgpath);
1170 1192
1171 requeue:
1172 dm_bio_restore(&mpio->details, bio);
1173
1174 /* queue for the daemon to resubmit or fail */
1175 spin_lock_irqsave(&m->lock, flags); 1193 spin_lock_irqsave(&m->lock, flags);
1176 bio_list_add(&m->queued_ios, bio); 1194 if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1177 m->queue_size++; 1195 r = -EIO;
1178 if (!m->queue_io)
1179 queue_work(kmultipathd, &m->process_queued_ios);
1180 spin_unlock_irqrestore(&m->lock, flags); 1196 spin_unlock_irqrestore(&m->lock, flags);
1181 1197
1182 return DM_ENDIO_INCOMPLETE; /* io not complete */ 1198 return r;
1183} 1199}
1184 1200
1185static int multipath_end_io(struct dm_target *ti, struct bio *bio, 1201static int multipath_end_io(struct dm_target *ti, struct request *clone,
1186 int error, union map_info *map_context) 1202 int error, union map_info *map_context)
1187{ 1203{
1188 struct multipath *m = ti->private; 1204 struct multipath *m = ti->private;
@@ -1191,14 +1207,13 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1191 struct path_selector *ps; 1207 struct path_selector *ps;
1192 int r; 1208 int r;
1193 1209
1194 r = do_end_io(m, bio, error, mpio); 1210 r = do_end_io(m, clone, error, mpio);
1195 if (pgpath) { 1211 if (pgpath) {
1196 ps = &pgpath->pg->ps; 1212 ps = &pgpath->pg->ps;
1197 if (ps->type->end_io) 1213 if (ps->type->end_io)
1198 ps->type->end_io(ps, &pgpath->path); 1214 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1199 } 1215 }
1200 if (r != DM_ENDIO_INCOMPLETE) 1216 mempool_free(mpio, m->mpio_pool);
1201 mempool_free(mpio, m->mpio_pool);
1202 1217
1203 return r; 1218 return r;
1204} 1219}
@@ -1411,7 +1426,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1411 spin_lock_irqsave(&m->lock, flags); 1426 spin_lock_irqsave(&m->lock, flags);
1412 1427
1413 if (!m->current_pgpath) 1428 if (!m->current_pgpath)
1414 __choose_pgpath(m); 1429 __choose_pgpath(m, 0);
1415 1430
1416 if (m->current_pgpath) { 1431 if (m->current_pgpath) {
1417 bdev = m->current_pgpath->path.dev->bdev; 1432 bdev = m->current_pgpath->path.dev->bdev;
@@ -1428,22 +1443,113 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1428 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1443 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1429} 1444}
1430 1445
1446static int multipath_iterate_devices(struct dm_target *ti,
1447 iterate_devices_callout_fn fn, void *data)
1448{
1449 struct multipath *m = ti->private;
1450 struct priority_group *pg;
1451 struct pgpath *p;
1452 int ret = 0;
1453
1454 list_for_each_entry(pg, &m->priority_groups, list) {
1455 list_for_each_entry(p, &pg->pgpaths, list) {
1456 ret = fn(ti, p->path.dev, ti->begin, data);
1457 if (ret)
1458 goto out;
1459 }
1460 }
1461
1462out:
1463 return ret;
1464}
1465
1466static int __pgpath_busy(struct pgpath *pgpath)
1467{
1468 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1469
1470 return dm_underlying_device_busy(q);
1471}
1472
1473/*
1474 * We return "busy", only when we can map I/Os but underlying devices
1475 * are busy (so even if we map I/Os now, the I/Os will wait on
1476 * the underlying queue).
1477 * In other words, if we want to kill I/Os or queue them inside us
1478 * due to map unavailability, we don't return "busy". Otherwise,
1479 * dm core won't give us the I/Os and we can't do what we want.
1480 */
1481static int multipath_busy(struct dm_target *ti)
1482{
1483 int busy = 0, has_active = 0;
1484 struct multipath *m = ti->private;
1485 struct priority_group *pg;
1486 struct pgpath *pgpath;
1487 unsigned long flags;
1488
1489 spin_lock_irqsave(&m->lock, flags);
1490
1491 /* Guess which priority_group will be used at next mapping time */
1492 if (unlikely(!m->current_pgpath && m->next_pg))
1493 pg = m->next_pg;
1494 else if (likely(m->current_pg))
1495 pg = m->current_pg;
1496 else
1497 /*
1498 * We don't know which pg will be used at next mapping time.
1499 * We don't call __choose_pgpath() here to avoid to trigger
1500 * pg_init just by busy checking.
1501 * So we don't know whether underlying devices we will be using
1502 * at next mapping time are busy or not. Just try mapping.
1503 */
1504 goto out;
1505
1506 /*
1507 * If there is one non-busy active path at least, the path selector
1508 * will be able to select it. So we consider such a pg as not busy.
1509 */
1510 busy = 1;
1511 list_for_each_entry(pgpath, &pg->pgpaths, list)
1512 if (pgpath->is_active) {
1513 has_active = 1;
1514
1515 if (!__pgpath_busy(pgpath)) {
1516 busy = 0;
1517 break;
1518 }
1519 }
1520
1521 if (!has_active)
1522 /*
1523 * No active path in this pg, so this pg won't be used and
1524 * the current_pg will be changed at next mapping time.
1525 * We need to try mapping to determine it.
1526 */
1527 busy = 0;
1528
1529out:
1530 spin_unlock_irqrestore(&m->lock, flags);
1531
1532 return busy;
1533}
1534
1431/*----------------------------------------------------------------- 1535/*-----------------------------------------------------------------
1432 * Module setup 1536 * Module setup
1433 *---------------------------------------------------------------*/ 1537 *---------------------------------------------------------------*/
1434static struct target_type multipath_target = { 1538static struct target_type multipath_target = {
1435 .name = "multipath", 1539 .name = "multipath",
1436 .version = {1, 0, 5}, 1540 .version = {1, 1, 0},
1437 .module = THIS_MODULE, 1541 .module = THIS_MODULE,
1438 .ctr = multipath_ctr, 1542 .ctr = multipath_ctr,
1439 .dtr = multipath_dtr, 1543 .dtr = multipath_dtr,
1440 .map = multipath_map, 1544 .map_rq = multipath_map,
1441 .end_io = multipath_end_io, 1545 .rq_end_io = multipath_end_io,
1442 .presuspend = multipath_presuspend, 1546 .presuspend = multipath_presuspend,
1443 .resume = multipath_resume, 1547 .resume = multipath_resume,
1444 .status = multipath_status, 1548 .status = multipath_status,
1445 .message = multipath_message, 1549 .message = multipath_message,
1446 .ioctl = multipath_ioctl, 1550 .ioctl = multipath_ioctl,
1551 .iterate_devices = multipath_iterate_devices,
1552 .busy = multipath_busy,
1447}; 1553};
1448 1554
1449static int __init dm_multipath_init(void) 1555static int __init dm_multipath_init(void)
diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h
index 27357b85d73d..e7d1fa8b0459 100644
--- a/drivers/md/dm-path-selector.h
+++ b/drivers/md/dm-path-selector.h
@@ -56,7 +56,8 @@ struct path_selector_type {
56 * the path fails. 56 * the path fails.
57 */ 57 */
58 struct dm_path *(*select_path) (struct path_selector *ps, 58 struct dm_path *(*select_path) (struct path_selector *ps,
59 unsigned *repeat_count); 59 unsigned *repeat_count,
60 size_t nr_bytes);
60 61
61 /* 62 /*
62 * Notify the selector that a path has failed. 63 * Notify the selector that a path has failed.
@@ -75,7 +76,10 @@ struct path_selector_type {
75 int (*status) (struct path_selector *ps, struct dm_path *path, 76 int (*status) (struct path_selector *ps, struct dm_path *path,
76 status_type_t type, char *result, unsigned int maxlen); 77 status_type_t type, char *result, unsigned int maxlen);
77 78
78 int (*end_io) (struct path_selector *ps, struct dm_path *path); 79 int (*start_io) (struct path_selector *ps, struct dm_path *path,
80 size_t nr_bytes);
81 int (*end_io) (struct path_selector *ps, struct dm_path *path,
82 size_t nr_bytes);
79}; 83};
80 84
81/* Register a path selector */ 85/* Register a path selector */
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c
new file mode 100644
index 000000000000..f92b6cea9d9c
--- /dev/null
+++ b/drivers/md/dm-queue-length.c
@@ -0,0 +1,263 @@
1/*
2 * Copyright (C) 2004-2005 IBM Corp. All Rights Reserved.
3 * Copyright (C) 2006-2009 NEC Corporation.
4 *
5 * dm-queue-length.c
6 *
7 * Module Author: Stefan Bader, IBM
8 * Modified by: Kiyoshi Ueda, NEC
9 *
10 * This file is released under the GPL.
11 *
12 * queue-length path selector - choose a path with the least number of
13 * in-flight I/Os.
14 */
15
16#include "dm.h"
17#include "dm-path-selector.h"
18
19#include <linux/slab.h>
20#include <linux/ctype.h>
21#include <linux/errno.h>
22#include <linux/module.h>
23#include <asm/atomic.h>
24
25#define DM_MSG_PREFIX "multipath queue-length"
26#define QL_MIN_IO 128
27#define QL_VERSION "0.1.0"
28
29struct selector {
30 struct list_head valid_paths;
31 struct list_head failed_paths;
32};
33
34struct path_info {
35 struct list_head list;
36 struct dm_path *path;
37 unsigned repeat_count;
38 atomic_t qlen; /* the number of in-flight I/Os */
39};
40
41static struct selector *alloc_selector(void)
42{
43 struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
44
45 if (s) {
46 INIT_LIST_HEAD(&s->valid_paths);
47 INIT_LIST_HEAD(&s->failed_paths);
48 }
49
50 return s;
51}
52
53static int ql_create(struct path_selector *ps, unsigned argc, char **argv)
54{
55 struct selector *s = alloc_selector();
56
57 if (!s)
58 return -ENOMEM;
59
60 ps->context = s;
61 return 0;
62}
63
64static void ql_free_paths(struct list_head *paths)
65{
66 struct path_info *pi, *next;
67
68 list_for_each_entry_safe(pi, next, paths, list) {
69 list_del(&pi->list);
70 kfree(pi);
71 }
72}
73
74static void ql_destroy(struct path_selector *ps)
75{
76 struct selector *s = ps->context;
77
78 ql_free_paths(&s->valid_paths);
79 ql_free_paths(&s->failed_paths);
80 kfree(s);
81 ps->context = NULL;
82}
83
84static int ql_status(struct path_selector *ps, struct dm_path *path,
85 status_type_t type, char *result, unsigned maxlen)
86{
87 unsigned sz = 0;
88 struct path_info *pi;
89
90 /* When called with NULL path, return selector status/args. */
91 if (!path)
92 DMEMIT("0 ");
93 else {
94 pi = path->pscontext;
95
96 switch (type) {
97 case STATUSTYPE_INFO:
98 DMEMIT("%d ", atomic_read(&pi->qlen));
99 break;
100 case STATUSTYPE_TABLE:
101 DMEMIT("%u ", pi->repeat_count);
102 break;
103 }
104 }
105
106 return sz;
107}
108
109static int ql_add_path(struct path_selector *ps, struct dm_path *path,
110 int argc, char **argv, char **error)
111{
112 struct selector *s = ps->context;
113 struct path_info *pi;
114 unsigned repeat_count = QL_MIN_IO;
115
116 /*
117 * Arguments: [<repeat_count>]
118 * <repeat_count>: The number of I/Os before switching path.
119 * If not given, default (QL_MIN_IO) is used.
120 */
121 if (argc > 1) {
122 *error = "queue-length ps: incorrect number of arguments";
123 return -EINVAL;
124 }
125
126 if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
127 *error = "queue-length ps: invalid repeat count";
128 return -EINVAL;
129 }
130
131 /* Allocate the path information structure */
132 pi = kmalloc(sizeof(*pi), GFP_KERNEL);
133 if (!pi) {
134 *error = "queue-length ps: Error allocating path information";
135 return -ENOMEM;
136 }
137
138 pi->path = path;
139 pi->repeat_count = repeat_count;
140 atomic_set(&pi->qlen, 0);
141
142 path->pscontext = pi;
143
144 list_add_tail(&pi->list, &s->valid_paths);
145
146 return 0;
147}
148
149static void ql_fail_path(struct path_selector *ps, struct dm_path *path)
150{
151 struct selector *s = ps->context;
152 struct path_info *pi = path->pscontext;
153
154 list_move(&pi->list, &s->failed_paths);
155}
156
157static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
158{
159 struct selector *s = ps->context;
160 struct path_info *pi = path->pscontext;
161
162 list_move_tail(&pi->list, &s->valid_paths);
163
164 return 0;
165}
166
167/*
168 * Select a path having the minimum number of in-flight I/Os
169 */
170static struct dm_path *ql_select_path(struct path_selector *ps,
171 unsigned *repeat_count, size_t nr_bytes)
172{
173 struct selector *s = ps->context;
174 struct path_info *pi = NULL, *best = NULL;
175
176 if (list_empty(&s->valid_paths))
177 return NULL;
178
179 /* Change preferred (first in list) path to evenly balance. */
180 list_move_tail(s->valid_paths.next, &s->valid_paths);
181
182 list_for_each_entry(pi, &s->valid_paths, list) {
183 if (!best ||
184 (atomic_read(&pi->qlen) < atomic_read(&best->qlen)))
185 best = pi;
186
187 if (!atomic_read(&best->qlen))
188 break;
189 }
190
191 if (!best)
192 return NULL;
193
194 *repeat_count = best->repeat_count;
195
196 return best->path;
197}
198
199static int ql_start_io(struct path_selector *ps, struct dm_path *path,
200 size_t nr_bytes)
201{
202 struct path_info *pi = path->pscontext;
203
204 atomic_inc(&pi->qlen);
205
206 return 0;
207}
208
209static int ql_end_io(struct path_selector *ps, struct dm_path *path,
210 size_t nr_bytes)
211{
212 struct path_info *pi = path->pscontext;
213
214 atomic_dec(&pi->qlen);
215
216 return 0;
217}
218
219static struct path_selector_type ql_ps = {
220 .name = "queue-length",
221 .module = THIS_MODULE,
222 .table_args = 1,
223 .info_args = 1,
224 .create = ql_create,
225 .destroy = ql_destroy,
226 .status = ql_status,
227 .add_path = ql_add_path,
228 .fail_path = ql_fail_path,
229 .reinstate_path = ql_reinstate_path,
230 .select_path = ql_select_path,
231 .start_io = ql_start_io,
232 .end_io = ql_end_io,
233};
234
235static int __init dm_ql_init(void)
236{
237 int r = dm_register_path_selector(&ql_ps);
238
239 if (r < 0)
240 DMERR("register failed %d", r);
241
242 DMINFO("version " QL_VERSION " loaded");
243
244 return r;
245}
246
247static void __exit dm_ql_exit(void)
248{
249 int r = dm_unregister_path_selector(&ql_ps);
250
251 if (r < 0)
252 DMERR("unregister failed %d", r);
253}
254
255module_init(dm_ql_init);
256module_exit(dm_ql_exit);
257
258MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>");
259MODULE_DESCRIPTION(
260 "(C) Copyright IBM Corp. 2004,2005 All Rights Reserved.\n"
261 DM_NAME " path selector to balance the number of in-flight I/Os"
262);
263MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 076fbb4e967a..ce8868c768cc 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1283,9 +1283,23 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
1283 return 0; 1283 return 0;
1284} 1284}
1285 1285
1286static int mirror_iterate_devices(struct dm_target *ti,
1287 iterate_devices_callout_fn fn, void *data)
1288{
1289 struct mirror_set *ms = ti->private;
1290 int ret = 0;
1291 unsigned i;
1292
1293 for (i = 0; !ret && i < ms->nr_mirrors; i++)
1294 ret = fn(ti, ms->mirror[i].dev,
1295 ms->mirror[i].offset, data);
1296
1297 return ret;
1298}
1299
1286static struct target_type mirror_target = { 1300static struct target_type mirror_target = {
1287 .name = "mirror", 1301 .name = "mirror",
1288 .version = {1, 0, 20}, 1302 .version = {1, 12, 0},
1289 .module = THIS_MODULE, 1303 .module = THIS_MODULE,
1290 .ctr = mirror_ctr, 1304 .ctr = mirror_ctr,
1291 .dtr = mirror_dtr, 1305 .dtr = mirror_dtr,
@@ -1295,6 +1309,7 @@ static struct target_type mirror_target = {
1295 .postsuspend = mirror_postsuspend, 1309 .postsuspend = mirror_postsuspend,
1296 .resume = mirror_resume, 1310 .resume = mirror_resume,
1297 .status = mirror_status, 1311 .status = mirror_status,
1312 .iterate_devices = mirror_iterate_devices,
1298}; 1313};
1299 1314
1300static int __init dm_mirror_init(void) 1315static int __init dm_mirror_init(void)
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 7b899be0b087..36dbe29f2fd6 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -283,7 +283,7 @@ static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
283 283
284 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC); 284 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
285 if (unlikely(!nreg)) 285 if (unlikely(!nreg))
286 nreg = kmalloc(sizeof(*nreg), GFP_NOIO); 286 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
287 287
288 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? 288 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
289 DM_RH_CLEAN : DM_RH_NOSYNC; 289 DM_RH_CLEAN : DM_RH_NOSYNC;
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c
index cdfbf65b28cb..24752f449bef 100644
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -161,7 +161,7 @@ static int rr_reinstate_path(struct path_selector *ps, struct dm_path *p)
161} 161}
162 162
163static struct dm_path *rr_select_path(struct path_selector *ps, 163static struct dm_path *rr_select_path(struct path_selector *ps,
164 unsigned *repeat_count) 164 unsigned *repeat_count, size_t nr_bytes)
165{ 165{
166 struct selector *s = (struct selector *) ps->context; 166 struct selector *s = (struct selector *) ps->context;
167 struct path_info *pi = NULL; 167 struct path_info *pi = NULL;
diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-service-time.c
new file mode 100644
index 000000000000..cfa668f46c40
--- /dev/null
+++ b/drivers/md/dm-service-time.c
@@ -0,0 +1,339 @@
1/*
2 * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved.
3 *
4 * Module Author: Kiyoshi Ueda
5 *
6 * This file is released under the GPL.
7 *
8 * Throughput oriented path selector.
9 */
10
11#include "dm.h"
12#include "dm-path-selector.h"
13
14#define DM_MSG_PREFIX "multipath service-time"
15#define ST_MIN_IO 1
16#define ST_MAX_RELATIVE_THROUGHPUT 100
17#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7
18#define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
19#define ST_VERSION "0.2.0"
20
21struct selector {
22 struct list_head valid_paths;
23 struct list_head failed_paths;
24};
25
26struct path_info {
27 struct list_head list;
28 struct dm_path *path;
29 unsigned repeat_count;
30 unsigned relative_throughput;
31 atomic_t in_flight_size; /* Total size of in-flight I/Os */
32};
33
34static struct selector *alloc_selector(void)
35{
36 struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
37
38 if (s) {
39 INIT_LIST_HEAD(&s->valid_paths);
40 INIT_LIST_HEAD(&s->failed_paths);
41 }
42
43 return s;
44}
45
46static int st_create(struct path_selector *ps, unsigned argc, char **argv)
47{
48 struct selector *s = alloc_selector();
49
50 if (!s)
51 return -ENOMEM;
52
53 ps->context = s;
54 return 0;
55}
56
57static void free_paths(struct list_head *paths)
58{
59 struct path_info *pi, *next;
60
61 list_for_each_entry_safe(pi, next, paths, list) {
62 list_del(&pi->list);
63 kfree(pi);
64 }
65}
66
67static void st_destroy(struct path_selector *ps)
68{
69 struct selector *s = ps->context;
70
71 free_paths(&s->valid_paths);
72 free_paths(&s->failed_paths);
73 kfree(s);
74 ps->context = NULL;
75}
76
77static int st_status(struct path_selector *ps, struct dm_path *path,
78 status_type_t type, char *result, unsigned maxlen)
79{
80 unsigned sz = 0;
81 struct path_info *pi;
82
83 if (!path)
84 DMEMIT("0 ");
85 else {
86 pi = path->pscontext;
87
88 switch (type) {
89 case STATUSTYPE_INFO:
90 DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
91 pi->relative_throughput);
92 break;
93 case STATUSTYPE_TABLE:
94 DMEMIT("%u %u ", pi->repeat_count,
95 pi->relative_throughput);
96 break;
97 }
98 }
99
100 return sz;
101}
102
103static int st_add_path(struct path_selector *ps, struct dm_path *path,
104 int argc, char **argv, char **error)
105{
106 struct selector *s = ps->context;
107 struct path_info *pi;
108 unsigned repeat_count = ST_MIN_IO;
109 unsigned relative_throughput = 1;
110
111 /*
112 * Arguments: [<repeat_count> [<relative_throughput>]]
113 * <repeat_count>: The number of I/Os before switching path.
114 * If not given, default (ST_MIN_IO) is used.
115 * <relative_throughput>: The relative throughput value of
116 * the path among all paths in the path-group.
117 * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
118 * If not given, minimum value '1' is used.
119 * If '0' is given, the path isn't selected while
120 * other paths having a positive value are
121 * available.
122 */
123 if (argc > 2) {
124 *error = "service-time ps: incorrect number of arguments";
125 return -EINVAL;
126 }
127
128 if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
129 *error = "service-time ps: invalid repeat count";
130 return -EINVAL;
131 }
132
133 if ((argc == 2) &&
134 (sscanf(argv[1], "%u", &relative_throughput) != 1 ||
135 relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
136 *error = "service-time ps: invalid relative_throughput value";
137 return -EINVAL;
138 }
139
140 /* allocate the path */
141 pi = kmalloc(sizeof(*pi), GFP_KERNEL);
142 if (!pi) {
143 *error = "service-time ps: Error allocating path context";
144 return -ENOMEM;
145 }
146
147 pi->path = path;
148 pi->repeat_count = repeat_count;
149 pi->relative_throughput = relative_throughput;
150 atomic_set(&pi->in_flight_size, 0);
151
152 path->pscontext = pi;
153
154 list_add_tail(&pi->list, &s->valid_paths);
155
156 return 0;
157}
158
159static void st_fail_path(struct path_selector *ps, struct dm_path *path)
160{
161 struct selector *s = ps->context;
162 struct path_info *pi = path->pscontext;
163
164 list_move(&pi->list, &s->failed_paths);
165}
166
167static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
168{
169 struct selector *s = ps->context;
170 struct path_info *pi = path->pscontext;
171
172 list_move_tail(&pi->list, &s->valid_paths);
173
174 return 0;
175}
176
177/*
178 * Compare the estimated service time of 2 paths, pi1 and pi2,
179 * for the incoming I/O.
180 *
181 * Returns:
182 * < 0 : pi1 is better
183 * 0 : no difference between pi1 and pi2
184 * > 0 : pi2 is better
185 *
186 * Description:
187 * Basically, the service time is estimated by:
188 * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
189 * To reduce the calculation, some optimizations are made.
190 * (See comments inline)
191 */
192static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
193 size_t incoming)
194{
195 size_t sz1, sz2, st1, st2;
196
197 sz1 = atomic_read(&pi1->in_flight_size);
198 sz2 = atomic_read(&pi2->in_flight_size);
199
200 /*
201 * Case 1: Both have same throughput value. Choose less loaded path.
202 */
203 if (pi1->relative_throughput == pi2->relative_throughput)
204 return sz1 - sz2;
205
206 /*
207 * Case 2a: Both have same load. Choose higher throughput path.
208 * Case 2b: One path has no throughput value. Choose the other one.
209 */
210 if (sz1 == sz2 ||
211 !pi1->relative_throughput || !pi2->relative_throughput)
212 return pi2->relative_throughput - pi1->relative_throughput;
213
214 /*
215 * Case 3: Calculate service time. Choose faster path.
216 * Service time using pi1:
217 * st1 = (sz1 + incoming) / pi1->relative_throughput
218 * Service time using pi2:
219 * st2 = (sz2 + incoming) / pi2->relative_throughput
220 *
221 * To avoid the division, transform the expression to use
222 * multiplication.
223 * Because ->relative_throughput > 0 here, if st1 < st2,
224 * the expressions below are the same meaning:
225 * (sz1 + incoming) / pi1->relative_throughput <
226 * (sz2 + incoming) / pi2->relative_throughput
227 * (sz1 + incoming) * pi2->relative_throughput <
228 * (sz2 + incoming) * pi1->relative_throughput
229 * So use the later one.
230 */
231 sz1 += incoming;
232 sz2 += incoming;
233 if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
234 sz2 >= ST_MAX_INFLIGHT_SIZE)) {
235 /*
236 * Size may be too big for multiplying pi->relative_throughput
237 * and overflow.
238 * To avoid the overflow and mis-selection, shift down both.
239 */
240 sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
241 sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
242 }
243 st1 = sz1 * pi2->relative_throughput;
244 st2 = sz2 * pi1->relative_throughput;
245 if (st1 != st2)
246 return st1 - st2;
247
248 /*
249 * Case 4: Service time is equal. Choose higher throughput path.
250 */
251 return pi2->relative_throughput - pi1->relative_throughput;
252}
253
254static struct dm_path *st_select_path(struct path_selector *ps,
255 unsigned *repeat_count, size_t nr_bytes)
256{
257 struct selector *s = ps->context;
258 struct path_info *pi = NULL, *best = NULL;
259
260 if (list_empty(&s->valid_paths))
261 return NULL;
262
263 /* Change preferred (first in list) path to evenly balance. */
264 list_move_tail(s->valid_paths.next, &s->valid_paths);
265
266 list_for_each_entry(pi, &s->valid_paths, list)
267 if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
268 best = pi;
269
270 if (!best)
271 return NULL;
272
273 *repeat_count = best->repeat_count;
274
275 return best->path;
276}
277
278static int st_start_io(struct path_selector *ps, struct dm_path *path,
279 size_t nr_bytes)
280{
281 struct path_info *pi = path->pscontext;
282
283 atomic_add(nr_bytes, &pi->in_flight_size);
284
285 return 0;
286}
287
288static int st_end_io(struct path_selector *ps, struct dm_path *path,
289 size_t nr_bytes)
290{
291 struct path_info *pi = path->pscontext;
292
293 atomic_sub(nr_bytes, &pi->in_flight_size);
294
295 return 0;
296}
297
298static struct path_selector_type st_ps = {
299 .name = "service-time",
300 .module = THIS_MODULE,
301 .table_args = 2,
302 .info_args = 2,
303 .create = st_create,
304 .destroy = st_destroy,
305 .status = st_status,
306 .add_path = st_add_path,
307 .fail_path = st_fail_path,
308 .reinstate_path = st_reinstate_path,
309 .select_path = st_select_path,
310 .start_io = st_start_io,
311 .end_io = st_end_io,
312};
313
314static int __init dm_st_init(void)
315{
316 int r = dm_register_path_selector(&st_ps);
317
318 if (r < 0)
319 DMERR("register failed %d", r);
320
321 DMINFO("version " ST_VERSION " loaded");
322
323 return r;
324}
325
326static void __exit dm_st_exit(void)
327{
328 int r = dm_unregister_path_selector(&st_ps);
329
330 if (r < 0)
331 DMERR("unregister failed %d", r);
332}
333
334module_init(dm_st_init);
335module_exit(dm_st_exit);
336
337MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
338MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
339MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 2662a41337e7..6e3fe4f14934 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -636,7 +636,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
636 /* 636 /*
637 * Commit exceptions to disk. 637 * Commit exceptions to disk.
638 */ 638 */
639 if (ps->valid && area_io(ps, WRITE)) 639 if (ps->valid && area_io(ps, WRITE_BARRIER))
640 ps->valid = 0; 640 ps->valid = 0;
641 641
642 /* 642 /*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index d73f17fc7778..d573165cd2b7 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -678,6 +678,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
678 678
679 ti->private = s; 679 ti->private = s;
680 ti->split_io = s->store->chunk_size; 680 ti->split_io = s->store->chunk_size;
681 ti->num_flush_requests = 1;
681 682
682 return 0; 683 return 0;
683 684
@@ -1030,6 +1031,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1030 chunk_t chunk; 1031 chunk_t chunk;
1031 struct dm_snap_pending_exception *pe = NULL; 1032 struct dm_snap_pending_exception *pe = NULL;
1032 1033
1034 if (unlikely(bio_empty_barrier(bio))) {
1035 bio->bi_bdev = s->store->cow->bdev;
1036 return DM_MAPIO_REMAPPED;
1037 }
1038
1033 chunk = sector_to_chunk(s->store, bio->bi_sector); 1039 chunk = sector_to_chunk(s->store, bio->bi_sector);
1034 1040
1035 /* Full snapshots are not usable */ 1041 /* Full snapshots are not usable */
@@ -1338,6 +1344,8 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1338 } 1344 }
1339 1345
1340 ti->private = dev; 1346 ti->private = dev;
1347 ti->num_flush_requests = 1;
1348
1341 return 0; 1349 return 0;
1342} 1350}
1343 1351
@@ -1353,6 +1361,9 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
1353 struct dm_dev *dev = ti->private; 1361 struct dm_dev *dev = ti->private;
1354 bio->bi_bdev = dev->bdev; 1362 bio->bi_bdev = dev->bdev;
1355 1363
1364 if (unlikely(bio_empty_barrier(bio)))
1365 return DM_MAPIO_REMAPPED;
1366
1356 /* Only tell snapshots if this is a write */ 1367 /* Only tell snapshots if this is a write */
1357 return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 1368 return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
1358} 1369}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 41569bc60abc..b240e85ae39a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -167,6 +167,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
167 sc->stripes = stripes; 167 sc->stripes = stripes;
168 sc->stripe_width = width; 168 sc->stripe_width = width;
169 ti->split_io = chunk_size; 169 ti->split_io = chunk_size;
170 ti->num_flush_requests = stripes;
170 171
171 sc->chunk_mask = ((sector_t) chunk_size) - 1; 172 sc->chunk_mask = ((sector_t) chunk_size) - 1;
172 for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) 173 for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
@@ -211,10 +212,18 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
211 union map_info *map_context) 212 union map_info *map_context)
212{ 213{
213 struct stripe_c *sc = (struct stripe_c *) ti->private; 214 struct stripe_c *sc = (struct stripe_c *) ti->private;
215 sector_t offset, chunk;
216 uint32_t stripe;
214 217
215 sector_t offset = bio->bi_sector - ti->begin; 218 if (unlikely(bio_empty_barrier(bio))) {
216 sector_t chunk = offset >> sc->chunk_shift; 219 BUG_ON(map_context->flush_request >= sc->stripes);
217 uint32_t stripe = sector_div(chunk, sc->stripes); 220 bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev;
221 return DM_MAPIO_REMAPPED;
222 }
223
224 offset = bio->bi_sector - ti->begin;
225 chunk = offset >> sc->chunk_shift;
226 stripe = sector_div(chunk, sc->stripes);
218 227
219 bio->bi_bdev = sc->stripe[stripe].dev->bdev; 228 bio->bi_bdev = sc->stripe[stripe].dev->bdev;
220 bio->bi_sector = sc->stripe[stripe].physical_start + 229 bio->bi_sector = sc->stripe[stripe].physical_start +
@@ -304,15 +313,31 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
304 return error; 313 return error;
305} 314}
306 315
316static int stripe_iterate_devices(struct dm_target *ti,
317 iterate_devices_callout_fn fn, void *data)
318{
319 struct stripe_c *sc = ti->private;
320 int ret = 0;
321 unsigned i = 0;
322
323 do
324 ret = fn(ti, sc->stripe[i].dev,
325 sc->stripe[i].physical_start, data);
326 while (!ret && ++i < sc->stripes);
327
328 return ret;
329}
330
307static struct target_type stripe_target = { 331static struct target_type stripe_target = {
308 .name = "striped", 332 .name = "striped",
309 .version = {1, 1, 0}, 333 .version = {1, 2, 0},
310 .module = THIS_MODULE, 334 .module = THIS_MODULE,
311 .ctr = stripe_ctr, 335 .ctr = stripe_ctr,
312 .dtr = stripe_dtr, 336 .dtr = stripe_dtr,
313 .map = stripe_map, 337 .map = stripe_map,
314 .end_io = stripe_end_io, 338 .end_io = stripe_end_io,
315 .status = stripe_status, 339 .status = stripe_status,
340 .iterate_devices = stripe_iterate_devices,
316}; 341};
317 342
318int __init dm_stripe_init(void) 343int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index a2a45e6c7c8b..4b045903a4e2 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -57,12 +57,21 @@ static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
57 return strlen(buf); 57 return strlen(buf);
58} 58}
59 59
60static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
61{
62 sprintf(buf, "%d\n", dm_suspended(md));
63
64 return strlen(buf);
65}
66
60static DM_ATTR_RO(name); 67static DM_ATTR_RO(name);
61static DM_ATTR_RO(uuid); 68static DM_ATTR_RO(uuid);
69static DM_ATTR_RO(suspended);
62 70
63static struct attribute *dm_attrs[] = { 71static struct attribute *dm_attrs[] = {
64 &dm_attr_name.attr, 72 &dm_attr_name.attr,
65 &dm_attr_uuid.attr, 73 &dm_attr_uuid.attr,
74 &dm_attr_suspended.attr,
66 NULL, 75 NULL,
67}; 76};
68 77
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e9a73bb242b0..4899ebe767c8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -41,6 +41,7 @@
41struct dm_table { 41struct dm_table {
42 struct mapped_device *md; 42 struct mapped_device *md;
43 atomic_t holders; 43 atomic_t holders;
44 unsigned type;
44 45
45 /* btree table */ 46 /* btree table */
46 unsigned int depth; 47 unsigned int depth;
@@ -62,15 +63,11 @@ struct dm_table {
62 /* a list of devices used by this table */ 63 /* a list of devices used by this table */
63 struct list_head devices; 64 struct list_head devices;
64 65
65 /*
66 * These are optimistic limits taken from all the
67 * targets, some targets will need smaller limits.
68 */
69 struct io_restrictions limits;
70
71 /* events get handed up using this callback */ 66 /* events get handed up using this callback */
72 void (*event_fn)(void *); 67 void (*event_fn)(void *);
73 void *event_context; 68 void *event_context;
69
70 struct dm_md_mempools *mempools;
74}; 71};
75 72
76/* 73/*
@@ -89,43 +86,6 @@ static unsigned int int_log(unsigned int n, unsigned int base)
89} 86}
90 87
91/* 88/*
92 * Returns the minimum that is _not_ zero, unless both are zero.
93 */
94#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
95
96/*
97 * Combine two io_restrictions, always taking the lower value.
98 */
99static void combine_restrictions_low(struct io_restrictions *lhs,
100 struct io_restrictions *rhs)
101{
102 lhs->max_sectors =
103 min_not_zero(lhs->max_sectors, rhs->max_sectors);
104
105 lhs->max_phys_segments =
106 min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
107
108 lhs->max_hw_segments =
109 min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
110
111 lhs->logical_block_size = max(lhs->logical_block_size,
112 rhs->logical_block_size);
113
114 lhs->max_segment_size =
115 min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
116
117 lhs->max_hw_sectors =
118 min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
119
120 lhs->seg_boundary_mask =
121 min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
122
123 lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
124
125 lhs->no_cluster |= rhs->no_cluster;
126}
127
128/*
129 * Calculate the index of the child node of the n'th node k'th key. 89 * Calculate the index of the child node of the n'th node k'th key.
130 */ 90 */
131static inline unsigned int get_child(unsigned int n, unsigned int k) 91static inline unsigned int get_child(unsigned int n, unsigned int k)
@@ -267,6 +227,8 @@ static void free_devices(struct list_head *devices)
267 list_for_each_safe(tmp, next, devices) { 227 list_for_each_safe(tmp, next, devices) {
268 struct dm_dev_internal *dd = 228 struct dm_dev_internal *dd =
269 list_entry(tmp, struct dm_dev_internal, list); 229 list_entry(tmp, struct dm_dev_internal, list);
230 DMWARN("dm_table_destroy: dm_put_device call missing for %s",
231 dd->dm_dev.name);
270 kfree(dd); 232 kfree(dd);
271 } 233 }
272} 234}
@@ -296,12 +258,10 @@ void dm_table_destroy(struct dm_table *t)
296 vfree(t->highs); 258 vfree(t->highs);
297 259
298 /* free the device list */ 260 /* free the device list */
299 if (t->devices.next != &t->devices) { 261 if (t->devices.next != &t->devices)
300 DMWARN("devices still present during destroy: "
301 "dm_table_remove_device calls missing");
302
303 free_devices(&t->devices); 262 free_devices(&t->devices);
304 } 263
264 dm_free_md_mempools(t->mempools);
305 265
306 kfree(t); 266 kfree(t);
307} 267}
@@ -385,15 +345,48 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
385/* 345/*
386 * If possible, this checks an area of a destination device is valid. 346 * If possible, this checks an area of a destination device is valid.
387 */ 347 */
388static int check_device_area(struct dm_dev_internal *dd, sector_t start, 348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
389 sector_t len) 349 sector_t start, void *data)
390{ 350{
391 sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; 351 struct queue_limits *limits = data;
352 struct block_device *bdev = dev->bdev;
353 sector_t dev_size =
354 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
355 unsigned short logical_block_size_sectors =
356 limits->logical_block_size >> SECTOR_SHIFT;
357 char b[BDEVNAME_SIZE];
392 358
393 if (!dev_size) 359 if (!dev_size)
394 return 1; 360 return 1;
395 361
396 return ((start < dev_size) && (len <= (dev_size - start))); 362 if ((start >= dev_size) || (start + ti->len > dev_size)) {
363 DMWARN("%s: %s too small for target",
364 dm_device_name(ti->table->md), bdevname(bdev, b));
365 return 0;
366 }
367
368 if (logical_block_size_sectors <= 1)
369 return 1;
370
371 if (start & (logical_block_size_sectors - 1)) {
372 DMWARN("%s: start=%llu not aligned to h/w "
373 "logical block size %hu of %s",
374 dm_device_name(ti->table->md),
375 (unsigned long long)start,
376 limits->logical_block_size, bdevname(bdev, b));
377 return 0;
378 }
379
380 if (ti->len & (logical_block_size_sectors - 1)) {
381 DMWARN("%s: len=%llu not aligned to h/w "
382 "logical block size %hu of %s",
383 dm_device_name(ti->table->md),
384 (unsigned long long)ti->len,
385 limits->logical_block_size, bdevname(bdev, b));
386 return 0;
387 }
388
389 return 1;
397} 390}
398 391
399/* 392/*
@@ -479,38 +472,32 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
479 } 472 }
480 atomic_inc(&dd->count); 473 atomic_inc(&dd->count);
481 474
482 if (!check_device_area(dd, start, len)) {
483 DMWARN("device %s too small for target", path);
484 dm_put_device(ti, &dd->dm_dev);
485 return -EINVAL;
486 }
487
488 *result = &dd->dm_dev; 475 *result = &dd->dm_dev;
489
490 return 0; 476 return 0;
491} 477}
492 478
493void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 479/*
480 * Returns the minimum that is _not_ zero, unless both are zero.
481 */
482#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
483
484int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
485 sector_t start, void *data)
494{ 486{
487 struct queue_limits *limits = data;
488 struct block_device *bdev = dev->bdev;
495 struct request_queue *q = bdev_get_queue(bdev); 489 struct request_queue *q = bdev_get_queue(bdev);
496 struct io_restrictions *rs = &ti->limits;
497 char b[BDEVNAME_SIZE]; 490 char b[BDEVNAME_SIZE];
498 491
499 if (unlikely(!q)) { 492 if (unlikely(!q)) {
500 DMWARN("%s: Cannot set limits for nonexistent device %s", 493 DMWARN("%s: Cannot set limits for nonexistent device %s",
501 dm_device_name(ti->table->md), bdevname(bdev, b)); 494 dm_device_name(ti->table->md), bdevname(bdev, b));
502 return; 495 return 0;
503 } 496 }
504 497
505 /* 498 if (blk_stack_limits(limits, &q->limits, start) < 0)
506 * Combine the device limits low. 499 DMWARN("%s: target device %s is misaligned",
507 * 500 dm_device_name(ti->table->md), bdevname(bdev, b));
508 * FIXME: if we move an io_restriction struct
509 * into q this would just be a call to
510 * combine_restrictions_low()
511 */
512 rs->max_sectors =
513 min_not_zero(rs->max_sectors, queue_max_sectors(q));
514 501
515 /* 502 /*
516 * Check if merge fn is supported. 503 * Check if merge fn is supported.
@@ -519,48 +506,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
519 */ 506 */
520 507
521 if (q->merge_bvec_fn && !ti->type->merge) 508 if (q->merge_bvec_fn && !ti->type->merge)
522 rs->max_sectors = 509 limits->max_sectors =
523 min_not_zero(rs->max_sectors, 510 min_not_zero(limits->max_sectors,
524 (unsigned int) (PAGE_SIZE >> 9)); 511 (unsigned int) (PAGE_SIZE >> 9));
525 512 return 0;
526 rs->max_phys_segments =
527 min_not_zero(rs->max_phys_segments,
528 queue_max_phys_segments(q));
529
530 rs->max_hw_segments =
531 min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
532
533 rs->logical_block_size = max(rs->logical_block_size,
534 queue_logical_block_size(q));
535
536 rs->max_segment_size =
537 min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
538
539 rs->max_hw_sectors =
540 min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
541
542 rs->seg_boundary_mask =
543 min_not_zero(rs->seg_boundary_mask,
544 queue_segment_boundary(q));
545
546 rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
547
548 rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
549} 513}
550EXPORT_SYMBOL_GPL(dm_set_device_limits); 514EXPORT_SYMBOL_GPL(dm_set_device_limits);
551 515
552int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 516int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
553 sector_t len, fmode_t mode, struct dm_dev **result) 517 sector_t len, fmode_t mode, struct dm_dev **result)
554{ 518{
555 int r = __table_get_device(ti->table, ti, path, 519 return __table_get_device(ti->table, ti, path,
556 start, len, mode, result); 520 start, len, mode, result);
557
558 if (!r)
559 dm_set_device_limits(ti, (*result)->bdev);
560
561 return r;
562} 521}
563 522
523
564/* 524/*
565 * Decrement a devices use count and remove it if necessary. 525 * Decrement a devices use count and remove it if necessary.
566 */ 526 */
@@ -675,24 +635,78 @@ int dm_split_args(int *argc, char ***argvp, char *input)
675 return 0; 635 return 0;
676} 636}
677 637
678static void check_for_valid_limits(struct io_restrictions *rs) 638/*
639 * Impose necessary and sufficient conditions on a devices's table such
640 * that any incoming bio which respects its logical_block_size can be
641 * processed successfully. If it falls across the boundary between
642 * two or more targets, the size of each piece it gets split into must
643 * be compatible with the logical_block_size of the target processing it.
644 */
645static int validate_hardware_logical_block_alignment(struct dm_table *table,
646 struct queue_limits *limits)
679{ 647{
680 if (!rs->max_sectors) 648 /*
681 rs->max_sectors = SAFE_MAX_SECTORS; 649 * This function uses arithmetic modulo the logical_block_size
682 if (!rs->max_hw_sectors) 650 * (in units of 512-byte sectors).
683 rs->max_hw_sectors = SAFE_MAX_SECTORS; 651 */
684 if (!rs->max_phys_segments) 652 unsigned short device_logical_block_size_sects =
685 rs->max_phys_segments = MAX_PHYS_SEGMENTS; 653 limits->logical_block_size >> SECTOR_SHIFT;
686 if (!rs->max_hw_segments) 654
687 rs->max_hw_segments = MAX_HW_SEGMENTS; 655 /*
688 if (!rs->logical_block_size) 656 * Offset of the start of the next table entry, mod logical_block_size.
689 rs->logical_block_size = 1 << SECTOR_SHIFT; 657 */
690 if (!rs->max_segment_size) 658 unsigned short next_target_start = 0;
691 rs->max_segment_size = MAX_SEGMENT_SIZE; 659
692 if (!rs->seg_boundary_mask) 660 /*
693 rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 661 * Given an aligned bio that extends beyond the end of a
694 if (!rs->bounce_pfn) 662 * target, how many sectors must the next target handle?
695 rs->bounce_pfn = -1; 663 */
664 unsigned short remaining = 0;
665
666 struct dm_target *uninitialized_var(ti);
667 struct queue_limits ti_limits;
668 unsigned i = 0;
669
670 /*
671 * Check each entry in the table in turn.
672 */
673 while (i < dm_table_get_num_targets(table)) {
674 ti = dm_table_get_target(table, i++);
675
676 blk_set_default_limits(&ti_limits);
677
678 /* combine all target devices' limits */
679 if (ti->type->iterate_devices)
680 ti->type->iterate_devices(ti, dm_set_device_limits,
681 &ti_limits);
682
683 /*
684 * If the remaining sectors fall entirely within this
685 * table entry are they compatible with its logical_block_size?
686 */
687 if (remaining < ti->len &&
688 remaining & ((ti_limits.logical_block_size >>
689 SECTOR_SHIFT) - 1))
690 break; /* Error */
691
692 next_target_start =
693 (unsigned short) ((next_target_start + ti->len) &
694 (device_logical_block_size_sects - 1));
695 remaining = next_target_start ?
696 device_logical_block_size_sects - next_target_start : 0;
697 }
698
699 if (remaining) {
700 DMWARN("%s: table line %u (start sect %llu len %llu) "
701 "not aligned to h/w logical block size %hu",
702 dm_device_name(table->md), i,
703 (unsigned long long) ti->begin,
704 (unsigned long long) ti->len,
705 limits->logical_block_size);
706 return -EINVAL;
707 }
708
709 return 0;
696} 710}
697 711
698int dm_table_add_target(struct dm_table *t, const char *type, 712int dm_table_add_target(struct dm_table *t, const char *type,
@@ -747,9 +761,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
747 761
748 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 762 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
749 763
750 /* FIXME: the plan is to combine high here and then have
751 * the merge fn apply the target level restrictions. */
752 combine_restrictions_low(&t->limits, &tgt->limits);
753 return 0; 764 return 0;
754 765
755 bad: 766 bad:
@@ -758,6 +769,104 @@ int dm_table_add_target(struct dm_table *t, const char *type,
758 return r; 769 return r;
759} 770}
760 771
772int dm_table_set_type(struct dm_table *t)
773{
774 unsigned i;
775 unsigned bio_based = 0, request_based = 0;
776 struct dm_target *tgt;
777 struct dm_dev_internal *dd;
778 struct list_head *devices;
779
780 for (i = 0; i < t->num_targets; i++) {
781 tgt = t->targets + i;
782 if (dm_target_request_based(tgt))
783 request_based = 1;
784 else
785 bio_based = 1;
786
787 if (bio_based && request_based) {
788 DMWARN("Inconsistent table: different target types"
789 " can't be mixed up");
790 return -EINVAL;
791 }
792 }
793
794 if (bio_based) {
795 /* We must use this table as bio-based */
796 t->type = DM_TYPE_BIO_BASED;
797 return 0;
798 }
799
800 BUG_ON(!request_based); /* No targets in this table */
801
802 /* Non-request-stackable devices can't be used for request-based dm */
803 devices = dm_table_get_devices(t);
804 list_for_each_entry(dd, devices, list) {
805 if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
806 DMWARN("table load rejected: including"
807 " non-request-stackable devices");
808 return -EINVAL;
809 }
810 }
811
812 /*
813 * Request-based dm supports only tables that have a single target now.
814 * To support multiple targets, request splitting support is needed,
815 * and that needs lots of changes in the block-layer.
816 * (e.g. request completion process for partial completion.)
817 */
818 if (t->num_targets > 1) {
819 DMWARN("Request-based dm doesn't support multiple targets yet");
820 return -EINVAL;
821 }
822
823 t->type = DM_TYPE_REQUEST_BASED;
824
825 return 0;
826}
827
828unsigned dm_table_get_type(struct dm_table *t)
829{
830 return t->type;
831}
832
833bool dm_table_bio_based(struct dm_table *t)
834{
835 return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
836}
837
838bool dm_table_request_based(struct dm_table *t)
839{
840 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
841}
842
843int dm_table_alloc_md_mempools(struct dm_table *t)
844{
845 unsigned type = dm_table_get_type(t);
846
847 if (unlikely(type == DM_TYPE_NONE)) {
848 DMWARN("no table type is set, can't allocate mempools");
849 return -EINVAL;
850 }
851
852 t->mempools = dm_alloc_md_mempools(type);
853 if (!t->mempools)
854 return -ENOMEM;
855
856 return 0;
857}
858
859void dm_table_free_md_mempools(struct dm_table *t)
860{
861 dm_free_md_mempools(t->mempools);
862 t->mempools = NULL;
863}
864
865struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
866{
867 return t->mempools;
868}
869
761static int setup_indexes(struct dm_table *t) 870static int setup_indexes(struct dm_table *t)
762{ 871{
763 int i; 872 int i;
@@ -792,8 +901,6 @@ int dm_table_complete(struct dm_table *t)
792 int r = 0; 901 int r = 0;
793 unsigned int leaf_nodes; 902 unsigned int leaf_nodes;
794 903
795 check_for_valid_limits(&t->limits);
796
797 /* how many indexes will the btree have ? */ 904 /* how many indexes will the btree have ? */
798 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 905 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
799 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 906 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -869,6 +976,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
869} 976}
870 977
871/* 978/*
979 * Establish the new table's queue_limits and validate them.
980 */
981int dm_calculate_queue_limits(struct dm_table *table,
982 struct queue_limits *limits)
983{
984 struct dm_target *uninitialized_var(ti);
985 struct queue_limits ti_limits;
986 unsigned i = 0;
987
988 blk_set_default_limits(limits);
989
990 while (i < dm_table_get_num_targets(table)) {
991 blk_set_default_limits(&ti_limits);
992
993 ti = dm_table_get_target(table, i++);
994
995 if (!ti->type->iterate_devices)
996 goto combine_limits;
997
998 /*
999 * Combine queue limits of all the devices this target uses.
1000 */
1001 ti->type->iterate_devices(ti, dm_set_device_limits,
1002 &ti_limits);
1003
1004 /*
1005 * Check each device area is consistent with the target's
1006 * overall queue limits.
1007 */
1008 if (!ti->type->iterate_devices(ti, device_area_is_valid,
1009 &ti_limits))
1010 return -EINVAL;
1011
1012combine_limits:
1013 /*
1014 * Merge this target's queue limits into the overall limits
1015 * for the table.
1016 */
1017 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1018 DMWARN("%s: target device "
1019 "(start sect %llu len %llu) "
1020 "is misaligned",
1021 dm_device_name(table->md),
1022 (unsigned long long) ti->begin,
1023 (unsigned long long) ti->len);
1024 }
1025
1026 return validate_hardware_logical_block_alignment(table, limits);
1027}
1028
1029/*
872 * Set the integrity profile for this device if all devices used have 1030 * Set the integrity profile for this device if all devices used have
873 * matching profiles. 1031 * matching profiles.
874 */ 1032 */
@@ -907,27 +1065,42 @@ no_integrity:
907 return; 1065 return;
908} 1066}
909 1067
910void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 1068void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1069 struct queue_limits *limits)
911{ 1070{
912 /* 1071 /*
913 * Make sure we obey the optimistic sub devices 1072 * Each target device in the table has a data area that should normally
914 * restrictions. 1073 * be aligned such that the DM device's alignment_offset is 0.
1074 * FIXME: Propagate alignment_offsets up the stack and warn of
1075 * sub-optimal or inconsistent settings.
1076 */
1077 limits->alignment_offset = 0;
1078 limits->misaligned = 0;
1079
1080 /*
1081 * Copy table's limits to the DM device's request_queue
915 */ 1082 */
916 blk_queue_max_sectors(q, t->limits.max_sectors); 1083 q->limits = *limits;
917 blk_queue_max_phys_segments(q, t->limits.max_phys_segments); 1084
918 blk_queue_max_hw_segments(q, t->limits.max_hw_segments); 1085 if (limits->no_cluster)
919 blk_queue_logical_block_size(q, t->limits.logical_block_size);
920 blk_queue_max_segment_size(q, t->limits.max_segment_size);
921 blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
922 blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
923 blk_queue_bounce_limit(q, t->limits.bounce_pfn);
924
925 if (t->limits.no_cluster)
926 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 1086 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
927 else 1087 else
928 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 1088 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
929 1089
930 dm_table_set_integrity(t); 1090 dm_table_set_integrity(t);
1091
1092 /*
1093 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1094 * visible to other CPUs because, once the flag is set, incoming bios
1095 * are processed by request-based dm, which refers to the queue
1096 * settings.
1097 * Until the flag set, bios are passed to bio-based dm and queued to
1098 * md->deferred where queue settings are not needed yet.
1099 * Those bios are passed to request-based dm at the resume time.
1100 */
1101 smp_mb();
1102 if (dm_table_request_based(t))
1103 queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
931} 1104}
932 1105
933unsigned int dm_table_get_num_targets(struct dm_table *t) 1106unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -1023,6 +1196,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1023 return r; 1196 return r;
1024} 1197}
1025 1198
1199int dm_table_any_busy_target(struct dm_table *t)
1200{
1201 unsigned i;
1202 struct dm_target *ti;
1203
1204 for (i = 0; i < t->num_targets; i++) {
1205 ti = t->targets + i;
1206 if (ti->type->busy && ti->type->busy(ti))
1207 return 1;
1208 }
1209
1210 return 0;
1211}
1212
1026void dm_table_unplug_all(struct dm_table *t) 1213void dm_table_unplug_all(struct dm_table *t)
1027{ 1214{
1028 struct dm_dev_internal *dd; 1215 struct dm_dev_internal *dd;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 48db308fae67..3c6d4ee8921d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -24,6 +24,13 @@
24 24
25#define DM_MSG_PREFIX "core" 25#define DM_MSG_PREFIX "core"
26 26
27/*
28 * Cookies are numeric values sent with CHANGE and REMOVE
29 * uevents while resuming, removing or renaming the device.
30 */
31#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
32#define DM_COOKIE_LENGTH 24
33
27static const char *_name = DM_NAME; 34static const char *_name = DM_NAME;
28 35
29static unsigned int major = 0; 36static unsigned int major = 0;
@@ -71,7 +78,7 @@ struct dm_rq_target_io {
71 */ 78 */
72struct dm_rq_clone_bio_info { 79struct dm_rq_clone_bio_info {
73 struct bio *orig; 80 struct bio *orig;
74 struct request *rq; 81 struct dm_rq_target_io *tio;
75}; 82};
76 83
77union map_info *dm_get_mapinfo(struct bio *bio) 84union map_info *dm_get_mapinfo(struct bio *bio)
@@ -81,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio)
81 return NULL; 88 return NULL;
82} 89}
83 90
91union map_info *dm_get_rq_mapinfo(struct request *rq)
92{
93 if (rq && rq->end_io_data)
94 return &((struct dm_rq_target_io *)rq->end_io_data)->info;
95 return NULL;
96}
97EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
98
84#define MINOR_ALLOCED ((void *)-1) 99#define MINOR_ALLOCED ((void *)-1)
85 100
86/* 101/*
@@ -157,13 +172,31 @@ struct mapped_device {
157 * freeze/thaw support require holding onto a super block 172 * freeze/thaw support require holding onto a super block
158 */ 173 */
159 struct super_block *frozen_sb; 174 struct super_block *frozen_sb;
160 struct block_device *suspended_bdev; 175 struct block_device *bdev;
161 176
162 /* forced geometry settings */ 177 /* forced geometry settings */
163 struct hd_geometry geometry; 178 struct hd_geometry geometry;
164 179
180 /* marker of flush suspend for request-based dm */
181 struct request suspend_rq;
182
183 /* For saving the address of __make_request for request based dm */
184 make_request_fn *saved_make_request_fn;
185
165 /* sysfs handle */ 186 /* sysfs handle */
166 struct kobject kobj; 187 struct kobject kobj;
188
189 /* zero-length barrier that will be cloned and submitted to targets */
190 struct bio barrier_bio;
191};
192
193/*
194 * For mempools pre-allocation at the table loading time.
195 */
196struct dm_md_mempools {
197 mempool_t *io_pool;
198 mempool_t *tio_pool;
199 struct bio_set *bs;
167}; 200};
168 201
169#define MIN_IOS 256 202#define MIN_IOS 256
@@ -391,14 +424,29 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
391 mempool_free(io, md->io_pool); 424 mempool_free(io, md->io_pool);
392} 425}
393 426
394static struct dm_target_io *alloc_tio(struct mapped_device *md) 427static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
395{ 428{
396 return mempool_alloc(md->tio_pool, GFP_NOIO); 429 mempool_free(tio, md->tio_pool);
397} 430}
398 431
399static void free_tio(struct mapped_device *md, struct dm_target_io *tio) 432static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
400{ 433{
401 mempool_free(tio, md->tio_pool); 434 return mempool_alloc(md->tio_pool, GFP_ATOMIC);
435}
436
437static void free_rq_tio(struct dm_rq_target_io *tio)
438{
439 mempool_free(tio, tio->md->tio_pool);
440}
441
442static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
443{
444 return mempool_alloc(md->io_pool, GFP_ATOMIC);
445}
446
447static void free_bio_info(struct dm_rq_clone_bio_info *info)
448{
449 mempool_free(info, info->tio->md->io_pool);
402} 450}
403 451
404static void start_io_acct(struct dm_io *io) 452static void start_io_acct(struct dm_io *io)
@@ -464,12 +512,13 @@ static void queue_io(struct mapped_device *md, struct bio *bio)
464struct dm_table *dm_get_table(struct mapped_device *md) 512struct dm_table *dm_get_table(struct mapped_device *md)
465{ 513{
466 struct dm_table *t; 514 struct dm_table *t;
515 unsigned long flags;
467 516
468 read_lock(&md->map_lock); 517 read_lock_irqsave(&md->map_lock, flags);
469 t = md->map; 518 t = md->map;
470 if (t) 519 if (t)
471 dm_table_get(t); 520 dm_table_get(t);
472 read_unlock(&md->map_lock); 521 read_unlock_irqrestore(&md->map_lock, flags);
473 522
474 return t; 523 return t;
475} 524}
@@ -536,9 +585,11 @@ static void dec_pending(struct dm_io *io, int error)
536 * Target requested pushing back the I/O. 585 * Target requested pushing back the I/O.
537 */ 586 */
538 spin_lock_irqsave(&md->deferred_lock, flags); 587 spin_lock_irqsave(&md->deferred_lock, flags);
539 if (__noflush_suspending(md)) 588 if (__noflush_suspending(md)) {
540 bio_list_add_head(&md->deferred, io->bio); 589 if (!bio_barrier(io->bio))
541 else 590 bio_list_add_head(&md->deferred,
591 io->bio);
592 } else
542 /* noflush suspend was interrupted. */ 593 /* noflush suspend was interrupted. */
543 io->error = -EIO; 594 io->error = -EIO;
544 spin_unlock_irqrestore(&md->deferred_lock, flags); 595 spin_unlock_irqrestore(&md->deferred_lock, flags);
@@ -553,7 +604,8 @@ static void dec_pending(struct dm_io *io, int error)
553 * a per-device variable for error reporting. 604 * a per-device variable for error reporting.
554 * Note that you can't touch the bio after end_io_acct 605 * Note that you can't touch the bio after end_io_acct
555 */ 606 */
556 md->barrier_error = io_error; 607 if (!md->barrier_error && io_error != -EOPNOTSUPP)
608 md->barrier_error = io_error;
557 end_io_acct(io); 609 end_io_acct(io);
558 } else { 610 } else {
559 end_io_acct(io); 611 end_io_acct(io);
@@ -607,6 +659,262 @@ static void clone_endio(struct bio *bio, int error)
607 dec_pending(io, error); 659 dec_pending(io, error);
608} 660}
609 661
662/*
663 * Partial completion handling for request-based dm
664 */
665static void end_clone_bio(struct bio *clone, int error)
666{
667 struct dm_rq_clone_bio_info *info = clone->bi_private;
668 struct dm_rq_target_io *tio = info->tio;
669 struct bio *bio = info->orig;
670 unsigned int nr_bytes = info->orig->bi_size;
671
672 bio_put(clone);
673
674 if (tio->error)
675 /*
676 * An error has already been detected on the request.
677 * Once error occurred, just let clone->end_io() handle
678 * the remainder.
679 */
680 return;
681 else if (error) {
682 /*
683 * Don't notice the error to the upper layer yet.
684 * The error handling decision is made by the target driver,
685 * when the request is completed.
686 */
687 tio->error = error;
688 return;
689 }
690
691 /*
692 * I/O for the bio successfully completed.
693 * Notice the data completion to the upper layer.
694 */
695
696 /*
697 * bios are processed from the head of the list.
698 * So the completing bio should always be rq->bio.
699 * If it's not, something wrong is happening.
700 */
701 if (tio->orig->bio != bio)
702 DMERR("bio completion is going in the middle of the request");
703
704 /*
705 * Update the original request.
706 * Do not use blk_end_request() here, because it may complete
707 * the original request before the clone, and break the ordering.
708 */
709 blk_update_request(tio->orig, 0, nr_bytes);
710}
711
712/*
713 * Don't touch any member of the md after calling this function because
714 * the md may be freed in dm_put() at the end of this function.
715 * Or do dm_get() before calling this function and dm_put() later.
716 */
717static void rq_completed(struct mapped_device *md, int run_queue)
718{
719 int wakeup_waiters = 0;
720 struct request_queue *q = md->queue;
721 unsigned long flags;
722
723 spin_lock_irqsave(q->queue_lock, flags);
724 if (!queue_in_flight(q))
725 wakeup_waiters = 1;
726 spin_unlock_irqrestore(q->queue_lock, flags);
727
728 /* nudge anyone waiting on suspend queue */
729 if (wakeup_waiters)
730 wake_up(&md->wait);
731
732 if (run_queue)
733 blk_run_queue(q);
734
735 /*
736 * dm_put() must be at the end of this function. See the comment above
737 */
738 dm_put(md);
739}
740
741static void dm_unprep_request(struct request *rq)
742{
743 struct request *clone = rq->special;
744 struct dm_rq_target_io *tio = clone->end_io_data;
745
746 rq->special = NULL;
747 rq->cmd_flags &= ~REQ_DONTPREP;
748
749 blk_rq_unprep_clone(clone);
750 free_rq_tio(tio);
751}
752
753/*
754 * Requeue the original request of a clone.
755 */
756void dm_requeue_unmapped_request(struct request *clone)
757{
758 struct dm_rq_target_io *tio = clone->end_io_data;
759 struct mapped_device *md = tio->md;
760 struct request *rq = tio->orig;
761 struct request_queue *q = rq->q;
762 unsigned long flags;
763
764 dm_unprep_request(rq);
765
766 spin_lock_irqsave(q->queue_lock, flags);
767 if (elv_queue_empty(q))
768 blk_plug_device(q);
769 blk_requeue_request(q, rq);
770 spin_unlock_irqrestore(q->queue_lock, flags);
771
772 rq_completed(md, 0);
773}
774EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
775
776static void __stop_queue(struct request_queue *q)
777{
778 blk_stop_queue(q);
779}
780
781static void stop_queue(struct request_queue *q)
782{
783 unsigned long flags;
784
785 spin_lock_irqsave(q->queue_lock, flags);
786 __stop_queue(q);
787 spin_unlock_irqrestore(q->queue_lock, flags);
788}
789
790static void __start_queue(struct request_queue *q)
791{
792 if (blk_queue_stopped(q))
793 blk_start_queue(q);
794}
795
796static void start_queue(struct request_queue *q)
797{
798 unsigned long flags;
799
800 spin_lock_irqsave(q->queue_lock, flags);
801 __start_queue(q);
802 spin_unlock_irqrestore(q->queue_lock, flags);
803}
804
805/*
806 * Complete the clone and the original request.
807 * Must be called without queue lock.
808 */
809static void dm_end_request(struct request *clone, int error)
810{
811 struct dm_rq_target_io *tio = clone->end_io_data;
812 struct mapped_device *md = tio->md;
813 struct request *rq = tio->orig;
814
815 if (blk_pc_request(rq)) {
816 rq->errors = clone->errors;
817 rq->resid_len = clone->resid_len;
818
819 if (rq->sense)
820 /*
821 * We are using the sense buffer of the original
822 * request.
823 * So setting the length of the sense data is enough.
824 */
825 rq->sense_len = clone->sense_len;
826 }
827
828 BUG_ON(clone->bio);
829 free_rq_tio(tio);
830
831 blk_end_request_all(rq, error);
832
833 rq_completed(md, 1);
834}
835
836/*
837 * Request completion handler for request-based dm
838 */
839static void dm_softirq_done(struct request *rq)
840{
841 struct request *clone = rq->completion_data;
842 struct dm_rq_target_io *tio = clone->end_io_data;
843 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
844 int error = tio->error;
845
846 if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io)
847 error = rq_end_io(tio->ti, clone, error, &tio->info);
848
849 if (error <= 0)
850 /* The target wants to complete the I/O */
851 dm_end_request(clone, error);
852 else if (error == DM_ENDIO_INCOMPLETE)
853 /* The target will handle the I/O */
854 return;
855 else if (error == DM_ENDIO_REQUEUE)
856 /* The target wants to requeue the I/O */
857 dm_requeue_unmapped_request(clone);
858 else {
859 DMWARN("unimplemented target endio return value: %d", error);
860 BUG();
861 }
862}
863
864/*
865 * Complete the clone and the original request with the error status
866 * through softirq context.
867 */
868static void dm_complete_request(struct request *clone, int error)
869{
870 struct dm_rq_target_io *tio = clone->end_io_data;
871 struct request *rq = tio->orig;
872
873 tio->error = error;
874 rq->completion_data = clone;
875 blk_complete_request(rq);
876}
877
878/*
879 * Complete the not-mapped clone and the original request with the error status
880 * through softirq context.
881 * Target's rq_end_io() function isn't called.
882 * This may be used when the target's map_rq() function fails.
883 */
884void dm_kill_unmapped_request(struct request *clone, int error)
885{
886 struct dm_rq_target_io *tio = clone->end_io_data;
887 struct request *rq = tio->orig;
888
889 rq->cmd_flags |= REQ_FAILED;
890 dm_complete_request(clone, error);
891}
892EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
893
894/*
895 * Called with the queue lock held
896 */
897static void end_clone_request(struct request *clone, int error)
898{
899 /*
900 * For just cleaning up the information of the queue in which
901 * the clone was dispatched.
902 * The clone is *NOT* freed actually here because it is alloced from
903 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
904 */
905 __blk_put_request(clone->q, clone);
906
907 /*
908 * Actual request completion is done in a softirq context which doesn't
909 * hold the queue lock. Otherwise, deadlock could occur because:
910 * - another request may be submitted by the upper level driver
911 * of the stacking during the completion
912 * - the submission which requires queue lock may be done
913 * against this queue
914 */
915 dm_complete_request(clone, error);
916}
917
610static sector_t max_io_len(struct mapped_device *md, 918static sector_t max_io_len(struct mapped_device *md,
611 sector_t sector, struct dm_target *ti) 919 sector_t sector, struct dm_target *ti)
612{ 920{
@@ -634,11 +942,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
634 sector_t sector; 942 sector_t sector;
635 struct mapped_device *md; 943 struct mapped_device *md;
636 944
637 /*
638 * Sanity checks.
639 */
640 BUG_ON(!clone->bi_size);
641
642 clone->bi_end_io = clone_endio; 945 clone->bi_end_io = clone_endio;
643 clone->bi_private = tio; 946 clone->bi_private = tio;
644 947
@@ -752,6 +1055,48 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
752 return clone; 1055 return clone;
753} 1056}
754 1057
1058static struct dm_target_io *alloc_tio(struct clone_info *ci,
1059 struct dm_target *ti)
1060{
1061 struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
1062
1063 tio->io = ci->io;
1064 tio->ti = ti;
1065 memset(&tio->info, 0, sizeof(tio->info));
1066
1067 return tio;
1068}
1069
1070static void __flush_target(struct clone_info *ci, struct dm_target *ti,
1071 unsigned flush_nr)
1072{
1073 struct dm_target_io *tio = alloc_tio(ci, ti);
1074 struct bio *clone;
1075
1076 tio->info.flush_request = flush_nr;
1077
1078 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
1079 __bio_clone(clone, ci->bio);
1080 clone->bi_destructor = dm_bio_destructor;
1081
1082 __map_bio(ti, clone, tio);
1083}
1084
1085static int __clone_and_map_empty_barrier(struct clone_info *ci)
1086{
1087 unsigned target_nr = 0, flush_nr;
1088 struct dm_target *ti;
1089
1090 while ((ti = dm_table_get_target(ci->map, target_nr++)))
1091 for (flush_nr = 0; flush_nr < ti->num_flush_requests;
1092 flush_nr++)
1093 __flush_target(ci, ti, flush_nr);
1094
1095 ci->sector_count = 0;
1096
1097 return 0;
1098}
1099
755static int __clone_and_map(struct clone_info *ci) 1100static int __clone_and_map(struct clone_info *ci)
756{ 1101{
757 struct bio *clone, *bio = ci->bio; 1102 struct bio *clone, *bio = ci->bio;
@@ -759,6 +1104,9 @@ static int __clone_and_map(struct clone_info *ci)
759 sector_t len = 0, max; 1104 sector_t len = 0, max;
760 struct dm_target_io *tio; 1105 struct dm_target_io *tio;
761 1106
1107 if (unlikely(bio_empty_barrier(bio)))
1108 return __clone_and_map_empty_barrier(ci);
1109
762 ti = dm_table_find_target(ci->map, ci->sector); 1110 ti = dm_table_find_target(ci->map, ci->sector);
763 if (!dm_target_is_valid(ti)) 1111 if (!dm_target_is_valid(ti))
764 return -EIO; 1112 return -EIO;
@@ -768,10 +1116,7 @@ static int __clone_and_map(struct clone_info *ci)
768 /* 1116 /*
769 * Allocate a target io object. 1117 * Allocate a target io object.
770 */ 1118 */
771 tio = alloc_tio(ci->md); 1119 tio = alloc_tio(ci, ti);
772 tio->io = ci->io;
773 tio->ti = ti;
774 memset(&tio->info, 0, sizeof(tio->info));
775 1120
776 if (ci->sector_count <= max) { 1121 if (ci->sector_count <= max) {
777 /* 1122 /*
@@ -827,10 +1172,7 @@ static int __clone_and_map(struct clone_info *ci)
827 1172
828 max = max_io_len(ci->md, ci->sector, ti); 1173 max = max_io_len(ci->md, ci->sector, ti);
829 1174
830 tio = alloc_tio(ci->md); 1175 tio = alloc_tio(ci, ti);
831 tio->io = ci->io;
832 tio->ti = ti;
833 memset(&tio->info, 0, sizeof(tio->info));
834 } 1176 }
835 1177
836 len = min(remaining, max); 1178 len = min(remaining, max);
@@ -865,7 +1207,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
865 if (!bio_barrier(bio)) 1207 if (!bio_barrier(bio))
866 bio_io_error(bio); 1208 bio_io_error(bio);
867 else 1209 else
868 md->barrier_error = -EIO; 1210 if (!md->barrier_error)
1211 md->barrier_error = -EIO;
869 return; 1212 return;
870 } 1213 }
871 1214
@@ -878,6 +1221,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
878 ci.io->md = md; 1221 ci.io->md = md;
879 ci.sector = bio->bi_sector; 1222 ci.sector = bio->bi_sector;
880 ci.sector_count = bio_sectors(bio); 1223 ci.sector_count = bio_sectors(bio);
1224 if (unlikely(bio_empty_barrier(bio)))
1225 ci.sector_count = 1;
881 ci.idx = bio->bi_idx; 1226 ci.idx = bio->bi_idx;
882 1227
883 start_io_acct(ci.io); 1228 start_io_acct(ci.io);
@@ -925,6 +1270,16 @@ static int dm_merge_bvec(struct request_queue *q,
925 */ 1270 */
926 if (max_size && ti->type->merge) 1271 if (max_size && ti->type->merge)
927 max_size = ti->type->merge(ti, bvm, biovec, max_size); 1272 max_size = ti->type->merge(ti, bvm, biovec, max_size);
1273 /*
1274 * If the target doesn't support merge method and some of the devices
1275 * provided their merge_bvec method (we know this by looking at
1276 * queue_max_hw_sectors), then we can't allow bios with multiple vector
1277 * entries. So always set max_size to 0, and the code below allows
1278 * just one page.
1279 */
1280 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1281
1282 max_size = 0;
928 1283
929out_table: 1284out_table:
930 dm_table_put(map); 1285 dm_table_put(map);
@@ -943,7 +1298,7 @@ out:
943 * The request function that just remaps the bio built up by 1298 * The request function that just remaps the bio built up by
944 * dm_merge_bvec. 1299 * dm_merge_bvec.
945 */ 1300 */
946static int dm_request(struct request_queue *q, struct bio *bio) 1301static int _dm_request(struct request_queue *q, struct bio *bio)
947{ 1302{
948 int rw = bio_data_dir(bio); 1303 int rw = bio_data_dir(bio);
949 struct mapped_device *md = q->queuedata; 1304 struct mapped_device *md = q->queuedata;
@@ -980,12 +1335,274 @@ static int dm_request(struct request_queue *q, struct bio *bio)
980 return 0; 1335 return 0;
981} 1336}
982 1337
1338static int dm_make_request(struct request_queue *q, struct bio *bio)
1339{
1340 struct mapped_device *md = q->queuedata;
1341
1342 if (unlikely(bio_barrier(bio))) {
1343 bio_endio(bio, -EOPNOTSUPP);
1344 return 0;
1345 }
1346
1347 return md->saved_make_request_fn(q, bio); /* call __make_request() */
1348}
1349
1350static int dm_request_based(struct mapped_device *md)
1351{
1352 return blk_queue_stackable(md->queue);
1353}
1354
1355static int dm_request(struct request_queue *q, struct bio *bio)
1356{
1357 struct mapped_device *md = q->queuedata;
1358
1359 if (dm_request_based(md))
1360 return dm_make_request(q, bio);
1361
1362 return _dm_request(q, bio);
1363}
1364
1365void dm_dispatch_request(struct request *rq)
1366{
1367 int r;
1368
1369 if (blk_queue_io_stat(rq->q))
1370 rq->cmd_flags |= REQ_IO_STAT;
1371
1372 rq->start_time = jiffies;
1373 r = blk_insert_cloned_request(rq->q, rq);
1374 if (r)
1375 dm_complete_request(rq, r);
1376}
1377EXPORT_SYMBOL_GPL(dm_dispatch_request);
1378
1379static void dm_rq_bio_destructor(struct bio *bio)
1380{
1381 struct dm_rq_clone_bio_info *info = bio->bi_private;
1382 struct mapped_device *md = info->tio->md;
1383
1384 free_bio_info(info);
1385 bio_free(bio, md->bs);
1386}
1387
1388static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1389 void *data)
1390{
1391 struct dm_rq_target_io *tio = data;
1392 struct mapped_device *md = tio->md;
1393 struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
1394
1395 if (!info)
1396 return -ENOMEM;
1397
1398 info->orig = bio_orig;
1399 info->tio = tio;
1400 bio->bi_end_io = end_clone_bio;
1401 bio->bi_private = info;
1402 bio->bi_destructor = dm_rq_bio_destructor;
1403
1404 return 0;
1405}
1406
1407static int setup_clone(struct request *clone, struct request *rq,
1408 struct dm_rq_target_io *tio)
1409{
1410 int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1411 dm_rq_bio_constructor, tio);
1412
1413 if (r)
1414 return r;
1415
1416 clone->cmd = rq->cmd;
1417 clone->cmd_len = rq->cmd_len;
1418 clone->sense = rq->sense;
1419 clone->buffer = rq->buffer;
1420 clone->end_io = end_clone_request;
1421 clone->end_io_data = tio;
1422
1423 return 0;
1424}
1425
1426static int dm_rq_flush_suspending(struct mapped_device *md)
1427{
1428 return !md->suspend_rq.special;
1429}
1430
1431/*
1432 * Called with the queue lock held.
1433 */
1434static int dm_prep_fn(struct request_queue *q, struct request *rq)
1435{
1436 struct mapped_device *md = q->queuedata;
1437 struct dm_rq_target_io *tio;
1438 struct request *clone;
1439
1440 if (unlikely(rq == &md->suspend_rq)) {
1441 if (dm_rq_flush_suspending(md))
1442 return BLKPREP_OK;
1443 else
1444 /* The flush suspend was interrupted */
1445 return BLKPREP_KILL;
1446 }
1447
1448 if (unlikely(rq->special)) {
1449 DMWARN("Already has something in rq->special.");
1450 return BLKPREP_KILL;
1451 }
1452
1453 tio = alloc_rq_tio(md); /* Only one for each original request */
1454 if (!tio)
1455 /* -ENOMEM */
1456 return BLKPREP_DEFER;
1457
1458 tio->md = md;
1459 tio->ti = NULL;
1460 tio->orig = rq;
1461 tio->error = 0;
1462 memset(&tio->info, 0, sizeof(tio->info));
1463
1464 clone = &tio->clone;
1465 if (setup_clone(clone, rq, tio)) {
1466 /* -ENOMEM */
1467 free_rq_tio(tio);
1468 return BLKPREP_DEFER;
1469 }
1470
1471 rq->special = clone;
1472 rq->cmd_flags |= REQ_DONTPREP;
1473
1474 return BLKPREP_OK;
1475}
1476
1477static void map_request(struct dm_target *ti, struct request *rq,
1478 struct mapped_device *md)
1479{
1480 int r;
1481 struct request *clone = rq->special;
1482 struct dm_rq_target_io *tio = clone->end_io_data;
1483
1484 /*
1485 * Hold the md reference here for the in-flight I/O.
1486 * We can't rely on the reference count by device opener,
1487 * because the device may be closed during the request completion
1488 * when all bios are completed.
1489 * See the comment in rq_completed() too.
1490 */
1491 dm_get(md);
1492
1493 tio->ti = ti;
1494 r = ti->type->map_rq(ti, clone, &tio->info);
1495 switch (r) {
1496 case DM_MAPIO_SUBMITTED:
1497 /* The target has taken the I/O to submit by itself later */
1498 break;
1499 case DM_MAPIO_REMAPPED:
1500 /* The target has remapped the I/O so dispatch it */
1501 dm_dispatch_request(clone);
1502 break;
1503 case DM_MAPIO_REQUEUE:
1504 /* The target wants to requeue the I/O */
1505 dm_requeue_unmapped_request(clone);
1506 break;
1507 default:
1508 if (r > 0) {
1509 DMWARN("unimplemented target map return value: %d", r);
1510 BUG();
1511 }
1512
1513 /* The target wants to complete the I/O */
1514 dm_kill_unmapped_request(clone, r);
1515 break;
1516 }
1517}
1518
1519/*
1520 * q->request_fn for request-based dm.
1521 * Called with the queue lock held.
1522 */
1523static void dm_request_fn(struct request_queue *q)
1524{
1525 struct mapped_device *md = q->queuedata;
1526 struct dm_table *map = dm_get_table(md);
1527 struct dm_target *ti;
1528 struct request *rq;
1529
1530 /*
1531 * For noflush suspend, check blk_queue_stopped() to immediately
1532 * quit I/O dispatching.
1533 */
1534 while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
1535 rq = blk_peek_request(q);
1536 if (!rq)
1537 goto plug_and_out;
1538
1539 if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */
1540 if (queue_in_flight(q))
1541 /* Not quiet yet. Wait more */
1542 goto plug_and_out;
1543
1544 /* This device should be quiet now */
1545 __stop_queue(q);
1546 blk_start_request(rq);
1547 __blk_end_request_all(rq, 0);
1548 wake_up(&md->wait);
1549 goto out;
1550 }
1551
1552 ti = dm_table_find_target(map, blk_rq_pos(rq));
1553 if (ti->type->busy && ti->type->busy(ti))
1554 goto plug_and_out;
1555
1556 blk_start_request(rq);
1557 spin_unlock(q->queue_lock);
1558 map_request(ti, rq, md);
1559 spin_lock_irq(q->queue_lock);
1560 }
1561
1562 goto out;
1563
1564plug_and_out:
1565 if (!elv_queue_empty(q))
1566 /* Some requests still remain, retry later */
1567 blk_plug_device(q);
1568
1569out:
1570 dm_table_put(map);
1571
1572 return;
1573}
1574
1575int dm_underlying_device_busy(struct request_queue *q)
1576{
1577 return blk_lld_busy(q);
1578}
1579EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
1580
1581static int dm_lld_busy(struct request_queue *q)
1582{
1583 int r;
1584 struct mapped_device *md = q->queuedata;
1585 struct dm_table *map = dm_get_table(md);
1586
1587 if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
1588 r = 1;
1589 else
1590 r = dm_table_any_busy_target(map);
1591
1592 dm_table_put(map);
1593
1594 return r;
1595}
1596
983static void dm_unplug_all(struct request_queue *q) 1597static void dm_unplug_all(struct request_queue *q)
984{ 1598{
985 struct mapped_device *md = q->queuedata; 1599 struct mapped_device *md = q->queuedata;
986 struct dm_table *map = dm_get_table(md); 1600 struct dm_table *map = dm_get_table(md);
987 1601
988 if (map) { 1602 if (map) {
1603 if (dm_request_based(md))
1604 generic_unplug_device(q);
1605
989 dm_table_unplug_all(map); 1606 dm_table_unplug_all(map);
990 dm_table_put(map); 1607 dm_table_put(map);
991 } 1608 }
@@ -1000,7 +1617,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1000 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1617 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
1001 map = dm_get_table(md); 1618 map = dm_get_table(md);
1002 if (map) { 1619 if (map) {
1003 r = dm_table_any_congested(map, bdi_bits); 1620 /*
1621 * Request-based dm cares about only own queue for
1622 * the query about congestion status of request_queue
1623 */
1624 if (dm_request_based(md))
1625 r = md->queue->backing_dev_info.state &
1626 bdi_bits;
1627 else
1628 r = dm_table_any_congested(map, bdi_bits);
1629
1004 dm_table_put(map); 1630 dm_table_put(map);
1005 } 1631 }
1006 } 1632 }
@@ -1123,30 +1749,32 @@ static struct mapped_device *alloc_dev(int minor)
1123 INIT_LIST_HEAD(&md->uevent_list); 1749 INIT_LIST_HEAD(&md->uevent_list);
1124 spin_lock_init(&md->uevent_lock); 1750 spin_lock_init(&md->uevent_lock);
1125 1751
1126 md->queue = blk_alloc_queue(GFP_KERNEL); 1752 md->queue = blk_init_queue(dm_request_fn, NULL);
1127 if (!md->queue) 1753 if (!md->queue)
1128 goto bad_queue; 1754 goto bad_queue;
1129 1755
1756 /*
1757 * Request-based dm devices cannot be stacked on top of bio-based dm
1758 * devices. The type of this dm device has not been decided yet,
1759 * although we initialized the queue using blk_init_queue().
1760 * The type is decided at the first table loading time.
1761 * To prevent problematic device stacking, clear the queue flag
1762 * for request stacking support until then.
1763 *
1764 * This queue is new, so no concurrency on the queue_flags.
1765 */
1766 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
1767 md->saved_make_request_fn = md->queue->make_request_fn;
1130 md->queue->queuedata = md; 1768 md->queue->queuedata = md;
1131 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1769 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1132 md->queue->backing_dev_info.congested_data = md; 1770 md->queue->backing_dev_info.congested_data = md;
1133 blk_queue_make_request(md->queue, dm_request); 1771 blk_queue_make_request(md->queue, dm_request);
1134 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
1135 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1772 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1136 md->queue->unplug_fn = dm_unplug_all; 1773 md->queue->unplug_fn = dm_unplug_all;
1137 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1774 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1138 1775 blk_queue_softirq_done(md->queue, dm_softirq_done);
1139 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 1776 blk_queue_prep_rq(md->queue, dm_prep_fn);
1140 if (!md->io_pool) 1777 blk_queue_lld_busy(md->queue, dm_lld_busy);
1141 goto bad_io_pool;
1142
1143 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
1144 if (!md->tio_pool)
1145 goto bad_tio_pool;
1146
1147 md->bs = bioset_create(16, 0);
1148 if (!md->bs)
1149 goto bad_no_bioset;
1150 1778
1151 md->disk = alloc_disk(1); 1779 md->disk = alloc_disk(1);
1152 if (!md->disk) 1780 if (!md->disk)
@@ -1170,6 +1798,10 @@ static struct mapped_device *alloc_dev(int minor)
1170 if (!md->wq) 1798 if (!md->wq)
1171 goto bad_thread; 1799 goto bad_thread;
1172 1800
1801 md->bdev = bdget_disk(md->disk, 0);
1802 if (!md->bdev)
1803 goto bad_bdev;
1804
1173 /* Populate the mapping, nobody knows we exist yet */ 1805 /* Populate the mapping, nobody knows we exist yet */
1174 spin_lock(&_minor_lock); 1806 spin_lock(&_minor_lock);
1175 old_md = idr_replace(&_minor_idr, md, minor); 1807 old_md = idr_replace(&_minor_idr, md, minor);
@@ -1179,15 +1811,11 @@ static struct mapped_device *alloc_dev(int minor)
1179 1811
1180 return md; 1812 return md;
1181 1813
1814bad_bdev:
1815 destroy_workqueue(md->wq);
1182bad_thread: 1816bad_thread:
1183 put_disk(md->disk); 1817 put_disk(md->disk);
1184bad_disk: 1818bad_disk:
1185 bioset_free(md->bs);
1186bad_no_bioset:
1187 mempool_destroy(md->tio_pool);
1188bad_tio_pool:
1189 mempool_destroy(md->io_pool);
1190bad_io_pool:
1191 blk_cleanup_queue(md->queue); 1819 blk_cleanup_queue(md->queue);
1192bad_queue: 1820bad_queue:
1193 free_minor(minor); 1821 free_minor(minor);
@@ -1204,14 +1832,15 @@ static void free_dev(struct mapped_device *md)
1204{ 1832{
1205 int minor = MINOR(disk_devt(md->disk)); 1833 int minor = MINOR(disk_devt(md->disk));
1206 1834
1207 if (md->suspended_bdev) { 1835 unlock_fs(md);
1208 unlock_fs(md); 1836 bdput(md->bdev);
1209 bdput(md->suspended_bdev);
1210 }
1211 destroy_workqueue(md->wq); 1837 destroy_workqueue(md->wq);
1212 mempool_destroy(md->tio_pool); 1838 if (md->tio_pool)
1213 mempool_destroy(md->io_pool); 1839 mempool_destroy(md->tio_pool);
1214 bioset_free(md->bs); 1840 if (md->io_pool)
1841 mempool_destroy(md->io_pool);
1842 if (md->bs)
1843 bioset_free(md->bs);
1215 blk_integrity_unregister(md->disk); 1844 blk_integrity_unregister(md->disk);
1216 del_gendisk(md->disk); 1845 del_gendisk(md->disk);
1217 free_minor(minor); 1846 free_minor(minor);
@@ -1226,6 +1855,29 @@ static void free_dev(struct mapped_device *md)
1226 kfree(md); 1855 kfree(md);
1227} 1856}
1228 1857
1858static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1859{
1860 struct dm_md_mempools *p;
1861
1862 if (md->io_pool && md->tio_pool && md->bs)
1863 /* the md already has necessary mempools */
1864 goto out;
1865
1866 p = dm_table_get_md_mempools(t);
1867 BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
1868
1869 md->io_pool = p->io_pool;
1870 p->io_pool = NULL;
1871 md->tio_pool = p->tio_pool;
1872 p->tio_pool = NULL;
1873 md->bs = p->bs;
1874 p->bs = NULL;
1875
1876out:
1877 /* mempool bind completed, now no need any mempools in the table */
1878 dm_table_free_md_mempools(t);
1879}
1880
1229/* 1881/*
1230 * Bind a table to the device. 1882 * Bind a table to the device.
1231 */ 1883 */
@@ -1249,15 +1901,17 @@ static void __set_size(struct mapped_device *md, sector_t size)
1249{ 1901{
1250 set_capacity(md->disk, size); 1902 set_capacity(md->disk, size);
1251 1903
1252 mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); 1904 mutex_lock(&md->bdev->bd_inode->i_mutex);
1253 i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 1905 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
1254 mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); 1906 mutex_unlock(&md->bdev->bd_inode->i_mutex);
1255} 1907}
1256 1908
1257static int __bind(struct mapped_device *md, struct dm_table *t) 1909static int __bind(struct mapped_device *md, struct dm_table *t,
1910 struct queue_limits *limits)
1258{ 1911{
1259 struct request_queue *q = md->queue; 1912 struct request_queue *q = md->queue;
1260 sector_t size; 1913 sector_t size;
1914 unsigned long flags;
1261 1915
1262 size = dm_table_get_size(t); 1916 size = dm_table_get_size(t);
1263 1917
@@ -1267,8 +1921,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
1267 if (size != get_capacity(md->disk)) 1921 if (size != get_capacity(md->disk))
1268 memset(&md->geometry, 0, sizeof(md->geometry)); 1922 memset(&md->geometry, 0, sizeof(md->geometry));
1269 1923
1270 if (md->suspended_bdev) 1924 __set_size(md, size);
1271 __set_size(md, size);
1272 1925
1273 if (!size) { 1926 if (!size) {
1274 dm_table_destroy(t); 1927 dm_table_destroy(t);
@@ -1277,10 +1930,22 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
1277 1930
1278 dm_table_event_callback(t, event_callback, md); 1931 dm_table_event_callback(t, event_callback, md);
1279 1932
1280 write_lock(&md->map_lock); 1933 /*
1934 * The queue hasn't been stopped yet, if the old table type wasn't
1935 * for request-based during suspension. So stop it to prevent
1936 * I/O mapping before resume.
1937 * This must be done before setting the queue restrictions,
1938 * because request-based dm may be run just after the setting.
1939 */
1940 if (dm_table_request_based(t) && !blk_queue_stopped(q))
1941 stop_queue(q);
1942
1943 __bind_mempools(md, t);
1944
1945 write_lock_irqsave(&md->map_lock, flags);
1281 md->map = t; 1946 md->map = t;
1282 dm_table_set_restrictions(t, q); 1947 dm_table_set_restrictions(t, q, limits);
1283 write_unlock(&md->map_lock); 1948 write_unlock_irqrestore(&md->map_lock, flags);
1284 1949
1285 return 0; 1950 return 0;
1286} 1951}
@@ -1288,14 +1953,15 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
1288static void __unbind(struct mapped_device *md) 1953static void __unbind(struct mapped_device *md)
1289{ 1954{
1290 struct dm_table *map = md->map; 1955 struct dm_table *map = md->map;
1956 unsigned long flags;
1291 1957
1292 if (!map) 1958 if (!map)
1293 return; 1959 return;
1294 1960
1295 dm_table_event_callback(map, NULL, NULL); 1961 dm_table_event_callback(map, NULL, NULL);
1296 write_lock(&md->map_lock); 1962 write_lock_irqsave(&md->map_lock, flags);
1297 md->map = NULL; 1963 md->map = NULL;
1298 write_unlock(&md->map_lock); 1964 write_unlock_irqrestore(&md->map_lock, flags);
1299 dm_table_destroy(map); 1965 dm_table_destroy(map);
1300} 1966}
1301 1967
@@ -1399,6 +2065,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1399{ 2065{
1400 int r = 0; 2066 int r = 0;
1401 DECLARE_WAITQUEUE(wait, current); 2067 DECLARE_WAITQUEUE(wait, current);
2068 struct request_queue *q = md->queue;
2069 unsigned long flags;
1402 2070
1403 dm_unplug_all(md->queue); 2071 dm_unplug_all(md->queue);
1404 2072
@@ -1408,7 +2076,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1408 set_current_state(interruptible); 2076 set_current_state(interruptible);
1409 2077
1410 smp_mb(); 2078 smp_mb();
1411 if (!atomic_read(&md->pending)) 2079 if (dm_request_based(md)) {
2080 spin_lock_irqsave(q->queue_lock, flags);
2081 if (!queue_in_flight(q) && blk_queue_stopped(q)) {
2082 spin_unlock_irqrestore(q->queue_lock, flags);
2083 break;
2084 }
2085 spin_unlock_irqrestore(q->queue_lock, flags);
2086 } else if (!atomic_read(&md->pending))
1412 break; 2087 break;
1413 2088
1414 if (interruptible == TASK_INTERRUPTIBLE && 2089 if (interruptible == TASK_INTERRUPTIBLE &&
@@ -1426,34 +2101,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
1426 return r; 2101 return r;
1427} 2102}
1428 2103
1429static int dm_flush(struct mapped_device *md) 2104static void dm_flush(struct mapped_device *md)
1430{ 2105{
1431 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2106 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
1432 return 0; 2107
2108 bio_init(&md->barrier_bio);
2109 md->barrier_bio.bi_bdev = md->bdev;
2110 md->barrier_bio.bi_rw = WRITE_BARRIER;
2111 __split_and_process_bio(md, &md->barrier_bio);
2112
2113 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
1433} 2114}
1434 2115
1435static void process_barrier(struct mapped_device *md, struct bio *bio) 2116static void process_barrier(struct mapped_device *md, struct bio *bio)
1436{ 2117{
1437 int error = dm_flush(md); 2118 md->barrier_error = 0;
1438
1439 if (unlikely(error)) {
1440 bio_endio(bio, error);
1441 return;
1442 }
1443 if (bio_empty_barrier(bio)) {
1444 bio_endio(bio, 0);
1445 return;
1446 }
1447
1448 __split_and_process_bio(md, bio);
1449 2119
1450 error = dm_flush(md); 2120 dm_flush(md);
1451 2121
1452 if (!error && md->barrier_error) 2122 if (!bio_empty_barrier(bio)) {
1453 error = md->barrier_error; 2123 __split_and_process_bio(md, bio);
2124 dm_flush(md);
2125 }
1454 2126
1455 if (md->barrier_error != DM_ENDIO_REQUEUE) 2127 if (md->barrier_error != DM_ENDIO_REQUEUE)
1456 bio_endio(bio, error); 2128 bio_endio(bio, md->barrier_error);
2129 else {
2130 spin_lock_irq(&md->deferred_lock);
2131 bio_list_add_head(&md->deferred, bio);
2132 spin_unlock_irq(&md->deferred_lock);
2133 }
1457} 2134}
1458 2135
1459/* 2136/*
@@ -1479,10 +2156,14 @@ static void dm_wq_work(struct work_struct *work)
1479 2156
1480 up_write(&md->io_lock); 2157 up_write(&md->io_lock);
1481 2158
1482 if (bio_barrier(c)) 2159 if (dm_request_based(md))
1483 process_barrier(md, c); 2160 generic_make_request(c);
1484 else 2161 else {
1485 __split_and_process_bio(md, c); 2162 if (bio_barrier(c))
2163 process_barrier(md, c);
2164 else
2165 __split_and_process_bio(md, c);
2166 }
1486 2167
1487 down_write(&md->io_lock); 2168 down_write(&md->io_lock);
1488 } 2169 }
@@ -1502,6 +2183,7 @@ static void dm_queue_flush(struct mapped_device *md)
1502 */ 2183 */
1503int dm_swap_table(struct mapped_device *md, struct dm_table *table) 2184int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1504{ 2185{
2186 struct queue_limits limits;
1505 int r = -EINVAL; 2187 int r = -EINVAL;
1506 2188
1507 mutex_lock(&md->suspend_lock); 2189 mutex_lock(&md->suspend_lock);
@@ -1510,19 +2192,96 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1510 if (!dm_suspended(md)) 2192 if (!dm_suspended(md))
1511 goto out; 2193 goto out;
1512 2194
1513 /* without bdev, the device size cannot be changed */ 2195 r = dm_calculate_queue_limits(table, &limits);
1514 if (!md->suspended_bdev) 2196 if (r)
1515 if (get_capacity(md->disk) != dm_table_get_size(table)) 2197 goto out;
1516 goto out; 2198
2199 /* cannot change the device type, once a table is bound */
2200 if (md->map &&
2201 (dm_table_get_type(md->map) != dm_table_get_type(table))) {
2202 DMWARN("can't change the device type after a table is bound");
2203 goto out;
2204 }
2205
2206 /*
2207 * It is enought that blk_queue_ordered() is called only once when
2208 * the first bio-based table is bound.
2209 *
2210 * This setting should be moved to alloc_dev() when request-based dm
2211 * supports barrier.
2212 */
2213 if (!md->map && dm_table_bio_based(table))
2214 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
1517 2215
1518 __unbind(md); 2216 __unbind(md);
1519 r = __bind(md, table); 2217 r = __bind(md, table, &limits);
1520 2218
1521out: 2219out:
1522 mutex_unlock(&md->suspend_lock); 2220 mutex_unlock(&md->suspend_lock);
1523 return r; 2221 return r;
1524} 2222}
1525 2223
2224static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
2225{
2226 md->suspend_rq.special = (void *)0x1;
2227}
2228
2229static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
2230{
2231 struct request_queue *q = md->queue;
2232 unsigned long flags;
2233
2234 spin_lock_irqsave(q->queue_lock, flags);
2235 if (!noflush)
2236 dm_rq_invalidate_suspend_marker(md);
2237 __start_queue(q);
2238 spin_unlock_irqrestore(q->queue_lock, flags);
2239}
2240
2241static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
2242{
2243 struct request *rq = &md->suspend_rq;
2244 struct request_queue *q = md->queue;
2245
2246 if (noflush)
2247 stop_queue(q);
2248 else {
2249 blk_rq_init(q, rq);
2250 blk_insert_request(q, rq, 0, NULL);
2251 }
2252}
2253
2254static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
2255{
2256 int r = 1;
2257 struct request *rq = &md->suspend_rq;
2258 struct request_queue *q = md->queue;
2259 unsigned long flags;
2260
2261 if (noflush)
2262 return r;
2263
2264 /* The marker must be protected by queue lock if it is in use */
2265 spin_lock_irqsave(q->queue_lock, flags);
2266 if (unlikely(rq->ref_count)) {
2267 /*
2268 * This can happen, when the previous flush suspend was
2269 * interrupted, the marker is still in the queue and
2270 * this flush suspend has been invoked, because we don't
2271 * remove the marker at the time of suspend interruption.
2272 * We have only one marker per mapped_device, so we can't
2273 * start another flush suspend while it is in use.
2274 */
2275 BUG_ON(!rq->special); /* The marker should be invalidated */
2276 DMWARN("Invalidating the previous flush suspend is still in"
2277 " progress. Please retry later.");
2278 r = 0;
2279 }
2280 spin_unlock_irqrestore(q->queue_lock, flags);
2281
2282 return r;
2283}
2284
1526/* 2285/*
1527 * Functions to lock and unlock any filesystem running on the 2286 * Functions to lock and unlock any filesystem running on the
1528 * device. 2287 * device.
@@ -1533,7 +2292,7 @@ static int lock_fs(struct mapped_device *md)
1533 2292
1534 WARN_ON(md->frozen_sb); 2293 WARN_ON(md->frozen_sb);
1535 2294
1536 md->frozen_sb = freeze_bdev(md->suspended_bdev); 2295 md->frozen_sb = freeze_bdev(md->bdev);
1537 if (IS_ERR(md->frozen_sb)) { 2296 if (IS_ERR(md->frozen_sb)) {
1538 r = PTR_ERR(md->frozen_sb); 2297 r = PTR_ERR(md->frozen_sb);
1539 md->frozen_sb = NULL; 2298 md->frozen_sb = NULL;
@@ -1542,9 +2301,6 @@ static int lock_fs(struct mapped_device *md)
1542 2301
1543 set_bit(DMF_FROZEN, &md->flags); 2302 set_bit(DMF_FROZEN, &md->flags);
1544 2303
1545 /* don't bdput right now, we don't want the bdev
1546 * to go away while it is locked.
1547 */
1548 return 0; 2304 return 0;
1549} 2305}
1550 2306
@@ -1553,7 +2309,7 @@ static void unlock_fs(struct mapped_device *md)
1553 if (!test_bit(DMF_FROZEN, &md->flags)) 2309 if (!test_bit(DMF_FROZEN, &md->flags))
1554 return; 2310 return;
1555 2311
1556 thaw_bdev(md->suspended_bdev, md->frozen_sb); 2312 thaw_bdev(md->bdev, md->frozen_sb);
1557 md->frozen_sb = NULL; 2313 md->frozen_sb = NULL;
1558 clear_bit(DMF_FROZEN, &md->flags); 2314 clear_bit(DMF_FROZEN, &md->flags);
1559} 2315}
@@ -1565,6 +2321,53 @@ static void unlock_fs(struct mapped_device *md)
1565 * dm_bind_table, dm_suspend must be called to flush any in 2321 * dm_bind_table, dm_suspend must be called to flush any in
1566 * flight bios and ensure that any further io gets deferred. 2322 * flight bios and ensure that any further io gets deferred.
1567 */ 2323 */
2324/*
2325 * Suspend mechanism in request-based dm.
2326 *
2327 * After the suspend starts, further incoming requests are kept in
2328 * the request_queue and deferred.
2329 * Remaining requests in the request_queue at the start of suspend are flushed
2330 * if it is flush suspend.
2331 * The suspend completes when the following conditions have been satisfied,
2332 * so wait for it:
2333 * 1. q->in_flight is 0 (which means no in_flight request)
2334 * 2. queue has been stopped (which means no request dispatching)
2335 *
2336 *
2337 * Noflush suspend
2338 * ---------------
2339 * Noflush suspend doesn't need to dispatch remaining requests.
2340 * So stop the queue immediately. Then, wait for all in_flight requests
2341 * to be completed or requeued.
2342 *
2343 * To abort noflush suspend, start the queue.
2344 *
2345 *
2346 * Flush suspend
2347 * -------------
2348 * Flush suspend needs to dispatch remaining requests. So stop the queue
2349 * after the remaining requests are completed. (Requeued request must be also
2350 * re-dispatched and completed. Until then, we can't stop the queue.)
2351 *
2352 * During flushing the remaining requests, further incoming requests are also
2353 * inserted to the same queue. To distinguish which requests are to be
2354 * flushed, we insert a marker request to the queue at the time of starting
2355 * flush suspend, like a barrier.
2356 * The dispatching is blocked when the marker is found on the top of the queue.
2357 * And the queue is stopped when all in_flight requests are completed, since
2358 * that means the remaining requests are completely flushed.
2359 * Then, the marker is removed from the queue.
2360 *
2361 * To abort flush suspend, we also need to take care of the marker, not only
2362 * starting the queue.
2363 * We don't remove the marker forcibly from the queue since it's against
2364 * the block-layer manner. Instead, we put a invalidated mark on the marker.
2365 * When the invalidated marker is found on the top of the queue, it is
2366 * immediately removed from the queue, so it doesn't block dispatching.
2367 * Because we have only one marker per mapped_device, we can't start another
2368 * flush suspend until the invalidated marker is removed from the queue.
2369 * So fail and return with -EBUSY in such a case.
2370 */
1568int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2371int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1569{ 2372{
1570 struct dm_table *map = NULL; 2373 struct dm_table *map = NULL;
@@ -1579,6 +2382,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1579 goto out_unlock; 2382 goto out_unlock;
1580 } 2383 }
1581 2384
2385 if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
2386 r = -EBUSY;
2387 goto out_unlock;
2388 }
2389
1582 map = dm_get_table(md); 2390 map = dm_get_table(md);
1583 2391
1584 /* 2392 /*
@@ -1591,24 +2399,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1591 /* This does not get reverted if there's an error later. */ 2399 /* This does not get reverted if there's an error later. */
1592 dm_table_presuspend_targets(map); 2400 dm_table_presuspend_targets(map);
1593 2401
1594 /* bdget() can stall if the pending I/Os are not flushed */ 2402 /*
1595 if (!noflush) { 2403 * Flush I/O to the device. noflush supersedes do_lockfs,
1596 md->suspended_bdev = bdget_disk(md->disk, 0); 2404 * because lock_fs() needs to flush I/Os.
1597 if (!md->suspended_bdev) { 2405 */
1598 DMWARN("bdget failed in dm_suspend"); 2406 if (!noflush && do_lockfs) {
1599 r = -ENOMEM; 2407 r = lock_fs(md);
2408 if (r)
1600 goto out; 2409 goto out;
1601 }
1602
1603 /*
1604 * Flush I/O to the device. noflush supersedes do_lockfs,
1605 * because lock_fs() needs to flush I/Os.
1606 */
1607 if (do_lockfs) {
1608 r = lock_fs(md);
1609 if (r)
1610 goto out;
1611 }
1612 } 2410 }
1613 2411
1614 /* 2412 /*
@@ -1634,6 +2432,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1634 2432
1635 flush_workqueue(md->wq); 2433 flush_workqueue(md->wq);
1636 2434
2435 if (dm_request_based(md))
2436 dm_rq_start_suspend(md, noflush);
2437
1637 /* 2438 /*
1638 * At this point no more requests are entering target request routines. 2439 * At this point no more requests are entering target request routines.
1639 * We call dm_wait_for_completion to wait for all existing requests 2440 * We call dm_wait_for_completion to wait for all existing requests
@@ -1650,6 +2451,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1650 if (r < 0) { 2451 if (r < 0) {
1651 dm_queue_flush(md); 2452 dm_queue_flush(md);
1652 2453
2454 if (dm_request_based(md))
2455 dm_rq_abort_suspend(md, noflush);
2456
1653 unlock_fs(md); 2457 unlock_fs(md);
1654 goto out; /* pushback list is already flushed, so skip flush */ 2458 goto out; /* pushback list is already flushed, so skip flush */
1655 } 2459 }
@@ -1665,11 +2469,6 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
1665 set_bit(DMF_SUSPENDED, &md->flags); 2469 set_bit(DMF_SUSPENDED, &md->flags);
1666 2470
1667out: 2471out:
1668 if (r && md->suspended_bdev) {
1669 bdput(md->suspended_bdev);
1670 md->suspended_bdev = NULL;
1671 }
1672
1673 dm_table_put(map); 2472 dm_table_put(map);
1674 2473
1675out_unlock: 2474out_unlock:
@@ -1696,21 +2495,20 @@ int dm_resume(struct mapped_device *md)
1696 2495
1697 dm_queue_flush(md); 2496 dm_queue_flush(md);
1698 2497
1699 unlock_fs(md); 2498 /*
2499 * Flushing deferred I/Os must be done after targets are resumed
2500 * so that mapping of targets can work correctly.
2501 * Request-based dm is queueing the deferred I/Os in its request_queue.
2502 */
2503 if (dm_request_based(md))
2504 start_queue(md->queue);
1700 2505
1701 if (md->suspended_bdev) { 2506 unlock_fs(md);
1702 bdput(md->suspended_bdev);
1703 md->suspended_bdev = NULL;
1704 }
1705 2507
1706 clear_bit(DMF_SUSPENDED, &md->flags); 2508 clear_bit(DMF_SUSPENDED, &md->flags);
1707 2509
1708 dm_table_unplug_all(map); 2510 dm_table_unplug_all(map);
1709
1710 dm_kobject_uevent(md);
1711
1712 r = 0; 2511 r = 0;
1713
1714out: 2512out:
1715 dm_table_put(map); 2513 dm_table_put(map);
1716 mutex_unlock(&md->suspend_lock); 2514 mutex_unlock(&md->suspend_lock);
@@ -1721,9 +2519,19 @@ out:
1721/*----------------------------------------------------------------- 2519/*-----------------------------------------------------------------
1722 * Event notification. 2520 * Event notification.
1723 *---------------------------------------------------------------*/ 2521 *---------------------------------------------------------------*/
1724void dm_kobject_uevent(struct mapped_device *md) 2522void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
1725{ 2523 unsigned cookie)
1726 kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); 2524{
2525 char udev_cookie[DM_COOKIE_LENGTH];
2526 char *envp[] = { udev_cookie, NULL };
2527
2528 if (!cookie)
2529 kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2530 else {
2531 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2532 DM_COOKIE_ENV_VAR_NAME, cookie);
2533 kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
2534 }
1727} 2535}
1728 2536
1729uint32_t dm_next_uevent_seq(struct mapped_device *md) 2537uint32_t dm_next_uevent_seq(struct mapped_device *md)
@@ -1777,6 +2585,10 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
1777 if (&md->kobj != kobj) 2585 if (&md->kobj != kobj)
1778 return NULL; 2586 return NULL;
1779 2587
2588 if (test_bit(DMF_FREEING, &md->flags) ||
2589 test_bit(DMF_DELETING, &md->flags))
2590 return NULL;
2591
1780 dm_get(md); 2592 dm_get(md);
1781 return md; 2593 return md;
1782} 2594}
@@ -1797,6 +2609,61 @@ int dm_noflush_suspending(struct dm_target *ti)
1797} 2609}
1798EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2610EXPORT_SYMBOL_GPL(dm_noflush_suspending);
1799 2611
2612struct dm_md_mempools *dm_alloc_md_mempools(unsigned type)
2613{
2614 struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
2615
2616 if (!pools)
2617 return NULL;
2618
2619 pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
2620 mempool_create_slab_pool(MIN_IOS, _io_cache) :
2621 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
2622 if (!pools->io_pool)
2623 goto free_pools_and_out;
2624
2625 pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
2626 mempool_create_slab_pool(MIN_IOS, _tio_cache) :
2627 mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
2628 if (!pools->tio_pool)
2629 goto free_io_pool_and_out;
2630
2631 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2632 bioset_create(16, 0) : bioset_create(MIN_IOS, 0);
2633 if (!pools->bs)
2634 goto free_tio_pool_and_out;
2635
2636 return pools;
2637
2638free_tio_pool_and_out:
2639 mempool_destroy(pools->tio_pool);
2640
2641free_io_pool_and_out:
2642 mempool_destroy(pools->io_pool);
2643
2644free_pools_and_out:
2645 kfree(pools);
2646
2647 return NULL;
2648}
2649
2650void dm_free_md_mempools(struct dm_md_mempools *pools)
2651{
2652 if (!pools)
2653 return;
2654
2655 if (pools->io_pool)
2656 mempool_destroy(pools->io_pool);
2657
2658 if (pools->tio_pool)
2659 mempool_destroy(pools->tio_pool);
2660
2661 if (pools->bs)
2662 bioset_free(pools->bs);
2663
2664 kfree(pools);
2665}
2666
1800static struct block_device_operations dm_blk_dops = { 2667static struct block_device_operations dm_blk_dops = {
1801 .open = dm_blk_open, 2668 .open = dm_blk_open,
1802 .release = dm_blk_close, 2669 .release = dm_blk_close,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a31506d93e91..23278ae80f08 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -23,6 +23,13 @@
23#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) 23#define DM_SUSPEND_NOFLUSH_FLAG (1 << 1)
24 24
25/* 25/*
26 * Type of table and mapped_device's mempool
27 */
28#define DM_TYPE_NONE 0
29#define DM_TYPE_BIO_BASED 1
30#define DM_TYPE_REQUEST_BASED 2
31
32/*
26 * List of devices that a metadevice uses and should open/close. 33 * List of devices that a metadevice uses and should open/close.
27 */ 34 */
28struct dm_dev_internal { 35struct dm_dev_internal {
@@ -32,6 +39,7 @@ struct dm_dev_internal {
32}; 39};
33 40
34struct dm_table; 41struct dm_table;
42struct dm_md_mempools;
35 43
36/*----------------------------------------------------------------- 44/*-----------------------------------------------------------------
37 * Internal table functions. 45 * Internal table functions.
@@ -41,18 +49,34 @@ void dm_table_event_callback(struct dm_table *t,
41 void (*fn)(void *), void *context); 49 void (*fn)(void *), void *context);
42struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 50struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
43struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 51struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
44void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q); 52int dm_calculate_queue_limits(struct dm_table *table,
53 struct queue_limits *limits);
54void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
55 struct queue_limits *limits);
45struct list_head *dm_table_get_devices(struct dm_table *t); 56struct list_head *dm_table_get_devices(struct dm_table *t);
46void dm_table_presuspend_targets(struct dm_table *t); 57void dm_table_presuspend_targets(struct dm_table *t);
47void dm_table_postsuspend_targets(struct dm_table *t); 58void dm_table_postsuspend_targets(struct dm_table *t);
48int dm_table_resume_targets(struct dm_table *t); 59int dm_table_resume_targets(struct dm_table *t);
49int dm_table_any_congested(struct dm_table *t, int bdi_bits); 60int dm_table_any_congested(struct dm_table *t, int bdi_bits);
61int dm_table_any_busy_target(struct dm_table *t);
62int dm_table_set_type(struct dm_table *t);
63unsigned dm_table_get_type(struct dm_table *t);
64bool dm_table_bio_based(struct dm_table *t);
65bool dm_table_request_based(struct dm_table *t);
66int dm_table_alloc_md_mempools(struct dm_table *t);
67void dm_table_free_md_mempools(struct dm_table *t);
68struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
50 69
51/* 70/*
52 * To check the return value from dm_table_find_target(). 71 * To check the return value from dm_table_find_target().
53 */ 72 */
54#define dm_target_is_valid(t) ((t)->table) 73#define dm_target_is_valid(t) ((t)->table)
55 74
75/*
76 * To check whether the target type is request-based or not (bio-based).
77 */
78#define dm_target_request_based(t) ((t)->type->map_rq != NULL)
79
56/*----------------------------------------------------------------- 80/*-----------------------------------------------------------------
57 * A registry of target types. 81 * A registry of target types.
58 *---------------------------------------------------------------*/ 82 *---------------------------------------------------------------*/
@@ -92,9 +116,16 @@ void dm_stripe_exit(void);
92int dm_open_count(struct mapped_device *md); 116int dm_open_count(struct mapped_device *md);
93int dm_lock_for_deletion(struct mapped_device *md); 117int dm_lock_for_deletion(struct mapped_device *md);
94 118
95void dm_kobject_uevent(struct mapped_device *md); 119void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
120 unsigned cookie);
96 121
97int dm_kcopyd_init(void); 122int dm_kcopyd_init(void);
98void dm_kcopyd_exit(void); 123void dm_kcopyd_exit(void);
99 124
125/*
126 * Mempool operations
127 */
128struct dm_md_mempools *dm_alloc_md_mempools(unsigned type);
129void dm_free_md_mempools(struct dm_md_mempools *pools);
130
100#endif 131#endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 1dc721517e4c..c155bd3ec9f1 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1725,6 +1725,7 @@ config TLAN
1725 1725
1726config KS8842 1726config KS8842
1727 tristate "Micrel KSZ8842" 1727 tristate "Micrel KSZ8842"
1728 depends on HAS_IOMEM
1728 help 1729 help
1729 This platform driver is for Micrel KSZ8842 chip. 1730 This platform driver is for Micrel KSZ8842 chip.
1730 1731
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 38f1c3375d7f..b70cc99962fc 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -6825,6 +6825,14 @@ bnx2_nway_reset(struct net_device *dev)
6825 return 0; 6825 return 0;
6826} 6826}
6827 6827
6828static u32
6829bnx2_get_link(struct net_device *dev)
6830{
6831 struct bnx2 *bp = netdev_priv(dev);
6832
6833 return bp->link_up;
6834}
6835
6828static int 6836static int
6829bnx2_get_eeprom_len(struct net_device *dev) 6837bnx2_get_eeprom_len(struct net_device *dev)
6830{ 6838{
@@ -7392,7 +7400,7 @@ static const struct ethtool_ops bnx2_ethtool_ops = {
7392 .get_wol = bnx2_get_wol, 7400 .get_wol = bnx2_get_wol,
7393 .set_wol = bnx2_set_wol, 7401 .set_wol = bnx2_set_wol,
7394 .nway_reset = bnx2_nway_reset, 7402 .nway_reset = bnx2_nway_reset,
7395 .get_link = ethtool_op_get_link, 7403 .get_link = bnx2_get_link,
7396 .get_eeprom_len = bnx2_get_eeprom_len, 7404 .get_eeprom_len = bnx2_get_eeprom_len,
7397 .get_eeprom = bnx2_get_eeprom, 7405 .get_eeprom = bnx2_get_eeprom,
7398 .set_eeprom = bnx2_set_eeprom, 7406 .set_eeprom = bnx2_set_eeprom,
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index d5e18812bf49..33821a81cbf8 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -36,7 +36,7 @@ config CAN_CALC_BITTIMING
36 If unsure, say Y. 36 If unsure, say Y.
37 37
38config CAN_SJA1000 38config CAN_SJA1000
39 depends on CAN_DEV 39 depends on CAN_DEV && HAS_IOMEM
40 tristate "Philips SJA1000" 40 tristate "Philips SJA1000"
41 ---help--- 41 ---help---
42 Driver for the SJA1000 CAN controllers from Philips or NXP 42 Driver for the SJA1000 CAN controllers from Philips or NXP
diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index bdb143d2b5c7..055bb61d6e77 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -944,28 +944,31 @@ int netxen_phantom_init(struct netxen_adapter *adapter, int pegtune_val)
944 u32 val = 0; 944 u32 val = 0;
945 int retries = 60; 945 int retries = 60;
946 946
947 if (!pegtune_val) { 947 if (pegtune_val)
948 do { 948 return 0;
949 val = NXRD32(adapter, CRB_CMDPEG_STATE);
950 949
951 if (val == PHAN_INITIALIZE_COMPLETE || 950 do {
952 val == PHAN_INITIALIZE_ACK) 951 val = NXRD32(adapter, CRB_CMDPEG_STATE);
953 return 0;
954 952
955 msleep(500); 953 switch (val) {
954 case PHAN_INITIALIZE_COMPLETE:
955 case PHAN_INITIALIZE_ACK:
956 return 0;
957 case PHAN_INITIALIZE_FAILED:
958 goto out_err;
959 default:
960 break;
961 }
956 962
957 } while (--retries); 963 msleep(500);
958 964
959 if (!retries) { 965 } while (--retries);
960 pegtune_val = NXRD32(adapter,
961 NETXEN_ROMUSB_GLB_PEGTUNE_DONE);
962 printk(KERN_WARNING "netxen_phantom_init: init failed, "
963 "pegtune_val=%x\n", pegtune_val);
964 return -1;
965 }
966 }
967 966
968 return 0; 967 NXWR32(adapter, CRB_CMDPEG_STATE, PHAN_INITIALIZE_FAILED);
968
969out_err:
970 dev_warn(&adapter->pdev->dev, "firmware init failed\n");
971 return -EIO;
969} 972}
970 973
971static int 974static int
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 71daa3d5f114..2919a2d12bf4 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -705,7 +705,7 @@ netxen_start_firmware(struct netxen_adapter *adapter, int request_fw)
705 first_driver = (adapter->ahw.pci_func == 0); 705 first_driver = (adapter->ahw.pci_func == 0);
706 706
707 if (!first_driver) 707 if (!first_driver)
708 return 0; 708 goto wait_init;
709 709
710 first_boot = NXRD32(adapter, NETXEN_CAM_RAM(0x1fc)); 710 first_boot = NXRD32(adapter, NETXEN_CAM_RAM(0x1fc));
711 711
@@ -752,6 +752,7 @@ netxen_start_firmware(struct netxen_adapter *adapter, int request_fw)
752 | (_NETXEN_NIC_LINUX_SUBVERSION); 752 | (_NETXEN_NIC_LINUX_SUBVERSION);
753 NXWR32(adapter, CRB_DRIVER_VERSION, val); 753 NXWR32(adapter, CRB_DRIVER_VERSION, val);
754 754
755wait_init:
755 /* Handshake with the card before we register the devices. */ 756 /* Handshake with the card before we register the devices. */
756 err = netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); 757 err = netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE);
757 if (err) { 758 if (err) {
@@ -1178,6 +1179,7 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev)
1178 free_netdev(netdev); 1179 free_netdev(netdev);
1179} 1180}
1180 1181
1182#ifdef CONFIG_PM
1181static int 1183static int
1182netxen_nic_suspend(struct pci_dev *pdev, pm_message_t state) 1184netxen_nic_suspend(struct pci_dev *pdev, pm_message_t state)
1183{ 1185{
@@ -1242,6 +1244,7 @@ netxen_nic_resume(struct pci_dev *pdev)
1242 1244
1243 return 0; 1245 return 0;
1244} 1246}
1247#endif
1245 1248
1246static int netxen_nic_open(struct net_device *netdev) 1249static int netxen_nic_open(struct net_device *netdev)
1247{ 1250{
@@ -1771,8 +1774,10 @@ static struct pci_driver netxen_driver = {
1771 .id_table = netxen_pci_tbl, 1774 .id_table = netxen_pci_tbl,
1772 .probe = netxen_nic_probe, 1775 .probe = netxen_nic_probe,
1773 .remove = __devexit_p(netxen_nic_remove), 1776 .remove = __devexit_p(netxen_nic_remove),
1777#ifdef CONFIG_PM
1774 .suspend = netxen_nic_suspend, 1778 .suspend = netxen_nic_suspend,
1775 .resume = netxen_nic_resume 1779 .resume = netxen_nic_resume
1780#endif
1776}; 1781};
1777 1782
1778/* Driver Registration on NetXen card */ 1783/* Driver Registration on NetXen card */
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index bbc6d4d3cc94..3e4b67aaa6ea 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -3142,6 +3142,7 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
3142 (void __iomem *)port_regs; 3142 (void __iomem *)port_regs;
3143 u32 delay = 10; 3143 u32 delay = 10;
3144 int status = 0; 3144 int status = 0;
3145 unsigned long hw_flags = 0;
3145 3146
3146 if(ql_mii_setup(qdev)) 3147 if(ql_mii_setup(qdev))
3147 return -1; 3148 return -1;
@@ -3150,7 +3151,8 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
3150 ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg, 3151 ql_write_common_reg(qdev, &port_regs->CommonRegs.serialPortInterfaceReg,
3151 (ISP_SERIAL_PORT_IF_WE | 3152 (ISP_SERIAL_PORT_IF_WE |
3152 (ISP_SERIAL_PORT_IF_WE << 16))); 3153 (ISP_SERIAL_PORT_IF_WE << 16)));
3153 3154 /* Give the PHY time to come out of reset. */
3155 mdelay(100);
3154 qdev->port_link_state = LS_DOWN; 3156 qdev->port_link_state = LS_DOWN;
3155 netif_carrier_off(qdev->ndev); 3157 netif_carrier_off(qdev->ndev);
3156 3158
@@ -3350,7 +3352,9 @@ static int ql_adapter_initialize(struct ql3_adapter *qdev)
3350 value = ql_read_page0_reg(qdev, &port_regs->portStatus); 3352 value = ql_read_page0_reg(qdev, &port_regs->portStatus);
3351 if (value & PORT_STATUS_IC) 3353 if (value & PORT_STATUS_IC)
3352 break; 3354 break;
3355 spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
3353 msleep(500); 3356 msleep(500);
3357 spin_lock_irqsave(&qdev->hw_lock, hw_flags);
3354 } while (--delay); 3358 } while (--delay);
3355 3359
3356 if (delay == 0) { 3360 if (delay == 0) {
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index fbc63d5e459f..eb159587d0bf 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -354,7 +354,7 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
354 status = acpi_run_hpp(handle, hpp); 354 status = acpi_run_hpp(handle, hpp);
355 if (ACPI_SUCCESS(status)) 355 if (ACPI_SUCCESS(status))
356 break; 356 break;
357 if (acpi_root_bridge(handle)) 357 if (acpi_is_root_bridge(handle))
358 break; 358 break;
359 status = acpi_get_parent(handle, &phandle); 359 status = acpi_get_parent(handle, &phandle);
360 if (ACPI_FAILURE(status)) 360 if (ACPI_FAILURE(status))
@@ -428,7 +428,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
428 status = acpi_run_oshp(handle); 428 status = acpi_run_oshp(handle);
429 if (ACPI_SUCCESS(status)) 429 if (ACPI_SUCCESS(status))
430 goto got_one; 430 goto got_one;
431 if (acpi_root_bridge(handle)) 431 if (acpi_is_root_bridge(handle))
432 break; 432 break;
433 chandle = handle; 433 chandle = handle;
434 status = acpi_get_parent(chandle, &handle); 434 status = acpi_get_parent(chandle, &handle);
@@ -449,42 +449,6 @@ got_one:
449} 449}
450EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware); 450EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware);
451 451
452/* acpi_root_bridge - check to see if this acpi object is a root bridge
453 *
454 * @handle - the acpi object in question.
455 */
456int acpi_root_bridge(acpi_handle handle)
457{
458 acpi_status status;
459 struct acpi_device_info *info;
460 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
461 int i;
462
463 status = acpi_get_object_info(handle, &buffer);
464 if (ACPI_SUCCESS(status)) {
465 info = buffer.pointer;
466 if ((info->valid & ACPI_VALID_HID) &&
467 !strcmp(PCI_ROOT_HID_STRING,
468 info->hardware_id.value)) {
469 kfree(buffer.pointer);
470 return 1;
471 }
472 if (info->valid & ACPI_VALID_CID) {
473 for (i=0; i < info->compatibility_id.count; i++) {
474 if (!strcmp(PCI_ROOT_HID_STRING,
475 info->compatibility_id.id[i].value)) {
476 kfree(buffer.pointer);
477 return 1;
478 }
479 }
480 }
481 kfree(buffer.pointer);
482 }
483 return 0;
484}
485EXPORT_SYMBOL_GPL(acpi_root_bridge);
486
487
488static int is_ejectable(acpi_handle handle) 452static int is_ejectable(acpi_handle handle)
489{ 453{
490 acpi_status status; 454 acpi_status status;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3a6064bce561..0cb0f830a993 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -678,18 +678,9 @@ static void remove_bridge(acpi_handle handle)
678 678
679static struct pci_dev * get_apic_pci_info(acpi_handle handle) 679static struct pci_dev * get_apic_pci_info(acpi_handle handle)
680{ 680{
681 struct acpi_pci_id id;
682 struct pci_bus *bus;
683 struct pci_dev *dev; 681 struct pci_dev *dev;
684 682
685 if (ACPI_FAILURE(acpi_get_pci_id(handle, &id))) 683 dev = acpi_get_pci_dev(handle);
686 return NULL;
687
688 bus = pci_find_bus(id.segment, id.bus);
689 if (!bus)
690 return NULL;
691
692 dev = pci_get_slot(bus, PCI_DEVFN(id.device, id.function));
693 if (!dev) 684 if (!dev)
694 return NULL; 685 return NULL;
695 686
@@ -1396,19 +1387,16 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
1396/* Program resources in newly inserted bridge */ 1387/* Program resources in newly inserted bridge */
1397static int acpiphp_configure_bridge (acpi_handle handle) 1388static int acpiphp_configure_bridge (acpi_handle handle)
1398{ 1389{
1399 struct acpi_pci_id pci_id; 1390 struct pci_dev *dev;
1400 struct pci_bus *bus; 1391 struct pci_bus *bus;
1401 1392
1402 if (ACPI_FAILURE(acpi_get_pci_id(handle, &pci_id))) { 1393 dev = acpi_get_pci_dev(handle);
1394 if (!dev) {
1403 err("cannot get PCI domain and bus number for bridge\n"); 1395 err("cannot get PCI domain and bus number for bridge\n");
1404 return -EINVAL; 1396 return -EINVAL;
1405 } 1397 }
1406 bus = pci_find_bus(pci_id.segment, pci_id.bus); 1398
1407 if (!bus) { 1399 bus = dev->bus;
1408 err("cannot find bus %d:%d\n",
1409 pci_id.segment, pci_id.bus);
1410 return -EINVAL;
1411 }
1412 1400
1413 pci_bus_size_bridges(bus); 1401 pci_bus_size_bridges(bus);
1414 pci_bus_assign_resources(bus); 1402 pci_bus_assign_resources(bus);
@@ -1416,6 +1404,7 @@ static int acpiphp_configure_bridge (acpi_handle handle)
1416 acpiphp_set_hpp_values(handle, bus); 1404 acpiphp_set_hpp_values(handle, bus);
1417 pci_enable_bridges(bus); 1405 pci_enable_bridges(bus);
1418 acpiphp_configure_ioapics(handle); 1406 acpiphp_configure_ioapics(handle);
1407 pci_dev_put(dev);
1419 return 0; 1408 return 0;
1420} 1409}
1421 1410
@@ -1631,7 +1620,7 @@ find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
1631{ 1620{
1632 int *count = (int *)context; 1621 int *count = (int *)context;
1633 1622
1634 if (acpi_root_bridge(handle)) { 1623 if (acpi_is_root_bridge(handle)) {
1635 acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY, 1624 acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
1636 handle_hotplug_event_bridge, NULL); 1625 handle_hotplug_event_bridge, NULL);
1637 (*count)++; 1626 (*count)++;
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 178853a07440..e53eacd75c8d 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -39,6 +39,7 @@
39#include <linux/sysdev.h> 39#include <linux/sysdev.h>
40#include <asm/cacheflush.h> 40#include <asm/cacheflush.h>
41#include <asm/iommu.h> 41#include <asm/iommu.h>
42#include <asm/e820.h>
42#include "pci.h" 43#include "pci.h"
43 44
44#define ROOT_SIZE VTD_PAGE_SIZE 45#define ROOT_SIZE VTD_PAGE_SIZE
@@ -217,6 +218,14 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 218 return (pte->val & 3) != 0;
218} 219}
219 220
221/*
222 * This domain is a statically identity mapping domain.
223 * 1. This domain creats a static 1:1 mapping to all usable memory.
224 * 2. It maps to each iommu if successful.
225 * 3. Each iommu mapps to this domain if successful.
226 */
227struct dmar_domain *si_domain;
228
220/* devices under the same p2p bridge are owned in one domain */ 229/* devices under the same p2p bridge are owned in one domain */
221#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) 230#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
222 231
@@ -225,6 +234,9 @@ static inline bool dma_pte_present(struct dma_pte *pte)
225 */ 234 */
226#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) 235#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
227 236
237/* si_domain contains mulitple devices */
238#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
239
228struct dmar_domain { 240struct dmar_domain {
229 int id; /* domain id */ 241 int id; /* domain id */
230 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ 242 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
@@ -435,12 +447,14 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
435 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 447 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
436} 448}
437 449
438/* in native case, each domain is related to only one iommu */ 450/* This functionin only returns single iommu in a domain */
439static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 451static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
440{ 452{
441 int iommu_id; 453 int iommu_id;
442 454
455 /* si_domain and vm domain should not get here. */
443 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); 456 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
457 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
444 458
445 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); 459 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
446 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 460 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
@@ -1189,48 +1203,71 @@ void free_dmar_iommu(struct intel_iommu *iommu)
1189 free_context_table(iommu); 1203 free_context_table(iommu);
1190} 1204}
1191 1205
1192static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1206static struct dmar_domain *alloc_domain(void)
1193{ 1207{
1194 unsigned long num;
1195 unsigned long ndomains;
1196 struct dmar_domain *domain; 1208 struct dmar_domain *domain;
1197 unsigned long flags;
1198 1209
1199 domain = alloc_domain_mem(); 1210 domain = alloc_domain_mem();
1200 if (!domain) 1211 if (!domain)
1201 return NULL; 1212 return NULL;
1202 1213
1214 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1215 domain->flags = 0;
1216
1217 return domain;
1218}
1219
1220static int iommu_attach_domain(struct dmar_domain *domain,
1221 struct intel_iommu *iommu)
1222{
1223 int num;
1224 unsigned long ndomains;
1225 unsigned long flags;
1226
1203 ndomains = cap_ndoms(iommu->cap); 1227 ndomains = cap_ndoms(iommu->cap);
1204 1228
1205 spin_lock_irqsave(&iommu->lock, flags); 1229 spin_lock_irqsave(&iommu->lock, flags);
1230
1206 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1231 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1207 if (num >= ndomains) { 1232 if (num >= ndomains) {
1208 spin_unlock_irqrestore(&iommu->lock, flags); 1233 spin_unlock_irqrestore(&iommu->lock, flags);
1209 free_domain_mem(domain);
1210 printk(KERN_ERR "IOMMU: no free domain ids\n"); 1234 printk(KERN_ERR "IOMMU: no free domain ids\n");
1211 return NULL; 1235 return -ENOMEM;
1212 } 1236 }
1213 1237
1214 set_bit(num, iommu->domain_ids);
1215 domain->id = num; 1238 domain->id = num;
1216 memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); 1239 set_bit(num, iommu->domain_ids);
1217 set_bit(iommu->seq_id, &domain->iommu_bmp); 1240 set_bit(iommu->seq_id, &domain->iommu_bmp);
1218 domain->flags = 0;
1219 iommu->domains[num] = domain; 1241 iommu->domains[num] = domain;
1220 spin_unlock_irqrestore(&iommu->lock, flags); 1242 spin_unlock_irqrestore(&iommu->lock, flags);
1221 1243
1222 return domain; 1244 return 0;
1223} 1245}
1224 1246
1225static void iommu_free_domain(struct dmar_domain *domain) 1247static void iommu_detach_domain(struct dmar_domain *domain,
1248 struct intel_iommu *iommu)
1226{ 1249{
1227 unsigned long flags; 1250 unsigned long flags;
1228 struct intel_iommu *iommu; 1251 int num, ndomains;
1229 1252 int found = 0;
1230 iommu = domain_get_iommu(domain);
1231 1253
1232 spin_lock_irqsave(&iommu->lock, flags); 1254 spin_lock_irqsave(&iommu->lock, flags);
1233 clear_bit(domain->id, iommu->domain_ids); 1255 ndomains = cap_ndoms(iommu->cap);
1256 num = find_first_bit(iommu->domain_ids, ndomains);
1257 for (; num < ndomains; ) {
1258 if (iommu->domains[num] == domain) {
1259 found = 1;
1260 break;
1261 }
1262 num = find_next_bit(iommu->domain_ids,
1263 cap_ndoms(iommu->cap), num+1);
1264 }
1265
1266 if (found) {
1267 clear_bit(num, iommu->domain_ids);
1268 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1269 iommu->domains[num] = NULL;
1270 }
1234 spin_unlock_irqrestore(&iommu->lock, flags); 1271 spin_unlock_irqrestore(&iommu->lock, flags);
1235} 1272}
1236 1273
@@ -1350,6 +1387,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1350 1387
1351static void domain_exit(struct dmar_domain *domain) 1388static void domain_exit(struct dmar_domain *domain)
1352{ 1389{
1390 struct dmar_drhd_unit *drhd;
1391 struct intel_iommu *iommu;
1353 u64 end; 1392 u64 end;
1354 1393
1355 /* Domain 0 is reserved, so dont process it */ 1394 /* Domain 0 is reserved, so dont process it */
@@ -1368,7 +1407,10 @@ static void domain_exit(struct dmar_domain *domain)
1368 /* free page tables */ 1407 /* free page tables */
1369 dma_pte_free_pagetable(domain, 0, end); 1408 dma_pte_free_pagetable(domain, 0, end);
1370 1409
1371 iommu_free_domain(domain); 1410 for_each_active_iommu(iommu, drhd)
1411 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1412 iommu_detach_domain(domain, iommu);
1413
1372 free_domain_mem(domain); 1414 free_domain_mem(domain);
1373} 1415}
1374 1416
@@ -1408,7 +1450,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1408 id = domain->id; 1450 id = domain->id;
1409 pgd = domain->pgd; 1451 pgd = domain->pgd;
1410 1452
1411 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { 1453 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1454 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1412 int found = 0; 1455 int found = 0;
1413 1456
1414 /* find an available domain id for this device in iommu */ 1457 /* find an available domain id for this device in iommu */
@@ -1433,6 +1476,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1433 } 1476 }
1434 1477
1435 set_bit(num, iommu->domain_ids); 1478 set_bit(num, iommu->domain_ids);
1479 set_bit(iommu->seq_id, &domain->iommu_bmp);
1436 iommu->domains[num] = domain; 1480 iommu->domains[num] = domain;
1437 id = num; 1481 id = num;
1438 } 1482 }
@@ -1675,6 +1719,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1675 unsigned long flags; 1719 unsigned long flags;
1676 int bus = 0, devfn = 0; 1720 int bus = 0, devfn = 0;
1677 int segment; 1721 int segment;
1722 int ret;
1678 1723
1679 domain = find_domain(pdev); 1724 domain = find_domain(pdev);
1680 if (domain) 1725 if (domain)
@@ -1707,6 +1752,10 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1707 } 1752 }
1708 } 1753 }
1709 1754
1755 domain = alloc_domain();
1756 if (!domain)
1757 goto error;
1758
1710 /* Allocate new domain for the device */ 1759 /* Allocate new domain for the device */
1711 drhd = dmar_find_matched_drhd_unit(pdev); 1760 drhd = dmar_find_matched_drhd_unit(pdev);
1712 if (!drhd) { 1761 if (!drhd) {
@@ -1716,9 +1765,11 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1716 } 1765 }
1717 iommu = drhd->iommu; 1766 iommu = drhd->iommu;
1718 1767
1719 domain = iommu_alloc_domain(iommu); 1768 ret = iommu_attach_domain(domain, iommu);
1720 if (!domain) 1769 if (ret) {
1770 domain_exit(domain);
1721 goto error; 1771 goto error;
1772 }
1722 1773
1723 if (domain_init(domain, gaw)) { 1774 if (domain_init(domain, gaw)) {
1724 domain_exit(domain); 1775 domain_exit(domain);
@@ -1792,6 +1843,8 @@ error:
1792 return find_domain(pdev); 1843 return find_domain(pdev);
1793} 1844}
1794 1845
1846static int iommu_identity_mapping;
1847
1795static int iommu_prepare_identity_map(struct pci_dev *pdev, 1848static int iommu_prepare_identity_map(struct pci_dev *pdev,
1796 unsigned long long start, 1849 unsigned long long start,
1797 unsigned long long end) 1850 unsigned long long end)
@@ -1804,8 +1857,11 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
1804 printk(KERN_INFO 1857 printk(KERN_INFO
1805 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1858 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1806 pci_name(pdev), start, end); 1859 pci_name(pdev), start, end);
1807 /* page table init */ 1860 if (iommu_identity_mapping)
1808 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 1861 domain = si_domain;
1862 else
1863 /* page table init */
1864 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1809 if (!domain) 1865 if (!domain)
1810 return -ENOMEM; 1866 return -ENOMEM;
1811 1867
@@ -1952,7 +2008,110 @@ static int __init init_context_pass_through(void)
1952 return 0; 2008 return 0;
1953} 2009}
1954 2010
1955static int __init init_dmars(void) 2011static int md_domain_init(struct dmar_domain *domain, int guest_width);
2012static int si_domain_init(void)
2013{
2014 struct dmar_drhd_unit *drhd;
2015 struct intel_iommu *iommu;
2016 int ret = 0;
2017
2018 si_domain = alloc_domain();
2019 if (!si_domain)
2020 return -EFAULT;
2021
2022
2023 for_each_active_iommu(iommu, drhd) {
2024 ret = iommu_attach_domain(si_domain, iommu);
2025 if (ret) {
2026 domain_exit(si_domain);
2027 return -EFAULT;
2028 }
2029 }
2030
2031 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2032 domain_exit(si_domain);
2033 return -EFAULT;
2034 }
2035
2036 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2037
2038 return 0;
2039}
2040
2041static void domain_remove_one_dev_info(struct dmar_domain *domain,
2042 struct pci_dev *pdev);
2043static int identity_mapping(struct pci_dev *pdev)
2044{
2045 struct device_domain_info *info;
2046
2047 if (likely(!iommu_identity_mapping))
2048 return 0;
2049
2050
2051 list_for_each_entry(info, &si_domain->devices, link)
2052 if (info->dev == pdev)
2053 return 1;
2054 return 0;
2055}
2056
2057static int domain_add_dev_info(struct dmar_domain *domain,
2058 struct pci_dev *pdev)
2059{
2060 struct device_domain_info *info;
2061 unsigned long flags;
2062
2063 info = alloc_devinfo_mem();
2064 if (!info)
2065 return -ENOMEM;
2066
2067 info->segment = pci_domain_nr(pdev->bus);
2068 info->bus = pdev->bus->number;
2069 info->devfn = pdev->devfn;
2070 info->dev = pdev;
2071 info->domain = domain;
2072
2073 spin_lock_irqsave(&device_domain_lock, flags);
2074 list_add(&info->link, &domain->devices);
2075 list_add(&info->global, &device_domain_list);
2076 pdev->dev.archdata.iommu = info;
2077 spin_unlock_irqrestore(&device_domain_lock, flags);
2078
2079 return 0;
2080}
2081
2082static int iommu_prepare_static_identity_mapping(void)
2083{
2084 int i;
2085 struct pci_dev *pdev = NULL;
2086 int ret;
2087
2088 ret = si_domain_init();
2089 if (ret)
2090 return -EFAULT;
2091
2092 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2093 for_each_pci_dev(pdev) {
2094 for (i = 0; i < e820.nr_map; i++) {
2095 struct e820entry *ei = &e820.map[i];
2096
2097 if (ei->type == E820_RAM) {
2098 ret = iommu_prepare_identity_map(pdev,
2099 ei->addr, ei->addr + ei->size);
2100 if (ret) {
2101 printk(KERN_INFO "1:1 mapping to one domain failed.\n");
2102 return -EFAULT;
2103 }
2104 }
2105 }
2106 ret = domain_add_dev_info(si_domain, pdev);
2107 if (ret)
2108 return ret;
2109 }
2110
2111 return 0;
2112}
2113
2114int __init init_dmars(void)
1956{ 2115{
1957 struct dmar_drhd_unit *drhd; 2116 struct dmar_drhd_unit *drhd;
1958 struct dmar_rmrr_unit *rmrr; 2117 struct dmar_rmrr_unit *rmrr;
@@ -1962,6 +2121,13 @@ static int __init init_dmars(void)
1962 int pass_through = 1; 2121 int pass_through = 1;
1963 2122
1964 /* 2123 /*
2124 * In case pass through can not be enabled, iommu tries to use identity
2125 * mapping.
2126 */
2127 if (iommu_pass_through)
2128 iommu_identity_mapping = 1;
2129
2130 /*
1965 * for each drhd 2131 * for each drhd
1966 * allocate root 2132 * allocate root
1967 * initialize and program root entry to not present 2133 * initialize and program root entry to not present
@@ -2090,9 +2256,12 @@ static int __init init_dmars(void)
2090 2256
2091 /* 2257 /*
2092 * If pass through is not set or not enabled, setup context entries for 2258 * If pass through is not set or not enabled, setup context entries for
2093 * identity mappings for rmrr, gfx, and isa. 2259 * identity mappings for rmrr, gfx, and isa and may fall back to static
2260 * identity mapping if iommu_identity_mapping is set.
2094 */ 2261 */
2095 if (!iommu_pass_through) { 2262 if (!iommu_pass_through) {
2263 if (iommu_identity_mapping)
2264 iommu_prepare_static_identity_mapping();
2096 /* 2265 /*
2097 * For each rmrr 2266 * For each rmrr
2098 * for each dev attached to rmrr 2267 * for each dev attached to rmrr
@@ -2107,6 +2276,7 @@ static int __init init_dmars(void)
2107 * endfor 2276 * endfor
2108 * endfor 2277 * endfor
2109 */ 2278 */
2279 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2110 for_each_rmrr_units(rmrr) { 2280 for_each_rmrr_units(rmrr) {
2111 for (i = 0; i < rmrr->devices_cnt; i++) { 2281 for (i = 0; i < rmrr->devices_cnt; i++) {
2112 pdev = rmrr->devices[i]; 2282 pdev = rmrr->devices[i];
@@ -2248,6 +2418,52 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
2248 return domain; 2418 return domain;
2249} 2419}
2250 2420
2421static int iommu_dummy(struct pci_dev *pdev)
2422{
2423 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2424}
2425
2426/* Check if the pdev needs to go through non-identity map and unmap process.*/
2427static int iommu_no_mapping(struct pci_dev *pdev)
2428{
2429 int found;
2430
2431 if (!iommu_identity_mapping)
2432 return iommu_dummy(pdev);
2433
2434 found = identity_mapping(pdev);
2435 if (found) {
2436 if (pdev->dma_mask > DMA_BIT_MASK(32))
2437 return 1;
2438 else {
2439 /*
2440 * 32 bit DMA is removed from si_domain and fall back
2441 * to non-identity mapping.
2442 */
2443 domain_remove_one_dev_info(si_domain, pdev);
2444 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2445 pci_name(pdev));
2446 return 0;
2447 }
2448 } else {
2449 /*
2450 * In case of a detached 64 bit DMA device from vm, the device
2451 * is put into si_domain for identity mapping.
2452 */
2453 if (pdev->dma_mask > DMA_BIT_MASK(32)) {
2454 int ret;
2455 ret = domain_add_dev_info(si_domain, pdev);
2456 if (!ret) {
2457 printk(KERN_INFO "64bit %s uses identity mapping\n",
2458 pci_name(pdev));
2459 return 1;
2460 }
2461 }
2462 }
2463
2464 return iommu_dummy(pdev);
2465}
2466
2251static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, 2467static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2252 size_t size, int dir, u64 dma_mask) 2468 size_t size, int dir, u64 dma_mask)
2253{ 2469{
@@ -2260,7 +2476,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2260 struct intel_iommu *iommu; 2476 struct intel_iommu *iommu;
2261 2477
2262 BUG_ON(dir == DMA_NONE); 2478 BUG_ON(dir == DMA_NONE);
2263 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2479
2480 if (iommu_no_mapping(pdev))
2264 return paddr; 2481 return paddr;
2265 2482
2266 domain = get_valid_domain_for_dev(pdev); 2483 domain = get_valid_domain_for_dev(pdev);
@@ -2401,8 +2618,9 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2401 struct iova *iova; 2618 struct iova *iova;
2402 struct intel_iommu *iommu; 2619 struct intel_iommu *iommu;
2403 2620
2404 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2621 if (iommu_no_mapping(pdev))
2405 return; 2622 return;
2623
2406 domain = find_domain(pdev); 2624 domain = find_domain(pdev);
2407 BUG_ON(!domain); 2625 BUG_ON(!domain);
2408 2626
@@ -2492,7 +2710,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2492 struct scatterlist *sg; 2710 struct scatterlist *sg;
2493 struct intel_iommu *iommu; 2711 struct intel_iommu *iommu;
2494 2712
2495 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2713 if (iommu_no_mapping(pdev))
2496 return; 2714 return;
2497 2715
2498 domain = find_domain(pdev); 2716 domain = find_domain(pdev);
@@ -2553,7 +2771,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2553 struct intel_iommu *iommu; 2771 struct intel_iommu *iommu;
2554 2772
2555 BUG_ON(dir == DMA_NONE); 2773 BUG_ON(dir == DMA_NONE);
2556 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2774 if (iommu_no_mapping(pdev))
2557 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); 2775 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2558 2776
2559 domain = get_valid_domain_for_dev(pdev); 2777 domain = get_valid_domain_for_dev(pdev);
@@ -2951,31 +3169,6 @@ int __init intel_iommu_init(void)
2951 return 0; 3169 return 0;
2952} 3170}
2953 3171
2954static int vm_domain_add_dev_info(struct dmar_domain *domain,
2955 struct pci_dev *pdev)
2956{
2957 struct device_domain_info *info;
2958 unsigned long flags;
2959
2960 info = alloc_devinfo_mem();
2961 if (!info)
2962 return -ENOMEM;
2963
2964 info->segment = pci_domain_nr(pdev->bus);
2965 info->bus = pdev->bus->number;
2966 info->devfn = pdev->devfn;
2967 info->dev = pdev;
2968 info->domain = domain;
2969
2970 spin_lock_irqsave(&device_domain_lock, flags);
2971 list_add(&info->link, &domain->devices);
2972 list_add(&info->global, &device_domain_list);
2973 pdev->dev.archdata.iommu = info;
2974 spin_unlock_irqrestore(&device_domain_lock, flags);
2975
2976 return 0;
2977}
2978
2979static void iommu_detach_dependent_devices(struct intel_iommu *iommu, 3172static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
2980 struct pci_dev *pdev) 3173 struct pci_dev *pdev)
2981{ 3174{
@@ -3003,7 +3196,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3003 } 3196 }
3004} 3197}
3005 3198
3006static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, 3199static void domain_remove_one_dev_info(struct dmar_domain *domain,
3007 struct pci_dev *pdev) 3200 struct pci_dev *pdev)
3008{ 3201{
3009 struct device_domain_info *info; 3202 struct device_domain_info *info;
@@ -3136,7 +3329,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
3136 return domain; 3329 return domain;
3137} 3330}
3138 3331
3139static int vm_domain_init(struct dmar_domain *domain, int guest_width) 3332static int md_domain_init(struct dmar_domain *domain, int guest_width)
3140{ 3333{
3141 int adjust_width; 3334 int adjust_width;
3142 3335
@@ -3227,7 +3420,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
3227 "intel_iommu_domain_init: dmar_domain == NULL\n"); 3420 "intel_iommu_domain_init: dmar_domain == NULL\n");
3228 return -ENOMEM; 3421 return -ENOMEM;
3229 } 3422 }
3230 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 3423 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3231 printk(KERN_ERR 3424 printk(KERN_ERR
3232 "intel_iommu_domain_init() failed\n"); 3425 "intel_iommu_domain_init() failed\n");
3233 vm_domain_exit(dmar_domain); 3426 vm_domain_exit(dmar_domain);
@@ -3262,8 +3455,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3262 3455
3263 old_domain = find_domain(pdev); 3456 old_domain = find_domain(pdev);
3264 if (old_domain) { 3457 if (old_domain) {
3265 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 3458 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3266 vm_domain_remove_one_dev_info(old_domain, pdev); 3459 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3460 domain_remove_one_dev_info(old_domain, pdev);
3267 else 3461 else
3268 domain_remove_dev_info(old_domain); 3462 domain_remove_dev_info(old_domain);
3269 } 3463 }
@@ -3285,7 +3479,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
3285 return -EFAULT; 3479 return -EFAULT;
3286 } 3480 }
3287 3481
3288 ret = vm_domain_add_dev_info(dmar_domain, pdev); 3482 ret = domain_add_dev_info(dmar_domain, pdev);
3289 if (ret) 3483 if (ret)
3290 return ret; 3484 return ret;
3291 3485
@@ -3299,7 +3493,7 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
3299 struct dmar_domain *dmar_domain = domain->priv; 3493 struct dmar_domain *dmar_domain = domain->priv;
3300 struct pci_dev *pdev = to_pci_dev(dev); 3494 struct pci_dev *pdev = to_pci_dev(dev);
3301 3495
3302 vm_domain_remove_one_dev_info(dmar_domain, pdev); 3496 domain_remove_one_dev_info(dmar_domain, pdev);
3303} 3497}
3304 3498
3305static int intel_iommu_map_range(struct iommu_domain *domain, 3499static int intel_iommu_map_range(struct iommu_domain *domain,
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 1e83c8c5f985..4f5b8712931f 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -10,6 +10,8 @@
10#include <linux/intel-iommu.h> 10#include <linux/intel-iommu.h>
11#include "intr_remapping.h" 11#include "intr_remapping.h"
12#include <acpi/acpi.h> 12#include <acpi/acpi.h>
13#include <asm/pci-direct.h>
14#include "pci.h"
13 15
14static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; 16static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
15static int ir_ioapic_num; 17static int ir_ioapic_num;
@@ -314,7 +316,8 @@ int modify_irte(int irq, struct irte *irte_modified)
314 index = irq_iommu->irte_index + irq_iommu->sub_handle; 316 index = irq_iommu->irte_index + irq_iommu->sub_handle;
315 irte = &iommu->ir_table->base[index]; 317 irte = &iommu->ir_table->base[index];
316 318
317 set_64bit((unsigned long *)irte, irte_modified->low); 319 set_64bit((unsigned long *)&irte->low, irte_modified->low);
320 set_64bit((unsigned long *)&irte->high, irte_modified->high);
318 __iommu_flush_cache(iommu, irte, sizeof(*irte)); 321 __iommu_flush_cache(iommu, irte, sizeof(*irte));
319 322
320 rc = qi_flush_iec(iommu, index, 0); 323 rc = qi_flush_iec(iommu, index, 0);
@@ -369,12 +372,32 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
369 return drhd->iommu; 372 return drhd->iommu;
370} 373}
371 374
375static int clear_entries(struct irq_2_iommu *irq_iommu)
376{
377 struct irte *start, *entry, *end;
378 struct intel_iommu *iommu;
379 int index;
380
381 if (irq_iommu->sub_handle)
382 return 0;
383
384 iommu = irq_iommu->iommu;
385 index = irq_iommu->irte_index + irq_iommu->sub_handle;
386
387 start = iommu->ir_table->base + index;
388 end = start + (1 << irq_iommu->irte_mask);
389
390 for (entry = start; entry < end; entry++) {
391 set_64bit((unsigned long *)&entry->low, 0);
392 set_64bit((unsigned long *)&entry->high, 0);
393 }
394
395 return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
396}
397
372int free_irte(int irq) 398int free_irte(int irq)
373{ 399{
374 int rc = 0; 400 int rc = 0;
375 int index, i;
376 struct irte *irte;
377 struct intel_iommu *iommu;
378 struct irq_2_iommu *irq_iommu; 401 struct irq_2_iommu *irq_iommu;
379 unsigned long flags; 402 unsigned long flags;
380 403
@@ -385,16 +408,7 @@ int free_irte(int irq)
385 return -1; 408 return -1;
386 } 409 }
387 410
388 iommu = irq_iommu->iommu; 411 rc = clear_entries(irq_iommu);
389
390 index = irq_iommu->irte_index + irq_iommu->sub_handle;
391 irte = &iommu->ir_table->base[index];
392
393 if (!irq_iommu->sub_handle) {
394 for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
395 set_64bit((unsigned long *)(irte + i), 0);
396 rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
397 }
398 412
399 irq_iommu->iommu = NULL; 413 irq_iommu->iommu = NULL;
400 irq_iommu->irte_index = 0; 414 irq_iommu->irte_index = 0;
@@ -406,6 +420,91 @@ int free_irte(int irq)
406 return rc; 420 return rc;
407} 421}
408 422
423/*
424 * source validation type
425 */
426#define SVT_NO_VERIFY 0x0 /* no verification is required */
427#define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */
428#define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
429
430/*
431 * source-id qualifier
432 */
433#define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
434#define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
435 * the third least significant bit
436 */
437#define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
438 * the second and third least significant bits
439 */
440#define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
441 * the least three significant bits
442 */
443
444/*
445 * set SVT, SQ and SID fields of irte to verify
446 * source ids of interrupt requests
447 */
448static void set_irte_sid(struct irte *irte, unsigned int svt,
449 unsigned int sq, unsigned int sid)
450{
451 irte->svt = svt;
452 irte->sq = sq;
453 irte->sid = sid;
454}
455
456int set_ioapic_sid(struct irte *irte, int apic)
457{
458 int i;
459 u16 sid = 0;
460
461 if (!irte)
462 return -1;
463
464 for (i = 0; i < MAX_IO_APICS; i++) {
465 if (ir_ioapic[i].id == apic) {
466 sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
467 break;
468 }
469 }
470
471 if (sid == 0) {
472 pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
473 return -1;
474 }
475
476 set_irte_sid(irte, 1, 0, sid);
477
478 return 0;
479}
480
481int set_msi_sid(struct irte *irte, struct pci_dev *dev)
482{
483 struct pci_dev *bridge;
484
485 if (!irte || !dev)
486 return -1;
487
488 /* PCIe device or Root Complex integrated PCI device */
489 if (dev->is_pcie || !dev->bus->parent) {
490 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
491 (dev->bus->number << 8) | dev->devfn);
492 return 0;
493 }
494
495 bridge = pci_find_upstream_pcie_bridge(dev);
496 if (bridge) {
497 if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */
498 set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
499 (bridge->bus->number << 8) | dev->bus->number);
500 else /* this is a legacy PCI bridge */
501 set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
502 (bridge->bus->number << 8) | bridge->devfn);
503 }
504
505 return 0;
506}
507
409static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) 508static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
410{ 509{
411 u64 addr; 510 u64 addr;
@@ -612,6 +711,35 @@ error:
612 return -1; 711 return -1;
613} 712}
614 713
714static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
715 struct intel_iommu *iommu)
716{
717 struct acpi_dmar_pci_path *path;
718 u8 bus;
719 int count;
720
721 bus = scope->bus;
722 path = (struct acpi_dmar_pci_path *)(scope + 1);
723 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
724 / sizeof(struct acpi_dmar_pci_path);
725
726 while (--count > 0) {
727 /*
728 * Access PCI directly due to the PCI
729 * subsystem isn't initialized yet.
730 */
731 bus = read_pci_config_byte(bus, path->dev, path->fn,
732 PCI_SECONDARY_BUS);
733 path++;
734 }
735
736 ir_ioapic[ir_ioapic_num].bus = bus;
737 ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
738 ir_ioapic[ir_ioapic_num].iommu = iommu;
739 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
740 ir_ioapic_num++;
741}
742
615static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, 743static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
616 struct intel_iommu *iommu) 744 struct intel_iommu *iommu)
617{ 745{
@@ -636,9 +764,7 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
636 " 0x%Lx\n", scope->enumeration_id, 764 " 0x%Lx\n", scope->enumeration_id,
637 drhd->address); 765 drhd->address);
638 766
639 ir_ioapic[ir_ioapic_num].iommu = iommu; 767 ir_parse_one_ioapic_scope(scope, iommu);
640 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
641 ir_ioapic_num++;
642 } 768 }
643 start += scope->length; 769 start += scope->length;
644 } 770 }
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
index ca48f0df8ac9..63a263c18415 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/pci/intr_remapping.h
@@ -3,6 +3,8 @@
3struct ioapic_scope { 3struct ioapic_scope {
4 struct intel_iommu *iommu; 4 struct intel_iommu *iommu;
5 unsigned int id; 5 unsigned int id;
6 unsigned int bus; /* PCI bus number */
7 unsigned int devfn; /* PCI devfn number */
6}; 8};
7 9
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) 10#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index c682ac536415..7232fe7104aa 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -34,10 +34,27 @@ config ACER_WMI
34 If you have an ACPI-WMI compatible Acer/ Wistron laptop, say Y or M 34 If you have an ACPI-WMI compatible Acer/ Wistron laptop, say Y or M
35 here. 35 here.
36 36
37config ACERHDF
38 tristate "Acer Aspire One temperature and fan driver"
39 depends on THERMAL && THERMAL_HWMON && ACPI
40 ---help---
41 This is a driver for Acer Aspire One netbooks. It allows to access
42 the temperature sensor and to control the fan.
43
44 After loading this driver the BIOS is still in control of the fan.
45 To let the kernel handle the fan, do:
46 echo -n enabled > /sys/class/thermal/thermal_zone0/mode
47
48 For more information about this driver see
49 <http://piie.net/files/acerhdf_README.txt>
50
51 If you have an Acer Aspire One netbook, say Y or M
52 here.
53
37config ASUS_LAPTOP 54config ASUS_LAPTOP
38 tristate "Asus Laptop Extras (EXPERIMENTAL)" 55 tristate "Asus Laptop Extras"
39 depends on ACPI 56 depends on ACPI
40 depends on EXPERIMENTAL && !ACPI_ASUS 57 depends on !ACPI_ASUS
41 select LEDS_CLASS 58 select LEDS_CLASS
42 select NEW_LEDS 59 select NEW_LEDS
43 select BACKLIGHT_CLASS_DEVICE 60 select BACKLIGHT_CLASS_DEVICE
@@ -45,12 +62,12 @@ config ASUS_LAPTOP
45 ---help--- 62 ---help---
46 This is the new Linux driver for Asus laptops. It may also support some 63 This is the new Linux driver for Asus laptops. It may also support some
47 MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate 64 MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate
48 standard ACPI events that go through /proc/acpi/events. It also adds 65 standard ACPI events and input events. It also adds
49 support for video output switching, LCD backlight control, Bluetooth and 66 support for video output switching, LCD backlight control, Bluetooth and
50 Wlan control, and most importantly, allows you to blink those fancy LEDs. 67 Wlan control, and most importantly, allows you to blink those fancy LEDs.
51 68
52 For more information and a userspace daemon for handling the extra 69 For more information and a userspace daemon for handling the extra
53 buttons see <http://acpi4asus.sf.net/>. 70 buttons see <http://acpi4asus.sf.net>.
54 71
55 If you have an ACPI-compatible ASUS laptop, say Y or M here. 72 If you have an ACPI-compatible ASUS laptop, say Y or M here.
56 73
@@ -342,7 +359,10 @@ config EEEPC_LAPTOP
342 select HWMON 359 select HWMON
343 ---help--- 360 ---help---
344 This driver supports the Fn-Fx keys on Eee PC laptops. 361 This driver supports the Fn-Fx keys on Eee PC laptops.
345 It also adds the ability to switch camera/wlan on/off. 362
363 It also gives access to some extra laptop functionalities like
364 Bluetooth, backlight and allows powering on/off some other
365 devices.
346 366
347 If you have an Eee PC laptop, say Y or M here. 367 If you have an Eee PC laptop, say Y or M here.
348 368
@@ -369,7 +389,7 @@ config ACPI_WMI
369 any ACPI-WMI devices. 389 any ACPI-WMI devices.
370 390
371config ACPI_ASUS 391config ACPI_ASUS
372 tristate "ASUS/Medion Laptop Extras" 392 tristate "ASUS/Medion Laptop Extras (DEPRECATED)"
373 depends on ACPI 393 depends on ACPI
374 select BACKLIGHT_CLASS_DEVICE 394 select BACKLIGHT_CLASS_DEVICE
375 ---help--- 395 ---help---
@@ -390,7 +410,7 @@ config ACPI_ASUS
390 parameters. 410 parameters.
391 411
392 More information and a userspace daemon for handling the extra buttons 412 More information and a userspace daemon for handling the extra buttons
393 at <http://sourceforge.net/projects/acpi4asus/>. 413 at <http://acpi4asus.sf.net>.
394 414
395 If you have an ACPI-compatible ASUS laptop, say Y or M here. This 415 If you have an ACPI-compatible ASUS laptop, say Y or M here. This
396 driver is still under development, so if your laptop is unsupported or 416 driver is still under development, so if your laptop is unsupported or
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index e40c7bd1b87e..641b8bfa5538 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o
9obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o 9obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o
10obj-$(CONFIG_DELL_WMI) += dell-wmi.o 10obj-$(CONFIG_DELL_WMI) += dell-wmi.o
11obj-$(CONFIG_ACER_WMI) += acer-wmi.o 11obj-$(CONFIG_ACER_WMI) += acer-wmi.o
12obj-$(CONFIG_ACERHDF) += acerhdf.o
12obj-$(CONFIG_HP_WMI) += hp-wmi.o 13obj-$(CONFIG_HP_WMI) += hp-wmi.o
13obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o 14obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
14obj-$(CONFIG_SONY_LAPTOP) += sony-laptop.o 15obj-$(CONFIG_SONY_LAPTOP) += sony-laptop.o
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
new file mode 100644
index 000000000000..bdfee177eefb
--- /dev/null
+++ b/drivers/platform/x86/acerhdf.c
@@ -0,0 +1,602 @@
1/*
2 * acerhdf - A driver which monitors the temperature
3 * of the aspire one netbook, turns on/off the fan
4 * as soon as the upper/lower threshold is reached.
5 *
6 * (C) 2009 - Peter Feuerer peter (a) piie.net
7 * http://piie.net
8 * 2009 Borislav Petkov <petkovbb@gmail.com>
9 *
10 * Inspired by and many thanks to:
11 * o acerfand - Rachel Greenham
12 * o acer_ec.pl - Michael Kurz michi.kurz (at) googlemail.com
13 * - Petr Tomasek tomasek (#) etf,cuni,cz
14 * - Carlos Corbacho cathectic (at) gmail.com
15 * o lkml - Matthew Garrett
16 * - Borislav Petkov
17 * - Andreas Mohr
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 */
33
34#define pr_fmt(fmt) "acerhdf: " fmt
35
36#include <linux/kernel.h>
37#include <linux/module.h>
38#include <linux/fs.h>
39#include <linux/dmi.h>
40#include <acpi/acpi_drivers.h>
41#include <linux/sched.h>
42#include <linux/thermal.h>
43#include <linux/platform_device.h>
44
45/*
46 * The driver is started with "kernel mode off" by default. That means, the BIOS
47 * is still in control of the fan. In this mode the driver allows to read the
48 * temperature of the cpu and a userspace tool may take over control of the fan.
49 * If the driver is switched to "kernel mode" (e.g. via module parameter) the
50 * driver is in full control of the fan. If you want the module to be started in
51 * kernel mode by default, define the following:
52 */
53#undef START_IN_KERNEL_MODE
54
55#define DRV_VER "0.5.13"
56
57/*
58 * According to the Atom N270 datasheet,
59 * (http://download.intel.com/design/processor/datashts/320032.pdf) the
60 * CPU's optimal operating limits denoted in junction temperature as
61 * measured by the on-die thermal monitor are within 0 <= Tj <= 90. So,
62 * assume 89°C is critical temperature.
63 */
64#define ACERHDF_TEMP_CRIT 89
65#define ACERHDF_FAN_OFF 0
66#define ACERHDF_FAN_AUTO 1
67
68/*
69 * No matter what value the user puts into the fanon variable, turn on the fan
70 * at 80 degree Celsius to prevent hardware damage
71 */
72#define ACERHDF_MAX_FANON 80
73
74/*
75 * Maximum interval between two temperature checks is 15 seconds, as the die
76 * can get hot really fast under heavy load (plus we shouldn't forget about
77 * possible impact of _external_ aggressive sources such as heaters, sun etc.)
78 */
79#define ACERHDF_MAX_INTERVAL 15
80
81#ifdef START_IN_KERNEL_MODE
82static int kernelmode = 1;
83#else
84static int kernelmode;
85#endif
86
87static unsigned int interval = 10;
88static unsigned int fanon = 63;
89static unsigned int fanoff = 58;
90static unsigned int verbose;
91static unsigned int fanstate = ACERHDF_FAN_AUTO;
92static char force_bios[16];
93static unsigned int prev_interval;
94struct thermal_zone_device *thz_dev;
95struct thermal_cooling_device *cl_dev;
96struct platform_device *acerhdf_dev;
97
98module_param(kernelmode, uint, 0);
99MODULE_PARM_DESC(kernelmode, "Kernel mode fan control on / off");
100module_param(interval, uint, 0600);
101MODULE_PARM_DESC(interval, "Polling interval of temperature check");
102module_param(fanon, uint, 0600);
103MODULE_PARM_DESC(fanon, "Turn the fan on above this temperature");
104module_param(fanoff, uint, 0600);
105MODULE_PARM_DESC(fanoff, "Turn the fan off below this temperature");
106module_param(verbose, uint, 0600);
107MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
108module_param_string(force_bios, force_bios, 16, 0);
109MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check");
110
111/* BIOS settings */
112struct bios_settings_t {
113 const char *vendor;
114 const char *version;
115 unsigned char fanreg;
116 unsigned char tempreg;
117 unsigned char fancmd[2]; /* fan off and auto commands */
118};
119
120/* Register addresses and values for different BIOS versions */
121static const struct bios_settings_t bios_tbl[] = {
122 {"Acer", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
123 {"Acer", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
124 {"Acer", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
125 {"Acer", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
126 {"Acer", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
127 {"Acer", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
128 {"Acer", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
129 {"Acer", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
130 {"Gateway", "v0.3103", 0x55, 0x58, {0x21, 0x00} },
131 {"Packard Bell", "v0.3105", 0x55, 0x58, {0x21, 0x00} },
132 {"", "", 0, 0, {0, 0} }
133};
134
135static const struct bios_settings_t *bios_cfg __read_mostly;
136
137
138static int acerhdf_get_temp(int *temp)
139{
140 u8 read_temp;
141
142 if (ec_read(bios_cfg->tempreg, &read_temp))
143 return -EINVAL;
144
145 *temp = read_temp;
146
147 return 0;
148}
149
150static int acerhdf_get_fanstate(int *state)
151{
152 u8 fan;
153 bool tmp;
154
155 if (ec_read(bios_cfg->fanreg, &fan))
156 return -EINVAL;
157
158 tmp = (fan == bios_cfg->fancmd[ACERHDF_FAN_OFF]);
159 *state = tmp ? ACERHDF_FAN_OFF : ACERHDF_FAN_AUTO;
160
161 return 0;
162}
163
164static void acerhdf_change_fanstate(int state)
165{
166 unsigned char cmd;
167
168 if (verbose)
169 pr_notice("fan %s\n", (state == ACERHDF_FAN_OFF) ?
170 "OFF" : "ON");
171
172 if ((state != ACERHDF_FAN_OFF) && (state != ACERHDF_FAN_AUTO)) {
173 pr_err("invalid fan state %d requested, setting to auto!\n",
174 state);
175 state = ACERHDF_FAN_AUTO;
176 }
177
178 cmd = bios_cfg->fancmd[state];
179 fanstate = state;
180
181 ec_write(bios_cfg->fanreg, cmd);
182}
183
184static void acerhdf_check_param(struct thermal_zone_device *thermal)
185{
186 if (fanon > ACERHDF_MAX_FANON) {
187 pr_err("fanon temperature too high, set to %d\n",
188 ACERHDF_MAX_FANON);
189 fanon = ACERHDF_MAX_FANON;
190 }
191
192 if (kernelmode && prev_interval != interval) {
193 if (interval > ACERHDF_MAX_INTERVAL) {
194 pr_err("interval too high, set to %d\n",
195 ACERHDF_MAX_INTERVAL);
196 interval = ACERHDF_MAX_INTERVAL;
197 }
198 if (verbose)
199 pr_notice("interval changed to: %d\n",
200 interval);
201 thermal->polling_delay = interval*1000;
202 prev_interval = interval;
203 }
204}
205
206/*
207 * This is the thermal zone callback which does the delayed polling of the fan
208 * state. We do check /sysfs-originating settings here in acerhdf_check_param()
209 * as late as the polling interval is since we can't do that in the respective
210 * accessors of the module parameters.
211 */
212static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal,
213 unsigned long *t)
214{
215 int temp, err = 0;
216
217 acerhdf_check_param(thermal);
218
219 err = acerhdf_get_temp(&temp);
220 if (err)
221 return err;
222
223 if (verbose)
224 pr_notice("temp %d\n", temp);
225
226 *t = temp;
227 return 0;
228}
229
230static int acerhdf_bind(struct thermal_zone_device *thermal,
231 struct thermal_cooling_device *cdev)
232{
233 /* if the cooling device is the one from acerhdf bind it */
234 if (cdev != cl_dev)
235 return 0;
236
237 if (thermal_zone_bind_cooling_device(thermal, 0, cdev)) {
238 pr_err("error binding cooling dev\n");
239 return -EINVAL;
240 }
241 return 0;
242}
243
244static int acerhdf_unbind(struct thermal_zone_device *thermal,
245 struct thermal_cooling_device *cdev)
246{
247 if (cdev != cl_dev)
248 return 0;
249
250 if (thermal_zone_unbind_cooling_device(thermal, 0, cdev)) {
251 pr_err("error unbinding cooling dev\n");
252 return -EINVAL;
253 }
254 return 0;
255}
256
257static inline void acerhdf_revert_to_bios_mode(void)
258{
259 acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
260 kernelmode = 0;
261 if (thz_dev)
262 thz_dev->polling_delay = 0;
263 pr_notice("kernel mode fan control OFF\n");
264}
265static inline void acerhdf_enable_kernelmode(void)
266{
267 kernelmode = 1;
268
269 thz_dev->polling_delay = interval*1000;
270 thermal_zone_device_update(thz_dev);
271 pr_notice("kernel mode fan control ON\n");
272}
273
274static int acerhdf_get_mode(struct thermal_zone_device *thermal,
275 enum thermal_device_mode *mode)
276{
277 if (verbose)
278 pr_notice("kernel mode fan control %d\n", kernelmode);
279
280 *mode = (kernelmode) ? THERMAL_DEVICE_ENABLED
281 : THERMAL_DEVICE_DISABLED;
282
283 return 0;
284}
285
286/*
287 * set operation mode;
288 * enabled: the thermal layer of the kernel takes care about
289 * the temperature and the fan.
290 * disabled: the BIOS takes control of the fan.
291 */
292static int acerhdf_set_mode(struct thermal_zone_device *thermal,
293 enum thermal_device_mode mode)
294{
295 if (mode == THERMAL_DEVICE_DISABLED && kernelmode)
296 acerhdf_revert_to_bios_mode();
297 else if (mode == THERMAL_DEVICE_ENABLED && !kernelmode)
298 acerhdf_enable_kernelmode();
299
300 return 0;
301}
302
303static int acerhdf_get_trip_type(struct thermal_zone_device *thermal, int trip,
304 enum thermal_trip_type *type)
305{
306 if (trip == 0)
307 *type = THERMAL_TRIP_ACTIVE;
308
309 return 0;
310}
311
312static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip,
313 unsigned long *temp)
314{
315 if (trip == 0)
316 *temp = fanon;
317
318 return 0;
319}
320
321static int acerhdf_get_crit_temp(struct thermal_zone_device *thermal,
322 unsigned long *temperature)
323{
324 *temperature = ACERHDF_TEMP_CRIT;
325 return 0;
326}
327
328/* bind callback functions to thermalzone */
329struct thermal_zone_device_ops acerhdf_dev_ops = {
330 .bind = acerhdf_bind,
331 .unbind = acerhdf_unbind,
332 .get_temp = acerhdf_get_ec_temp,
333 .get_mode = acerhdf_get_mode,
334 .set_mode = acerhdf_set_mode,
335 .get_trip_type = acerhdf_get_trip_type,
336 .get_trip_temp = acerhdf_get_trip_temp,
337 .get_crit_temp = acerhdf_get_crit_temp,
338};
339
340
341/*
342 * cooling device callback functions
343 * get maximal fan cooling state
344 */
345static int acerhdf_get_max_state(struct thermal_cooling_device *cdev,
346 unsigned long *state)
347{
348 *state = 1;
349
350 return 0;
351}
352
353static int acerhdf_get_cur_state(struct thermal_cooling_device *cdev,
354 unsigned long *state)
355{
356 int err = 0, tmp;
357
358 err = acerhdf_get_fanstate(&tmp);
359 if (err)
360 return err;
361
362 *state = (tmp == ACERHDF_FAN_AUTO) ? 1 : 0;
363 return 0;
364}
365
366/* change current fan state - is overwritten when running in kernel mode */
367static int acerhdf_set_cur_state(struct thermal_cooling_device *cdev,
368 unsigned long state)
369{
370 int cur_temp, cur_state, err = 0;
371
372 if (!kernelmode)
373 return 0;
374
375 err = acerhdf_get_temp(&cur_temp);
376 if (err) {
377 pr_err("error reading temperature, hand off control to BIOS\n");
378 goto err_out;
379 }
380
381 err = acerhdf_get_fanstate(&cur_state);
382 if (err) {
383 pr_err("error reading fan state, hand off control to BIOS\n");
384 goto err_out;
385 }
386
387 if (state == 0) {
388 /* turn fan off only if below fanoff temperature */
389 if ((cur_state == ACERHDF_FAN_AUTO) &&
390 (cur_temp < fanoff))
391 acerhdf_change_fanstate(ACERHDF_FAN_OFF);
392 } else {
393 if (cur_state == ACERHDF_FAN_OFF)
394 acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
395 }
396 return 0;
397
398err_out:
399 acerhdf_revert_to_bios_mode();
400 return -EINVAL;
401}
402
403/* bind fan callbacks to fan device */
404struct thermal_cooling_device_ops acerhdf_cooling_ops = {
405 .get_max_state = acerhdf_get_max_state,
406 .get_cur_state = acerhdf_get_cur_state,
407 .set_cur_state = acerhdf_set_cur_state,
408};
409
410/* suspend / resume functionality */
411static int acerhdf_suspend(struct platform_device *dev, pm_message_t state)
412{
413 if (kernelmode)
414 acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
415
416 if (verbose)
417 pr_notice("going suspend\n");
418
419 return 0;
420}
421
422static int acerhdf_resume(struct platform_device *device)
423{
424 if (verbose)
425 pr_notice("resuming\n");
426
427 return 0;
428}
429
430static int __devinit acerhdf_probe(struct platform_device *device)
431{
432 return 0;
433}
434
435static int acerhdf_remove(struct platform_device *device)
436{
437 return 0;
438}
439
440struct platform_driver acerhdf_drv = {
441 .driver = {
442 .name = "acerhdf",
443 .owner = THIS_MODULE,
444 },
445 .probe = acerhdf_probe,
446 .remove = acerhdf_remove,
447 .suspend = acerhdf_suspend,
448 .resume = acerhdf_resume,
449};
450
451
452/* check hardware */
453static int acerhdf_check_hardware(void)
454{
455 char const *vendor, *version, *product;
456 int i;
457
458 /* get BIOS data */
459 vendor = dmi_get_system_info(DMI_SYS_VENDOR);
460 version = dmi_get_system_info(DMI_BIOS_VERSION);
461 product = dmi_get_system_info(DMI_PRODUCT_NAME);
462
463 pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER);
464
465 if (!force_bios[0]) {
466 if (strncmp(product, "AO", 2)) {
467 pr_err("no Aspire One hardware found\n");
468 return -EINVAL;
469 }
470 } else {
471 pr_info("forcing BIOS version: %s\n", version);
472 version = force_bios;
473 kernelmode = 0;
474 }
475
476 if (verbose)
477 pr_info("BIOS info: %s %s, product: %s\n",
478 vendor, version, product);
479
480 /* search BIOS version and vendor in BIOS settings table */
481 for (i = 0; bios_tbl[i].version[0]; i++) {
482 if (!strcmp(bios_tbl[i].vendor, vendor) &&
483 !strcmp(bios_tbl[i].version, version)) {
484 bios_cfg = &bios_tbl[i];
485 break;
486 }
487 }
488
489 if (!bios_cfg) {
490 pr_err("unknown (unsupported) BIOS version %s/%s, "
491 "please report, aborting!\n", vendor, version);
492 return -EINVAL;
493 }
494
495 /*
496 * if started with kernel mode off, prevent the kernel from switching
497 * off the fan
498 */
499 if (!kernelmode) {
500 pr_notice("Fan control off, to enable do:\n");
501 pr_notice("echo -n \"enabled\" > "
502 "/sys/class/thermal/thermal_zone0/mode\n");
503 }
504
505 return 0;
506}
507
508static int acerhdf_register_platform(void)
509{
510 int err = 0;
511
512 err = platform_driver_register(&acerhdf_drv);
513 if (err)
514 return err;
515
516 acerhdf_dev = platform_device_alloc("acerhdf", -1);
517 platform_device_add(acerhdf_dev);
518
519 return 0;
520}
521
522static void acerhdf_unregister_platform(void)
523{
524 if (!acerhdf_dev)
525 return;
526
527 platform_device_del(acerhdf_dev);
528 platform_driver_unregister(&acerhdf_drv);
529}
530
531static int acerhdf_register_thermal(void)
532{
533 cl_dev = thermal_cooling_device_register("acerhdf-fan", NULL,
534 &acerhdf_cooling_ops);
535
536 if (IS_ERR(cl_dev))
537 return -EINVAL;
538
539 thz_dev = thermal_zone_device_register("acerhdf", 1, NULL,
540 &acerhdf_dev_ops, 0, 0, 0,
541 (kernelmode) ? interval*1000 : 0);
542 if (IS_ERR(thz_dev))
543 return -EINVAL;
544
545 return 0;
546}
547
548static void acerhdf_unregister_thermal(void)
549{
550 if (cl_dev) {
551 thermal_cooling_device_unregister(cl_dev);
552 cl_dev = NULL;
553 }
554
555 if (thz_dev) {
556 thermal_zone_device_unregister(thz_dev);
557 thz_dev = NULL;
558 }
559}
560
561static int __init acerhdf_init(void)
562{
563 int err = 0;
564
565 err = acerhdf_check_hardware();
566 if (err)
567 goto out_err;
568
569 err = acerhdf_register_platform();
570 if (err)
571 goto err_unreg;
572
573 err = acerhdf_register_thermal();
574 if (err)
575 goto err_unreg;
576
577 return 0;
578
579err_unreg:
580 acerhdf_unregister_thermal();
581 acerhdf_unregister_platform();
582
583out_err:
584 return -ENODEV;
585}
586
587static void __exit acerhdf_exit(void)
588{
589 acerhdf_change_fanstate(ACERHDF_FAN_AUTO);
590 acerhdf_unregister_thermal();
591 acerhdf_unregister_platform();
592}
593
594MODULE_LICENSE("GPL");
595MODULE_AUTHOR("Peter Feuerer");
596MODULE_DESCRIPTION("Aspire One temperature and fan driver");
597MODULE_ALIAS("dmi:*:*Acer*:*:");
598MODULE_ALIAS("dmi:*:*Gateway*:*:");
599MODULE_ALIAS("dmi:*:*Packard Bell*:*:");
600
601module_init(acerhdf_init);
602module_exit(acerhdf_exit);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index bfc1a8892a32..db657bbeec90 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -33,6 +33,8 @@
33 * Sam Lin - GPS support 33 * Sam Lin - GPS support
34 */ 34 */
35 35
36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37
36#include <linux/kernel.h> 38#include <linux/kernel.h>
37#include <linux/module.h> 39#include <linux/module.h>
38#include <linux/init.h> 40#include <linux/init.h>
@@ -53,9 +55,10 @@
53#define ASUS_HOTK_NAME "Asus Laptop Support" 55#define ASUS_HOTK_NAME "Asus Laptop Support"
54#define ASUS_HOTK_CLASS "hotkey" 56#define ASUS_HOTK_CLASS "hotkey"
55#define ASUS_HOTK_DEVICE_NAME "Hotkey" 57#define ASUS_HOTK_DEVICE_NAME "Hotkey"
56#define ASUS_HOTK_FILE "asus-laptop" 58#define ASUS_HOTK_FILE KBUILD_MODNAME
57#define ASUS_HOTK_PREFIX "\\_SB.ATKD." 59#define ASUS_HOTK_PREFIX "\\_SB.ATKD."
58 60
61
59/* 62/*
60 * Some events we use, same for all Asus 63 * Some events we use, same for all Asus
61 */ 64 */
@@ -207,13 +210,17 @@ MODULE_DEVICE_TABLE(acpi, asus_device_ids);
207 210
208static int asus_hotk_add(struct acpi_device *device); 211static int asus_hotk_add(struct acpi_device *device);
209static int asus_hotk_remove(struct acpi_device *device, int type); 212static int asus_hotk_remove(struct acpi_device *device, int type);
213static void asus_hotk_notify(struct acpi_device *device, u32 event);
214
210static struct acpi_driver asus_hotk_driver = { 215static struct acpi_driver asus_hotk_driver = {
211 .name = ASUS_HOTK_NAME, 216 .name = ASUS_HOTK_NAME,
212 .class = ASUS_HOTK_CLASS, 217 .class = ASUS_HOTK_CLASS,
213 .ids = asus_device_ids, 218 .ids = asus_device_ids,
219 .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
214 .ops = { 220 .ops = {
215 .add = asus_hotk_add, 221 .add = asus_hotk_add,
216 .remove = asus_hotk_remove, 222 .remove = asus_hotk_remove,
223 .notify = asus_hotk_notify,
217 }, 224 },
218}; 225};
219 226
@@ -323,7 +330,7 @@ static int read_wireless_status(int mask)
323 330
324 rv = acpi_evaluate_integer(wireless_status_handle, NULL, NULL, &status); 331 rv = acpi_evaluate_integer(wireless_status_handle, NULL, NULL, &status);
325 if (ACPI_FAILURE(rv)) 332 if (ACPI_FAILURE(rv))
326 printk(ASUS_WARNING "Error reading Wireless status\n"); 333 pr_warning("Error reading Wireless status\n");
327 else 334 else
328 return (status & mask) ? 1 : 0; 335 return (status & mask) ? 1 : 0;
329 336
@@ -337,7 +344,7 @@ static int read_gps_status(void)
337 344
338 rv = acpi_evaluate_integer(gps_status_handle, NULL, NULL, &status); 345 rv = acpi_evaluate_integer(gps_status_handle, NULL, NULL, &status);
339 if (ACPI_FAILURE(rv)) 346 if (ACPI_FAILURE(rv))
340 printk(ASUS_WARNING "Error reading GPS status\n"); 347 pr_warning("Error reading GPS status\n");
341 else 348 else
342 return status ? 1 : 0; 349 return status ? 1 : 0;
343 350
@@ -377,7 +384,7 @@ static void write_status(acpi_handle handle, int out, int mask)
377 } 384 }
378 385
379 if (write_acpi_int(handle, NULL, out, NULL)) 386 if (write_acpi_int(handle, NULL, out, NULL))
380 printk(ASUS_WARNING " write failed %x\n", mask); 387 pr_warning(" write failed %x\n", mask);
381} 388}
382 389
383/* /sys/class/led handlers */ 390/* /sys/class/led handlers */
@@ -420,7 +427,7 @@ static int set_lcd_state(int value)
420 NULL, NULL, NULL); 427 NULL, NULL, NULL);
421 428
422 if (ACPI_FAILURE(status)) 429 if (ACPI_FAILURE(status))
423 printk(ASUS_WARNING "Error switching LCD\n"); 430 pr_warning("Error switching LCD\n");
424 } 431 }
425 432
426 write_status(NULL, lcd, LCD_ON); 433 write_status(NULL, lcd, LCD_ON);
@@ -444,7 +451,7 @@ static int read_brightness(struct backlight_device *bd)
444 451
445 rv = acpi_evaluate_integer(brightness_get_handle, NULL, NULL, &value); 452 rv = acpi_evaluate_integer(brightness_get_handle, NULL, NULL, &value);
446 if (ACPI_FAILURE(rv)) 453 if (ACPI_FAILURE(rv))
447 printk(ASUS_WARNING "Error reading brightness\n"); 454 pr_warning("Error reading brightness\n");
448 455
449 return value; 456 return value;
450} 457}
@@ -457,7 +464,7 @@ static int set_brightness(struct backlight_device *bd, int value)
457 /* 0 <= value <= 15 */ 464 /* 0 <= value <= 15 */
458 465
459 if (write_acpi_int(brightness_set_handle, NULL, value, NULL)) { 466 if (write_acpi_int(brightness_set_handle, NULL, value, NULL)) {
460 printk(ASUS_WARNING "Error changing brightness\n"); 467 pr_warning("Error changing brightness\n");
461 ret = -EIO; 468 ret = -EIO;
462 } 469 }
463 470
@@ -587,7 +594,7 @@ static ssize_t store_ledd(struct device *dev, struct device_attribute *attr,
587 rv = parse_arg(buf, count, &value); 594 rv = parse_arg(buf, count, &value);
588 if (rv > 0) { 595 if (rv > 0) {
589 if (write_acpi_int(ledd_set_handle, NULL, value, NULL)) 596 if (write_acpi_int(ledd_set_handle, NULL, value, NULL))
590 printk(ASUS_WARNING "LED display write failed\n"); 597 pr_warning("LED display write failed\n");
591 else 598 else
592 hotk->ledd_status = (u32) value; 599 hotk->ledd_status = (u32) value;
593 } 600 }
@@ -632,7 +639,7 @@ static void set_display(int value)
632{ 639{
633 /* no sanity check needed for now */ 640 /* no sanity check needed for now */
634 if (write_acpi_int(display_set_handle, NULL, value, NULL)) 641 if (write_acpi_int(display_set_handle, NULL, value, NULL))
635 printk(ASUS_WARNING "Error setting display\n"); 642 pr_warning("Error setting display\n");
636 return; 643 return;
637} 644}
638 645
@@ -647,7 +654,7 @@ static int read_display(void)
647 rv = acpi_evaluate_integer(display_get_handle, NULL, 654 rv = acpi_evaluate_integer(display_get_handle, NULL,
648 NULL, &value); 655 NULL, &value);
649 if (ACPI_FAILURE(rv)) 656 if (ACPI_FAILURE(rv))
650 printk(ASUS_WARNING "Error reading display status\n"); 657 pr_warning("Error reading display status\n");
651 } 658 }
652 659
653 value &= 0x0F; /* needed for some models, shouldn't hurt others */ 660 value &= 0x0F; /* needed for some models, shouldn't hurt others */
@@ -689,7 +696,7 @@ static ssize_t store_disp(struct device *dev, struct device_attribute *attr,
689static void set_light_sens_switch(int value) 696static void set_light_sens_switch(int value)
690{ 697{
691 if (write_acpi_int(ls_switch_handle, NULL, value, NULL)) 698 if (write_acpi_int(ls_switch_handle, NULL, value, NULL))
692 printk(ASUS_WARNING "Error setting light sensor switch\n"); 699 pr_warning("Error setting light sensor switch\n");
693 hotk->light_switch = value; 700 hotk->light_switch = value;
694} 701}
695 702
@@ -714,7 +721,7 @@ static ssize_t store_lssw(struct device *dev, struct device_attribute *attr,
714static void set_light_sens_level(int value) 721static void set_light_sens_level(int value)
715{ 722{
716 if (write_acpi_int(ls_level_handle, NULL, value, NULL)) 723 if (write_acpi_int(ls_level_handle, NULL, value, NULL))
717 printk(ASUS_WARNING "Error setting light sensor level\n"); 724 pr_warning("Error setting light sensor level\n");
718 hotk->light_level = value; 725 hotk->light_level = value;
719} 726}
720 727
@@ -812,7 +819,7 @@ static int asus_setkeycode(struct input_dev *dev, int scancode, int keycode)
812 return -EINVAL; 819 return -EINVAL;
813} 820}
814 821
815static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) 822static void asus_hotk_notify(struct acpi_device *device, u32 event)
816{ 823{
817 static struct key_entry *key; 824 static struct key_entry *key;
818 u16 count; 825 u16 count;
@@ -975,11 +982,11 @@ static int asus_hotk_get_info(void)
975 */ 982 */
976 status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info); 983 status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info);
977 if (ACPI_FAILURE(status)) 984 if (ACPI_FAILURE(status))
978 printk(ASUS_WARNING "Couldn't get the DSDT table header\n"); 985 pr_warning("Couldn't get the DSDT table header\n");
979 986
980 /* We have to write 0 on init this far for all ASUS models */ 987 /* We have to write 0 on init this far for all ASUS models */
981 if (write_acpi_int(hotk->handle, "INIT", 0, &buffer)) { 988 if (write_acpi_int(hotk->handle, "INIT", 0, &buffer)) {
982 printk(ASUS_ERR "Hotkey initialization failed\n"); 989 pr_err("Hotkey initialization failed\n");
983 return -ENODEV; 990 return -ENODEV;
984 } 991 }
985 992
@@ -987,9 +994,9 @@ static int asus_hotk_get_info(void)
987 status = 994 status =
988 acpi_evaluate_integer(hotk->handle, "BSTS", NULL, &bsts_result); 995 acpi_evaluate_integer(hotk->handle, "BSTS", NULL, &bsts_result);
989 if (ACPI_FAILURE(status)) 996 if (ACPI_FAILURE(status))
990 printk(ASUS_WARNING "Error calling BSTS\n"); 997 pr_warning("Error calling BSTS\n");
991 else if (bsts_result) 998 else if (bsts_result)
992 printk(ASUS_NOTICE "BSTS called, 0x%02x returned\n", 999 pr_notice("BSTS called, 0x%02x returned\n",
993 (uint) bsts_result); 1000 (uint) bsts_result);
994 1001
995 /* This too ... */ 1002 /* This too ... */
@@ -1020,7 +1027,7 @@ static int asus_hotk_get_info(void)
1020 return -ENOMEM; 1027 return -ENOMEM;
1021 1028
1022 if (*string) 1029 if (*string)
1023 printk(ASUS_NOTICE " %s model detected\n", string); 1030 pr_notice(" %s model detected\n", string);
1024 1031
1025 ASUS_HANDLE_INIT(mled_set); 1032 ASUS_HANDLE_INIT(mled_set);
1026 ASUS_HANDLE_INIT(tled_set); 1033 ASUS_HANDLE_INIT(tled_set);
@@ -1077,7 +1084,7 @@ static int asus_input_init(void)
1077 1084
1078 hotk->inputdev = input_allocate_device(); 1085 hotk->inputdev = input_allocate_device();
1079 if (!hotk->inputdev) { 1086 if (!hotk->inputdev) {
1080 printk(ASUS_INFO "Unable to allocate input device\n"); 1087 pr_info("Unable to allocate input device\n");
1081 return 0; 1088 return 0;
1082 } 1089 }
1083 hotk->inputdev->name = "Asus Laptop extra buttons"; 1090 hotk->inputdev->name = "Asus Laptop extra buttons";
@@ -1096,7 +1103,7 @@ static int asus_input_init(void)
1096 } 1103 }
1097 result = input_register_device(hotk->inputdev); 1104 result = input_register_device(hotk->inputdev);
1098 if (result) { 1105 if (result) {
1099 printk(ASUS_INFO "Unable to register input device\n"); 1106 pr_info("Unable to register input device\n");
1100 input_free_device(hotk->inputdev); 1107 input_free_device(hotk->inputdev);
1101 } 1108 }
1102 return result; 1109 return result;
@@ -1113,7 +1120,7 @@ static int asus_hotk_check(void)
1113 if (hotk->device->status.present) { 1120 if (hotk->device->status.present) {
1114 result = asus_hotk_get_info(); 1121 result = asus_hotk_get_info();
1115 } else { 1122 } else {
1116 printk(ASUS_ERR "Hotkey device not present, aborting\n"); 1123 pr_err("Hotkey device not present, aborting\n");
1117 return -EINVAL; 1124 return -EINVAL;
1118 } 1125 }
1119 1126
@@ -1124,13 +1131,12 @@ static int asus_hotk_found;
1124 1131
1125static int asus_hotk_add(struct acpi_device *device) 1132static int asus_hotk_add(struct acpi_device *device)
1126{ 1133{
1127 acpi_status status = AE_OK;
1128 int result; 1134 int result;
1129 1135
1130 if (!device) 1136 if (!device)
1131 return -EINVAL; 1137 return -EINVAL;
1132 1138
1133 printk(ASUS_NOTICE "Asus Laptop Support version %s\n", 1139 pr_notice("Asus Laptop Support version %s\n",
1134 ASUS_LAPTOP_VERSION); 1140 ASUS_LAPTOP_VERSION);
1135 1141
1136 hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL); 1142 hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL);
@@ -1149,15 +1155,6 @@ static int asus_hotk_add(struct acpi_device *device)
1149 1155
1150 asus_hotk_add_fs(); 1156 asus_hotk_add_fs();
1151 1157
1152 /*
1153 * We install the handler, it will receive the hotk in parameter, so, we
1154 * could add other data to the hotk struct
1155 */
1156 status = acpi_install_notify_handler(hotk->handle, ACPI_ALL_NOTIFY,
1157 asus_hotk_notify, hotk);
1158 if (ACPI_FAILURE(status))
1159 printk(ASUS_ERR "Error installing notify handler\n");
1160
1161 asus_hotk_found = 1; 1158 asus_hotk_found = 1;
1162 1159
1163 /* WLED and BLED are on by default */ 1160 /* WLED and BLED are on by default */
@@ -1198,16 +1195,9 @@ end:
1198 1195
1199static int asus_hotk_remove(struct acpi_device *device, int type) 1196static int asus_hotk_remove(struct acpi_device *device, int type)
1200{ 1197{
1201 acpi_status status = 0;
1202
1203 if (!device || !acpi_driver_data(device)) 1198 if (!device || !acpi_driver_data(device))
1204 return -EINVAL; 1199 return -EINVAL;
1205 1200
1206 status = acpi_remove_notify_handler(hotk->handle, ACPI_ALL_NOTIFY,
1207 asus_hotk_notify);
1208 if (ACPI_FAILURE(status))
1209 printk(ASUS_ERR "Error removing notify handler\n");
1210
1211 kfree(hotk->name); 1201 kfree(hotk->name);
1212 kfree(hotk); 1202 kfree(hotk);
1213 1203
@@ -1260,8 +1250,7 @@ static int asus_backlight_init(struct device *dev)
1260 bd = backlight_device_register(ASUS_HOTK_FILE, dev, 1250 bd = backlight_device_register(ASUS_HOTK_FILE, dev,
1261 NULL, &asusbl_ops); 1251 NULL, &asusbl_ops);
1262 if (IS_ERR(bd)) { 1252 if (IS_ERR(bd)) {
1263 printk(ASUS_ERR 1253 pr_err("Could not register asus backlight device\n");
1264 "Could not register asus backlight device\n");
1265 asus_backlight_device = NULL; 1254 asus_backlight_device = NULL;
1266 return PTR_ERR(bd); 1255 return PTR_ERR(bd);
1267 } 1256 }
@@ -1334,7 +1323,6 @@ out:
1334 1323
1335static int __init asus_laptop_init(void) 1324static int __init asus_laptop_init(void)
1336{ 1325{
1337 struct device *dev;
1338 int result; 1326 int result;
1339 1327
1340 if (acpi_disabled) 1328 if (acpi_disabled)
@@ -1356,24 +1344,10 @@ static int __init asus_laptop_init(void)
1356 return -ENODEV; 1344 return -ENODEV;
1357 } 1345 }
1358 1346
1359 dev = acpi_get_physical_device(hotk->device->handle);
1360
1361 if (!acpi_video_backlight_support()) {
1362 result = asus_backlight_init(dev);
1363 if (result)
1364 goto fail_backlight;
1365 } else
1366 printk(ASUS_INFO "Brightness ignored, must be controlled by "
1367 "ACPI video driver\n");
1368
1369 result = asus_input_init(); 1347 result = asus_input_init();
1370 if (result) 1348 if (result)
1371 goto fail_input; 1349 goto fail_input;
1372 1350
1373 result = asus_led_init(dev);
1374 if (result)
1375 goto fail_led;
1376
1377 /* Register platform stuff */ 1351 /* Register platform stuff */
1378 result = platform_driver_register(&asuspf_driver); 1352 result = platform_driver_register(&asuspf_driver);
1379 if (result) 1353 if (result)
@@ -1394,8 +1368,27 @@ static int __init asus_laptop_init(void)
1394 if (result) 1368 if (result)
1395 goto fail_sysfs; 1369 goto fail_sysfs;
1396 1370
1371 result = asus_led_init(&asuspf_device->dev);
1372 if (result)
1373 goto fail_led;
1374
1375 if (!acpi_video_backlight_support()) {
1376 result = asus_backlight_init(&asuspf_device->dev);
1377 if (result)
1378 goto fail_backlight;
1379 } else
1380 pr_info("Brightness ignored, must be controlled by "
1381 "ACPI video driver\n");
1382
1397 return 0; 1383 return 0;
1398 1384
1385fail_backlight:
1386 asus_led_exit();
1387
1388fail_led:
1389 sysfs_remove_group(&asuspf_device->dev.kobj,
1390 &asuspf_attribute_group);
1391
1399fail_sysfs: 1392fail_sysfs:
1400 platform_device_del(asuspf_device); 1393 platform_device_del(asuspf_device);
1401 1394
@@ -1406,15 +1399,9 @@ fail_platform_device1:
1406 platform_driver_unregister(&asuspf_driver); 1399 platform_driver_unregister(&asuspf_driver);
1407 1400
1408fail_platform_driver: 1401fail_platform_driver:
1409 asus_led_exit();
1410
1411fail_led:
1412 asus_input_exit(); 1402 asus_input_exit();
1413 1403
1414fail_input: 1404fail_input:
1415 asus_backlight_exit();
1416
1417fail_backlight:
1418 1405
1419 return result; 1406 return result;
1420} 1407}
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index ba1f7497e4b9..ddf5240ade8c 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -455,6 +455,8 @@ static struct asus_hotk *hotk;
455 */ 455 */
456static int asus_hotk_add(struct acpi_device *device); 456static int asus_hotk_add(struct acpi_device *device);
457static int asus_hotk_remove(struct acpi_device *device, int type); 457static int asus_hotk_remove(struct acpi_device *device, int type);
458static void asus_hotk_notify(struct acpi_device *device, u32 event);
459
458static const struct acpi_device_id asus_device_ids[] = { 460static const struct acpi_device_id asus_device_ids[] = {
459 {"ATK0100", 0}, 461 {"ATK0100", 0},
460 {"", 0}, 462 {"", 0},
@@ -465,9 +467,11 @@ static struct acpi_driver asus_hotk_driver = {
465 .name = "asus_acpi", 467 .name = "asus_acpi",
466 .class = ACPI_HOTK_CLASS, 468 .class = ACPI_HOTK_CLASS,
467 .ids = asus_device_ids, 469 .ids = asus_device_ids,
470 .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
468 .ops = { 471 .ops = {
469 .add = asus_hotk_add, 472 .add = asus_hotk_add,
470 .remove = asus_hotk_remove, 473 .remove = asus_hotk_remove,
474 .notify = asus_hotk_notify,
471 }, 475 },
472}; 476};
473 477
@@ -1101,12 +1105,20 @@ static int asus_hotk_remove_fs(struct acpi_device *device)
1101 return 0; 1105 return 0;
1102} 1106}
1103 1107
1104static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) 1108static void asus_hotk_notify(struct acpi_device *device, u32 event)
1105{ 1109{
1106 /* TODO Find a better way to handle events count. */ 1110 /* TODO Find a better way to handle events count. */
1107 if (!hotk) 1111 if (!hotk)
1108 return; 1112 return;
1109 1113
1114 /*
1115 * The BIOS *should* be sending us device events, but apparently
1116 * Asus uses system events instead, so just ignore any device
1117 * events we get.
1118 */
1119 if (event > ACPI_MAX_SYS_NOTIFY)
1120 return;
1121
1110 if ((event & ~((u32) BR_UP)) < 16) 1122 if ((event & ~((u32) BR_UP)) < 16)
1111 hotk->brightness = (event & ~((u32) BR_UP)); 1123 hotk->brightness = (event & ~((u32) BR_UP));
1112 else if ((event & ~((u32) BR_DOWN)) < 16) 1124 else if ((event & ~((u32) BR_DOWN)) < 16)
@@ -1346,15 +1358,6 @@ static int asus_hotk_add(struct acpi_device *device)
1346 if (result) 1358 if (result)
1347 goto end; 1359 goto end;
1348 1360
1349 /*
1350 * We install the handler, it will receive the hotk in parameter, so, we
1351 * could add other data to the hotk struct
1352 */
1353 status = acpi_install_notify_handler(hotk->handle, ACPI_SYSTEM_NOTIFY,
1354 asus_hotk_notify, hotk);
1355 if (ACPI_FAILURE(status))
1356 printk(KERN_ERR " Error installing notify handler\n");
1357
1358 /* For laptops without GPLV: init the hotk->brightness value */ 1361 /* For laptops without GPLV: init the hotk->brightness value */
1359 if ((!hotk->methods->brightness_get) 1362 if ((!hotk->methods->brightness_get)
1360 && (!hotk->methods->brightness_status) 1363 && (!hotk->methods->brightness_status)
@@ -1389,16 +1392,9 @@ end:
1389 1392
1390static int asus_hotk_remove(struct acpi_device *device, int type) 1393static int asus_hotk_remove(struct acpi_device *device, int type)
1391{ 1394{
1392 acpi_status status = 0;
1393
1394 if (!device || !acpi_driver_data(device)) 1395 if (!device || !acpi_driver_data(device))
1395 return -EINVAL; 1396 return -EINVAL;
1396 1397
1397 status = acpi_remove_notify_handler(hotk->handle, ACPI_SYSTEM_NOTIFY,
1398 asus_hotk_notify);
1399 if (ACPI_FAILURE(status))
1400 printk(KERN_ERR "Asus ACPI: Error removing notify handler\n");
1401
1402 asus_hotk_remove_fs(device); 1398 asus_hotk_remove_fs(device);
1403 1399
1404 kfree(hotk); 1400 kfree(hotk);
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 2fab94162147..0f900cc9fa7a 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -46,10 +46,53 @@ struct key_entry {
46 u16 keycode; 46 u16 keycode;
47}; 47};
48 48
49enum { KE_KEY, KE_SW, KE_END }; 49enum { KE_KEY, KE_SW, KE_IGNORE, KE_END };
50
51/*
52 * Certain keys are flagged as KE_IGNORE. All of these are either
53 * notifications (rather than requests for change) or are also sent
54 * via the keyboard controller so should not be sent again.
55 */
50 56
51static struct key_entry dell_wmi_keymap[] = { 57static struct key_entry dell_wmi_keymap[] = {
52 {KE_KEY, 0xe045, KEY_PROG1}, 58 {KE_KEY, 0xe045, KEY_PROG1},
59 {KE_KEY, 0xe009, KEY_EJECTCD},
60
61 /* These also contain the brightness level at offset 6 */
62 {KE_KEY, 0xe006, KEY_BRIGHTNESSUP},
63 {KE_KEY, 0xe005, KEY_BRIGHTNESSDOWN},
64
65 /* Battery health status button */
66 {KE_KEY, 0xe007, KEY_BATTERY},
67
68 /* This is actually for all radios. Although physically a
69 * switch, the notification does not provide an indication of
70 * state and so it should be reported as a key */
71 {KE_KEY, 0xe008, KEY_WLAN},
72
73 /* The next device is at offset 6, the active devices are at
74 offset 8 and the attached devices at offset 10 */
75 {KE_KEY, 0xe00b, KEY_DISPLAYTOGGLE},
76
77 {KE_IGNORE, 0xe00c, KEY_KBDILLUMTOGGLE},
78
79 /* BIOS error detected */
80 {KE_IGNORE, 0xe00d, KEY_RESERVED},
81
82 /* Wifi Catcher */
83 {KE_KEY, 0xe011, KEY_PROG2},
84
85 /* Ambient light sensor toggle */
86 {KE_IGNORE, 0xe013, KEY_RESERVED},
87
88 {KE_IGNORE, 0xe020, KEY_MUTE},
89 {KE_IGNORE, 0xe02e, KEY_VOLUMEDOWN},
90 {KE_IGNORE, 0xe030, KEY_VOLUMEUP},
91 {KE_IGNORE, 0xe033, KEY_KBDILLUMUP},
92 {KE_IGNORE, 0xe034, KEY_KBDILLUMDOWN},
93 {KE_IGNORE, 0xe03a, KEY_CAPSLOCK},
94 {KE_IGNORE, 0xe045, KEY_NUMLOCK},
95 {KE_IGNORE, 0xe046, KEY_SCROLLLOCK},
53 {KE_END, 0} 96 {KE_END, 0}
54}; 97};
55 98
@@ -122,15 +165,20 @@ static void dell_wmi_notify(u32 value, void *context)
122 165
123 if (obj && obj->type == ACPI_TYPE_BUFFER) { 166 if (obj && obj->type == ACPI_TYPE_BUFFER) {
124 int *buffer = (int *)obj->buffer.pointer; 167 int *buffer = (int *)obj->buffer.pointer;
125 key = dell_wmi_get_entry_by_scancode(buffer[1]); 168 /*
169 * The upper bytes of the event may contain
170 * additional information, so mask them off for the
171 * scancode lookup
172 */
173 key = dell_wmi_get_entry_by_scancode(buffer[1] & 0xFFFF);
126 if (key) { 174 if (key) {
127 input_report_key(dell_wmi_input_dev, key->keycode, 1); 175 input_report_key(dell_wmi_input_dev, key->keycode, 1);
128 input_sync(dell_wmi_input_dev); 176 input_sync(dell_wmi_input_dev);
129 input_report_key(dell_wmi_input_dev, key->keycode, 0); 177 input_report_key(dell_wmi_input_dev, key->keycode, 0);
130 input_sync(dell_wmi_input_dev); 178 input_sync(dell_wmi_input_dev);
131 } else 179 } else if (buffer[1] & 0xFFFF)
132 printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n", 180 printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n",
133 buffer[1]); 181 buffer[1] & 0xFFFF);
134 } 182 }
135} 183}
136 184
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 8153b3e59189..4207b26ff990 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -62,7 +62,10 @@ enum {
62 DISABLE_ASL_GPS = 0x0020, 62 DISABLE_ASL_GPS = 0x0020,
63 DISABLE_ASL_DISPLAYSWITCH = 0x0040, 63 DISABLE_ASL_DISPLAYSWITCH = 0x0040,
64 DISABLE_ASL_MODEM = 0x0080, 64 DISABLE_ASL_MODEM = 0x0080,
65 DISABLE_ASL_CARDREADER = 0x0100 65 DISABLE_ASL_CARDREADER = 0x0100,
66 DISABLE_ASL_3G = 0x0200,
67 DISABLE_ASL_WIMAX = 0x0400,
68 DISABLE_ASL_HWCF = 0x0800
66}; 69};
67 70
68enum { 71enum {
@@ -87,7 +90,13 @@ enum {
87 CM_ASL_USBPORT3, 90 CM_ASL_USBPORT3,
88 CM_ASL_MODEM, 91 CM_ASL_MODEM,
89 CM_ASL_CARDREADER, 92 CM_ASL_CARDREADER,
90 CM_ASL_LID 93 CM_ASL_3G,
94 CM_ASL_WIMAX,
95 CM_ASL_HWCF,
96 CM_ASL_LID,
97 CM_ASL_TYPE,
98 CM_ASL_PANELPOWER, /*P901*/
99 CM_ASL_TPD
91}; 100};
92 101
93static const char *cm_getv[] = { 102static const char *cm_getv[] = {
@@ -96,7 +105,8 @@ static const char *cm_getv[] = {
96 NULL, "PBLG", NULL, NULL, 105 NULL, "PBLG", NULL, NULL,
97 "CFVG", NULL, NULL, NULL, 106 "CFVG", NULL, NULL, NULL,
98 "USBG", NULL, NULL, "MODG", 107 "USBG", NULL, NULL, "MODG",
99 "CRDG", "LIDG" 108 "CRDG", "M3GG", "WIMG", "HWCF",
109 "LIDG", "TYPE", "PBPG", "TPDG"
100}; 110};
101 111
102static const char *cm_setv[] = { 112static const char *cm_setv[] = {
@@ -105,7 +115,8 @@ static const char *cm_setv[] = {
105 "SDSP", "PBLS", "HDPS", NULL, 115 "SDSP", "PBLS", "HDPS", NULL,
106 "CFVS", NULL, NULL, NULL, 116 "CFVS", NULL, NULL, NULL,
107 "USBG", NULL, NULL, "MODS", 117 "USBG", NULL, NULL, "MODS",
108 "CRDS", NULL 118 "CRDS", "M3GS", "WIMS", NULL,
119 NULL, NULL, "PBPS", "TPDS"
109}; 120};
110 121
111#define EEEPC_EC "\\_SB.PCI0.SBRG.EC0." 122#define EEEPC_EC "\\_SB.PCI0.SBRG.EC0."
@@ -181,6 +192,7 @@ static struct key_entry eeepc_keymap[] = {
181static int eeepc_hotk_add(struct acpi_device *device); 192static int eeepc_hotk_add(struct acpi_device *device);
182static int eeepc_hotk_remove(struct acpi_device *device, int type); 193static int eeepc_hotk_remove(struct acpi_device *device, int type);
183static int eeepc_hotk_resume(struct acpi_device *device); 194static int eeepc_hotk_resume(struct acpi_device *device);
195static void eeepc_hotk_notify(struct acpi_device *device, u32 event);
184 196
185static const struct acpi_device_id eeepc_device_ids[] = { 197static const struct acpi_device_id eeepc_device_ids[] = {
186 {EEEPC_HOTK_HID, 0}, 198 {EEEPC_HOTK_HID, 0},
@@ -192,10 +204,12 @@ static struct acpi_driver eeepc_hotk_driver = {
192 .name = EEEPC_HOTK_NAME, 204 .name = EEEPC_HOTK_NAME,
193 .class = EEEPC_HOTK_CLASS, 205 .class = EEEPC_HOTK_CLASS,
194 .ids = eeepc_device_ids, 206 .ids = eeepc_device_ids,
207 .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
195 .ops = { 208 .ops = {
196 .add = eeepc_hotk_add, 209 .add = eeepc_hotk_add,
197 .remove = eeepc_hotk_remove, 210 .remove = eeepc_hotk_remove,
198 .resume = eeepc_hotk_resume, 211 .resume = eeepc_hotk_resume,
212 .notify = eeepc_hotk_notify,
199 }, 213 },
200}; 214};
201 215
@@ -318,6 +332,15 @@ static const struct rfkill_ops eeepc_rfkill_ops = {
318 .set_block = eeepc_rfkill_set, 332 .set_block = eeepc_rfkill_set,
319}; 333};
320 334
335static void __init eeepc_enable_camera(void)
336{
337 /*
338 * If the following call to set_acpi() fails, it's because there's no
339 * camera so we can ignore the error.
340 */
341 set_acpi(CM_ASL_CAMERA, 1);
342}
343
321/* 344/*
322 * Sys helpers 345 * Sys helpers
323 */ 346 */
@@ -369,13 +392,88 @@ static ssize_t show_sys_acpi(int cm, char *buf)
369EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA); 392EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA);
370EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER); 393EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER);
371EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH); 394EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH);
372EEEPC_CREATE_DEVICE_ATTR(cpufv, CM_ASL_CPUFV); 395
396struct eeepc_cpufv {
397 int num;
398 int cur;
399};
400
401static int get_cpufv(struct eeepc_cpufv *c)
402{
403 c->cur = get_acpi(CM_ASL_CPUFV);
404 c->num = (c->cur >> 8) & 0xff;
405 c->cur &= 0xff;
406 if (c->cur < 0 || c->num <= 0 || c->num > 12)
407 return -ENODEV;
408 return 0;
409}
410
411static ssize_t show_available_cpufv(struct device *dev,
412 struct device_attribute *attr,
413 char *buf)
414{
415 struct eeepc_cpufv c;
416 int i;
417 ssize_t len = 0;
418
419 if (get_cpufv(&c))
420 return -ENODEV;
421 for (i = 0; i < c.num; i++)
422 len += sprintf(buf + len, "%d ", i);
423 len += sprintf(buf + len, "\n");
424 return len;
425}
426
427static ssize_t show_cpufv(struct device *dev,
428 struct device_attribute *attr,
429 char *buf)
430{
431 struct eeepc_cpufv c;
432
433 if (get_cpufv(&c))
434 return -ENODEV;
435 return sprintf(buf, "%#x\n", (c.num << 8) | c.cur);
436}
437
438static ssize_t store_cpufv(struct device *dev,
439 struct device_attribute *attr,
440 const char *buf, size_t count)
441{
442 struct eeepc_cpufv c;
443 int rv, value;
444
445 if (get_cpufv(&c))
446 return -ENODEV;
447 rv = parse_arg(buf, count, &value);
448 if (rv < 0)
449 return rv;
450 if (!rv || value < 0 || value >= c.num)
451 return -EINVAL;
452 set_acpi(CM_ASL_CPUFV, value);
453 return rv;
454}
455
456static struct device_attribute dev_attr_cpufv = {
457 .attr = {
458 .name = "cpufv",
459 .mode = 0644 },
460 .show = show_cpufv,
461 .store = store_cpufv
462};
463
464static struct device_attribute dev_attr_available_cpufv = {
465 .attr = {
466 .name = "available_cpufv",
467 .mode = 0444 },
468 .show = show_available_cpufv
469};
373 470
374static struct attribute *platform_attributes[] = { 471static struct attribute *platform_attributes[] = {
375 &dev_attr_camera.attr, 472 &dev_attr_camera.attr,
376 &dev_attr_cardr.attr, 473 &dev_attr_cardr.attr,
377 &dev_attr_disp.attr, 474 &dev_attr_disp.attr,
378 &dev_attr_cpufv.attr, 475 &dev_attr_cpufv.attr,
476 &dev_attr_available_cpufv.attr,
379 NULL 477 NULL
380}; 478};
381 479
@@ -558,7 +656,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
558 eeepc_rfkill_hotplug(); 656 eeepc_rfkill_hotplug();
559} 657}
560 658
561static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) 659static void eeepc_hotk_notify(struct acpi_device *device, u32 event)
562{ 660{
563 static struct key_entry *key; 661 static struct key_entry *key;
564 u16 count; 662 u16 count;
@@ -566,6 +664,8 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
566 664
567 if (!ehotk) 665 if (!ehotk)
568 return; 666 return;
667 if (event > ACPI_MAX_SYS_NOTIFY)
668 return;
569 if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) 669 if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX)
570 brn = notify_brn(); 670 brn = notify_brn();
571 count = ehotk->event_count[event % 128]++; 671 count = ehotk->event_count[event % 128]++;
@@ -646,7 +746,6 @@ static void eeepc_unregister_rfkill_notifier(char *node)
646 746
647static int eeepc_hotk_add(struct acpi_device *device) 747static int eeepc_hotk_add(struct acpi_device *device)
648{ 748{
649 acpi_status status = AE_OK;
650 int result; 749 int result;
651 750
652 if (!device) 751 if (!device)
@@ -664,10 +763,6 @@ static int eeepc_hotk_add(struct acpi_device *device)
664 result = eeepc_hotk_check(); 763 result = eeepc_hotk_check();
665 if (result) 764 if (result)
666 goto ehotk_fail; 765 goto ehotk_fail;
667 status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
668 eeepc_hotk_notify, ehotk);
669 if (ACPI_FAILURE(status))
670 printk(EEEPC_ERR "Error installing notify handler\n");
671 766
672 eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); 767 eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6");
673 eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); 768 eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
@@ -725,14 +820,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
725 820
726static int eeepc_hotk_remove(struct acpi_device *device, int type) 821static int eeepc_hotk_remove(struct acpi_device *device, int type)
727{ 822{
728 acpi_status status = 0;
729
730 if (!device || !acpi_driver_data(device)) 823 if (!device || !acpi_driver_data(device))
731 return -EINVAL; 824 return -EINVAL;
732 status = acpi_remove_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY,
733 eeepc_hotk_notify);
734 if (ACPI_FAILURE(status))
735 printk(EEEPC_ERR "Error removing notify handler\n");
736 825
737 eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6"); 826 eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6");
738 eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7"); 827 eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
@@ -989,6 +1078,9 @@ static int __init eeepc_laptop_init(void)
989 result = eeepc_hwmon_init(dev); 1078 result = eeepc_hwmon_init(dev);
990 if (result) 1079 if (result)
991 goto fail_hwmon; 1080 goto fail_hwmon;
1081
1082 eeepc_enable_camera();
1083
992 /* Register platform stuff */ 1084 /* Register platform stuff */
993 result = platform_driver_register(&platform_driver); 1085 result = platform_driver_register(&platform_driver);
994 if (result) 1086 if (result)
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 16fffe44e333..4ac2311c00af 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -47,7 +47,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
47#define HPWMI_DISPLAY_QUERY 0x1 47#define HPWMI_DISPLAY_QUERY 0x1
48#define HPWMI_HDDTEMP_QUERY 0x2 48#define HPWMI_HDDTEMP_QUERY 0x2
49#define HPWMI_ALS_QUERY 0x3 49#define HPWMI_ALS_QUERY 0x3
50#define HPWMI_DOCK_QUERY 0x4 50#define HPWMI_HARDWARE_QUERY 0x4
51#define HPWMI_WIRELESS_QUERY 0x5 51#define HPWMI_WIRELESS_QUERY 0x5
52#define HPWMI_HOTKEY_QUERY 0xc 52#define HPWMI_HOTKEY_QUERY 0xc
53 53
@@ -75,10 +75,9 @@ struct key_entry {
75 u16 keycode; 75 u16 keycode;
76}; 76};
77 77
78enum { KE_KEY, KE_SW, KE_END }; 78enum { KE_KEY, KE_END };
79 79
80static struct key_entry hp_wmi_keymap[] = { 80static struct key_entry hp_wmi_keymap[] = {
81 {KE_SW, 0x01, SW_DOCK},
82 {KE_KEY, 0x02, KEY_BRIGHTNESSUP}, 81 {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
83 {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN}, 82 {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
84 {KE_KEY, 0x20e6, KEY_PROG1}, 83 {KE_KEY, 0x20e6, KEY_PROG1},
@@ -151,7 +150,22 @@ static int hp_wmi_als_state(void)
151 150
152static int hp_wmi_dock_state(void) 151static int hp_wmi_dock_state(void)
153{ 152{
154 return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0); 153 int ret = hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, 0, 0);
154
155 if (ret < 0)
156 return ret;
157
158 return ret & 0x1;
159}
160
161static int hp_wmi_tablet_state(void)
162{
163 int ret = hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, 0, 0);
164
165 if (ret < 0)
166 return ret;
167
168 return (ret & 0x4) ? 1 : 0;
155} 169}
156 170
157static int hp_wmi_set_block(void *data, bool blocked) 171static int hp_wmi_set_block(void *data, bool blocked)
@@ -232,6 +246,15 @@ static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
232 return sprintf(buf, "%d\n", value); 246 return sprintf(buf, "%d\n", value);
233} 247}
234 248
249static ssize_t show_tablet(struct device *dev, struct device_attribute *attr,
250 char *buf)
251{
252 int value = hp_wmi_tablet_state();
253 if (value < 0)
254 return -EINVAL;
255 return sprintf(buf, "%d\n", value);
256}
257
235static ssize_t set_als(struct device *dev, struct device_attribute *attr, 258static ssize_t set_als(struct device *dev, struct device_attribute *attr,
236 const char *buf, size_t count) 259 const char *buf, size_t count)
237{ 260{
@@ -244,6 +267,7 @@ static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
244static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL); 267static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
245static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als); 268static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
246static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL); 269static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
270static DEVICE_ATTR(tablet, S_IRUGO, show_tablet, NULL);
247 271
248static struct key_entry *hp_wmi_get_entry_by_scancode(int code) 272static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
249{ 273{
@@ -326,13 +350,13 @@ static void hp_wmi_notify(u32 value, void *context)
326 key->keycode, 0); 350 key->keycode, 0);
327 input_sync(hp_wmi_input_dev); 351 input_sync(hp_wmi_input_dev);
328 break; 352 break;
329 case KE_SW:
330 input_report_switch(hp_wmi_input_dev,
331 key->keycode,
332 hp_wmi_dock_state());
333 input_sync(hp_wmi_input_dev);
334 break;
335 } 353 }
354 } else if (eventcode == 0x1) {
355 input_report_switch(hp_wmi_input_dev, SW_DOCK,
356 hp_wmi_dock_state());
357 input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
358 hp_wmi_tablet_state());
359 input_sync(hp_wmi_input_dev);
336 } else if (eventcode == 0x5) { 360 } else if (eventcode == 0x5) {
337 if (wifi_rfkill) 361 if (wifi_rfkill)
338 rfkill_set_sw_state(wifi_rfkill, 362 rfkill_set_sw_state(wifi_rfkill,
@@ -369,18 +393,19 @@ static int __init hp_wmi_input_setup(void)
369 set_bit(EV_KEY, hp_wmi_input_dev->evbit); 393 set_bit(EV_KEY, hp_wmi_input_dev->evbit);
370 set_bit(key->keycode, hp_wmi_input_dev->keybit); 394 set_bit(key->keycode, hp_wmi_input_dev->keybit);
371 break; 395 break;
372 case KE_SW:
373 set_bit(EV_SW, hp_wmi_input_dev->evbit);
374 set_bit(key->keycode, hp_wmi_input_dev->swbit);
375
376 /* Set initial dock state */
377 input_report_switch(hp_wmi_input_dev, key->keycode,
378 hp_wmi_dock_state());
379 input_sync(hp_wmi_input_dev);
380 break;
381 } 396 }
382 } 397 }
383 398
399 set_bit(EV_SW, hp_wmi_input_dev->evbit);
400 set_bit(SW_DOCK, hp_wmi_input_dev->swbit);
401 set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit);
402
403 /* Set initial hardware state */
404 input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state());
405 input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
406 hp_wmi_tablet_state());
407 input_sync(hp_wmi_input_dev);
408
384 err = input_register_device(hp_wmi_input_dev); 409 err = input_register_device(hp_wmi_input_dev);
385 410
386 if (err) { 411 if (err) {
@@ -397,6 +422,7 @@ static void cleanup_sysfs(struct platform_device *device)
397 device_remove_file(&device->dev, &dev_attr_hddtemp); 422 device_remove_file(&device->dev, &dev_attr_hddtemp);
398 device_remove_file(&device->dev, &dev_attr_als); 423 device_remove_file(&device->dev, &dev_attr_als);
399 device_remove_file(&device->dev, &dev_attr_dock); 424 device_remove_file(&device->dev, &dev_attr_dock);
425 device_remove_file(&device->dev, &dev_attr_tablet);
400} 426}
401 427
402static int __init hp_wmi_bios_setup(struct platform_device *device) 428static int __init hp_wmi_bios_setup(struct platform_device *device)
@@ -416,6 +442,9 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
416 err = device_create_file(&device->dev, &dev_attr_dock); 442 err = device_create_file(&device->dev, &dev_attr_dock);
417 if (err) 443 if (err)
418 goto add_sysfs_error; 444 goto add_sysfs_error;
445 err = device_create_file(&device->dev, &dev_attr_tablet);
446 if (err)
447 goto add_sysfs_error;
419 448
420 if (wireless & 0x1) { 449 if (wireless & 0x1) {
421 wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev, 450 wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
@@ -485,23 +514,17 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
485 514
486static int hp_wmi_resume_handler(struct platform_device *device) 515static int hp_wmi_resume_handler(struct platform_device *device)
487{ 516{
488 struct key_entry *key;
489
490 /* 517 /*
491 * Docking state may have changed while suspended, so trigger 518 * Hardware state may have changed while suspended, so trigger
492 * an input event for the current state. As this is a switch, 519 * input events for the current state. As this is a switch,
493 * the input layer will only actually pass it on if the state 520 * the input layer will only actually pass it on if the state
494 * changed. 521 * changed.
495 */ 522 */
496 for (key = hp_wmi_keymap; key->type != KE_END; key++) { 523
497 switch (key->type) { 524 input_report_switch(hp_wmi_input_dev, SW_DOCK, hp_wmi_dock_state());
498 case KE_SW: 525 input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE,
499 input_report_switch(hp_wmi_input_dev, key->keycode, 526 hp_wmi_tablet_state());
500 hp_wmi_dock_state()); 527 input_sync(hp_wmi_input_dev);
501 input_sync(hp_wmi_input_dev);
502 break;
503 }
504 }
505 528
506 return 0; 529 return 0;
507} 530}
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 40d64c03278c..a463fd72c495 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -22,7 +22,7 @@
22 */ 22 */
23 23
24#define TPACPI_VERSION "0.23" 24#define TPACPI_VERSION "0.23"
25#define TPACPI_SYSFS_VERSION 0x020300 25#define TPACPI_SYSFS_VERSION 0x020400
26 26
27/* 27/*
28 * Changelog: 28 * Changelog:
@@ -257,6 +257,8 @@ static struct {
257 u32 wan:1; 257 u32 wan:1;
258 u32 uwb:1; 258 u32 uwb:1;
259 u32 fan_ctrl_status_undef:1; 259 u32 fan_ctrl_status_undef:1;
260 u32 second_fan:1;
261 u32 beep_needs_two_args:1;
260 u32 input_device_registered:1; 262 u32 input_device_registered:1;
261 u32 platform_drv_registered:1; 263 u32 platform_drv_registered:1;
262 u32 platform_drv_attrs_registered:1; 264 u32 platform_drv_attrs_registered:1;
@@ -277,8 +279,10 @@ struct thinkpad_id_data {
277 char *bios_version_str; /* Something like 1ZET51WW (1.03z) */ 279 char *bios_version_str; /* Something like 1ZET51WW (1.03z) */
278 char *ec_version_str; /* Something like 1ZHT51WW-1.04a */ 280 char *ec_version_str; /* Something like 1ZHT51WW-1.04a */
279 281
280 u16 bios_model; /* Big Endian, TP-1Y = 0x5931, 0 = unknown */ 282 u16 bios_model; /* 1Y = 0x5931, 0 = unknown */
281 u16 ec_model; 283 u16 ec_model;
284 u16 bios_release; /* 1ZETK1WW = 0x314b, 0 = unknown */
285 u16 ec_release;
282 286
283 char *model_str; /* ThinkPad T43 */ 287 char *model_str; /* ThinkPad T43 */
284 char *nummodel_str; /* 9384A9C for a 9384-A9C model */ 288 char *nummodel_str; /* 9384A9C for a 9384-A9C model */
@@ -355,6 +359,73 @@ static void tpacpi_log_usertask(const char * const what)
355 } \ 359 } \
356 } while (0) 360 } while (0)
357 361
362/*
363 * Quirk handling helpers
364 *
365 * ThinkPad IDs and versions seen in the field so far
366 * are two-characters from the set [0-9A-Z], i.e. base 36.
367 *
368 * We use values well outside that range as specials.
369 */
370
371#define TPACPI_MATCH_ANY 0xffffU
372#define TPACPI_MATCH_UNKNOWN 0U
373
374/* TPID('1', 'Y') == 0x5931 */
375#define TPID(__c1, __c2) (((__c2) << 8) | (__c1))
376
377#define TPACPI_Q_IBM(__id1, __id2, __quirk) \
378 { .vendor = PCI_VENDOR_ID_IBM, \
379 .bios = TPID(__id1, __id2), \
380 .ec = TPACPI_MATCH_ANY, \
381 .quirks = (__quirk) }
382
383#define TPACPI_Q_LNV(__id1, __id2, __quirk) \
384 { .vendor = PCI_VENDOR_ID_LENOVO, \
385 .bios = TPID(__id1, __id2), \
386 .ec = TPACPI_MATCH_ANY, \
387 .quirks = (__quirk) }
388
389struct tpacpi_quirk {
390 unsigned int vendor;
391 u16 bios;
392 u16 ec;
393 unsigned long quirks;
394};
395
396/**
397 * tpacpi_check_quirks() - search BIOS/EC version on a list
398 * @qlist: array of &struct tpacpi_quirk
399 * @qlist_size: number of elements in @qlist
400 *
401 * Iterates over a quirks list until one is found that matches the
402 * ThinkPad's vendor, BIOS and EC model.
403 *
404 * Returns 0 if nothing matches, otherwise returns the quirks field of
405 * the matching &struct tpacpi_quirk entry.
406 *
407 * The match criteria is: vendor, ec and bios much match.
408 */
409static unsigned long __init tpacpi_check_quirks(
410 const struct tpacpi_quirk *qlist,
411 unsigned int qlist_size)
412{
413 while (qlist_size) {
414 if ((qlist->vendor == thinkpad_id.vendor ||
415 qlist->vendor == TPACPI_MATCH_ANY) &&
416 (qlist->bios == thinkpad_id.bios_model ||
417 qlist->bios == TPACPI_MATCH_ANY) &&
418 (qlist->ec == thinkpad_id.ec_model ||
419 qlist->ec == TPACPI_MATCH_ANY))
420 return qlist->quirks;
421
422 qlist_size--;
423 qlist++;
424 }
425 return 0;
426}
427
428
358/**************************************************************************** 429/****************************************************************************
359 **************************************************************************** 430 ****************************************************************************
360 * 431 *
@@ -2880,7 +2951,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
2880 /* update bright_acpimode... */ 2951 /* update bright_acpimode... */
2881 tpacpi_check_std_acpi_brightness_support(); 2952 tpacpi_check_std_acpi_brightness_support();
2882 2953
2883 if (tp_features.bright_acpimode) { 2954 if (tp_features.bright_acpimode && acpi_video_backlight_support()) {
2884 printk(TPACPI_INFO 2955 printk(TPACPI_INFO
2885 "This ThinkPad has standard ACPI backlight " 2956 "This ThinkPad has standard ACPI backlight "
2886 "brightness control, supported by the ACPI " 2957 "brightness control, supported by the ACPI "
@@ -4773,7 +4844,7 @@ TPACPI_HANDLE(led, ec, "SLED", /* 570 */
4773 "LED", /* all others */ 4844 "LED", /* all others */
4774 ); /* R30, R31 */ 4845 ); /* R30, R31 */
4775 4846
4776#define TPACPI_LED_NUMLEDS 8 4847#define TPACPI_LED_NUMLEDS 16
4777static struct tpacpi_led_classdev *tpacpi_leds; 4848static struct tpacpi_led_classdev *tpacpi_leds;
4778static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS]; 4849static enum led_status_t tpacpi_led_state_cache[TPACPI_LED_NUMLEDS];
4779static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = { 4850static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
@@ -4786,15 +4857,20 @@ static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = {
4786 "tpacpi::dock_batt", 4857 "tpacpi::dock_batt",
4787 "tpacpi::unknown_led", 4858 "tpacpi::unknown_led",
4788 "tpacpi::standby", 4859 "tpacpi::standby",
4860 "tpacpi::dock_status1",
4861 "tpacpi::dock_status2",
4862 "tpacpi::unknown_led2",
4863 "tpacpi::unknown_led3",
4864 "tpacpi::thinkvantage",
4789}; 4865};
4790#define TPACPI_SAFE_LEDS 0x0081U 4866#define TPACPI_SAFE_LEDS 0x1081U
4791 4867
4792static inline bool tpacpi_is_led_restricted(const unsigned int led) 4868static inline bool tpacpi_is_led_restricted(const unsigned int led)
4793{ 4869{
4794#ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS 4870#ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS
4795 return false; 4871 return false;
4796#else 4872#else
4797 return (TPACPI_SAFE_LEDS & (1 << led)) == 0; 4873 return (1U & (TPACPI_SAFE_LEDS >> led)) == 0;
4798#endif 4874#endif
4799} 4875}
4800 4876
@@ -4956,6 +5032,10 @@ static int __init tpacpi_init_led(unsigned int led)
4956 5032
4957 tpacpi_leds[led].led = led; 5033 tpacpi_leds[led].led = led;
4958 5034
5035 /* LEDs with no name don't get registered */
5036 if (!tpacpi_led_names[led])
5037 return 0;
5038
4959 tpacpi_leds[led].led_classdev.brightness_set = &led_sysfs_set; 5039 tpacpi_leds[led].led_classdev.brightness_set = &led_sysfs_set;
4960 tpacpi_leds[led].led_classdev.blink_set = &led_sysfs_blink_set; 5040 tpacpi_leds[led].led_classdev.blink_set = &led_sysfs_blink_set;
4961 if (led_supported == TPACPI_LED_570) 5041 if (led_supported == TPACPI_LED_570)
@@ -4974,10 +5054,59 @@ static int __init tpacpi_init_led(unsigned int led)
4974 return rc; 5054 return rc;
4975} 5055}
4976 5056
5057static const struct tpacpi_quirk led_useful_qtable[] __initconst = {
5058 TPACPI_Q_IBM('1', 'E', 0x009f), /* A30 */
5059 TPACPI_Q_IBM('1', 'N', 0x009f), /* A31 */
5060 TPACPI_Q_IBM('1', 'G', 0x009f), /* A31 */
5061
5062 TPACPI_Q_IBM('1', 'I', 0x0097), /* T30 */
5063 TPACPI_Q_IBM('1', 'R', 0x0097), /* T40, T41, T42, R50, R51 */
5064 TPACPI_Q_IBM('7', '0', 0x0097), /* T43, R52 */
5065 TPACPI_Q_IBM('1', 'Y', 0x0097), /* T43 */
5066 TPACPI_Q_IBM('1', 'W', 0x0097), /* R50e */
5067 TPACPI_Q_IBM('1', 'V', 0x0097), /* R51 */
5068 TPACPI_Q_IBM('7', '8', 0x0097), /* R51e */
5069 TPACPI_Q_IBM('7', '6', 0x0097), /* R52 */
5070
5071 TPACPI_Q_IBM('1', 'K', 0x00bf), /* X30 */
5072 TPACPI_Q_IBM('1', 'Q', 0x00bf), /* X31, X32 */
5073 TPACPI_Q_IBM('1', 'U', 0x00bf), /* X40 */
5074 TPACPI_Q_IBM('7', '4', 0x00bf), /* X41 */
5075 TPACPI_Q_IBM('7', '5', 0x00bf), /* X41t */
5076
5077 TPACPI_Q_IBM('7', '9', 0x1f97), /* T60 (1) */
5078 TPACPI_Q_IBM('7', '7', 0x1f97), /* Z60* (1) */
5079 TPACPI_Q_IBM('7', 'F', 0x1f97), /* Z61* (1) */
5080 TPACPI_Q_IBM('7', 'B', 0x1fb7), /* X60 (1) */
5081
5082 /* (1) - may have excess leds enabled on MSB */
5083
5084 /* Defaults (order matters, keep last, don't reorder!) */
5085 { /* Lenovo */
5086 .vendor = PCI_VENDOR_ID_LENOVO,
5087 .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
5088 .quirks = 0x1fffU,
5089 },
5090 { /* IBM ThinkPads with no EC version string */
5091 .vendor = PCI_VENDOR_ID_IBM,
5092 .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_UNKNOWN,
5093 .quirks = 0x00ffU,
5094 },
5095 { /* IBM ThinkPads with EC version string */
5096 .vendor = PCI_VENDOR_ID_IBM,
5097 .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
5098 .quirks = 0x00bfU,
5099 },
5100};
5101
5102#undef TPACPI_LEDQ_IBM
5103#undef TPACPI_LEDQ_LNV
5104
4977static int __init led_init(struct ibm_init_struct *iibm) 5105static int __init led_init(struct ibm_init_struct *iibm)
4978{ 5106{
4979 unsigned int i; 5107 unsigned int i;
4980 int rc; 5108 int rc;
5109 unsigned long useful_leds;
4981 5110
4982 vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n"); 5111 vdbg_printk(TPACPI_DBG_INIT, "initializing LED subdriver\n");
4983 5112
@@ -4999,6 +5128,9 @@ static int __init led_init(struct ibm_init_struct *iibm)
4999 vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n", 5128 vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
5000 str_supported(led_supported), led_supported); 5129 str_supported(led_supported), led_supported);
5001 5130
5131 if (led_supported == TPACPI_LED_NONE)
5132 return 1;
5133
5002 tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS, 5134 tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS,
5003 GFP_KERNEL); 5135 GFP_KERNEL);
5004 if (!tpacpi_leds) { 5136 if (!tpacpi_leds) {
@@ -5006,8 +5138,12 @@ static int __init led_init(struct ibm_init_struct *iibm)
5006 return -ENOMEM; 5138 return -ENOMEM;
5007 } 5139 }
5008 5140
5141 useful_leds = tpacpi_check_quirks(led_useful_qtable,
5142 ARRAY_SIZE(led_useful_qtable));
5143
5009 for (i = 0; i < TPACPI_LED_NUMLEDS; i++) { 5144 for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
5010 if (!tpacpi_is_led_restricted(i)) { 5145 if (!tpacpi_is_led_restricted(i) &&
5146 test_bit(i, &useful_leds)) {
5011 rc = tpacpi_init_led(i); 5147 rc = tpacpi_init_led(i);
5012 if (rc < 0) { 5148 if (rc < 0) {
5013 led_exit(); 5149 led_exit();
@@ -5017,12 +5153,11 @@ static int __init led_init(struct ibm_init_struct *iibm)
5017 } 5153 }
5018 5154
5019#ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS 5155#ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS
5020 if (led_supported != TPACPI_LED_NONE) 5156 printk(TPACPI_NOTICE
5021 printk(TPACPI_NOTICE 5157 "warning: userspace override of important "
5022 "warning: userspace override of important " 5158 "firmware LEDs is enabled\n");
5023 "firmware LEDs is enabled\n");
5024#endif 5159#endif
5025 return (led_supported != TPACPI_LED_NONE)? 0 : 1; 5160 return 0;
5026} 5161}
5027 5162
5028#define str_led_status(s) \ 5163#define str_led_status(s) \
@@ -5052,7 +5187,7 @@ static int led_read(char *p)
5052 } 5187 }
5053 5188
5054 len += sprintf(p + len, "commands:\t" 5189 len += sprintf(p + len, "commands:\t"
5055 "<led> on, <led> off, <led> blink (<led> is 0-7)\n"); 5190 "<led> on, <led> off, <led> blink (<led> is 0-15)\n");
5056 5191
5057 return len; 5192 return len;
5058} 5193}
@@ -5067,7 +5202,7 @@ static int led_write(char *buf)
5067 return -ENODEV; 5202 return -ENODEV;
5068 5203
5069 while ((cmd = next_cmd(&buf))) { 5204 while ((cmd = next_cmd(&buf))) {
5070 if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 7) 5205 if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 15)
5071 return -EINVAL; 5206 return -EINVAL;
5072 5207
5073 if (strstr(cmd, "off")) { 5208 if (strstr(cmd, "off")) {
@@ -5101,8 +5236,17 @@ static struct ibm_struct led_driver_data = {
5101 5236
5102TPACPI_HANDLE(beep, ec, "BEEP"); /* all except R30, R31 */ 5237TPACPI_HANDLE(beep, ec, "BEEP"); /* all except R30, R31 */
5103 5238
5239#define TPACPI_BEEP_Q1 0x0001
5240
5241static const struct tpacpi_quirk beep_quirk_table[] __initconst = {
5242 TPACPI_Q_IBM('I', 'M', TPACPI_BEEP_Q1), /* 570 */
5243 TPACPI_Q_IBM('I', 'U', TPACPI_BEEP_Q1), /* 570E - unverified */
5244};
5245
5104static int __init beep_init(struct ibm_init_struct *iibm) 5246static int __init beep_init(struct ibm_init_struct *iibm)
5105{ 5247{
5248 unsigned long quirks;
5249
5106 vdbg_printk(TPACPI_DBG_INIT, "initializing beep subdriver\n"); 5250 vdbg_printk(TPACPI_DBG_INIT, "initializing beep subdriver\n");
5107 5251
5108 TPACPI_ACPIHANDLE_INIT(beep); 5252 TPACPI_ACPIHANDLE_INIT(beep);
@@ -5110,6 +5254,11 @@ static int __init beep_init(struct ibm_init_struct *iibm)
5110 vdbg_printk(TPACPI_DBG_INIT, "beep is %s\n", 5254 vdbg_printk(TPACPI_DBG_INIT, "beep is %s\n",
5111 str_supported(beep_handle != NULL)); 5255 str_supported(beep_handle != NULL));
5112 5256
5257 quirks = tpacpi_check_quirks(beep_quirk_table,
5258 ARRAY_SIZE(beep_quirk_table));
5259
5260 tp_features.beep_needs_two_args = !!(quirks & TPACPI_BEEP_Q1);
5261
5113 return (beep_handle)? 0 : 1; 5262 return (beep_handle)? 0 : 1;
5114} 5263}
5115 5264
@@ -5141,8 +5290,15 @@ static int beep_write(char *buf)
5141 /* beep_cmd set */ 5290 /* beep_cmd set */
5142 } else 5291 } else
5143 return -EINVAL; 5292 return -EINVAL;
5144 if (!acpi_evalf(beep_handle, NULL, NULL, "vdd", beep_cmd, 0)) 5293 if (tp_features.beep_needs_two_args) {
5145 return -EIO; 5294 if (!acpi_evalf(beep_handle, NULL, NULL, "vdd",
5295 beep_cmd, 0))
5296 return -EIO;
5297 } else {
5298 if (!acpi_evalf(beep_handle, NULL, NULL, "vd",
5299 beep_cmd))
5300 return -EIO;
5301 }
5146 } 5302 }
5147 5303
5148 return 0; 5304 return 0;
@@ -5569,6 +5725,10 @@ static struct ibm_struct ecdump_driver_data = {
5569 * Bit 3-0: backlight brightness level 5725 * Bit 3-0: backlight brightness level
5570 * 5726 *
5571 * brightness_get_raw returns status data in the HBRV layout 5727 * brightness_get_raw returns status data in the HBRV layout
5728 *
5729 * WARNING: The X61 has been verified to use HBRV for something else, so
5730 * this should be used _only_ on IBM ThinkPads, and maybe with some careful
5731 * testing on the very early *60 Lenovo models...
5572 */ 5732 */
5573 5733
5574enum { 5734enum {
@@ -5869,6 +6029,12 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
5869 brightness_mode); 6029 brightness_mode);
5870 } 6030 }
5871 6031
6032 /* Safety */
6033 if (thinkpad_id.vendor != PCI_VENDOR_ID_IBM &&
6034 (brightness_mode == TPACPI_BRGHT_MODE_ECNVRAM ||
6035 brightness_mode == TPACPI_BRGHT_MODE_EC))
6036 return -EINVAL;
6037
5872 if (tpacpi_brightness_get_raw(&b) < 0) 6038 if (tpacpi_brightness_get_raw(&b) < 0)
5873 return 1; 6039 return 1;
5874 6040
@@ -6161,6 +6327,21 @@ static struct ibm_struct volume_driver_data = {
6161 * For firmware bugs, refer to: 6327 * For firmware bugs, refer to:
6162 * http://thinkwiki.org/wiki/Embedded_Controller_Firmware#Firmware_Issues 6328 * http://thinkwiki.org/wiki/Embedded_Controller_Firmware#Firmware_Issues
6163 * 6329 *
6330 * ----
6331 *
6332 * ThinkPad EC register 0x31 bit 0 (only on select models)
6333 *
6334 * When bit 0 of EC register 0x31 is zero, the tachometer registers
6335 * show the speed of the main fan. When bit 0 of EC register 0x31
6336 * is one, the tachometer registers show the speed of the auxiliary
6337 * fan.
6338 *
6339 * Fan control seems to affect both fans, regardless of the state
6340 * of this bit.
6341 *
6342 * So far, only the firmware for the X60/X61 non-tablet versions
6343 * seem to support this (firmware TP-7M).
6344 *
6164 * TPACPI_FAN_WR_ACPI_FANS: 6345 * TPACPI_FAN_WR_ACPI_FANS:
6165 * ThinkPad X31, X40, X41. Not available in the X60. 6346 * ThinkPad X31, X40, X41. Not available in the X60.
6166 * 6347 *
@@ -6187,6 +6368,8 @@ enum { /* Fan control constants */
6187 fan_status_offset = 0x2f, /* EC register 0x2f */ 6368 fan_status_offset = 0x2f, /* EC register 0x2f */
6188 fan_rpm_offset = 0x84, /* EC register 0x84: LSB, 0x85 MSB (RPM) 6369 fan_rpm_offset = 0x84, /* EC register 0x84: LSB, 0x85 MSB (RPM)
6189 * 0x84 must be read before 0x85 */ 6370 * 0x84 must be read before 0x85 */
6371 fan_select_offset = 0x31, /* EC register 0x31 (Firmware 7M)
6372 bit 0 selects which fan is active */
6190 6373
6191 TP_EC_FAN_FULLSPEED = 0x40, /* EC fan mode: full speed */ 6374 TP_EC_FAN_FULLSPEED = 0x40, /* EC fan mode: full speed */
6192 TP_EC_FAN_AUTO = 0x80, /* EC fan mode: auto fan control */ 6375 TP_EC_FAN_AUTO = 0x80, /* EC fan mode: auto fan control */
@@ -6249,30 +6432,18 @@ TPACPI_HANDLE(sfan, ec, "SFAN", /* 570 */
6249 * We assume 0x07 really means auto mode while this quirk is active, 6432 * We assume 0x07 really means auto mode while this quirk is active,
6250 * as this is far more likely than the ThinkPad being in level 7, 6433 * as this is far more likely than the ThinkPad being in level 7,
6251 * which is only used by the firmware during thermal emergencies. 6434 * which is only used by the firmware during thermal emergencies.
6435 *
6436 * Enable for TP-1Y (T43), TP-78 (R51e), TP-76 (R52),
6437 * TP-70 (T43, R52), which are known to be buggy.
6252 */ 6438 */
6253 6439
6254static void fan_quirk1_detect(void) 6440static void fan_quirk1_setup(void)
6255{ 6441{
6256 /* In some ThinkPads, neither the EC nor the ACPI
6257 * DSDT initialize the HFSP register, and it ends up
6258 * being initially set to 0x07 when it *could* be
6259 * either 0x07 or 0x80.
6260 *
6261 * Enable for TP-1Y (T43), TP-78 (R51e),
6262 * TP-76 (R52), TP-70 (T43, R52), which are known
6263 * to be buggy. */
6264 if (fan_control_initial_status == 0x07) { 6442 if (fan_control_initial_status == 0x07) {
6265 switch (thinkpad_id.ec_model) { 6443 printk(TPACPI_NOTICE
6266 case 0x5931: /* TP-1Y */ 6444 "fan_init: initial fan status is unknown, "
6267 case 0x3837: /* TP-78 */ 6445 "assuming it is in auto mode\n");
6268 case 0x3637: /* TP-76 */ 6446 tp_features.fan_ctrl_status_undef = 1;
6269 case 0x3037: /* TP-70 */
6270 printk(TPACPI_NOTICE
6271 "fan_init: initial fan status is unknown, "
6272 "assuming it is in auto mode\n");
6273 tp_features.fan_ctrl_status_undef = 1;
6274 ;;
6275 }
6276 } 6447 }
6277} 6448}
6278 6449
@@ -6292,6 +6463,38 @@ static void fan_quirk1_handle(u8 *fan_status)
6292 } 6463 }
6293} 6464}
6294 6465
6466/* Select main fan on X60/X61, NOOP on others */
6467static bool fan_select_fan1(void)
6468{
6469 if (tp_features.second_fan) {
6470 u8 val;
6471
6472 if (ec_read(fan_select_offset, &val) < 0)
6473 return false;
6474 val &= 0xFEU;
6475 if (ec_write(fan_select_offset, val) < 0)
6476 return false;
6477 }
6478 return true;
6479}
6480
6481/* Select secondary fan on X60/X61 */
6482static bool fan_select_fan2(void)
6483{
6484 u8 val;
6485
6486 if (!tp_features.second_fan)
6487 return false;
6488
6489 if (ec_read(fan_select_offset, &val) < 0)
6490 return false;
6491 val |= 0x01U;
6492 if (ec_write(fan_select_offset, val) < 0)
6493 return false;
6494
6495 return true;
6496}
6497
6295/* 6498/*
6296 * Call with fan_mutex held 6499 * Call with fan_mutex held
6297 */ 6500 */
@@ -6369,6 +6572,8 @@ static int fan_get_speed(unsigned int *speed)
6369 switch (fan_status_access_mode) { 6572 switch (fan_status_access_mode) {
6370 case TPACPI_FAN_RD_TPEC: 6573 case TPACPI_FAN_RD_TPEC:
6371 /* all except 570, 600e/x, 770e, 770x */ 6574 /* all except 570, 600e/x, 770e, 770x */
6575 if (unlikely(!fan_select_fan1()))
6576 return -EIO;
6372 if (unlikely(!acpi_ec_read(fan_rpm_offset, &lo) || 6577 if (unlikely(!acpi_ec_read(fan_rpm_offset, &lo) ||
6373 !acpi_ec_read(fan_rpm_offset + 1, &hi))) 6578 !acpi_ec_read(fan_rpm_offset + 1, &hi)))
6374 return -EIO; 6579 return -EIO;
@@ -6385,6 +6590,34 @@ static int fan_get_speed(unsigned int *speed)
6385 return 0; 6590 return 0;
6386} 6591}
6387 6592
6593static int fan2_get_speed(unsigned int *speed)
6594{
6595 u8 hi, lo;
6596 bool rc;
6597
6598 switch (fan_status_access_mode) {
6599 case TPACPI_FAN_RD_TPEC:
6600 /* all except 570, 600e/x, 770e, 770x */
6601 if (unlikely(!fan_select_fan2()))
6602 return -EIO;
6603 rc = !acpi_ec_read(fan_rpm_offset, &lo) ||
6604 !acpi_ec_read(fan_rpm_offset + 1, &hi);
6605 fan_select_fan1(); /* play it safe */
6606 if (rc)
6607 return -EIO;
6608
6609 if (likely(speed))
6610 *speed = (hi << 8) | lo;
6611
6612 break;
6613
6614 default:
6615 return -ENXIO;
6616 }
6617
6618 return 0;
6619}
6620
6388static int fan_set_level(int level) 6621static int fan_set_level(int level)
6389{ 6622{
6390 if (!fan_control_allowed) 6623 if (!fan_control_allowed)
@@ -6790,6 +7023,25 @@ static struct device_attribute dev_attr_fan_fan1_input =
6790 __ATTR(fan1_input, S_IRUGO, 7023 __ATTR(fan1_input, S_IRUGO,
6791 fan_fan1_input_show, NULL); 7024 fan_fan1_input_show, NULL);
6792 7025
7026/* sysfs fan fan2_input ------------------------------------------------ */
7027static ssize_t fan_fan2_input_show(struct device *dev,
7028 struct device_attribute *attr,
7029 char *buf)
7030{
7031 int res;
7032 unsigned int speed;
7033
7034 res = fan2_get_speed(&speed);
7035 if (res < 0)
7036 return res;
7037
7038 return snprintf(buf, PAGE_SIZE, "%u\n", speed);
7039}
7040
7041static struct device_attribute dev_attr_fan_fan2_input =
7042 __ATTR(fan2_input, S_IRUGO,
7043 fan_fan2_input_show, NULL);
7044
6793/* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */ 7045/* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
6794static ssize_t fan_fan_watchdog_show(struct device_driver *drv, 7046static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
6795 char *buf) 7047 char *buf)
@@ -6823,6 +7075,7 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO,
6823static struct attribute *fan_attributes[] = { 7075static struct attribute *fan_attributes[] = {
6824 &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr, 7076 &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr,
6825 &dev_attr_fan_fan1_input.attr, 7077 &dev_attr_fan_fan1_input.attr,
7078 NULL, /* for fan2_input */
6826 NULL 7079 NULL
6827}; 7080};
6828 7081
@@ -6830,9 +7083,36 @@ static const struct attribute_group fan_attr_group = {
6830 .attrs = fan_attributes, 7083 .attrs = fan_attributes,
6831}; 7084};
6832 7085
7086#define TPACPI_FAN_Q1 0x0001 /* Unitialized HFSP */
7087#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */
7088
7089#define TPACPI_FAN_QI(__id1, __id2, __quirks) \
7090 { .vendor = PCI_VENDOR_ID_IBM, \
7091 .bios = TPACPI_MATCH_ANY, \
7092 .ec = TPID(__id1, __id2), \
7093 .quirks = __quirks }
7094
7095#define TPACPI_FAN_QL(__id1, __id2, __quirks) \
7096 { .vendor = PCI_VENDOR_ID_LENOVO, \
7097 .bios = TPACPI_MATCH_ANY, \
7098 .ec = TPID(__id1, __id2), \
7099 .quirks = __quirks }
7100
7101static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
7102 TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1),
7103 TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1),
7104 TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1),
7105 TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1),
7106 TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN),
7107};
7108
7109#undef TPACPI_FAN_QL
7110#undef TPACPI_FAN_QI
7111
6833static int __init fan_init(struct ibm_init_struct *iibm) 7112static int __init fan_init(struct ibm_init_struct *iibm)
6834{ 7113{
6835 int rc; 7114 int rc;
7115 unsigned long quirks;
6836 7116
6837 vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN, 7117 vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN,
6838 "initializing fan subdriver\n"); 7118 "initializing fan subdriver\n");
@@ -6843,12 +7123,16 @@ static int __init fan_init(struct ibm_init_struct *iibm)
6843 fan_control_commands = 0; 7123 fan_control_commands = 0;
6844 fan_watchdog_maxinterval = 0; 7124 fan_watchdog_maxinterval = 0;
6845 tp_features.fan_ctrl_status_undef = 0; 7125 tp_features.fan_ctrl_status_undef = 0;
7126 tp_features.second_fan = 0;
6846 fan_control_desired_level = 7; 7127 fan_control_desired_level = 7;
6847 7128
6848 TPACPI_ACPIHANDLE_INIT(fans); 7129 TPACPI_ACPIHANDLE_INIT(fans);
6849 TPACPI_ACPIHANDLE_INIT(gfan); 7130 TPACPI_ACPIHANDLE_INIT(gfan);
6850 TPACPI_ACPIHANDLE_INIT(sfan); 7131 TPACPI_ACPIHANDLE_INIT(sfan);
6851 7132
7133 quirks = tpacpi_check_quirks(fan_quirk_table,
7134 ARRAY_SIZE(fan_quirk_table));
7135
6852 if (gfan_handle) { 7136 if (gfan_handle) {
6853 /* 570, 600e/x, 770e, 770x */ 7137 /* 570, 600e/x, 770e, 770x */
6854 fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; 7138 fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN;
@@ -6858,7 +7142,13 @@ static int __init fan_init(struct ibm_init_struct *iibm)
6858 if (likely(acpi_ec_read(fan_status_offset, 7142 if (likely(acpi_ec_read(fan_status_offset,
6859 &fan_control_initial_status))) { 7143 &fan_control_initial_status))) {
6860 fan_status_access_mode = TPACPI_FAN_RD_TPEC; 7144 fan_status_access_mode = TPACPI_FAN_RD_TPEC;
6861 fan_quirk1_detect(); 7145 if (quirks & TPACPI_FAN_Q1)
7146 fan_quirk1_setup();
7147 if (quirks & TPACPI_FAN_2FAN) {
7148 tp_features.second_fan = 1;
7149 dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_FAN,
7150 "secondary fan support enabled\n");
7151 }
6862 } else { 7152 } else {
6863 printk(TPACPI_ERR 7153 printk(TPACPI_ERR
6864 "ThinkPad ACPI EC access misbehaving, " 7154 "ThinkPad ACPI EC access misbehaving, "
@@ -6914,6 +7204,11 @@ static int __init fan_init(struct ibm_init_struct *iibm)
6914 7204
6915 if (fan_status_access_mode != TPACPI_FAN_NONE || 7205 if (fan_status_access_mode != TPACPI_FAN_NONE ||
6916 fan_control_access_mode != TPACPI_FAN_WR_NONE) { 7206 fan_control_access_mode != TPACPI_FAN_WR_NONE) {
7207 if (tp_features.second_fan) {
7208 /* attach second fan tachometer */
7209 fan_attributes[ARRAY_SIZE(fan_attributes)-2] =
7210 &dev_attr_fan_fan2_input.attr;
7211 }
6917 rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj, 7212 rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
6918 &fan_attr_group); 7213 &fan_attr_group);
6919 if (rc < 0) 7214 if (rc < 0)
@@ -7385,6 +7680,24 @@ err_out:
7385 7680
7386/* Probing */ 7681/* Probing */
7387 7682
7683static bool __pure __init tpacpi_is_fw_digit(const char c)
7684{
7685 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
7686}
7687
7688/* Most models: xxyTkkWW (#.##c); Ancient 570/600 and -SL lacks (#.##c) */
7689static bool __pure __init tpacpi_is_valid_fw_id(const char* const s,
7690 const char t)
7691{
7692 return s && strlen(s) >= 8 &&
7693 tpacpi_is_fw_digit(s[0]) &&
7694 tpacpi_is_fw_digit(s[1]) &&
7695 s[2] == t && s[3] == 'T' &&
7696 tpacpi_is_fw_digit(s[4]) &&
7697 tpacpi_is_fw_digit(s[5]) &&
7698 s[6] == 'W' && s[7] == 'W';
7699}
7700
7388/* returns 0 - probe ok, or < 0 - probe error. 7701/* returns 0 - probe ok, or < 0 - probe error.
7389 * Probe ok doesn't mean thinkpad found. 7702 * Probe ok doesn't mean thinkpad found.
7390 * On error, kfree() cleanup on tp->* is not performed, caller must do it */ 7703 * On error, kfree() cleanup on tp->* is not performed, caller must do it */
@@ -7411,10 +7724,15 @@ static int __must_check __init get_thinkpad_model_data(
7411 tp->bios_version_str = kstrdup(s, GFP_KERNEL); 7724 tp->bios_version_str = kstrdup(s, GFP_KERNEL);
7412 if (s && !tp->bios_version_str) 7725 if (s && !tp->bios_version_str)
7413 return -ENOMEM; 7726 return -ENOMEM;
7414 if (!tp->bios_version_str) 7727
7728 /* Really ancient ThinkPad 240X will fail this, which is fine */
7729 if (!tpacpi_is_valid_fw_id(tp->bios_version_str, 'E'))
7415 return 0; 7730 return 0;
7731
7416 tp->bios_model = tp->bios_version_str[0] 7732 tp->bios_model = tp->bios_version_str[0]
7417 | (tp->bios_version_str[1] << 8); 7733 | (tp->bios_version_str[1] << 8);
7734 tp->bios_release = (tp->bios_version_str[4] << 8)
7735 | tp->bios_version_str[5];
7418 7736
7419 /* 7737 /*
7420 * ThinkPad T23 or newer, A31 or newer, R50e or newer, 7738 * ThinkPad T23 or newer, A31 or newer, R50e or newer,
@@ -7433,8 +7751,21 @@ static int __must_check __init get_thinkpad_model_data(
7433 tp->ec_version_str = kstrdup(ec_fw_string, GFP_KERNEL); 7751 tp->ec_version_str = kstrdup(ec_fw_string, GFP_KERNEL);
7434 if (!tp->ec_version_str) 7752 if (!tp->ec_version_str)
7435 return -ENOMEM; 7753 return -ENOMEM;
7436 tp->ec_model = ec_fw_string[0] 7754
7437 | (ec_fw_string[1] << 8); 7755 if (tpacpi_is_valid_fw_id(ec_fw_string, 'H')) {
7756 tp->ec_model = ec_fw_string[0]
7757 | (ec_fw_string[1] << 8);
7758 tp->ec_release = (ec_fw_string[4] << 8)
7759 | ec_fw_string[5];
7760 } else {
7761 printk(TPACPI_NOTICE
7762 "ThinkPad firmware release %s "
7763 "doesn't match the known patterns\n",
7764 ec_fw_string);
7765 printk(TPACPI_NOTICE
7766 "please report this to %s\n",
7767 TPACPI_MAIL);
7768 }
7438 break; 7769 break;
7439 } 7770 }
7440 } 7771 }
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 7f207f335bec..ef3a2cd3a7a0 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -287,6 +287,25 @@ static void pnpacpi_parse_allocated_address_space(struct pnp_dev *dev,
287 ACPI_DECODE_16); 287 ACPI_DECODE_16);
288} 288}
289 289
290static void pnpacpi_parse_allocated_ext_address_space(struct pnp_dev *dev,
291 struct acpi_resource *res)
292{
293 struct acpi_resource_extended_address64 *p = &res->data.ext_address64;
294
295 if (p->producer_consumer == ACPI_PRODUCER)
296 return;
297
298 if (p->resource_type == ACPI_MEMORY_RANGE)
299 pnpacpi_parse_allocated_memresource(dev,
300 p->minimum, p->address_length,
301 p->info.mem.write_protect);
302 else if (p->resource_type == ACPI_IO_RANGE)
303 pnpacpi_parse_allocated_ioresource(dev,
304 p->minimum, p->address_length,
305 p->granularity == 0xfff ? ACPI_DECODE_10 :
306 ACPI_DECODE_16);
307}
308
290static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, 309static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
291 void *data) 310 void *data)
292{ 311{
@@ -400,8 +419,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
400 break; 419 break;
401 420
402 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 421 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
403 if (res->data.ext_address64.producer_consumer == ACPI_PRODUCER) 422 pnpacpi_parse_allocated_ext_address_space(dev, res);
404 return AE_OK;
405 break; 423 break;
406 424
407 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 425 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -630,6 +648,28 @@ static __init void pnpacpi_parse_address_option(struct pnp_dev *dev,
630 IORESOURCE_IO_FIXED); 648 IORESOURCE_IO_FIXED);
631} 649}
632 650
651static __init void pnpacpi_parse_ext_address_option(struct pnp_dev *dev,
652 unsigned int option_flags,
653 struct acpi_resource *r)
654{
655 struct acpi_resource_extended_address64 *p = &r->data.ext_address64;
656 unsigned char flags = 0;
657
658 if (p->address_length == 0)
659 return;
660
661 if (p->resource_type == ACPI_MEMORY_RANGE) {
662 if (p->info.mem.write_protect == ACPI_READ_WRITE_MEMORY)
663 flags = IORESOURCE_MEM_WRITEABLE;
664 pnp_register_mem_resource(dev, option_flags, p->minimum,
665 p->minimum, 0, p->address_length,
666 flags);
667 } else if (p->resource_type == ACPI_IO_RANGE)
668 pnp_register_port_resource(dev, option_flags, p->minimum,
669 p->minimum, 0, p->address_length,
670 IORESOURCE_IO_FIXED);
671}
672
633struct acpipnp_parse_option_s { 673struct acpipnp_parse_option_s {
634 struct pnp_dev *dev; 674 struct pnp_dev *dev;
635 unsigned int option_flags; 675 unsigned int option_flags;
@@ -711,6 +751,7 @@ static __init acpi_status pnpacpi_option_resource(struct acpi_resource *res,
711 break; 751 break;
712 752
713 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 753 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
754 pnpacpi_parse_ext_address_option(dev, option_flags, res);
714 break; 755 break;
715 756
716 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 757 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -765,6 +806,7 @@ static int pnpacpi_supported_resource(struct acpi_resource *res)
765 case ACPI_RESOURCE_TYPE_ADDRESS16: 806 case ACPI_RESOURCE_TYPE_ADDRESS16:
766 case ACPI_RESOURCE_TYPE_ADDRESS32: 807 case ACPI_RESOURCE_TYPE_ADDRESS32:
767 case ACPI_RESOURCE_TYPE_ADDRESS64: 808 case ACPI_RESOURCE_TYPE_ADDRESS64:
809 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
768 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 810 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
769 return 1; 811 return 1;
770 } 812 }
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 33da1127992a..7eda34838bfe 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -82,6 +82,14 @@ config BATTERY_DA9030
82 Say Y here to enable support for batteries charger integrated into 82 Say Y here to enable support for batteries charger integrated into
83 DA9030 PMIC. 83 DA9030 PMIC.
84 84
85config BATTERY_MAX17040
86 tristate "Maxim MAX17040 Fuel Gauge"
87 depends on I2C
88 help
89 MAX17040 is fuel-gauge systems for lithium-ion (Li+) batteries
90 in handheld and portable equipment. The MAX17040 is configured
91 to operate with a single lithium cell
92
85config CHARGER_PCF50633 93config CHARGER_PCF50633
86 tristate "NXP PCF50633 MBC" 94 tristate "NXP PCF50633 MBC"
87 depends on MFD_PCF50633 95 depends on MFD_PCF50633
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 2fcf41d13e5c..daf3179689aa 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -25,4 +25,5 @@ obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o
25obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o 25obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o
26obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o 26obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o
27obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o 27obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o
28obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o \ No newline at end of file 28obj-$(CONFIG_BATTERY_MAX17040) += max17040_battery.o
29obj-$(CONFIG_CHARGER_PCF50633) += pcf50633-charger.o
diff --git a/drivers/power/da9030_battery.c b/drivers/power/da9030_battery.c
index 1662bb0f23a5..3364198134a1 100644
--- a/drivers/power/da9030_battery.c
+++ b/drivers/power/da9030_battery.c
@@ -22,8 +22,6 @@
22#include <linux/debugfs.h> 22#include <linux/debugfs.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24 24
25#define DA9030_STATUS_CHDET (1 << 3)
26
27#define DA9030_FAULT_LOG 0x0a 25#define DA9030_FAULT_LOG 0x0a
28#define DA9030_FAULT_LOG_OVER_TEMP (1 << 7) 26#define DA9030_FAULT_LOG_OVER_TEMP (1 << 7)
29#define DA9030_FAULT_LOG_VBAT_OVER (1 << 4) 27#define DA9030_FAULT_LOG_VBAT_OVER (1 << 4)
@@ -244,6 +242,8 @@ static void da9030_set_charge(struct da9030_charger *charger, int on)
244 } 242 }
245 243
246 da903x_write(charger->master, DA9030_CHARGE_CONTROL, val); 244 da903x_write(charger->master, DA9030_CHARGE_CONTROL, val);
245
246 power_supply_changed(&charger->psy);
247} 247}
248 248
249static void da9030_charger_check_state(struct da9030_charger *charger) 249static void da9030_charger_check_state(struct da9030_charger *charger)
@@ -258,6 +258,12 @@ static void da9030_charger_check_state(struct da9030_charger *charger)
258 da9030_set_charge(charger, 1); 258 da9030_set_charge(charger, 1);
259 } 259 }
260 } else { 260 } else {
261 /* Charger has been pulled out */
262 if (!charger->chdet) {
263 da9030_set_charge(charger, 0);
264 return;
265 }
266
261 if (charger->adc.vbat_res >= 267 if (charger->adc.vbat_res >=
262 charger->thresholds.vbat_charge_stop) { 268 charger->thresholds.vbat_charge_stop) {
263 da9030_set_charge(charger, 0); 269 da9030_set_charge(charger, 0);
@@ -395,13 +401,11 @@ static int da9030_battery_event(struct notifier_block *nb, unsigned long event,
395{ 401{
396 struct da9030_charger *charger = 402 struct da9030_charger *charger =
397 container_of(nb, struct da9030_charger, nb); 403 container_of(nb, struct da9030_charger, nb);
398 int status;
399 404
400 switch (event) { 405 switch (event) {
401 case DA9030_EVENT_CHDET: 406 case DA9030_EVENT_CHDET:
402 status = da903x_query_status(charger->master, 407 cancel_delayed_work_sync(&charger->work);
403 DA9030_STATUS_CHDET); 408 schedule_work(&charger->work.work);
404 da9030_set_charge(charger, status);
405 break; 409 break;
406 case DA9030_EVENT_VBATMON: 410 case DA9030_EVENT_VBATMON:
407 da9030_battery_vbat_event(charger); 411 da9030_battery_vbat_event(charger);
@@ -565,7 +569,8 @@ static int da9030_battery_remove(struct platform_device *dev)
565 da903x_unregister_notifier(charger->master, &charger->nb, 569 da903x_unregister_notifier(charger->master, &charger->nb,
566 DA9030_EVENT_CHDET | DA9030_EVENT_VBATMON | 570 DA9030_EVENT_CHDET | DA9030_EVENT_VBATMON |
567 DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT); 571 DA9030_EVENT_CHIOVER | DA9030_EVENT_TBAT);
568 cancel_delayed_work(&charger->work); 572 cancel_delayed_work_sync(&charger->work);
573 da9030_set_charge(charger, 0);
569 power_supply_unregister(&charger->psy); 574 power_supply_unregister(&charger->psy);
570 575
571 kfree(charger); 576 kfree(charger);
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index a52d4a11652d..520b5c49ff30 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -62,6 +62,10 @@ static unsigned int cache_time = 1000;
62module_param(cache_time, uint, 0644); 62module_param(cache_time, uint, 0644);
63MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); 63MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
64 64
65static unsigned int pmod_enabled;
66module_param(pmod_enabled, bool, 0644);
67MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit");
68
65/* Some batteries have their rated capacity stored a N * 10 mAh, while 69/* Some batteries have their rated capacity stored a N * 10 mAh, while
66 * others use an index into this table. */ 70 * others use an index into this table. */
67static int rated_capacities[] = { 71static int rated_capacities[] = {
@@ -259,6 +263,17 @@ static void ds2760_battery_update_status(struct ds2760_device_info *di)
259 power_supply_changed(&di->bat); 263 power_supply_changed(&di->bat);
260} 264}
261 265
266static void ds2760_battery_write_status(struct ds2760_device_info *di,
267 char status)
268{
269 if (status == di->raw[DS2760_STATUS_REG])
270 return;
271
272 w1_ds2760_write(di->w1_dev, &status, DS2760_STATUS_WRITE_REG, 1);
273 w1_ds2760_store_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
274 w1_ds2760_recall_eeprom(di->w1_dev, DS2760_EEPROM_BLOCK1);
275}
276
262static void ds2760_battery_work(struct work_struct *work) 277static void ds2760_battery_work(struct work_struct *work)
263{ 278{
264 struct ds2760_device_info *di = container_of(work, 279 struct ds2760_device_info *di = container_of(work,
@@ -342,9 +357,9 @@ static enum power_supply_property ds2760_battery_props[] = {
342 357
343static int ds2760_battery_probe(struct platform_device *pdev) 358static int ds2760_battery_probe(struct platform_device *pdev)
344{ 359{
360 char status;
345 int retval = 0; 361 int retval = 0;
346 struct ds2760_device_info *di; 362 struct ds2760_device_info *di;
347 struct ds2760_platform_data *pdata;
348 363
349 di = kzalloc(sizeof(*di), GFP_KERNEL); 364 di = kzalloc(sizeof(*di), GFP_KERNEL);
350 if (!di) { 365 if (!di) {
@@ -354,14 +369,13 @@ static int ds2760_battery_probe(struct platform_device *pdev)
354 369
355 platform_set_drvdata(pdev, di); 370 platform_set_drvdata(pdev, di);
356 371
357 pdata = pdev->dev.platform_data; 372 di->dev = &pdev->dev;
358 di->dev = &pdev->dev; 373 di->w1_dev = pdev->dev.parent;
359 di->w1_dev = pdev->dev.parent; 374 di->bat.name = dev_name(&pdev->dev);
360 di->bat.name = dev_name(&pdev->dev); 375 di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
361 di->bat.type = POWER_SUPPLY_TYPE_BATTERY; 376 di->bat.properties = ds2760_battery_props;
362 di->bat.properties = ds2760_battery_props; 377 di->bat.num_properties = ARRAY_SIZE(ds2760_battery_props);
363 di->bat.num_properties = ARRAY_SIZE(ds2760_battery_props); 378 di->bat.get_property = ds2760_battery_get_property;
364 di->bat.get_property = ds2760_battery_get_property;
365 di->bat.external_power_changed = 379 di->bat.external_power_changed =
366 ds2760_battery_external_power_changed; 380 ds2760_battery_external_power_changed;
367 381
@@ -373,6 +387,16 @@ static int ds2760_battery_probe(struct platform_device *pdev)
373 goto batt_failed; 387 goto batt_failed;
374 } 388 }
375 389
390 /* enable sleep mode feature */
391 ds2760_battery_read_status(di);
392 status = di->raw[DS2760_STATUS_REG];
393 if (pmod_enabled)
394 status |= DS2760_STATUS_PMOD;
395 else
396 status &= ~DS2760_STATUS_PMOD;
397
398 ds2760_battery_write_status(di, status);
399
376 INIT_DELAYED_WORK(&di->monitor_work, ds2760_battery_work); 400 INIT_DELAYED_WORK(&di->monitor_work, ds2760_battery_work);
377 di->monitor_wqueue = create_singlethread_workqueue(dev_name(&pdev->dev)); 401 di->monitor_wqueue = create_singlethread_workqueue(dev_name(&pdev->dev));
378 if (!di->monitor_wqueue) { 402 if (!di->monitor_wqueue) {
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
new file mode 100644
index 000000000000..87b98bf27ae1
--- /dev/null
+++ b/drivers/power/max17040_battery.c
@@ -0,0 +1,309 @@
1/*
2 * max17040_battery.c
3 * fuel-gauge systems for lithium-ion (Li+) batteries
4 *
5 * Copyright (C) 2009 Samsung Electronics
6 * Minkyu Kang <mk7.kang@samsung.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/platform_device.h>
16#include <linux/mutex.h>
17#include <linux/err.h>
18#include <linux/i2c.h>
19#include <linux/delay.h>
20#include <linux/power_supply.h>
21#include <linux/max17040_battery.h>
22
23#define MAX17040_VCELL_MSB 0x02
24#define MAX17040_VCELL_LSB 0x03
25#define MAX17040_SOC_MSB 0x04
26#define MAX17040_SOC_LSB 0x05
27#define MAX17040_MODE_MSB 0x06
28#define MAX17040_MODE_LSB 0x07
29#define MAX17040_VER_MSB 0x08
30#define MAX17040_VER_LSB 0x09
31#define MAX17040_RCOMP_MSB 0x0C
32#define MAX17040_RCOMP_LSB 0x0D
33#define MAX17040_CMD_MSB 0xFE
34#define MAX17040_CMD_LSB 0xFF
35
36#define MAX17040_DELAY 1000
37#define MAX17040_BATTERY_FULL 95
38
39struct max17040_chip {
40 struct i2c_client *client;
41 struct delayed_work work;
42 struct power_supply battery;
43 struct max17040_platform_data *pdata;
44
45 /* State Of Connect */
46 int online;
47 /* battery voltage */
48 int vcell;
49 /* battery capacity */
50 int soc;
51 /* State Of Charge */
52 int status;
53};
54
55static int max17040_get_property(struct power_supply *psy,
56 enum power_supply_property psp,
57 union power_supply_propval *val)
58{
59 struct max17040_chip *chip = container_of(psy,
60 struct max17040_chip, battery);
61
62 switch (psp) {
63 case POWER_SUPPLY_PROP_STATUS:
64 val->intval = chip->status;
65 break;
66 case POWER_SUPPLY_PROP_ONLINE:
67 val->intval = chip->online;
68 break;
69 case POWER_SUPPLY_PROP_VOLTAGE_NOW:
70 val->intval = chip->vcell;
71 break;
72 case POWER_SUPPLY_PROP_CAPACITY:
73 val->intval = chip->soc;
74 break;
75 default:
76 return -EINVAL;
77 }
78 return 0;
79}
80
81static int max17040_write_reg(struct i2c_client *client, int reg, u8 value)
82{
83 int ret;
84
85 ret = i2c_smbus_write_byte_data(client, reg, value);
86
87 if (ret < 0)
88 dev_err(&client->dev, "%s: err %d\n", __func__, ret);
89
90 return ret;
91}
92
93static int max17040_read_reg(struct i2c_client *client, int reg)
94{
95 int ret;
96
97 ret = i2c_smbus_read_byte_data(client, reg);
98
99 if (ret < 0)
100 dev_err(&client->dev, "%s: err %d\n", __func__, ret);
101
102 return ret;
103}
104
105static void max17040_reset(struct i2c_client *client)
106{
107 max17040_write_reg(client, MAX17040_CMD_MSB, 0x54);
108 max17040_write_reg(client, MAX17040_CMD_LSB, 0x00);
109}
110
111static void max17040_get_vcell(struct i2c_client *client)
112{
113 struct max17040_chip *chip = i2c_get_clientdata(client);
114 u8 msb;
115 u8 lsb;
116
117 msb = max17040_read_reg(client, MAX17040_VCELL_MSB);
118 lsb = max17040_read_reg(client, MAX17040_VCELL_LSB);
119
120 chip->vcell = (msb << 4) + (lsb >> 4);
121}
122
123static void max17040_get_soc(struct i2c_client *client)
124{
125 struct max17040_chip *chip = i2c_get_clientdata(client);
126 u8 msb;
127 u8 lsb;
128
129 msb = max17040_read_reg(client, MAX17040_SOC_MSB);
130 lsb = max17040_read_reg(client, MAX17040_SOC_LSB);
131
132 chip->soc = msb;
133}
134
135static void max17040_get_version(struct i2c_client *client)
136{
137 u8 msb;
138 u8 lsb;
139
140 msb = max17040_read_reg(client, MAX17040_VER_MSB);
141 lsb = max17040_read_reg(client, MAX17040_VER_LSB);
142
143 dev_info(&client->dev, "MAX17040 Fuel-Gauge Ver %d%d\n", msb, lsb);
144}
145
146static void max17040_get_online(struct i2c_client *client)
147{
148 struct max17040_chip *chip = i2c_get_clientdata(client);
149
150 if (chip->pdata->battery_online)
151 chip->online = chip->pdata->battery_online();
152 else
153 chip->online = 1;
154}
155
156static void max17040_get_status(struct i2c_client *client)
157{
158 struct max17040_chip *chip = i2c_get_clientdata(client);
159
160 if (!chip->pdata->charger_online || !chip->pdata->charger_enable) {
161 chip->status = POWER_SUPPLY_STATUS_UNKNOWN;
162 return;
163 }
164
165 if (chip->pdata->charger_online()) {
166 if (chip->pdata->charger_enable())
167 chip->status = POWER_SUPPLY_STATUS_CHARGING;
168 else
169 chip->status = POWER_SUPPLY_STATUS_NOT_CHARGING;
170 } else {
171 chip->status = POWER_SUPPLY_STATUS_DISCHARGING;
172 }
173
174 if (chip->soc > MAX17040_BATTERY_FULL)
175 chip->status = POWER_SUPPLY_STATUS_FULL;
176}
177
178static void max17040_work(struct work_struct *work)
179{
180 struct max17040_chip *chip;
181
182 chip = container_of(work, struct max17040_chip, work.work);
183
184 max17040_get_vcell(chip->client);
185 max17040_get_soc(chip->client);
186 max17040_get_online(chip->client);
187 max17040_get_status(chip->client);
188
189 schedule_delayed_work(&chip->work, MAX17040_DELAY);
190}
191
192static enum power_supply_property max17040_battery_props[] = {
193 POWER_SUPPLY_PROP_STATUS,
194 POWER_SUPPLY_PROP_ONLINE,
195 POWER_SUPPLY_PROP_VOLTAGE_NOW,
196 POWER_SUPPLY_PROP_CAPACITY,
197};
198
199static int __devinit max17040_probe(struct i2c_client *client,
200 const struct i2c_device_id *id)
201{
202 struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
203 struct max17040_chip *chip;
204 int ret;
205
206 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
207 return -EIO;
208
209 chip = kzalloc(sizeof(*chip), GFP_KERNEL);
210 if (!chip)
211 return -ENOMEM;
212
213 chip->client = client;
214 chip->pdata = client->dev.platform_data;
215
216 i2c_set_clientdata(client, chip);
217
218 chip->battery.name = "battery";
219 chip->battery.type = POWER_SUPPLY_TYPE_BATTERY;
220 chip->battery.get_property = max17040_get_property;
221 chip->battery.properties = max17040_battery_props;
222 chip->battery.num_properties = ARRAY_SIZE(max17040_battery_props);
223
224 ret = power_supply_register(&client->dev, &chip->battery);
225 if (ret) {
226 dev_err(&client->dev, "failed: power supply register\n");
227 i2c_set_clientdata(client, NULL);
228 kfree(chip);
229 return ret;
230 }
231
232 max17040_reset(client);
233 max17040_get_version(client);
234
235 INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work);
236 schedule_delayed_work(&chip->work, MAX17040_DELAY);
237
238 return 0;
239}
240
241static int __devexit max17040_remove(struct i2c_client *client)
242{
243 struct max17040_chip *chip = i2c_get_clientdata(client);
244
245 power_supply_unregister(&chip->battery);
246 cancel_delayed_work(&chip->work);
247 i2c_set_clientdata(client, NULL);
248 kfree(chip);
249 return 0;
250}
251
252#ifdef CONFIG_PM
253
254static int max17040_suspend(struct i2c_client *client,
255 pm_message_t state)
256{
257 struct max17040_chip *chip = i2c_get_clientdata(client);
258
259 cancel_delayed_work(&chip->work);
260 return 0;
261}
262
263static int max17040_resume(struct i2c_client *client)
264{
265 struct max17040_chip *chip = i2c_get_clientdata(client);
266
267 schedule_delayed_work(&chip->work, MAX17040_DELAY);
268 return 0;
269}
270
271#else
272
273#define max17040_suspend NULL
274#define max17040_resume NULL
275
276#endif /* CONFIG_PM */
277
278static const struct i2c_device_id max17040_id[] = {
279 { "max17040", 0 },
280 { }
281};
282MODULE_DEVICE_TABLE(i2c, max17040_id);
283
284static struct i2c_driver max17040_i2c_driver = {
285 .driver = {
286 .name = "max17040",
287 },
288 .probe = max17040_probe,
289 .remove = __devexit_p(max17040_remove),
290 .suspend = max17040_suspend,
291 .resume = max17040_resume,
292 .id_table = max17040_id,
293};
294
295static int __init max17040_init(void)
296{
297 return i2c_add_driver(&max17040_i2c_driver);
298}
299module_init(max17040_init);
300
301static void __exit max17040_exit(void)
302{
303 i2c_del_driver(&max17040_i2c_driver);
304}
305module_exit(max17040_exit);
306
307MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
308MODULE_DESCRIPTION("MAX17040 Fuel Gauge");
309MODULE_LICENSE("GPL");
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index e371a9c15341..a07015d646dd 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -398,8 +398,7 @@ static int sbs_init(struct pci_dev *dev)
398{ 398{
399 u8 __iomem *p; 399 u8 __iomem *p;
400 400
401 p = ioremap_nocache(pci_resource_start(dev, 0), 401 p = pci_ioremap_bar(dev, 0);
402 pci_resource_len(dev, 0));
403 402
404 if (p == NULL) 403 if (p == NULL)
405 return -ENOMEM; 404 return -ENOMEM;
@@ -423,8 +422,7 @@ static void __devexit sbs_exit(struct pci_dev *dev)
423{ 422{
424 u8 __iomem *p; 423 u8 __iomem *p;
425 424
426 p = ioremap_nocache(pci_resource_start(dev, 0), 425 p = pci_ioremap_bar(dev, 0);
427 pci_resource_len(dev, 0));
428 /* FIXME: What if resource_len < OCT_REG_CR_OFF */ 426 /* FIXME: What if resource_len < OCT_REG_CR_OFF */
429 if (p != NULL) 427 if (p != NULL)
430 writeb(0, p + OCT_REG_CR_OFF); 428 writeb(0, p + OCT_REG_CR_OFF);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 9f2891c2c4a2..cd1b6a45bb82 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -1548,8 +1548,7 @@ static int __devinit icom_probe(struct pci_dev *dev,
1548 goto probe_exit1; 1548 goto probe_exit1;
1549 } 1549 }
1550 1550
1551 icom_adapter->base_addr = ioremap(icom_adapter->base_addr_pci, 1551 icom_adapter->base_addr = pci_ioremap_bar(dev, 0);
1552 pci_resource_len(dev, 0));
1553 1552
1554 if (!icom_adapter->base_addr) 1553 if (!icom_adapter->base_addr)
1555 goto probe_exit1; 1554 goto probe_exit1;
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 107ce2e187b8..00f4577d2f7f 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -467,7 +467,7 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd)
467 printk(KERN_INFO "jsm: linemap is full, added device failed\n"); 467 printk(KERN_INFO "jsm: linemap is full, added device failed\n");
468 continue; 468 continue;
469 } else 469 } else
470 set_bit((int)line, linemap); 470 set_bit(line, linemap);
471 brd->channels[i]->uart_port.line = line; 471 brd->channels[i]->uart_port.line = line;
472 if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port)) 472 if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
473 printk(KERN_INFO "jsm: add device failed\n"); 473 printk(KERN_INFO "jsm: add device failed\n");
@@ -503,7 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd)
503 503
504 ch = brd->channels[i]; 504 ch = brd->channels[i];
505 505
506 clear_bit((int)(ch->uart_port.line), linemap); 506 clear_bit(ch->uart_port.line, linemap);
507 uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port); 507 uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
508 } 508 }
509 509
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 7313c2edcb83..54dd16d66a4b 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -461,6 +461,94 @@ static void serial_txx9_break_ctl(struct uart_port *port, int break_state)
461 spin_unlock_irqrestore(&up->port.lock, flags); 461 spin_unlock_irqrestore(&up->port.lock, flags);
462} 462}
463 463
464#if defined(CONFIG_SERIAL_TXX9_CONSOLE) || (CONFIG_CONSOLE_POLL)
465/*
466 * Wait for transmitter & holding register to empty
467 */
468static void wait_for_xmitr(struct uart_txx9_port *up)
469{
470 unsigned int tmout = 10000;
471
472 /* Wait up to 10ms for the character(s) to be sent. */
473 while (--tmout &&
474 !(sio_in(up, TXX9_SICISR) & TXX9_SICISR_TXALS))
475 udelay(1);
476
477 /* Wait up to 1s for flow control if necessary */
478 if (up->port.flags & UPF_CONS_FLOW) {
479 tmout = 1000000;
480 while (--tmout &&
481 (sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS))
482 udelay(1);
483 }
484}
485#endif
486
487#ifdef CONFIG_CONSOLE_POLL
488/*
489 * Console polling routines for writing and reading from the uart while
490 * in an interrupt or debug context.
491 */
492
493static int serial_txx9_get_poll_char(struct uart_port *port)
494{
495 unsigned int ier;
496 unsigned char c;
497 struct uart_txx9_port *up = (struct uart_txx9_port *)port;
498
499 /*
500 * First save the IER then disable the interrupts
501 */
502 ier = sio_in(up, TXX9_SIDICR);
503 sio_out(up, TXX9_SIDICR, 0);
504
505 while (sio_in(up, TXX9_SIDISR) & TXX9_SIDISR_UVALID)
506 ;
507
508 c = sio_in(up, TXX9_SIRFIFO);
509
510 /*
511 * Finally, clear RX interrupt status
512 * and restore the IER
513 */
514 sio_mask(up, TXX9_SIDISR, TXX9_SIDISR_RDIS);
515 sio_out(up, TXX9_SIDICR, ier);
516 return c;
517}
518
519
520static void serial_txx9_put_poll_char(struct uart_port *port, unsigned char c)
521{
522 unsigned int ier;
523 struct uart_txx9_port *up = (struct uart_txx9_port *)port;
524
525 /*
526 * First save the IER then disable the interrupts
527 */
528 ier = sio_in(up, TXX9_SIDICR);
529 sio_out(up, TXX9_SIDICR, 0);
530
531 wait_for_xmitr(up);
532 /*
533 * Send the character out.
534 * If a LF, also do CR...
535 */
536 sio_out(up, TXX9_SITFIFO, c);
537 if (c == 10) {
538 wait_for_xmitr(up);
539 sio_out(up, TXX9_SITFIFO, 13);
540 }
541
542 /*
543 * Finally, wait for transmitter to become empty
544 * and restore the IER
545 */
546 wait_for_xmitr(up);
547 sio_out(up, TXX9_SIDICR, ier);
548}
549
550#endif /* CONFIG_CONSOLE_POLL */
551
464static int serial_txx9_startup(struct uart_port *port) 552static int serial_txx9_startup(struct uart_port *port)
465{ 553{
466 struct uart_txx9_port *up = (struct uart_txx9_port *)port; 554 struct uart_txx9_port *up = (struct uart_txx9_port *)port;
@@ -781,6 +869,10 @@ static struct uart_ops serial_txx9_pops = {
781 .release_port = serial_txx9_release_port, 869 .release_port = serial_txx9_release_port,
782 .request_port = serial_txx9_request_port, 870 .request_port = serial_txx9_request_port,
783 .config_port = serial_txx9_config_port, 871 .config_port = serial_txx9_config_port,
872#ifdef CONFIG_CONSOLE_POLL
873 .poll_get_char = serial_txx9_get_poll_char,
874 .poll_put_char = serial_txx9_put_poll_char,
875#endif
784}; 876};
785 877
786static struct uart_txx9_port serial_txx9_ports[UART_NR]; 878static struct uart_txx9_port serial_txx9_ports[UART_NR];
@@ -803,27 +895,6 @@ static void __init serial_txx9_register_ports(struct uart_driver *drv,
803 895
804#ifdef CONFIG_SERIAL_TXX9_CONSOLE 896#ifdef CONFIG_SERIAL_TXX9_CONSOLE
805 897
806/*
807 * Wait for transmitter & holding register to empty
808 */
809static inline void wait_for_xmitr(struct uart_txx9_port *up)
810{
811 unsigned int tmout = 10000;
812
813 /* Wait up to 10ms for the character(s) to be sent. */
814 while (--tmout &&
815 !(sio_in(up, TXX9_SICISR) & TXX9_SICISR_TXALS))
816 udelay(1);
817
818 /* Wait up to 1s for flow control if necessary */
819 if (up->port.flags & UPF_CONS_FLOW) {
820 tmout = 1000000;
821 while (--tmout &&
822 (sio_in(up, TXX9_SICISR) & TXX9_SICISR_CTSS))
823 udelay(1);
824 }
825}
826
827static void serial_txx9_console_putchar(struct uart_port *port, int ch) 898static void serial_txx9_console_putchar(struct uart_port *port, int ch)
828{ 899{
829 struct uart_txx9_port *up = (struct uart_txx9_port *)port; 900 struct uart_txx9_port *up = (struct uart_txx9_port *)port;
diff --git a/drivers/staging/octeon/Makefile b/drivers/staging/octeon/Makefile
index 3c839e37d37f..c0a583cc2227 100644
--- a/drivers/staging/octeon/Makefile
+++ b/drivers/staging/octeon/Makefile
@@ -12,7 +12,6 @@
12obj-${CONFIG_OCTEON_ETHERNET} := octeon-ethernet.o 12obj-${CONFIG_OCTEON_ETHERNET} := octeon-ethernet.o
13 13
14octeon-ethernet-objs := ethernet.o 14octeon-ethernet-objs := ethernet.o
15octeon-ethernet-objs += ethernet-common.o
16octeon-ethernet-objs += ethernet-mdio.o 15octeon-ethernet-objs += ethernet-mdio.o
17octeon-ethernet-objs += ethernet-mem.o 16octeon-ethernet-objs += ethernet-mem.o
18octeon-ethernet-objs += ethernet-proc.o 17octeon-ethernet-objs += ethernet-proc.o
diff --git a/drivers/staging/octeon/ethernet-common.c b/drivers/staging/octeon/ethernet-common.c
deleted file mode 100644
index 3e6f5b8cc63d..000000000000
--- a/drivers/staging/octeon/ethernet-common.c
+++ /dev/null
@@ -1,328 +0,0 @@
1/**********************************************************************
2 * Author: Cavium Networks
3 *
4 * Contact: support@caviumnetworks.com
5 * This file is part of the OCTEON SDK
6 *
7 * Copyright (c) 2003-2007 Cavium Networks
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License, Version 2, as
11 * published by the Free Software Foundation.
12 *
13 * This file is distributed in the hope that it will be useful, but
14 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16 * NONINFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this file; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * or visit http://www.gnu.org/licenses/.
23 *
24 * This file may also be available under a different license from Cavium.
25 * Contact Cavium Networks for more information
26**********************************************************************/
27#include <linux/kernel.h>
28#include <linux/mii.h>
29#include <net/dst.h>
30
31#include <asm/atomic.h>
32#include <asm/octeon/octeon.h>
33
34#include "ethernet-defines.h"
35#include "ethernet-tx.h"
36#include "ethernet-mdio.h"
37#include "ethernet-util.h"
38#include "octeon-ethernet.h"
39#include "ethernet-common.h"
40
41#include "cvmx-pip.h"
42#include "cvmx-pko.h"
43#include "cvmx-fau.h"
44#include "cvmx-helper.h"
45
46#include "cvmx-gmxx-defs.h"
47
48/**
49 * Get the low level ethernet statistics
50 *
51 * @dev: Device to get the statistics from
52 * Returns Pointer to the statistics
53 */
54static struct net_device_stats *cvm_oct_common_get_stats(struct net_device *dev)
55{
56 cvmx_pip_port_status_t rx_status;
57 cvmx_pko_port_status_t tx_status;
58 struct octeon_ethernet *priv = netdev_priv(dev);
59
60 if (priv->port < CVMX_PIP_NUM_INPUT_PORTS) {
61 if (octeon_is_simulation()) {
62 /* The simulator doesn't support statistics */
63 memset(&rx_status, 0, sizeof(rx_status));
64 memset(&tx_status, 0, sizeof(tx_status));
65 } else {
66 cvmx_pip_get_port_status(priv->port, 1, &rx_status);
67 cvmx_pko_get_port_status(priv->port, 1, &tx_status);
68 }
69
70 priv->stats.rx_packets += rx_status.inb_packets;
71 priv->stats.tx_packets += tx_status.packets;
72 priv->stats.rx_bytes += rx_status.inb_octets;
73 priv->stats.tx_bytes += tx_status.octets;
74 priv->stats.multicast += rx_status.multicast_packets;
75 priv->stats.rx_crc_errors += rx_status.inb_errors;
76 priv->stats.rx_frame_errors += rx_status.fcs_align_err_packets;
77
78 /*
79 * The drop counter must be incremented atomically
80 * since the RX tasklet also increments it.
81 */
82#ifdef CONFIG_64BIT
83 atomic64_add(rx_status.dropped_packets,
84 (atomic64_t *)&priv->stats.rx_dropped);
85#else
86 atomic_add(rx_status.dropped_packets,
87 (atomic_t *)&priv->stats.rx_dropped);
88#endif
89 }
90
91 return &priv->stats;
92}
93
94/**
95 * Set the multicast list. Currently unimplemented.
96 *
97 * @dev: Device to work on
98 */
99static void cvm_oct_common_set_multicast_list(struct net_device *dev)
100{
101 union cvmx_gmxx_prtx_cfg gmx_cfg;
102 struct octeon_ethernet *priv = netdev_priv(dev);
103 int interface = INTERFACE(priv->port);
104 int index = INDEX(priv->port);
105
106 if ((interface < 2)
107 && (cvmx_helper_interface_get_mode(interface) !=
108 CVMX_HELPER_INTERFACE_MODE_SPI)) {
109 union cvmx_gmxx_rxx_adr_ctl control;
110 control.u64 = 0;
111 control.s.bcst = 1; /* Allow broadcast MAC addresses */
112
113 if (dev->mc_list || (dev->flags & IFF_ALLMULTI) ||
114 (dev->flags & IFF_PROMISC))
115 /* Force accept multicast packets */
116 control.s.mcst = 2;
117 else
118 /* Force reject multicat packets */
119 control.s.mcst = 1;
120
121 if (dev->flags & IFF_PROMISC)
122 /*
123 * Reject matches if promisc. Since CAM is
124 * shut off, should accept everything.
125 */
126 control.s.cam_mode = 0;
127 else
128 /* Filter packets based on the CAM */
129 control.s.cam_mode = 1;
130
131 gmx_cfg.u64 =
132 cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
133 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
134 gmx_cfg.u64 & ~1ull);
135
136 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CTL(index, interface),
137 control.u64);
138 if (dev->flags & IFF_PROMISC)
139 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
140 (index, interface), 0);
141 else
142 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
143 (index, interface), 1);
144
145 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
146 gmx_cfg.u64);
147 }
148}
149
150/**
151 * Set the hardware MAC address for a device
152 *
153 * @dev: Device to change the MAC address for
154 * @addr: Address structure to change it too. MAC address is addr + 2.
155 * Returns Zero on success
156 */
157static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr)
158{
159 struct octeon_ethernet *priv = netdev_priv(dev);
160 union cvmx_gmxx_prtx_cfg gmx_cfg;
161 int interface = INTERFACE(priv->port);
162 int index = INDEX(priv->port);
163
164 memcpy(dev->dev_addr, addr + 2, 6);
165
166 if ((interface < 2)
167 && (cvmx_helper_interface_get_mode(interface) !=
168 CVMX_HELPER_INTERFACE_MODE_SPI)) {
169 int i;
170 uint8_t *ptr = addr;
171 uint64_t mac = 0;
172 for (i = 0; i < 6; i++)
173 mac = (mac << 8) | (uint64_t) (ptr[i + 2]);
174
175 gmx_cfg.u64 =
176 cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
177 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
178 gmx_cfg.u64 & ~1ull);
179
180 cvmx_write_csr(CVMX_GMXX_SMACX(index, interface), mac);
181 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM0(index, interface),
182 ptr[2]);
183 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM1(index, interface),
184 ptr[3]);
185 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM2(index, interface),
186 ptr[4]);
187 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM3(index, interface),
188 ptr[5]);
189 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM4(index, interface),
190 ptr[6]);
191 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM5(index, interface),
192 ptr[7]);
193 cvm_oct_common_set_multicast_list(dev);
194 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
195 gmx_cfg.u64);
196 }
197 return 0;
198}
199
200/**
201 * Change the link MTU. Unimplemented
202 *
203 * @dev: Device to change
204 * @new_mtu: The new MTU
205 *
206 * Returns Zero on success
207 */
208static int cvm_oct_common_change_mtu(struct net_device *dev, int new_mtu)
209{
210 struct octeon_ethernet *priv = netdev_priv(dev);
211 int interface = INTERFACE(priv->port);
212 int index = INDEX(priv->port);
213#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
214 int vlan_bytes = 4;
215#else
216 int vlan_bytes = 0;
217#endif
218
219 /*
220 * Limit the MTU to make sure the ethernet packets are between
221 * 64 bytes and 65535 bytes.
222 */
223 if ((new_mtu + 14 + 4 + vlan_bytes < 64)
224 || (new_mtu + 14 + 4 + vlan_bytes > 65392)) {
225 pr_err("MTU must be between %d and %d.\n",
226 64 - 14 - 4 - vlan_bytes, 65392 - 14 - 4 - vlan_bytes);
227 return -EINVAL;
228 }
229 dev->mtu = new_mtu;
230
231 if ((interface < 2)
232 && (cvmx_helper_interface_get_mode(interface) !=
233 CVMX_HELPER_INTERFACE_MODE_SPI)) {
234 /* Add ethernet header and FCS, and VLAN if configured. */
235 int max_packet = new_mtu + 14 + 4 + vlan_bytes;
236
237 if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
238 || OCTEON_IS_MODEL(OCTEON_CN58XX)) {
239 /* Signal errors on packets larger than the MTU */
240 cvmx_write_csr(CVMX_GMXX_RXX_FRM_MAX(index, interface),
241 max_packet);
242 } else {
243 /*
244 * Set the hardware to truncate packets larger
245 * than the MTU and smaller the 64 bytes.
246 */
247 union cvmx_pip_frm_len_chkx frm_len_chk;
248 frm_len_chk.u64 = 0;
249 frm_len_chk.s.minlen = 64;
250 frm_len_chk.s.maxlen = max_packet;
251 cvmx_write_csr(CVMX_PIP_FRM_LEN_CHKX(interface),
252 frm_len_chk.u64);
253 }
254 /*
255 * Set the hardware to truncate packets larger than
256 * the MTU. The jabber register must be set to a
257 * multiple of 8 bytes, so round up.
258 */
259 cvmx_write_csr(CVMX_GMXX_RXX_JABBER(index, interface),
260 (max_packet + 7) & ~7u);
261 }
262 return 0;
263}
264
265/**
266 * Per network device initialization
267 *
268 * @dev: Device to initialize
269 * Returns Zero on success
270 */
271int cvm_oct_common_init(struct net_device *dev)
272{
273 static int count;
274 char mac[8] = { 0x00, 0x00,
275 octeon_bootinfo->mac_addr_base[0],
276 octeon_bootinfo->mac_addr_base[1],
277 octeon_bootinfo->mac_addr_base[2],
278 octeon_bootinfo->mac_addr_base[3],
279 octeon_bootinfo->mac_addr_base[4],
280 octeon_bootinfo->mac_addr_base[5] + count
281 };
282 struct octeon_ethernet *priv = netdev_priv(dev);
283
284 /*
285 * Force the interface to use the POW send if always_use_pow
286 * was specified or it is in the pow send list.
287 */
288 if ((pow_send_group != -1)
289 && (always_use_pow || strstr(pow_send_list, dev->name)))
290 priv->queue = -1;
291
292 if (priv->queue != -1) {
293 dev->hard_start_xmit = cvm_oct_xmit;
294 if (USE_HW_TCPUDP_CHECKSUM)
295 dev->features |= NETIF_F_IP_CSUM;
296 } else
297 dev->hard_start_xmit = cvm_oct_xmit_pow;
298 count++;
299
300 dev->get_stats = cvm_oct_common_get_stats;
301 dev->set_mac_address = cvm_oct_common_set_mac_address;
302 dev->set_multicast_list = cvm_oct_common_set_multicast_list;
303 dev->change_mtu = cvm_oct_common_change_mtu;
304 dev->do_ioctl = cvm_oct_ioctl;
305 /* We do our own locking, Linux doesn't need to */
306 dev->features |= NETIF_F_LLTX;
307 SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
308#ifdef CONFIG_NET_POLL_CONTROLLER
309 dev->poll_controller = cvm_oct_poll_controller;
310#endif
311
312 cvm_oct_mdio_setup_device(dev);
313 dev->set_mac_address(dev, mac);
314 dev->change_mtu(dev, dev->mtu);
315
316 /*
317 * Zero out stats for port so we won't mistakenly show
318 * counters from the bootloader.
319 */
320 memset(dev->get_stats(dev), 0, sizeof(struct net_device_stats));
321
322 return 0;
323}
324
325void cvm_oct_common_uninit(struct net_device *dev)
326{
327 /* Currently nothing to do */
328}
diff --git a/drivers/staging/octeon/ethernet-common.h b/drivers/staging/octeon/ethernet-common.h
deleted file mode 100644
index 2bd9cd76a398..000000000000
--- a/drivers/staging/octeon/ethernet-common.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*********************************************************************
2 * Author: Cavium Networks
3 *
4 * Contact: support@caviumnetworks.com
5 * This file is part of the OCTEON SDK
6 *
7 * Copyright (c) 2003-2007 Cavium Networks
8 *
9 * This file is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License, Version 2, as
11 * published by the Free Software Foundation.
12 *
13 * This file is distributed in the hope that it will be useful, but
14 * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16 * NONINFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this file; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * or visit http://www.gnu.org/licenses/.
23 *
24 * This file may also be available under a different license from Cavium.
25 * Contact Cavium Networks for more information
26*********************************************************************/
27
28int cvm_oct_common_init(struct net_device *dev);
29void cvm_oct_common_uninit(struct net_device *dev);
diff --git a/drivers/staging/octeon/ethernet-defines.h b/drivers/staging/octeon/ethernet-defines.h
index 8f7374e7664c..f13131b03c33 100644
--- a/drivers/staging/octeon/ethernet-defines.h
+++ b/drivers/staging/octeon/ethernet-defines.h
@@ -117,6 +117,8 @@
117 117
118/* Maximum number of packets to process per interrupt. */ 118/* Maximum number of packets to process per interrupt. */
119#define MAX_RX_PACKETS 120 119#define MAX_RX_PACKETS 120
120/* Maximum number of SKBs to try to free per xmit packet. */
121#define MAX_SKB_TO_FREE 10
120#define MAX_OUT_QUEUE_DEPTH 1000 122#define MAX_OUT_QUEUE_DEPTH 1000
121 123
122#ifndef CONFIG_SMP 124#ifndef CONFIG_SMP
diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c
index 8579f1670d1e..8704133fe127 100644
--- a/drivers/staging/octeon/ethernet-rgmii.c
+++ b/drivers/staging/octeon/ethernet-rgmii.c
@@ -33,7 +33,6 @@
33 33
34#include "ethernet-defines.h" 34#include "ethernet-defines.h"
35#include "octeon-ethernet.h" 35#include "octeon-ethernet.h"
36#include "ethernet-common.h"
37#include "ethernet-util.h" 36#include "ethernet-util.h"
38 37
39#include "cvmx-helper.h" 38#include "cvmx-helper.h"
@@ -265,7 +264,7 @@ static irqreturn_t cvm_oct_rgmii_rml_interrupt(int cpl, void *dev_id)
265 return return_status; 264 return return_status;
266} 265}
267 266
268static int cvm_oct_rgmii_open(struct net_device *dev) 267int cvm_oct_rgmii_open(struct net_device *dev)
269{ 268{
270 union cvmx_gmxx_prtx_cfg gmx_cfg; 269 union cvmx_gmxx_prtx_cfg gmx_cfg;
271 struct octeon_ethernet *priv = netdev_priv(dev); 270 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -286,7 +285,7 @@ static int cvm_oct_rgmii_open(struct net_device *dev)
286 return 0; 285 return 0;
287} 286}
288 287
289static int cvm_oct_rgmii_stop(struct net_device *dev) 288int cvm_oct_rgmii_stop(struct net_device *dev)
290{ 289{
291 union cvmx_gmxx_prtx_cfg gmx_cfg; 290 union cvmx_gmxx_prtx_cfg gmx_cfg;
292 struct octeon_ethernet *priv = netdev_priv(dev); 291 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -305,9 +304,7 @@ int cvm_oct_rgmii_init(struct net_device *dev)
305 int r; 304 int r;
306 305
307 cvm_oct_common_init(dev); 306 cvm_oct_common_init(dev);
308 dev->open = cvm_oct_rgmii_open; 307 dev->netdev_ops->ndo_stop(dev);
309 dev->stop = cvm_oct_rgmii_stop;
310 dev->stop(dev);
311 308
312 /* 309 /*
313 * Due to GMX errata in CN3XXX series chips, it is necessary 310 * Due to GMX errata in CN3XXX series chips, it is necessary
diff --git a/drivers/staging/octeon/ethernet-sgmii.c b/drivers/staging/octeon/ethernet-sgmii.c
index 58fa39c1d675..2b54996bd85d 100644
--- a/drivers/staging/octeon/ethernet-sgmii.c
+++ b/drivers/staging/octeon/ethernet-sgmii.c
@@ -34,13 +34,12 @@
34#include "ethernet-defines.h" 34#include "ethernet-defines.h"
35#include "octeon-ethernet.h" 35#include "octeon-ethernet.h"
36#include "ethernet-util.h" 36#include "ethernet-util.h"
37#include "ethernet-common.h"
38 37
39#include "cvmx-helper.h" 38#include "cvmx-helper.h"
40 39
41#include "cvmx-gmxx-defs.h" 40#include "cvmx-gmxx-defs.h"
42 41
43static int cvm_oct_sgmii_open(struct net_device *dev) 42int cvm_oct_sgmii_open(struct net_device *dev)
44{ 43{
45 union cvmx_gmxx_prtx_cfg gmx_cfg; 44 union cvmx_gmxx_prtx_cfg gmx_cfg;
46 struct octeon_ethernet *priv = netdev_priv(dev); 45 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -61,7 +60,7 @@ static int cvm_oct_sgmii_open(struct net_device *dev)
61 return 0; 60 return 0;
62} 61}
63 62
64static int cvm_oct_sgmii_stop(struct net_device *dev) 63int cvm_oct_sgmii_stop(struct net_device *dev)
65{ 64{
66 union cvmx_gmxx_prtx_cfg gmx_cfg; 65 union cvmx_gmxx_prtx_cfg gmx_cfg;
67 struct octeon_ethernet *priv = netdev_priv(dev); 66 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -113,9 +112,7 @@ int cvm_oct_sgmii_init(struct net_device *dev)
113{ 112{
114 struct octeon_ethernet *priv = netdev_priv(dev); 113 struct octeon_ethernet *priv = netdev_priv(dev);
115 cvm_oct_common_init(dev); 114 cvm_oct_common_init(dev);
116 dev->open = cvm_oct_sgmii_open; 115 dev->netdev_ops->ndo_stop(dev);
117 dev->stop = cvm_oct_sgmii_stop;
118 dev->stop(dev);
119 if (!octeon_is_simulation()) 116 if (!octeon_is_simulation())
120 priv->poll = cvm_oct_sgmii_poll; 117 priv->poll = cvm_oct_sgmii_poll;
121 118
diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c
index e0971bbe4ddc..66190b0cb68f 100644
--- a/drivers/staging/octeon/ethernet-spi.c
+++ b/drivers/staging/octeon/ethernet-spi.c
@@ -33,7 +33,6 @@
33 33
34#include "ethernet-defines.h" 34#include "ethernet-defines.h"
35#include "octeon-ethernet.h" 35#include "octeon-ethernet.h"
36#include "ethernet-common.h"
37#include "ethernet-util.h" 36#include "ethernet-util.h"
38 37
39#include "cvmx-spi.h" 38#include "cvmx-spi.h"
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 77b7122c8fdb..81a851390f1b 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -47,6 +47,7 @@
47 47
48#include "ethernet-defines.h" 48#include "ethernet-defines.h"
49#include "octeon-ethernet.h" 49#include "octeon-ethernet.h"
50#include "ethernet-tx.h"
50#include "ethernet-util.h" 51#include "ethernet-util.h"
51 52
52#include "cvmx-wqe.h" 53#include "cvmx-wqe.h"
@@ -82,8 +83,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
82 uint64_t old_scratch2; 83 uint64_t old_scratch2;
83 int dropped; 84 int dropped;
84 int qos; 85 int qos;
86 int queue_it_up;
85 struct octeon_ethernet *priv = netdev_priv(dev); 87 struct octeon_ethernet *priv = netdev_priv(dev);
86 int32_t in_use; 88 int32_t skb_to_free;
89 int32_t undo;
87 int32_t buffers_to_free; 90 int32_t buffers_to_free;
88#if REUSE_SKBUFFS_WITHOUT_FREE 91#if REUSE_SKBUFFS_WITHOUT_FREE
89 unsigned char *fpa_head; 92 unsigned char *fpa_head;
@@ -120,15 +123,15 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
120 old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); 123 old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
121 124
122 /* 125 /*
123 * Assume we're going to be able t osend this 126 * Fetch and increment the number of packets to be
124 * packet. Fetch and increment the number of pending 127 * freed.
125 * packets for output.
126 */ 128 */
127 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8, 129 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH + 8,
128 FAU_NUM_PACKET_BUFFERS_TO_FREE, 130 FAU_NUM_PACKET_BUFFERS_TO_FREE,
129 0); 131 0);
130 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH, 132 cvmx_fau_async_fetch_and_add32(CVMX_SCR_SCRATCH,
131 priv->fau + qos * 4, 1); 133 priv->fau + qos * 4,
134 MAX_SKB_TO_FREE);
132 } 135 }
133 136
134 /* 137 /*
@@ -253,10 +256,10 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
253 256
254 /* 257 /*
255 * The skbuff will be reused without ever being freed. We must 258 * The skbuff will be reused without ever being freed. We must
256 * cleanup a bunch of Linux stuff. 259 * cleanup a bunch of core things.
257 */ 260 */
258 dst_release(skb->dst); 261 dst_release(skb_dst(skb));
259 skb->dst = NULL; 262 skb_dst_set(skb, NULL);
260#ifdef CONFIG_XFRM 263#ifdef CONFIG_XFRM
261 secpath_put(skb->sp); 264 secpath_put(skb->sp);
262 skb->sp = NULL; 265 skb->sp = NULL;
@@ -286,16 +289,30 @@ dont_put_skbuff_in_hw:
286 if (USE_ASYNC_IOBDMA) { 289 if (USE_ASYNC_IOBDMA) {
287 /* Get the number of skbuffs in use by the hardware */ 290 /* Get the number of skbuffs in use by the hardware */
288 CVMX_SYNCIOBDMA; 291 CVMX_SYNCIOBDMA;
289 in_use = cvmx_scratch_read64(CVMX_SCR_SCRATCH); 292 skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
290 buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8); 293 buffers_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
291 } else { 294 } else {
292 /* Get the number of skbuffs in use by the hardware */ 295 /* Get the number of skbuffs in use by the hardware */
293 in_use = cvmx_fau_fetch_and_add32(priv->fau + qos * 4, 1); 296 skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
297 MAX_SKB_TO_FREE);
294 buffers_to_free = 298 buffers_to_free =
295 cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0); 299 cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
296 } 300 }
297 301
298 /* 302 /*
303 * We try to claim MAX_SKB_TO_FREE buffers. If there were not
304 * that many available, we have to un-claim (undo) any that
305 * were in excess. If skb_to_free is positive we will free
306 * that many buffers.
307 */
308 undo = skb_to_free > 0 ?
309 MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
310 if (undo > 0)
311 cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
312 skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
313 MAX_SKB_TO_FREE : -skb_to_free;
314
315 /*
299 * If we're sending faster than the receive can free them then 316 * If we're sending faster than the receive can free them then
300 * don't do the HW free. 317 * don't do the HW free.
301 */ 318 */
@@ -330,38 +347,31 @@ dont_put_skbuff_in_hw:
330 cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2); 347 cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
331 } 348 }
332 349
350 queue_it_up = 0;
333 if (unlikely(dropped)) { 351 if (unlikely(dropped)) {
334 dev_kfree_skb_any(skb); 352 dev_kfree_skb_any(skb);
335 cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
336 priv->stats.tx_dropped++; 353 priv->stats.tx_dropped++;
337 } else { 354 } else {
338 if (USE_SKBUFFS_IN_HW) { 355 if (USE_SKBUFFS_IN_HW) {
339 /* Put this packet on the queue to be freed later */ 356 /* Put this packet on the queue to be freed later */
340 if (pko_command.s.dontfree) 357 if (pko_command.s.dontfree)
341 skb_queue_tail(&priv->tx_free_list[qos], skb); 358 queue_it_up = 1;
342 else { 359 else
343 cvmx_fau_atomic_add32 360 cvmx_fau_atomic_add32
344 (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1); 361 (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
345 cvmx_fau_atomic_add32(priv->fau + qos * 4, -1);
346 }
347 } else { 362 } else {
348 /* Put this packet on the queue to be freed later */ 363 /* Put this packet on the queue to be freed later */
349 skb_queue_tail(&priv->tx_free_list[qos], skb); 364 queue_it_up = 1;
350 } 365 }
351 } 366 }
352 367
353 /* Free skbuffs not in use by the hardware, possibly two at a time */ 368 if (queue_it_up) {
354 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use) {
355 spin_lock(&priv->tx_free_list[qos].lock); 369 spin_lock(&priv->tx_free_list[qos].lock);
356 /* 370 __skb_queue_tail(&priv->tx_free_list[qos], skb);
357 * Check again now that we have the lock. It might 371 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0);
358 * have changed.
359 */
360 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
361 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
362 if (skb_queue_len(&priv->tx_free_list[qos]) > in_use)
363 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
364 spin_unlock(&priv->tx_free_list[qos].lock); 372 spin_unlock(&priv->tx_free_list[qos].lock);
373 } else {
374 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
365 } 375 }
366 376
367 return 0; 377 return 0;
diff --git a/drivers/staging/octeon/ethernet-tx.h b/drivers/staging/octeon/ethernet-tx.h
index 5106236fe981..c0bebf750bc0 100644
--- a/drivers/staging/octeon/ethernet-tx.h
+++ b/drivers/staging/octeon/ethernet-tx.h
@@ -30,3 +30,28 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev);
30int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry, 30int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
31 int do_free, int qos); 31 int do_free, int qos);
32void cvm_oct_tx_shutdown(struct net_device *dev); 32void cvm_oct_tx_shutdown(struct net_device *dev);
33
34/**
35 * Free dead transmit skbs.
36 *
37 * @priv: The driver data
38 * @skb_to_free: The number of SKBs to free (free none if negative).
39 * @qos: The queue to free from.
40 * @take_lock: If true, acquire the skb list lock.
41 */
42static inline void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv,
43 int skb_to_free,
44 int qos, int take_lock)
45{
46 /* Free skbuffs not in use by the hardware. */
47 if (skb_to_free > 0) {
48 if (take_lock)
49 spin_lock(&priv->tx_free_list[qos].lock);
50 while (skb_to_free > 0) {
51 dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
52 skb_to_free--;
53 }
54 if (take_lock)
55 spin_unlock(&priv->tx_free_list[qos].lock);
56 }
57}
diff --git a/drivers/staging/octeon/ethernet-xaui.c b/drivers/staging/octeon/ethernet-xaui.c
index f08eb32e04fc..0c2e7cc40f35 100644
--- a/drivers/staging/octeon/ethernet-xaui.c
+++ b/drivers/staging/octeon/ethernet-xaui.c
@@ -33,14 +33,13 @@
33 33
34#include "ethernet-defines.h" 34#include "ethernet-defines.h"
35#include "octeon-ethernet.h" 35#include "octeon-ethernet.h"
36#include "ethernet-common.h"
37#include "ethernet-util.h" 36#include "ethernet-util.h"
38 37
39#include "cvmx-helper.h" 38#include "cvmx-helper.h"
40 39
41#include "cvmx-gmxx-defs.h" 40#include "cvmx-gmxx-defs.h"
42 41
43static int cvm_oct_xaui_open(struct net_device *dev) 42int cvm_oct_xaui_open(struct net_device *dev)
44{ 43{
45 union cvmx_gmxx_prtx_cfg gmx_cfg; 44 union cvmx_gmxx_prtx_cfg gmx_cfg;
46 struct octeon_ethernet *priv = netdev_priv(dev); 45 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -60,7 +59,7 @@ static int cvm_oct_xaui_open(struct net_device *dev)
60 return 0; 59 return 0;
61} 60}
62 61
63static int cvm_oct_xaui_stop(struct net_device *dev) 62int cvm_oct_xaui_stop(struct net_device *dev)
64{ 63{
65 union cvmx_gmxx_prtx_cfg gmx_cfg; 64 union cvmx_gmxx_prtx_cfg gmx_cfg;
66 struct octeon_ethernet *priv = netdev_priv(dev); 65 struct octeon_ethernet *priv = netdev_priv(dev);
@@ -112,9 +111,7 @@ int cvm_oct_xaui_init(struct net_device *dev)
112{ 111{
113 struct octeon_ethernet *priv = netdev_priv(dev); 112 struct octeon_ethernet *priv = netdev_priv(dev);
114 cvm_oct_common_init(dev); 113 cvm_oct_common_init(dev);
115 dev->open = cvm_oct_xaui_open; 114 dev->netdev_ops->ndo_stop(dev);
116 dev->stop = cvm_oct_xaui_stop;
117 dev->stop(dev);
118 if (!octeon_is_simulation()) 115 if (!octeon_is_simulation())
119 priv->poll = cvm_oct_xaui_poll; 116 priv->poll = cvm_oct_xaui_poll;
120 117
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index e8ef9e0b791f..b8479517dce2 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -37,13 +37,14 @@
37#include <asm/octeon/octeon.h> 37#include <asm/octeon/octeon.h>
38 38
39#include "ethernet-defines.h" 39#include "ethernet-defines.h"
40#include "octeon-ethernet.h"
40#include "ethernet-mem.h" 41#include "ethernet-mem.h"
41#include "ethernet-rx.h" 42#include "ethernet-rx.h"
42#include "ethernet-tx.h" 43#include "ethernet-tx.h"
44#include "ethernet-mdio.h"
43#include "ethernet-util.h" 45#include "ethernet-util.h"
44#include "ethernet-proc.h" 46#include "ethernet-proc.h"
45#include "ethernet-common.h" 47
46#include "octeon-ethernet.h"
47 48
48#include "cvmx-pip.h" 49#include "cvmx-pip.h"
49#include "cvmx-pko.h" 50#include "cvmx-pko.h"
@@ -51,6 +52,7 @@
51#include "cvmx-ipd.h" 52#include "cvmx-ipd.h"
52#include "cvmx-helper.h" 53#include "cvmx-helper.h"
53 54
55#include "cvmx-gmxx-defs.h"
54#include "cvmx-smix-defs.h" 56#include "cvmx-smix-defs.h"
55 57
56#if defined(CONFIG_CAVIUM_OCTEON_NUM_PACKET_BUFFERS) \ 58#if defined(CONFIG_CAVIUM_OCTEON_NUM_PACKET_BUFFERS) \
@@ -129,53 +131,55 @@ extern struct semaphore mdio_sem;
129 */ 131 */
130static void cvm_do_timer(unsigned long arg) 132static void cvm_do_timer(unsigned long arg)
131{ 133{
134 int32_t skb_to_free, undo;
135 int queues_per_port;
136 int qos;
137 struct octeon_ethernet *priv;
132 static int port; 138 static int port;
133 if (port < CVMX_PIP_NUM_INPUT_PORTS) {
134 if (cvm_oct_device[port]) {
135 int queues_per_port;
136 int qos;
137 struct octeon_ethernet *priv =
138 netdev_priv(cvm_oct_device[port]);
139 if (priv->poll) {
140 /* skip polling if we don't get the lock */
141 if (!down_trylock(&mdio_sem)) {
142 priv->poll(cvm_oct_device[port]);
143 up(&mdio_sem);
144 }
145 }
146 139
147 queues_per_port = cvmx_pko_get_num_queues(port); 140 if (port >= CVMX_PIP_NUM_INPUT_PORTS) {
148 /* Drain any pending packets in the free list */ 141 /*
149 for (qos = 0; qos < queues_per_port; qos++) { 142 * All ports have been polled. Start the next
150 if (skb_queue_len(&priv->tx_free_list[qos])) { 143 * iteration through the ports in one second.
151 spin_lock(&priv->tx_free_list[qos]. 144 */
152 lock);
153 while (skb_queue_len
154 (&priv->tx_free_list[qos]) >
155 cvmx_fau_fetch_and_add32(priv->
156 fau +
157 qos * 4,
158 0))
159 dev_kfree_skb(__skb_dequeue
160 (&priv->
161 tx_free_list
162 [qos]));
163 spin_unlock(&priv->tx_free_list[qos].
164 lock);
165 }
166 }
167 cvm_oct_device[port]->get_stats(cvm_oct_device[port]);
168 }
169 port++;
170 /* Poll the next port in a 50th of a second.
171 This spreads the polling of ports out a little bit */
172 mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
173 } else {
174 port = 0; 145 port = 0;
175 /* All ports have been polled. Start the next iteration through
176 the ports in one second */
177 mod_timer(&cvm_oct_poll_timer, jiffies + HZ); 146 mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
147 return;
148 }
149 if (!cvm_oct_device[port])
150 goto out;
151
152 priv = netdev_priv(cvm_oct_device[port]);
153 if (priv->poll) {
154 /* skip polling if we don't get the lock */
155 if (!down_trylock(&mdio_sem)) {
156 priv->poll(cvm_oct_device[port]);
157 up(&mdio_sem);
158 }
178 } 159 }
160
161 queues_per_port = cvmx_pko_get_num_queues(port);
162 /* Drain any pending packets in the free list */
163 for (qos = 0; qos < queues_per_port; qos++) {
164 if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
165 continue;
166 skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
167 MAX_SKB_TO_FREE);
168 undo = skb_to_free > 0 ?
169 MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
170 if (undo > 0)
171 cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
172 skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
173 MAX_SKB_TO_FREE : -skb_to_free;
174 cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
175 }
176 cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
177
178out:
179 port++;
180 /* Poll the next port in a 50th of a second.
181 This spreads the polling of ports out a little bit */
182 mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
179} 183}
180 184
181/** 185/**
@@ -246,6 +250,362 @@ int cvm_oct_free_work(void *work_queue_entry)
246EXPORT_SYMBOL(cvm_oct_free_work); 250EXPORT_SYMBOL(cvm_oct_free_work);
247 251
248/** 252/**
253 * Get the low level ethernet statistics
254 *
255 * @dev: Device to get the statistics from
256 * Returns Pointer to the statistics
257 */
258static struct net_device_stats *cvm_oct_common_get_stats(struct net_device *dev)
259{
260 cvmx_pip_port_status_t rx_status;
261 cvmx_pko_port_status_t tx_status;
262 struct octeon_ethernet *priv = netdev_priv(dev);
263
264 if (priv->port < CVMX_PIP_NUM_INPUT_PORTS) {
265 if (octeon_is_simulation()) {
266 /* The simulator doesn't support statistics */
267 memset(&rx_status, 0, sizeof(rx_status));
268 memset(&tx_status, 0, sizeof(tx_status));
269 } else {
270 cvmx_pip_get_port_status(priv->port, 1, &rx_status);
271 cvmx_pko_get_port_status(priv->port, 1, &tx_status);
272 }
273
274 priv->stats.rx_packets += rx_status.inb_packets;
275 priv->stats.tx_packets += tx_status.packets;
276 priv->stats.rx_bytes += rx_status.inb_octets;
277 priv->stats.tx_bytes += tx_status.octets;
278 priv->stats.multicast += rx_status.multicast_packets;
279 priv->stats.rx_crc_errors += rx_status.inb_errors;
280 priv->stats.rx_frame_errors += rx_status.fcs_align_err_packets;
281
282 /*
283 * The drop counter must be incremented atomically
284 * since the RX tasklet also increments it.
285 */
286#ifdef CONFIG_64BIT
287 atomic64_add(rx_status.dropped_packets,
288 (atomic64_t *)&priv->stats.rx_dropped);
289#else
290 atomic_add(rx_status.dropped_packets,
291 (atomic_t *)&priv->stats.rx_dropped);
292#endif
293 }
294
295 return &priv->stats;
296}
297
298/**
299 * Change the link MTU. Unimplemented
300 *
301 * @dev: Device to change
302 * @new_mtu: The new MTU
303 *
304 * Returns Zero on success
305 */
306static int cvm_oct_common_change_mtu(struct net_device *dev, int new_mtu)
307{
308 struct octeon_ethernet *priv = netdev_priv(dev);
309 int interface = INTERFACE(priv->port);
310 int index = INDEX(priv->port);
311#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
312 int vlan_bytes = 4;
313#else
314 int vlan_bytes = 0;
315#endif
316
317 /*
318 * Limit the MTU to make sure the ethernet packets are between
319 * 64 bytes and 65535 bytes.
320 */
321 if ((new_mtu + 14 + 4 + vlan_bytes < 64)
322 || (new_mtu + 14 + 4 + vlan_bytes > 65392)) {
323 pr_err("MTU must be between %d and %d.\n",
324 64 - 14 - 4 - vlan_bytes, 65392 - 14 - 4 - vlan_bytes);
325 return -EINVAL;
326 }
327 dev->mtu = new_mtu;
328
329 if ((interface < 2)
330 && (cvmx_helper_interface_get_mode(interface) !=
331 CVMX_HELPER_INTERFACE_MODE_SPI)) {
332 /* Add ethernet header and FCS, and VLAN if configured. */
333 int max_packet = new_mtu + 14 + 4 + vlan_bytes;
334
335 if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
336 || OCTEON_IS_MODEL(OCTEON_CN58XX)) {
337 /* Signal errors on packets larger than the MTU */
338 cvmx_write_csr(CVMX_GMXX_RXX_FRM_MAX(index, interface),
339 max_packet);
340 } else {
341 /*
342 * Set the hardware to truncate packets larger
343 * than the MTU and smaller the 64 bytes.
344 */
345 union cvmx_pip_frm_len_chkx frm_len_chk;
346 frm_len_chk.u64 = 0;
347 frm_len_chk.s.minlen = 64;
348 frm_len_chk.s.maxlen = max_packet;
349 cvmx_write_csr(CVMX_PIP_FRM_LEN_CHKX(interface),
350 frm_len_chk.u64);
351 }
352 /*
353 * Set the hardware to truncate packets larger than
354 * the MTU. The jabber register must be set to a
355 * multiple of 8 bytes, so round up.
356 */
357 cvmx_write_csr(CVMX_GMXX_RXX_JABBER(index, interface),
358 (max_packet + 7) & ~7u);
359 }
360 return 0;
361}
362
363/**
364 * Set the multicast list. Currently unimplemented.
365 *
366 * @dev: Device to work on
367 */
368static void cvm_oct_common_set_multicast_list(struct net_device *dev)
369{
370 union cvmx_gmxx_prtx_cfg gmx_cfg;
371 struct octeon_ethernet *priv = netdev_priv(dev);
372 int interface = INTERFACE(priv->port);
373 int index = INDEX(priv->port);
374
375 if ((interface < 2)
376 && (cvmx_helper_interface_get_mode(interface) !=
377 CVMX_HELPER_INTERFACE_MODE_SPI)) {
378 union cvmx_gmxx_rxx_adr_ctl control;
379 control.u64 = 0;
380 control.s.bcst = 1; /* Allow broadcast MAC addresses */
381
382 if (dev->mc_list || (dev->flags & IFF_ALLMULTI) ||
383 (dev->flags & IFF_PROMISC))
384 /* Force accept multicast packets */
385 control.s.mcst = 2;
386 else
387 /* Force reject multicat packets */
388 control.s.mcst = 1;
389
390 if (dev->flags & IFF_PROMISC)
391 /*
392 * Reject matches if promisc. Since CAM is
393 * shut off, should accept everything.
394 */
395 control.s.cam_mode = 0;
396 else
397 /* Filter packets based on the CAM */
398 control.s.cam_mode = 1;
399
400 gmx_cfg.u64 =
401 cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
402 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
403 gmx_cfg.u64 & ~1ull);
404
405 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CTL(index, interface),
406 control.u64);
407 if (dev->flags & IFF_PROMISC)
408 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
409 (index, interface), 0);
410 else
411 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM_EN
412 (index, interface), 1);
413
414 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
415 gmx_cfg.u64);
416 }
417}
418
419/**
420 * Set the hardware MAC address for a device
421 *
422 * @dev: Device to change the MAC address for
423 * @addr: Address structure to change it too. MAC address is addr + 2.
424 * Returns Zero on success
425 */
426static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr)
427{
428 struct octeon_ethernet *priv = netdev_priv(dev);
429 union cvmx_gmxx_prtx_cfg gmx_cfg;
430 int interface = INTERFACE(priv->port);
431 int index = INDEX(priv->port);
432
433 memcpy(dev->dev_addr, addr + 2, 6);
434
435 if ((interface < 2)
436 && (cvmx_helper_interface_get_mode(interface) !=
437 CVMX_HELPER_INTERFACE_MODE_SPI)) {
438 int i;
439 uint8_t *ptr = addr;
440 uint64_t mac = 0;
441 for (i = 0; i < 6; i++)
442 mac = (mac << 8) | (uint64_t) (ptr[i + 2]);
443
444 gmx_cfg.u64 =
445 cvmx_read_csr(CVMX_GMXX_PRTX_CFG(index, interface));
446 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
447 gmx_cfg.u64 & ~1ull);
448
449 cvmx_write_csr(CVMX_GMXX_SMACX(index, interface), mac);
450 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM0(index, interface),
451 ptr[2]);
452 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM1(index, interface),
453 ptr[3]);
454 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM2(index, interface),
455 ptr[4]);
456 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM3(index, interface),
457 ptr[5]);
458 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM4(index, interface),
459 ptr[6]);
460 cvmx_write_csr(CVMX_GMXX_RXX_ADR_CAM5(index, interface),
461 ptr[7]);
462 cvm_oct_common_set_multicast_list(dev);
463 cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface),
464 gmx_cfg.u64);
465 }
466 return 0;
467}
468
469/**
470 * Per network device initialization
471 *
472 * @dev: Device to initialize
473 * Returns Zero on success
474 */
475int cvm_oct_common_init(struct net_device *dev)
476{
477 static int count;
478 char mac[8] = { 0x00, 0x00,
479 octeon_bootinfo->mac_addr_base[0],
480 octeon_bootinfo->mac_addr_base[1],
481 octeon_bootinfo->mac_addr_base[2],
482 octeon_bootinfo->mac_addr_base[3],
483 octeon_bootinfo->mac_addr_base[4],
484 octeon_bootinfo->mac_addr_base[5] + count
485 };
486 struct octeon_ethernet *priv = netdev_priv(dev);
487
488 /*
489 * Force the interface to use the POW send if always_use_pow
490 * was specified or it is in the pow send list.
491 */
492 if ((pow_send_group != -1)
493 && (always_use_pow || strstr(pow_send_list, dev->name)))
494 priv->queue = -1;
495
496 if (priv->queue != -1 && USE_HW_TCPUDP_CHECKSUM)
497 dev->features |= NETIF_F_IP_CSUM;
498
499 count++;
500
501 /* We do our own locking, Linux doesn't need to */
502 dev->features |= NETIF_F_LLTX;
503 SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
504
505 cvm_oct_mdio_setup_device(dev);
506 dev->netdev_ops->ndo_set_mac_address(dev, mac);
507 dev->netdev_ops->ndo_change_mtu(dev, dev->mtu);
508
509 /*
510 * Zero out stats for port so we won't mistakenly show
511 * counters from the bootloader.
512 */
513 memset(dev->netdev_ops->ndo_get_stats(dev), 0,
514 sizeof(struct net_device_stats));
515
516 return 0;
517}
518
519void cvm_oct_common_uninit(struct net_device *dev)
520{
521 /* Currently nothing to do */
522}
523
524static const struct net_device_ops cvm_oct_npi_netdev_ops = {
525 .ndo_init = cvm_oct_common_init,
526 .ndo_uninit = cvm_oct_common_uninit,
527 .ndo_start_xmit = cvm_oct_xmit,
528 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
529 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
530 .ndo_do_ioctl = cvm_oct_ioctl,
531 .ndo_change_mtu = cvm_oct_common_change_mtu,
532 .ndo_get_stats = cvm_oct_common_get_stats,
533#ifdef CONFIG_NET_POLL_CONTROLLER
534 .ndo_poll_controller = cvm_oct_poll_controller,
535#endif
536};
537static const struct net_device_ops cvm_oct_xaui_netdev_ops = {
538 .ndo_init = cvm_oct_xaui_init,
539 .ndo_uninit = cvm_oct_xaui_uninit,
540 .ndo_open = cvm_oct_xaui_open,
541 .ndo_stop = cvm_oct_xaui_stop,
542 .ndo_start_xmit = cvm_oct_xmit,
543 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
544 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
545 .ndo_do_ioctl = cvm_oct_ioctl,
546 .ndo_change_mtu = cvm_oct_common_change_mtu,
547 .ndo_get_stats = cvm_oct_common_get_stats,
548#ifdef CONFIG_NET_POLL_CONTROLLER
549 .ndo_poll_controller = cvm_oct_poll_controller,
550#endif
551};
552static const struct net_device_ops cvm_oct_sgmii_netdev_ops = {
553 .ndo_init = cvm_oct_sgmii_init,
554 .ndo_uninit = cvm_oct_sgmii_uninit,
555 .ndo_open = cvm_oct_sgmii_open,
556 .ndo_stop = cvm_oct_sgmii_stop,
557 .ndo_start_xmit = cvm_oct_xmit,
558 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
559 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
560 .ndo_do_ioctl = cvm_oct_ioctl,
561 .ndo_change_mtu = cvm_oct_common_change_mtu,
562 .ndo_get_stats = cvm_oct_common_get_stats,
563#ifdef CONFIG_NET_POLL_CONTROLLER
564 .ndo_poll_controller = cvm_oct_poll_controller,
565#endif
566};
567static const struct net_device_ops cvm_oct_spi_netdev_ops = {
568 .ndo_init = cvm_oct_spi_init,
569 .ndo_uninit = cvm_oct_spi_uninit,
570 .ndo_start_xmit = cvm_oct_xmit,
571 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
572 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
573 .ndo_do_ioctl = cvm_oct_ioctl,
574 .ndo_change_mtu = cvm_oct_common_change_mtu,
575 .ndo_get_stats = cvm_oct_common_get_stats,
576#ifdef CONFIG_NET_POLL_CONTROLLER
577 .ndo_poll_controller = cvm_oct_poll_controller,
578#endif
579};
580static const struct net_device_ops cvm_oct_rgmii_netdev_ops = {
581 .ndo_init = cvm_oct_rgmii_init,
582 .ndo_uninit = cvm_oct_rgmii_uninit,
583 .ndo_open = cvm_oct_rgmii_open,
584 .ndo_stop = cvm_oct_rgmii_stop,
585 .ndo_start_xmit = cvm_oct_xmit,
586 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
587 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
588 .ndo_do_ioctl = cvm_oct_ioctl,
589 .ndo_change_mtu = cvm_oct_common_change_mtu,
590 .ndo_get_stats = cvm_oct_common_get_stats,
591#ifdef CONFIG_NET_POLL_CONTROLLER
592 .ndo_poll_controller = cvm_oct_poll_controller,
593#endif
594};
595static const struct net_device_ops cvm_oct_pow_netdev_ops = {
596 .ndo_init = cvm_oct_common_init,
597 .ndo_start_xmit = cvm_oct_xmit_pow,
598 .ndo_set_multicast_list = cvm_oct_common_set_multicast_list,
599 .ndo_set_mac_address = cvm_oct_common_set_mac_address,
600 .ndo_do_ioctl = cvm_oct_ioctl,
601 .ndo_change_mtu = cvm_oct_common_change_mtu,
602 .ndo_get_stats = cvm_oct_common_get_stats,
603#ifdef CONFIG_NET_POLL_CONTROLLER
604 .ndo_poll_controller = cvm_oct_poll_controller,
605#endif
606};
607
608/**
249 * Module/ driver initialization. Creates the linux network 609 * Module/ driver initialization. Creates the linux network
250 * devices. 610 * devices.
251 * 611 *
@@ -303,7 +663,7 @@ static int __init cvm_oct_init_module(void)
303 struct octeon_ethernet *priv = netdev_priv(dev); 663 struct octeon_ethernet *priv = netdev_priv(dev);
304 memset(priv, 0, sizeof(struct octeon_ethernet)); 664 memset(priv, 0, sizeof(struct octeon_ethernet));
305 665
306 dev->init = cvm_oct_common_init; 666 dev->netdev_ops = &cvm_oct_pow_netdev_ops;
307 priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED; 667 priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED;
308 priv->port = CVMX_PIP_NUM_INPUT_PORTS; 668 priv->port = CVMX_PIP_NUM_INPUT_PORTS;
309 priv->queue = -1; 669 priv->queue = -1;
@@ -372,44 +732,38 @@ static int __init cvm_oct_init_module(void)
372 break; 732 break;
373 733
374 case CVMX_HELPER_INTERFACE_MODE_NPI: 734 case CVMX_HELPER_INTERFACE_MODE_NPI:
375 dev->init = cvm_oct_common_init; 735 dev->netdev_ops = &cvm_oct_npi_netdev_ops;
376 dev->uninit = cvm_oct_common_uninit;
377 strcpy(dev->name, "npi%d"); 736 strcpy(dev->name, "npi%d");
378 break; 737 break;
379 738
380 case CVMX_HELPER_INTERFACE_MODE_XAUI: 739 case CVMX_HELPER_INTERFACE_MODE_XAUI:
381 dev->init = cvm_oct_xaui_init; 740 dev->netdev_ops = &cvm_oct_xaui_netdev_ops;
382 dev->uninit = cvm_oct_xaui_uninit;
383 strcpy(dev->name, "xaui%d"); 741 strcpy(dev->name, "xaui%d");
384 break; 742 break;
385 743
386 case CVMX_HELPER_INTERFACE_MODE_LOOP: 744 case CVMX_HELPER_INTERFACE_MODE_LOOP:
387 dev->init = cvm_oct_common_init; 745 dev->netdev_ops = &cvm_oct_npi_netdev_ops;
388 dev->uninit = cvm_oct_common_uninit;
389 strcpy(dev->name, "loop%d"); 746 strcpy(dev->name, "loop%d");
390 break; 747 break;
391 748
392 case CVMX_HELPER_INTERFACE_MODE_SGMII: 749 case CVMX_HELPER_INTERFACE_MODE_SGMII:
393 dev->init = cvm_oct_sgmii_init; 750 dev->netdev_ops = &cvm_oct_sgmii_netdev_ops;
394 dev->uninit = cvm_oct_sgmii_uninit;
395 strcpy(dev->name, "eth%d"); 751 strcpy(dev->name, "eth%d");
396 break; 752 break;
397 753
398 case CVMX_HELPER_INTERFACE_MODE_SPI: 754 case CVMX_HELPER_INTERFACE_MODE_SPI:
399 dev->init = cvm_oct_spi_init; 755 dev->netdev_ops = &cvm_oct_spi_netdev_ops;
400 dev->uninit = cvm_oct_spi_uninit;
401 strcpy(dev->name, "spi%d"); 756 strcpy(dev->name, "spi%d");
402 break; 757 break;
403 758
404 case CVMX_HELPER_INTERFACE_MODE_RGMII: 759 case CVMX_HELPER_INTERFACE_MODE_RGMII:
405 case CVMX_HELPER_INTERFACE_MODE_GMII: 760 case CVMX_HELPER_INTERFACE_MODE_GMII:
406 dev->init = cvm_oct_rgmii_init; 761 dev->netdev_ops = &cvm_oct_rgmii_netdev_ops;
407 dev->uninit = cvm_oct_rgmii_uninit;
408 strcpy(dev->name, "eth%d"); 762 strcpy(dev->name, "eth%d");
409 break; 763 break;
410 } 764 }
411 765
412 if (!dev->init) { 766 if (!dev->netdev_ops) {
413 kfree(dev); 767 kfree(dev);
414 } else if (register_netdev(dev) < 0) { 768 } else if (register_netdev(dev) < 0) {
415 pr_err("Failed to register ethernet device " 769 pr_err("Failed to register ethernet device "
diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h
index b3199076ef5e..3aef9878fc0a 100644
--- a/drivers/staging/octeon/octeon-ethernet.h
+++ b/drivers/staging/octeon/octeon-ethernet.h
@@ -111,12 +111,23 @@ static inline int cvm_oct_transmit(struct net_device *dev,
111 111
112extern int cvm_oct_rgmii_init(struct net_device *dev); 112extern int cvm_oct_rgmii_init(struct net_device *dev);
113extern void cvm_oct_rgmii_uninit(struct net_device *dev); 113extern void cvm_oct_rgmii_uninit(struct net_device *dev);
114extern int cvm_oct_rgmii_open(struct net_device *dev);
115extern int cvm_oct_rgmii_stop(struct net_device *dev);
116
114extern int cvm_oct_sgmii_init(struct net_device *dev); 117extern int cvm_oct_sgmii_init(struct net_device *dev);
115extern void cvm_oct_sgmii_uninit(struct net_device *dev); 118extern void cvm_oct_sgmii_uninit(struct net_device *dev);
119extern int cvm_oct_sgmii_open(struct net_device *dev);
120extern int cvm_oct_sgmii_stop(struct net_device *dev);
121
116extern int cvm_oct_spi_init(struct net_device *dev); 122extern int cvm_oct_spi_init(struct net_device *dev);
117extern void cvm_oct_spi_uninit(struct net_device *dev); 123extern void cvm_oct_spi_uninit(struct net_device *dev);
118extern int cvm_oct_xaui_init(struct net_device *dev); 124extern int cvm_oct_xaui_init(struct net_device *dev);
119extern void cvm_oct_xaui_uninit(struct net_device *dev); 125extern void cvm_oct_xaui_uninit(struct net_device *dev);
126extern int cvm_oct_xaui_open(struct net_device *dev);
127extern int cvm_oct_xaui_stop(struct net_device *dev);
128
129extern int cvm_oct_common_init(struct net_device *dev);
130extern void cvm_oct_common_uninit(struct net_device *dev);
120 131
121extern int always_use_pow; 132extern int always_use_pow;
122extern int pow_send_group; 133extern int pow_send_group;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 38bfdb0f6660..3f1045993474 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -550,7 +550,7 @@ static void acm_waker(struct work_struct *waker)
550static int acm_tty_open(struct tty_struct *tty, struct file *filp) 550static int acm_tty_open(struct tty_struct *tty, struct file *filp)
551{ 551{
552 struct acm *acm; 552 struct acm *acm;
553 int rv = -EINVAL; 553 int rv = -ENODEV;
554 int i; 554 int i;
555 dbg("Entering acm_tty_open."); 555 dbg("Entering acm_tty_open.");
556 556
@@ -677,7 +677,7 @@ static void acm_tty_close(struct tty_struct *tty, struct file *filp)
677 677
678 /* Perform the closing process and see if we need to do the hardware 678 /* Perform the closing process and see if we need to do the hardware
679 shutdown */ 679 shutdown */
680 if (tty_port_close_start(&acm->port, tty, filp) == 0) 680 if (!acm || tty_port_close_start(&acm->port, tty, filp) == 0)
681 return; 681 return;
682 acm_port_down(acm, 0); 682 acm_port_down(acm, 0);
683 tty_port_close_end(&acm->port, tty); 683 tty_port_close_end(&acm->port, tty);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index d595aa5586a7..a84216464ca0 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -333,6 +333,9 @@ static void serial_close(struct tty_struct *tty, struct file *filp)
333{ 333{
334 struct usb_serial_port *port = tty->driver_data; 334 struct usb_serial_port *port = tty->driver_data;
335 335
336 if (!port)
337 return;
338
336 dbg("%s - port %d", __func__, port->number); 339 dbg("%s - port %d", __func__, port->number);
337 340
338 341
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 559f8784acf3..9052bcb4f528 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -501,7 +501,7 @@ int hwarc_filter_event_WUSB_0100(struct uwb_rc *rc, struct uwb_rceb **header,
501 int result = -ENOANO; 501 int result = -ENOANO;
502 struct uwb_rceb *rceb = *header; 502 struct uwb_rceb *rceb = *header;
503 int event = le16_to_cpu(rceb->wEvent); 503 int event = le16_to_cpu(rceb->wEvent);
504 size_t event_size; 504 ssize_t event_size;
505 size_t core_size, offset; 505 size_t core_size, offset;
506 506
507 if (rceb->bEventType != UWB_RC_CET_GENERAL) 507 if (rceb->bEventType != UWB_RC_CET_GENERAL)
diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c
index cd2035768b47..86a853b84119 100644
--- a/drivers/uwb/wlp/txrx.c
+++ b/drivers/uwb/wlp/txrx.c
@@ -326,7 +326,7 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp,
326 int result = -EINVAL; 326 int result = -EINVAL;
327 struct ethhdr *eth_hdr = (void *) skb->data; 327 struct ethhdr *eth_hdr = (void *) skb->data;
328 328
329 if (is_broadcast_ether_addr(eth_hdr->h_dest)) { 329 if (is_multicast_ether_addr(eth_hdr->h_dest)) {
330 result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb); 330 result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb);
331 if (result < 0) { 331 if (result < 0) {
332 if (printk_ratelimit()) 332 if (printk_ratelimit())
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index 1f09d4e4144c..59f708efe25f 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -68,6 +68,34 @@ int w1_ds2760_write(struct device *dev, char *buf, int addr, size_t count)
68 return w1_ds2760_io(dev, buf, addr, count, 1); 68 return w1_ds2760_io(dev, buf, addr, count, 1);
69} 69}
70 70
71static int w1_ds2760_eeprom_cmd(struct device *dev, int addr, int cmd)
72{
73 struct w1_slave *sl = container_of(dev, struct w1_slave, dev);
74
75 if (!dev)
76 return -EINVAL;
77
78 mutex_lock(&sl->master->mutex);
79
80 if (w1_reset_select_slave(sl) == 0) {
81 w1_write_8(sl->master, cmd);
82 w1_write_8(sl->master, addr);
83 }
84
85 mutex_unlock(&sl->master->mutex);
86 return 0;
87}
88
89int w1_ds2760_store_eeprom(struct device *dev, int addr)
90{
91 return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_COPY_DATA);
92}
93
94int w1_ds2760_recall_eeprom(struct device *dev, int addr)
95{
96 return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_RECALL_DATA);
97}
98
71static ssize_t w1_ds2760_read_bin(struct kobject *kobj, 99static ssize_t w1_ds2760_read_bin(struct kobject *kobj,
72 struct bin_attribute *bin_attr, 100 struct bin_attribute *bin_attr,
73 char *buf, loff_t off, size_t count) 101 char *buf, loff_t off, size_t count)
@@ -200,6 +228,8 @@ static void __exit w1_ds2760_exit(void)
200 228
201EXPORT_SYMBOL(w1_ds2760_read); 229EXPORT_SYMBOL(w1_ds2760_read);
202EXPORT_SYMBOL(w1_ds2760_write); 230EXPORT_SYMBOL(w1_ds2760_write);
231EXPORT_SYMBOL(w1_ds2760_store_eeprom);
232EXPORT_SYMBOL(w1_ds2760_recall_eeprom);
203 233
204module_init(w1_ds2760_init); 234module_init(w1_ds2760_init);
205module_exit(w1_ds2760_exit); 235module_exit(w1_ds2760_exit);
diff --git a/drivers/w1/slaves/w1_ds2760.h b/drivers/w1/slaves/w1_ds2760.h
index f1302429cb02..58e774141568 100644
--- a/drivers/w1/slaves/w1_ds2760.h
+++ b/drivers/w1/slaves/w1_ds2760.h
@@ -25,6 +25,10 @@
25 25
26#define DS2760_PROTECTION_REG 0x00 26#define DS2760_PROTECTION_REG 0x00
27#define DS2760_STATUS_REG 0x01 27#define DS2760_STATUS_REG 0x01
28 #define DS2760_STATUS_IE (1 << 2)
29 #define DS2760_STATUS_SWEN (1 << 3)
30 #define DS2760_STATUS_RNAOP (1 << 4)
31 #define DS2760_STATUS_PMOD (1 << 5)
28#define DS2760_EEPROM_REG 0x07 32#define DS2760_EEPROM_REG 0x07
29#define DS2760_SPECIAL_FEATURE_REG 0x08 33#define DS2760_SPECIAL_FEATURE_REG 0x08
30#define DS2760_VOLTAGE_MSB 0x0c 34#define DS2760_VOLTAGE_MSB 0x0c
@@ -38,6 +42,7 @@
38#define DS2760_EEPROM_BLOCK0 0x20 42#define DS2760_EEPROM_BLOCK0 0x20
39#define DS2760_ACTIVE_FULL 0x20 43#define DS2760_ACTIVE_FULL 0x20
40#define DS2760_EEPROM_BLOCK1 0x30 44#define DS2760_EEPROM_BLOCK1 0x30
45#define DS2760_STATUS_WRITE_REG 0x31
41#define DS2760_RATED_CAPACITY 0x32 46#define DS2760_RATED_CAPACITY 0x32
42#define DS2760_CURRENT_OFFSET_BIAS 0x33 47#define DS2760_CURRENT_OFFSET_BIAS 0x33
43#define DS2760_ACTIVE_EMPTY 0x3b 48#define DS2760_ACTIVE_EMPTY 0x3b
@@ -46,5 +51,7 @@ extern int w1_ds2760_read(struct device *dev, char *buf, int addr,
46 size_t count); 51 size_t count);
47extern int w1_ds2760_write(struct device *dev, char *buf, int addr, 52extern int w1_ds2760_write(struct device *dev, char *buf, int addr,
48 size_t count); 53 size_t count);
54extern int w1_ds2760_store_eeprom(struct device *dev, int addr);
55extern int w1_ds2760_recall_eeprom(struct device *dev, int addr);
49 56
50#endif /* !__w1_ds2760_h__ */ 57#endif /* !__w1_ds2760_h__ */
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 603972576f0f..f128427b995b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
29 29
30#ifdef CONFIG_FS_POSIX_ACL 30#ifdef CONFIG_FS_POSIX_ACL
31 31
32static void btrfs_update_cached_acl(struct inode *inode,
33 struct posix_acl **p_acl,
34 struct posix_acl *acl)
35{
36 spin_lock(&inode->i_lock);
37 if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
38 posix_acl_release(*p_acl);
39 *p_acl = posix_acl_dup(acl);
40 spin_unlock(&inode->i_lock);
41}
42
43static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 32static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
44{ 33{
45 int size; 34 int size;
46 const char *name; 35 const char *name;
47 char *value = NULL; 36 char *value = NULL;
48 struct posix_acl *acl = NULL, **p_acl; 37 struct posix_acl *acl;
38
39 acl = get_cached_acl(inode, type);
40 if (acl != ACL_NOT_CACHED)
41 return acl;
49 42
50 switch (type) { 43 switch (type) {
51 case ACL_TYPE_ACCESS: 44 case ACL_TYPE_ACCESS:
52 name = POSIX_ACL_XATTR_ACCESS; 45 name = POSIX_ACL_XATTR_ACCESS;
53 p_acl = &BTRFS_I(inode)->i_acl;
54 break; 46 break;
55 case ACL_TYPE_DEFAULT: 47 case ACL_TYPE_DEFAULT:
56 name = POSIX_ACL_XATTR_DEFAULT; 48 name = POSIX_ACL_XATTR_DEFAULT;
57 p_acl = &BTRFS_I(inode)->i_default_acl;
58 break; 49 break;
59 default: 50 default:
60 return ERR_PTR(-EINVAL); 51 BUG();
61 } 52 }
62 53
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
68 spin_lock(&inode->i_lock);
69 acl = *p_acl;
70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
72 spin_unlock(&inode->i_lock);
73
74 if (acl != BTRFS_ACL_NOT_CACHED)
75 return acl;
76
77 size = __btrfs_getxattr(inode, name, "", 0); 54 size = __btrfs_getxattr(inode, name, "", 0);
78 if (size > 0) { 55 if (size > 0) {
79 value = kzalloc(size, GFP_NOFS); 56 value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
82 size = __btrfs_getxattr(inode, name, value, size); 59 size = __btrfs_getxattr(inode, name, value, size);
83 if (size > 0) { 60 if (size > 0) {
84 acl = posix_acl_from_xattr(value, size); 61 acl = posix_acl_from_xattr(value, size);
85 btrfs_update_cached_acl(inode, p_acl, acl); 62 set_cached_acl(inode, type, acl);
86 } 63 }
87 kfree(value); 64 kfree(value);
88 } else if (size == -ENOENT || size == -ENODATA || size == 0) { 65 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */ 66 /* FIXME, who returns -ENOENT? I think nobody */
90 acl = NULL; 67 acl = NULL;
91 btrfs_update_cached_acl(inode, p_acl, acl); 68 set_cached_acl(inode, type, acl);
92 } else { 69 } else {
93 acl = ERR_PTR(-EIO); 70 acl = ERR_PTR(-EIO);
94 } 71 }
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
121{ 98{
122 int ret, size = 0; 99 int ret, size = 0;
123 const char *name; 100 const char *name;
124 struct posix_acl **p_acl;
125 char *value = NULL; 101 char *value = NULL;
126 mode_t mode; 102 mode_t mode;
127 103
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
141 ret = 0; 117 ret = 0;
142 inode->i_mode = mode; 118 inode->i_mode = mode;
143 name = POSIX_ACL_XATTR_ACCESS; 119 name = POSIX_ACL_XATTR_ACCESS;
144 p_acl = &BTRFS_I(inode)->i_acl;
145 break; 120 break;
146 case ACL_TYPE_DEFAULT: 121 case ACL_TYPE_DEFAULT:
147 if (!S_ISDIR(inode->i_mode)) 122 if (!S_ISDIR(inode->i_mode))
148 return acl ? -EINVAL : 0; 123 return acl ? -EINVAL : 0;
149 name = POSIX_ACL_XATTR_DEFAULT; 124 name = POSIX_ACL_XATTR_DEFAULT;
150 p_acl = &BTRFS_I(inode)->i_default_acl;
151 break; 125 break;
152 default: 126 default:
153 return -EINVAL; 127 return -EINVAL;
@@ -172,7 +146,7 @@ out:
172 kfree(value); 146 kfree(value);
173 147
174 if (!ret) 148 if (!ret)
175 btrfs_update_cached_acl(inode, p_acl, acl); 149 set_cached_acl(inode, type, acl);
176 150
177 return ret; 151 return ret;
178} 152}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index acb4f3517582..ea1ea0af8c0e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -53,10 +53,6 @@ struct btrfs_inode {
53 /* used to order data wrt metadata */ 53 /* used to order data wrt metadata */
54 struct btrfs_ordered_inode_tree ordered_tree; 54 struct btrfs_ordered_inode_tree ordered_tree;
55 55
56 /* standard acl pointers */
57 struct posix_acl *i_acl;
58 struct posix_acl *i_default_acl;
59
60 /* for keeping track of orphaned inodes */ 56 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan; 57 struct list_head i_orphan;
62 58
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..2779c2f5360a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
41 41
42#define BTRFS_MAGIC "_BHRfS_M" 42#define BTRFS_MAGIC "_BHRfS_M"
43 43
44#define BTRFS_ACL_NOT_CACHED ((void *)-1)
45
46#define BTRFS_MAX_LEVEL 8 44#define BTRFS_MAX_LEVEL 8
47 45
48#define BTRFS_COMPAT_EXTENT_TREE_V0 46#define BTRFS_COMPAT_EXTENT_TREE_V0
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8612b3a09811..dbe1aabf96cd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2122 * any xattrs or acls 2122 * any xattrs or acls
2123 */ 2123 */
2124 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2124 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2125 if (!maybe_acls) { 2125 if (!maybe_acls)
2126 BTRFS_I(inode)->i_acl = NULL; 2126 cache_no_acl(inode);
2127 BTRFS_I(inode)->i_default_acl = NULL;
2128 }
2129 2127
2130 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2128 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2131 alloc_group_block, 0); 2129 alloc_group_block, 0);
@@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3141{ 3139{
3142 struct btrfs_inode *bi = BTRFS_I(inode); 3140 struct btrfs_inode *bi = BTRFS_I(inode);
3143 3141
3144 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3145 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3146
3147 bi->generation = 0; 3142 bi->generation = 0;
3148 bi->sequence = 0; 3143 bi->sequence = 0;
3149 bi->last_trans = 0; 3144 bi->last_trans = 0;
@@ -4640,8 +4635,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4640 ei->last_trans = 0; 4635 ei->last_trans = 0;
4641 ei->logged_trans = 0; 4636 ei->logged_trans = 0;
4642 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 4637 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
4643 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4644 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4645 INIT_LIST_HEAD(&ei->i_orphan); 4638 INIT_LIST_HEAD(&ei->i_orphan);
4646 INIT_LIST_HEAD(&ei->ordered_operations); 4639 INIT_LIST_HEAD(&ei->ordered_operations);
4647 return &ei->vfs_inode; 4640 return &ei->vfs_inode;
@@ -4655,13 +4648,6 @@ void btrfs_destroy_inode(struct inode *inode)
4655 WARN_ON(!list_empty(&inode->i_dentry)); 4648 WARN_ON(!list_empty(&inode->i_dentry));
4656 WARN_ON(inode->i_data.nrpages); 4649 WARN_ON(inode->i_data.nrpages);
4657 4650
4658 if (BTRFS_I(inode)->i_acl &&
4659 BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
4660 posix_acl_release(BTRFS_I(inode)->i_acl);
4661 if (BTRFS_I(inode)->i_default_acl &&
4662 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4663 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4664
4665 /* 4651 /*
4666 * Make sure we're properly removed from the ordered operation 4652 * Make sure we're properly removed from the ordered operation
4667 * lists. 4653 * lists.
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c135202c38b3..626c7483b4de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -31,6 +31,7 @@
31#include <linux/skbuff.h> 31#include <linux/skbuff.h>
32#include <linux/netlink.h> 32#include <linux/netlink.h>
33#include <linux/vt.h> 33#include <linux/vt.h>
34#include <linux/falloc.h>
34#include <linux/fs.h> 35#include <linux/fs.h>
35#include <linux/file.h> 36#include <linux/file.h>
36#include <linux/ppp_defs.h> 37#include <linux/ppp_defs.h>
@@ -1779,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1779 return sys_ioctl(fd, cmd, (unsigned long)tn); 1780 return sys_ioctl(fd, cmd, (unsigned long)tn);
1780} 1781}
1781 1782
1783/* on ia32 l_start is on a 32-bit boundary */
1784#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1785struct space_resv_32 {
1786 __s16 l_type;
1787 __s16 l_whence;
1788 __s64 l_start __attribute__((packed));
1789 /* len == 0 means until end of file */
1790 __s64 l_len __attribute__((packed));
1791 __s32 l_sysid;
1792 __u32 l_pid;
1793 __s32 l_pad[4]; /* reserve area */
1794};
1795
1796#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32)
1797#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1798
1799/* just account for different alignment */
1800static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1801{
1802 struct space_resv_32 __user *p32 = (void __user *)arg;
1803 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1804
1805 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
1806 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
1807 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
1808 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
1809 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
1810 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
1811 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
1812 return -EFAULT;
1813
1814 return ioctl_preallocate(file, p);
1815}
1816#endif
1817
1782 1818
1783typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 1819typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
1784 unsigned long, struct file *); 1820 unsigned long, struct file *);
@@ -2756,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2756 case FIOQSIZE: 2792 case FIOQSIZE:
2757 break; 2793 break;
2758 2794
2795#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2796 case FS_IOC_RESVSP_32:
2797 case FS_IOC_RESVSP64_32:
2798 error = compat_ioctl_preallocate(filp, arg);
2799 goto out_fput;
2800#else
2801 case FS_IOC_RESVSP:
2802 case FS_IOC_RESVSP64:
2803 error = ioctl_preallocate(filp, (void __user *)arg);
2804 goto out_fput;
2805#endif
2806
2759 case FIBMAP: 2807 case FIBMAP:
2760 case FIGETBSZ: 2808 case FIGETBSZ:
2761 case FIONREAD: 2809 case FIONREAD:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9b1d285f9fe6..75efb028974b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
423} 423}
424 424
425static struct file_system_type devpts_fs_type = { 425static struct file_system_type devpts_fs_type = {
426 .owner = THIS_MODULE,
427 .name = "devpts", 426 .name = "devpts",
428 .get_sb = devpts_get_sb, 427 .get_sb = devpts_get_sb,
429 .kill_sb = devpts_kill_sb, 428 .kill_sb = devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
564 } 563 }
565 return err; 564 return err;
566} 565}
567
568static void __exit exit_devpts_fs(void)
569{
570 unregister_filesystem(&devpts_fs_type);
571 mntput(devpts_mnt);
572}
573
574module_init(init_devpts_fs) 566module_init(init_devpts_fs)
575module_exit(exit_devpts_fs)
576MODULE_LICENSE("GPL");
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d46e38cb85c5..d636e1297cad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,37 +125,12 @@ fail:
125 return ERR_PTR(-EINVAL); 125 return ERR_PTR(-EINVAL);
126} 126}
127 127
128static inline struct posix_acl *
129ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
130{
131 struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
132
133 spin_lock(&inode->i_lock);
134 if (*i_acl != EXT2_ACL_NOT_CACHED)
135 acl = posix_acl_dup(*i_acl);
136 spin_unlock(&inode->i_lock);
137
138 return acl;
139}
140
141static inline void
142ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
143 struct posix_acl *acl)
144{
145 spin_lock(&inode->i_lock);
146 if (*i_acl != EXT2_ACL_NOT_CACHED)
147 posix_acl_release(*i_acl);
148 *i_acl = posix_acl_dup(acl);
149 spin_unlock(&inode->i_lock);
150}
151
152/* 128/*
153 * inode->i_mutex: don't care 129 * inode->i_mutex: don't care
154 */ 130 */
155static struct posix_acl * 131static struct posix_acl *
156ext2_get_acl(struct inode *inode, int type) 132ext2_get_acl(struct inode *inode, int type)
157{ 133{
158 struct ext2_inode_info *ei = EXT2_I(inode);
159 int name_index; 134 int name_index;
160 char *value = NULL; 135 char *value = NULL;
161 struct posix_acl *acl; 136 struct posix_acl *acl;
@@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
164 if (!test_opt(inode->i_sb, POSIX_ACL)) 139 if (!test_opt(inode->i_sb, POSIX_ACL))
165 return NULL; 140 return NULL;
166 141
167 switch(type) { 142 acl = get_cached_acl(inode, type);
168 case ACL_TYPE_ACCESS: 143 if (acl != ACL_NOT_CACHED)
169 acl = ext2_iget_acl(inode, &ei->i_acl); 144 return acl;
170 if (acl != EXT2_ACL_NOT_CACHED) 145
171 return acl; 146 switch (type) {
172 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; 147 case ACL_TYPE_ACCESS:
173 break; 148 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
174 149 break;
175 case ACL_TYPE_DEFAULT: 150 case ACL_TYPE_DEFAULT:
176 acl = ext2_iget_acl(inode, &ei->i_default_acl); 151 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
177 if (acl != EXT2_ACL_NOT_CACHED) 152 break;
178 return acl; 153 default:
179 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 BUG();
180 break;
181
182 default:
183 return ERR_PTR(-EINVAL);
184 } 155 }
185 retval = ext2_xattr_get(inode, name_index, "", NULL, 0); 156 retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
186 if (retval > 0) { 157 if (retval > 0) {
@@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
197 acl = ERR_PTR(retval); 168 acl = ERR_PTR(retval);
198 kfree(value); 169 kfree(value);
199 170
200 if (!IS_ERR(acl)) { 171 if (!IS_ERR(acl))
201 switch(type) { 172 set_cached_acl(inode, type, acl);
202 case ACL_TYPE_ACCESS:
203 ext2_iset_acl(inode, &ei->i_acl, acl);
204 break;
205 173
206 case ACL_TYPE_DEFAULT:
207 ext2_iset_acl(inode, &ei->i_default_acl, acl);
208 break;
209 }
210 }
211 return acl; 174 return acl;
212} 175}
213 176
@@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type)
217static int 180static int
218ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 181ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219{ 182{
220 struct ext2_inode_info *ei = EXT2_I(inode);
221 int name_index; 183 int name_index;
222 void *value = NULL; 184 void *value = NULL;
223 size_t size = 0; 185 size_t size = 0;
@@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
263 error = ext2_xattr_set(inode, name_index, "", value, size, 0); 225 error = ext2_xattr_set(inode, name_index, "", value, size, 0);
264 226
265 kfree(value); 227 kfree(value);
266 if (!error) { 228 if (!error)
267 switch(type) { 229 set_cached_acl(inode, type, acl);
268 case ACL_TYPE_ACCESS:
269 ext2_iset_acl(inode, &ei->i_acl, acl);
270 break;
271
272 case ACL_TYPE_DEFAULT:
273 ext2_iset_acl(inode, &ei->i_default_acl, acl);
274 break;
275 }
276 }
277 return error; 230 return error;
278} 231}
279 232
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index b42cf578554b..ecefe478898f 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext2_permission (struct inode *, int); 57extern int ext2_permission (struct inode *, int);
62extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d988a718aedb..9a8a8e27a063 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -47,10 +47,6 @@ struct ext2_inode_info {
47 */ 47 */
48 struct rw_semaphore xattr_sem; 48 struct rw_semaphore xattr_sem;
49#endif 49#endif
50#ifdef CONFIG_EXT2_FS_POSIX_ACL
51 struct posix_acl *i_acl;
52 struct posix_acl *i_default_acl;
53#endif
54 rwlock_t i_meta_lock; 50 rwlock_t i_meta_lock;
55 51
56 /* 52 /*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 29ed682061f6..e27130341d4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1224 return inode; 1224 return inode;
1225 1225
1226 ei = EXT2_I(inode); 1226 ei = EXT2_I(inode);
1227#ifdef CONFIG_EXT2_FS_POSIX_ACL
1228 ei->i_acl = EXT2_ACL_NOT_CACHED;
1229 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
1230#endif
1231 ei->i_block_alloc_info = NULL; 1227 ei->i_block_alloc_info = NULL;
1232 1228
1233 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); 1229 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 458999638c3d..1a9ffee47d56 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); 152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
153 if (!ei) 153 if (!ei)
154 return NULL; 154 return NULL;
155#ifdef CONFIG_EXT2_FS_POSIX_ACL
156 ei->i_acl = EXT2_ACL_NOT_CACHED;
157 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
158#endif
159 ei->i_block_alloc_info = NULL; 155 ei->i_block_alloc_info = NULL;
160 ei->vfs_inode.i_version = 1; 156 ei->vfs_inode.i_version = 1;
161 return &ei->vfs_inode; 157 return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
198static void ext2_clear_inode(struct inode *inode) 194static void ext2_clear_inode(struct inode *inode)
199{ 195{
200 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; 196 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
201#ifdef CONFIG_EXT2_FS_POSIX_ACL
202 struct ext2_inode_info *ei = EXT2_I(inode);
203
204 if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
205 posix_acl_release(ei->i_acl);
206 ei->i_acl = EXT2_ACL_NOT_CACHED;
207 }
208 if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
209 posix_acl_release(ei->i_default_acl);
210 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
211 }
212#endif
213 ext2_discard_reservation(inode); 197 ext2_discard_reservation(inode);
214 EXT2_I(inode)->i_block_alloc_info = NULL; 198 EXT2_I(inode)->i_block_alloc_info = NULL;
215 if (unlikely(rsv)) 199 if (unlikely(rsv))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e0c745451715..e167bae37ef0 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,33 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = ACCESS_ONCE(*i_acl);
133
134 if (acl) {
135 spin_lock(&inode->i_lock);
136 acl = *i_acl;
137 if (acl != EXT3_ACL_NOT_CACHED)
138 acl = posix_acl_dup(acl);
139 spin_unlock(&inode->i_lock);
140 }
141
142 return acl;
143}
144
145static inline void
146ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
147 struct posix_acl *acl)
148{
149 spin_lock(&inode->i_lock);
150 if (*i_acl != EXT3_ACL_NOT_CACHED)
151 posix_acl_release(*i_acl);
152 *i_acl = posix_acl_dup(acl);
153 spin_unlock(&inode->i_lock);
154}
155
156/* 129/*
157 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
158 * 131 *
@@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
161static struct posix_acl * 134static struct posix_acl *
162ext3_get_acl(struct inode *inode, int type) 135ext3_get_acl(struct inode *inode, int type)
163{ 136{
164 struct ext3_inode_info *ei = EXT3_I(inode);
165 int name_index; 137 int name_index;
166 char *value = NULL; 138 char *value = NULL;
167 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
170 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
171 return NULL; 143 return NULL;
172 144
173 switch(type) { 145 acl = get_cached_acl(inode, type);
174 case ACL_TYPE_ACCESS: 146 if (acl != ACL_NOT_CACHED)
175 acl = ext3_iget_acl(inode, &ei->i_acl); 147 return acl;
176 if (acl != EXT3_ACL_NOT_CACHED) 148
177 return acl; 149 switch (type) {
178 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 150 case ACL_TYPE_ACCESS:
179 break; 151 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
180 152 break;
181 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
182 acl = ext3_iget_acl(inode, &ei->i_default_acl); 154 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 if (acl != EXT3_ACL_NOT_CACHED) 155 break;
184 return acl; 156 default:
185 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; 157 BUG();
186 break;
187
188 default:
189 return ERR_PTR(-EINVAL);
190 } 158 }
159
191 retval = ext3_xattr_get(inode, name_index, "", NULL, 0); 160 retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
192 if (retval > 0) { 161 if (retval > 0) {
193 value = kmalloc(retval, GFP_NOFS); 162 value = kmalloc(retval, GFP_NOFS);
@@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
203 acl = ERR_PTR(retval); 172 acl = ERR_PTR(retval);
204 kfree(value); 173 kfree(value);
205 174
206 if (!IS_ERR(acl)) { 175 if (!IS_ERR(acl))
207 switch(type) { 176 set_cached_acl(inode, type, acl);
208 case ACL_TYPE_ACCESS:
209 ext3_iset_acl(inode, &ei->i_acl, acl);
210 break;
211 177
212 case ACL_TYPE_DEFAULT:
213 ext3_iset_acl(inode, &ei->i_default_acl, acl);
214 break;
215 }
216 }
217 return acl; 178 return acl;
218} 179}
219 180
@@ -226,7 +187,6 @@ static int
226ext3_set_acl(handle_t *handle, struct inode *inode, int type, 187ext3_set_acl(handle_t *handle, struct inode *inode, int type,
227 struct posix_acl *acl) 188 struct posix_acl *acl)
228{ 189{
229 struct ext3_inode_info *ei = EXT3_I(inode);
230 int name_index; 190 int name_index;
231 void *value = NULL; 191 void *value = NULL;
232 size_t size = 0; 192 size_t size = 0;
@@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
271 value, size, 0); 231 value, size, 0);
272 232
273 kfree(value); 233 kfree(value);
274 if (!error) {
275 switch(type) {
276 case ACL_TYPE_ACCESS:
277 ext3_iset_acl(inode, &ei->i_acl, acl);
278 break;
279 234
280 case ACL_TYPE_DEFAULT: 235 if (!error)
281 ext3_iset_acl(inode, &ei->i_default_acl, acl); 236 set_cached_acl(inode, type, acl);
282 break; 237
283 }
284 }
285 return error; 238 return error;
286} 239}
287 240
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac0..07d15a3a5969 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT3_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext3_permission (struct inode *, int); 57extern int ext3_permission (struct inode *, int);
62extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 05dea8132fc0..5f51fed5c750 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2752 return inode; 2752 return inode;
2753 2753
2754 ei = EXT3_I(inode); 2754 ei = EXT3_I(inode);
2755#ifdef CONFIG_EXT3_FS_POSIX_ACL
2756 ei->i_acl = EXT3_ACL_NOT_CACHED;
2757 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
2758#endif
2759 ei->i_block_alloc_info = NULL; 2755 ei->i_block_alloc_info = NULL;
2760 2756
2761 ret = __ext3_get_inode_loc(inode, &iloc, 0); 2757 ret = __ext3_get_inode_loc(inode, &iloc, 0);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 601e881e6105..524b349c6299 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
465 if (!ei) 465 if (!ei)
466 return NULL; 466 return NULL;
467#ifdef CONFIG_EXT3_FS_POSIX_ACL
468 ei->i_acl = EXT3_ACL_NOT_CACHED;
469 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
470#endif
471 ei->i_block_alloc_info = NULL; 467 ei->i_block_alloc_info = NULL;
472 ei->vfs_inode.i_version = 1; 468 ei->vfs_inode.i_version = 1;
473 return &ei->vfs_inode; 469 return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
518static void ext3_clear_inode(struct inode *inode) 514static void ext3_clear_inode(struct inode *inode)
519{ 515{
520 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 516 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
521#ifdef CONFIG_EXT3_FS_POSIX_ACL
522 if (EXT3_I(inode)->i_acl &&
523 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
524 posix_acl_release(EXT3_I(inode)->i_acl);
525 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
526 }
527 if (EXT3_I(inode)->i_default_acl &&
528 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
529 posix_acl_release(EXT3_I(inode)->i_default_acl);
530 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
531 }
532#endif
533 ext3_discard_reservation(inode); 517 ext3_discard_reservation(inode);
534 EXT3_I(inode)->i_block_alloc_info = NULL; 518 EXT3_I(inode)->i_block_alloc_info = NULL;
535 if (unlikely(rsv)) 519 if (unlikely(rsv))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 605aeed96d68..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,33 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = ACCESS_ONCE(*i_acl);
133
134 if (acl) {
135 spin_lock(&inode->i_lock);
136 acl = *i_acl;
137 if (acl != EXT4_ACL_NOT_CACHED)
138 acl = posix_acl_dup(acl);
139 spin_unlock(&inode->i_lock);
140 }
141
142 return acl;
143}
144
145static inline void
146ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
147 struct posix_acl *acl)
148{
149 spin_lock(&inode->i_lock);
150 if (*i_acl != EXT4_ACL_NOT_CACHED)
151 posix_acl_release(*i_acl);
152 *i_acl = posix_acl_dup(acl);
153 spin_unlock(&inode->i_lock);
154}
155
156/* 129/*
157 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
158 * 131 *
@@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
161static struct posix_acl * 134static struct posix_acl *
162ext4_get_acl(struct inode *inode, int type) 135ext4_get_acl(struct inode *inode, int type)
163{ 136{
164 struct ext4_inode_info *ei = EXT4_I(inode);
165 int name_index; 137 int name_index;
166 char *value = NULL; 138 char *value = NULL;
167 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
170 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
171 return NULL; 143 return NULL;
172 144
145 acl = get_cached_acl(inode, type);
146 if (acl != ACL_NOT_CACHED)
147 return acl;
148
173 switch (type) { 149 switch (type) {
174 case ACL_TYPE_ACCESS: 150 case ACL_TYPE_ACCESS:
175 acl = ext4_iget_acl(inode, &ei->i_acl);
176 if (acl != EXT4_ACL_NOT_CACHED)
177 return acl;
178 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 151 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
179 break; 152 break;
180
181 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
182 acl = ext4_iget_acl(inode, &ei->i_default_acl);
183 if (acl != EXT4_ACL_NOT_CACHED)
184 return acl;
185 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
186 break; 155 break;
187
188 default: 156 default:
189 return ERR_PTR(-EINVAL); 157 BUG();
190 } 158 }
191 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 159 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
192 if (retval > 0) { 160 if (retval > 0) {
@@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
203 acl = ERR_PTR(retval); 171 acl = ERR_PTR(retval);
204 kfree(value); 172 kfree(value);
205 173
206 if (!IS_ERR(acl)) { 174 if (!IS_ERR(acl))
207 switch (type) { 175 set_cached_acl(inode, type, acl);
208 case ACL_TYPE_ACCESS:
209 ext4_iset_acl(inode, &ei->i_acl, acl);
210 break;
211 176
212 case ACL_TYPE_DEFAULT:
213 ext4_iset_acl(inode, &ei->i_default_acl, acl);
214 break;
215 }
216 }
217 return acl; 177 return acl;
218} 178}
219 179
@@ -226,7 +186,6 @@ static int
226ext4_set_acl(handle_t *handle, struct inode *inode, int type, 186ext4_set_acl(handle_t *handle, struct inode *inode, int type,
227 struct posix_acl *acl) 187 struct posix_acl *acl)
228{ 188{
229 struct ext4_inode_info *ei = EXT4_I(inode);
230 int name_index; 189 int name_index;
231 void *value = NULL; 190 void *value = NULL;
232 size_t size = 0; 191 size_t size = 0;
@@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
271 value, size, 0); 230 value, size, 0);
272 231
273 kfree(value); 232 kfree(value);
274 if (!error) { 233 if (!error)
275 switch (type) { 234 set_cached_acl(inode, type, acl);
276 case ACL_TYPE_ACCESS:
277 ext4_iset_acl(inode, &ei->i_acl, acl);
278 break;
279 235
280 case ACL_TYPE_DEFAULT:
281 ext4_iset_acl(inode, &ei->i_default_acl, acl);
282 break;
283 }
284 }
285 return error; 236 return error;
286} 237}
287 238
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext4_permission(struct inode *, int); 57extern int ext4_permission(struct inode *, int);
62extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 17b9998680e3..0ddf7e55abe1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -595,10 +595,6 @@ struct ext4_inode_info {
595 */ 595 */
596 struct rw_semaphore xattr_sem; 596 struct rw_semaphore xattr_sem;
597#endif 597#endif
598#ifdef CONFIG_EXT4_FS_POSIX_ACL
599 struct posix_acl *i_acl;
600 struct posix_acl *i_default_acl;
601#endif
602 598
603 struct list_head i_orphan; /* unlinked but open inodes */ 599 struct list_head i_orphan; /* unlinked but open inodes */
604 600
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c17ae275af4..60a26f3a6f8b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4453 return inode; 4453 return inode;
4454 4454
4455 ei = EXT4_I(inode); 4455 ei = EXT4_I(inode);
4456#ifdef CONFIG_EXT4_FS_POSIX_ACL
4457 ei->i_acl = EXT4_ACL_NOT_CACHED;
4458 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
4459#endif
4460 4456
4461 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4457 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4462 if (ret < 0) 4458 if (ret < 0)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bb9e2d3e4b8..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
666 if (!ei) 666 if (!ei)
667 return NULL; 667 return NULL;
668 668
669#ifdef CONFIG_EXT4_FS_POSIX_ACL
670 ei->i_acl = EXT4_ACL_NOT_CACHED;
671 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
672#endif
673 ei->vfs_inode.i_version = 1; 669 ei->vfs_inode.i_version = 1;
674 ei->vfs_inode.i_data.writeback_index = 0; 670 ei->vfs_inode.i_data.writeback_index = 0;
675 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 671 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -735,18 +731,6 @@ static void destroy_inodecache(void)
735 731
736static void ext4_clear_inode(struct inode *inode) 732static void ext4_clear_inode(struct inode *inode)
737{ 733{
738#ifdef CONFIG_EXT4_FS_POSIX_ACL
739 if (EXT4_I(inode)->i_acl &&
740 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
741 posix_acl_release(EXT4_I(inode)->i_acl);
742 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
743 }
744 if (EXT4_I(inode)->i_default_acl &&
745 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
746 posix_acl_release(EXT4_I(inode)->i_default_acl);
747 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
748 }
749#endif
750 ext4_discard_preallocations(inode); 734 ext4_discard_preallocations(inode);
751 if (EXT4_JOURNAL(inode)) 735 if (EXT4_JOURNAL(inode))
752 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 736 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index caf049146ca2..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
278EXPORT_SYMBOL(sb_has_dirty_inodes); 278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 279
280/* 280/*
281 * Write a single inode's dirty pages and inode data out to disk. 281 * Wait for writeback on an inode to complete.
282 */
283static void inode_wait_for_writeback(struct inode *inode)
284{
285 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
286 wait_queue_head_t *wqh;
287
288 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
289 do {
290 spin_unlock(&inode_lock);
291 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
292 spin_lock(&inode_lock);
293 } while (inode->i_state & I_SYNC);
294}
295
296/*
297 * Write out an inode's dirty pages. Called under inode_lock. Either the
298 * caller has ref on the inode (either via __iget or via syscall against an fd)
299 * or the inode has I_WILL_FREE set (via generic_forget_inode)
300 *
282 * If `wait' is set, wait on the writeout. 301 * If `wait' is set, wait on the writeout.
283 * 302 *
284 * The whole writeout design is quite complex and fragile. We want to avoid 303 * The whole writeout design is quite complex and fragile. We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
288 * Called under inode_lock. 307 * Called under inode_lock.
289 */ 308 */
290static int 309static int
291__sync_single_inode(struct inode *inode, struct writeback_control *wbc) 310writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
292{ 311{
293 unsigned dirty;
294 struct address_space *mapping = inode->i_mapping; 312 struct address_space *mapping = inode->i_mapping;
295 int wait = wbc->sync_mode == WB_SYNC_ALL; 313 int wait = wbc->sync_mode == WB_SYNC_ALL;
314 unsigned dirty;
296 int ret; 315 int ret;
297 316
317 if (!atomic_read(&inode->i_count))
318 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
319 else
320 WARN_ON(inode->i_state & I_WILL_FREE);
321
322 if (inode->i_state & I_SYNC) {
323 /*
324 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that
326 * writeback can proceed with the other inodes on s_io.
327 *
328 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io.
330 */
331 if (!wait) {
332 requeue_io(inode);
333 return 0;
334 }
335
336 /*
337 * It's a data-integrity sync. We must wait.
338 */
339 inode_wait_for_writeback(inode);
340 }
341
298 BUG_ON(inode->i_state & I_SYNC); 342 BUG_ON(inode->i_state & I_SYNC);
299 343
300 /* Set I_SYNC, reset I_DIRTY */ 344 /* Set I_SYNC, reset I_DIRTY */
@@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
390} 434}
391 435
392/* 436/*
393 * Write out an inode's dirty pages. Called under inode_lock. Either the
394 * caller has ref on the inode (either via __iget or via syscall against an fd)
395 * or the inode has I_WILL_FREE set (via generic_forget_inode)
396 */
397static int
398__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
399{
400 wait_queue_head_t *wqh;
401
402 if (!atomic_read(&inode->i_count))
403 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
404 else
405 WARN_ON(inode->i_state & I_WILL_FREE);
406
407 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
408 /*
409 * We're skipping this inode because it's locked, and we're not
410 * doing writeback-for-data-integrity. Move it to s_more_io so
411 * that writeback can proceed with the other inodes on s_io.
412 * We'll have another go at writing back this inode when we
413 * completed a full scan of s_io.
414 */
415 requeue_io(inode);
416 return 0;
417 }
418
419 /*
420 * It's a data-integrity sync. We must wait.
421 */
422 if (inode->i_state & I_SYNC) {
423 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
424
425 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
426 do {
427 spin_unlock(&inode_lock);
428 __wait_on_bit(wqh, &wq, inode_wait,
429 TASK_UNINTERRUPTIBLE);
430 spin_lock(&inode_lock);
431 } while (inode->i_state & I_SYNC);
432 }
433 return __sync_single_inode(inode, wbc);
434}
435
436/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 437 * Write out a superblock's list of dirty inodes. A wait will be performed
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 438 * upon no inodes, all inodes or the final one, depending upon sync_mode.
439 * 439 *
@@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 527 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 528 pages_skipped = wbc->pages_skipped;
529 __writeback_single_inode(inode, wbc); 529 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 530 if (current_is_pdflush())
531 writeback_release(bdi); 531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 532 if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
708 708
709 might_sleep(); 709 might_sleep();
710 spin_lock(&inode_lock); 710 spin_lock(&inode_lock);
711 ret = __writeback_single_inode(inode, &wbc); 711 ret = writeback_single_inode(inode, &wbc);
712 spin_unlock(&inode_lock); 712 spin_unlock(&inode_lock);
713 if (sync) 713 if (sync)
714 inode_sync_wait(inode); 714 inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
732 int ret; 732 int ret;
733 733
734 spin_lock(&inode_lock); 734 spin_lock(&inode_lock);
735 ret = __writeback_single_inode(inode, wbc); 735 ret = writeback_single_inode(inode, wbc);
736 spin_unlock(&inode_lock); 736 spin_unlock(&inode_lock);
737 return ret; 737 return ret;
738} 738}
diff --git a/fs/inode.c b/fs/inode.c
index f643be565df8..901bad1e5f12 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
25#include <linux/fsnotify.h> 25#include <linux/fsnotify.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/async.h> 27#include <linux/async.h>
28#include <linux/posix_acl.h>
28 29
29/* 30/*
30 * This is needed for the following functions: 31 * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 } 190 }
190 inode->i_private = NULL; 191 inode->i_private = NULL;
191 inode->i_mapping = mapping; 192 inode->i_mapping = mapping;
193#ifdef CONFIG_FS_POSIX_ACL
194 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
195#endif
192 196
193#ifdef CONFIG_FSNOTIFY 197#ifdef CONFIG_FSNOTIFY
194 inode->i_fsnotify_mask = 0; 198 inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
227 ima_inode_free(inode); 231 ima_inode_free(inode);
228 security_inode_free(inode); 232 security_inode_free(inode);
229 fsnotify_inode_delete(inode); 233 fsnotify_inode_delete(inode);
234#ifdef CONFIG_FS_POSIX_ACL
235 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
236 posix_acl_release(inode->i_acl);
237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
238 posix_acl_release(inode->i_default_acl);
239#endif
230 if (inode->i_sb->s_op->destroy_inode) 240 if (inode->i_sb->s_op->destroy_inode)
231 inode->i_sb->s_op->destroy_inode(inode); 241 inode->i_sb->s_op->destroy_inode(inode);
232 else 242 else
@@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode)
665 if (inode->i_mode & S_IFDIR) { 675 if (inode->i_mode & S_IFDIR) {
666 struct file_system_type *type = inode->i_sb->s_type; 676 struct file_system_type *type = inode->i_sb->s_type;
667 677
668 /* 678 /* Set new key only if filesystem hasn't already changed it */
669 * ensure nobody is actually holding i_mutex 679 if (!lockdep_match_class(&inode->i_mutex,
670 */ 680 &type->i_mutex_key)) {
671 mutex_destroy(&inode->i_mutex); 681 /*
672 mutex_init(&inode->i_mutex); 682 * ensure nobody is actually holding i_mutex
673 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 683 */
684 mutex_destroy(&inode->i_mutex);
685 mutex_init(&inode->i_mutex);
686 lockdep_set_class(&inode->i_mutex,
687 &type->i_mutex_dir_key);
688 }
674 } 689 }
675#endif 690#endif
676 /* 691 /*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 001f8d3118f2..5612880fcbe7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/writeback.h> 16#include <linux/writeback.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/falloc.h>
18 19
19#include <asm/ioctls.h> 20#include <asm/ioctls.h>
20 21
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
403 404
404#endif /* CONFIG_BLOCK */ 405#endif /* CONFIG_BLOCK */
405 406
407/*
408 * This provides compatibility with legacy XFS pre-allocation ioctls
409 * which predate the fallocate syscall.
410 *
411 * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
412 * are used here, rest are ignored.
413 */
414int ioctl_preallocate(struct file *filp, void __user *argp)
415{
416 struct inode *inode = filp->f_path.dentry->d_inode;
417 struct space_resv sr;
418
419 if (copy_from_user(&sr, argp, sizeof(sr)))
420 return -EFAULT;
421
422 switch (sr.l_whence) {
423 case SEEK_SET:
424 break;
425 case SEEK_CUR:
426 sr.l_start += filp->f_pos;
427 break;
428 case SEEK_END:
429 sr.l_start += i_size_read(inode);
430 break;
431 default:
432 return -EINVAL;
433 }
434
435 return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
436}
437
406static int file_ioctl(struct file *filp, unsigned int cmd, 438static int file_ioctl(struct file *filp, unsigned int cmd,
407 unsigned long arg) 439 unsigned long arg)
408{ 440{
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
414 return ioctl_fibmap(filp, p); 446 return ioctl_fibmap(filp, p);
415 case FIONREAD: 447 case FIONREAD:
416 return put_user(i_size_read(inode) - filp->f_pos, p); 448 return put_user(i_size_read(inode) - filp->f_pos, p);
449 case FS_IOC_RESVSP:
450 case FS_IOC_RESVSP64:
451 return ioctl_preallocate(filp, p);
417 } 452 }
418 453
419 return vfs_ioctl(filp, cmd, arg); 454 return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 043740dde20c..8fcb6239218e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
156 return ERR_PTR(-EINVAL); 156 return ERR_PTR(-EINVAL);
157} 157}
158 158
159static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
160{
161 struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
162
163 spin_lock(&inode->i_lock);
164 if (*i_acl != JFFS2_ACL_NOT_CACHED)
165 acl = posix_acl_dup(*i_acl);
166 spin_unlock(&inode->i_lock);
167 return acl;
168}
169
170static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
171{
172 spin_lock(&inode->i_lock);
173 if (*i_acl != JFFS2_ACL_NOT_CACHED)
174 posix_acl_release(*i_acl);
175 *i_acl = posix_acl_dup(acl);
176 spin_unlock(&inode->i_lock);
177}
178
179static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) 159static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
180{ 160{
181 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
182 struct posix_acl *acl; 161 struct posix_acl *acl;
183 char *value = NULL; 162 char *value = NULL;
184 int rc, xprefix; 163 int rc, xprefix;
185 164
165 acl = get_cached_acl(inode, type);
166 if (acl != ACL_NOT_CACHED)
167 return acl;
168
186 switch (type) { 169 switch (type) {
187 case ACL_TYPE_ACCESS: 170 case ACL_TYPE_ACCESS:
188 acl = jffs2_iget_acl(inode, &f->i_acl_access);
189 if (acl != JFFS2_ACL_NOT_CACHED)
190 return acl;
191 xprefix = JFFS2_XPREFIX_ACL_ACCESS; 171 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
192 break; 172 break;
193 case ACL_TYPE_DEFAULT: 173 case ACL_TYPE_DEFAULT:
194 acl = jffs2_iget_acl(inode, &f->i_acl_default);
195 if (acl != JFFS2_ACL_NOT_CACHED)
196 return acl;
197 xprefix = JFFS2_XPREFIX_ACL_DEFAULT; 174 xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
198 break; 175 break;
199 default: 176 default:
200 return ERR_PTR(-EINVAL); 177 BUG();
201 } 178 }
202 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); 179 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
203 if (rc > 0) { 180 if (rc > 0) {
@@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
215 } 192 }
216 if (value) 193 if (value)
217 kfree(value); 194 kfree(value);
218 if (!IS_ERR(acl)) { 195 if (!IS_ERR(acl))
219 switch (type) { 196 set_cached_acl(inode, type, acl);
220 case ACL_TYPE_ACCESS:
221 jffs2_iset_acl(inode, &f->i_acl_access, acl);
222 break;
223 case ACL_TYPE_DEFAULT:
224 jffs2_iset_acl(inode, &f->i_acl_default, acl);
225 break;
226 }
227 }
228 return acl; 197 return acl;
229} 198}
230 199
@@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
249 218
250static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 219static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
251{ 220{
252 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
253 int rc, xprefix; 221 int rc, xprefix;
254 222
255 if (S_ISLNK(inode->i_mode)) 223 if (S_ISLNK(inode->i_mode))
@@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
285 return -EINVAL; 253 return -EINVAL;
286 } 254 }
287 rc = __jffs2_set_acl(inode, xprefix, acl); 255 rc = __jffs2_set_acl(inode, xprefix, acl);
288 if (!rc) { 256 if (!rc)
289 switch(type) { 257 set_cached_acl(inode, type, acl);
290 case ACL_TYPE_ACCESS:
291 jffs2_iset_acl(inode, &f->i_acl_access, acl);
292 break;
293 case ACL_TYPE_DEFAULT:
294 jffs2_iset_acl(inode, &f->i_acl_default, acl);
295 break;
296 }
297 }
298 return rc; 258 return rc;
299} 259}
300 260
@@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask)
321 281
322int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
323{ 283{
324 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
325 struct posix_acl *acl, *clone; 284 struct posix_acl *acl, *clone;
326 int rc; 285 int rc;
327 286
328 f->i_acl_default = NULL; 287 cache_no_acl(inode);
329 f->i_acl_access = NULL;
330 288
331 if (S_ISLNK(*i_mode)) 289 if (S_ISLNK(*i_mode))
332 return 0; /* Symlink always has no-ACL */ 290 return 0; /* Symlink always has no-ACL */
@@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
339 *i_mode &= ~current_umask(); 297 *i_mode &= ~current_umask();
340 } else { 298 } else {
341 if (S_ISDIR(*i_mode)) 299 if (S_ISDIR(*i_mode))
342 jffs2_iset_acl(inode, &f->i_acl_default, acl); 300 set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
343 301
344 clone = posix_acl_clone(acl, GFP_KERNEL); 302 clone = posix_acl_clone(acl, GFP_KERNEL);
345 if (!clone) 303 if (!clone)
@@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
350 return rc; 308 return rc;
351 } 309 }
352 if (rc > 0) 310 if (rc > 0)
353 jffs2_iset_acl(inode, &f->i_acl_access, clone); 311 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
354 312
355 posix_acl_release(clone); 313 posix_acl_release(clone);
356 } 314 }
@@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
359 317
360int jffs2_init_acl_post(struct inode *inode) 318int jffs2_init_acl_post(struct inode *inode)
361{ 319{
362 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
363 int rc; 320 int rc;
364 321
365 if (f->i_acl_default) { 322 if (inode->i_default_acl) {
366 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default); 323 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
367 if (rc) 324 if (rc)
368 return rc; 325 return rc;
369 } 326 }
370 327
371 if (f->i_acl_access) { 328 if (inode->i_acl) {
372 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access); 329 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
373 if (rc) 330 if (rc)
374 return rc; 331 return rc;
375 } 332 }
@@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode)
377 return 0; 334 return 0;
378} 335}
379 336
380void jffs2_clear_acl(struct jffs2_inode_info *f)
381{
382 if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
383 posix_acl_release(f->i_acl_access);
384 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
385 }
386 if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
387 posix_acl_release(f->i_acl_default);
388 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
389 }
390}
391
392int jffs2_acl_chmod(struct inode *inode) 337int jffs2_acl_chmod(struct inode *inode)
393{ 338{
394 struct posix_acl *acl, *clone; 339 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8ca058aed384..fc929f2a14f6 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30
31extern int jffs2_permission(struct inode *, int); 29extern int jffs2_permission(struct inode *, int);
32extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
34extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
35extern void jffs2_clear_acl(struct jffs2_inode_info *);
36 33
37extern struct xattr_handler jffs2_acl_access_xattr_handler; 34extern struct xattr_handler jffs2_acl_access_xattr_handler;
38extern struct xattr_handler jffs2_acl_default_xattr_handler; 35extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
43#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
44#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
45#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
46#define jffs2_clear_acl(f)
47 43
48#endif /* CONFIG_JFFS2_FS_POSIX_ACL */ 44#endif /* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 4c41db91eaa4..c6923da98263 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
50 uint16_t flags; 50 uint16_t flags;
51 uint8_t usercompr; 51 uint8_t usercompr;
52 struct inode vfs_inode; 52 struct inode vfs_inode;
53#ifdef CONFIG_JFFS2_FS_POSIX_ACL
54 struct posix_acl *i_acl_access;
55 struct posix_acl *i_acl_default;
56#endif
57}; 53};
58 54
59#endif /* _JFFS2_FS_I */ 55#endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2228380c47b9..a7f03b7ebcb3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
56 f->target = NULL; 56 f->target = NULL;
57 f->flags = 0; 57 f->flags = 0;
58 f->usercompr = 0; 58 f->usercompr = 0;
59#ifdef CONFIG_JFFS2_FS_POSIX_ACL
60 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
61 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
62#endif
63} 59}
64 60
65 61
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1fc1e92356ee..1a80301004b8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1424 struct jffs2_full_dirent *fd, *fds; 1424 struct jffs2_full_dirent *fd, *fds;
1425 int deleted; 1425 int deleted;
1426 1426
1427 jffs2_clear_acl(f);
1428 jffs2_xattr_delete_inode(c, f->inocache); 1427 jffs2_xattr_delete_inode(c, f->inocache);
1429 mutex_lock(&f->sem); 1428 mutex_lock(&f->sem);
1430 deleted = f->inocache && !f->inocache->pino_nlink; 1429 deleted = f->inocache && !f->inocache->pino_nlink;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 06ca1b8d2054..91fa3ad6e8c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
31{ 31{
32 struct posix_acl *acl; 32 struct posix_acl *acl;
33 char *ea_name; 33 char *ea_name;
34 struct jfs_inode_info *ji = JFS_IP(inode);
35 struct posix_acl **p_acl;
36 int size; 34 int size;
37 char *value = NULL; 35 char *value = NULL;
38 36
37 acl = get_cached_acl(inode, type);
38 if (acl != ACL_NOT_CACHED)
39 return acl;
40
39 switch(type) { 41 switch(type) {
40 case ACL_TYPE_ACCESS: 42 case ACL_TYPE_ACCESS:
41 ea_name = POSIX_ACL_XATTR_ACCESS; 43 ea_name = POSIX_ACL_XATTR_ACCESS;
42 p_acl = &ji->i_acl;
43 break; 44 break;
44 case ACL_TYPE_DEFAULT: 45 case ACL_TYPE_DEFAULT:
45 ea_name = POSIX_ACL_XATTR_DEFAULT; 46 ea_name = POSIX_ACL_XATTR_DEFAULT;
46 p_acl = &ji->i_default_acl;
47 break; 47 break;
48 default: 48 default:
49 return ERR_PTR(-EINVAL); 49 return ERR_PTR(-EINVAL);
50 } 50 }
51 51
52 if (*p_acl != JFS_ACL_NOT_CACHED)
53 return posix_acl_dup(*p_acl);
54
55 size = __jfs_getxattr(inode, ea_name, NULL, 0); 52 size = __jfs_getxattr(inode, ea_name, NULL, 0);
56 53
57 if (size > 0) { 54 if (size > 0) {
@@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
62 } 59 }
63 60
64 if (size < 0) { 61 if (size < 0) {
65 if (size == -ENODATA) { 62 if (size == -ENODATA)
66 *p_acl = NULL;
67 acl = NULL; 63 acl = NULL;
68 } else 64 else
69 acl = ERR_PTR(size); 65 acl = ERR_PTR(size);
70 } else { 66 } else {
71 acl = posix_acl_from_xattr(value, size); 67 acl = posix_acl_from_xattr(value, size);
72 if (!IS_ERR(acl))
73 *p_acl = posix_acl_dup(acl);
74 } 68 }
75 kfree(value); 69 kfree(value);
70 if (!IS_ERR(acl)) {
71 set_cached_acl(inode, type, acl);
72 posix_acl_release(acl);
73 }
76 return acl; 74 return acl;
77} 75}
78 76
@@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
80 struct posix_acl *acl) 78 struct posix_acl *acl)
81{ 79{
82 char *ea_name; 80 char *ea_name;
83 struct jfs_inode_info *ji = JFS_IP(inode);
84 struct posix_acl **p_acl;
85 int rc; 81 int rc;
86 int size = 0; 82 int size = 0;
87 char *value = NULL; 83 char *value = NULL;
@@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
92 switch(type) { 88 switch(type) {
93 case ACL_TYPE_ACCESS: 89 case ACL_TYPE_ACCESS:
94 ea_name = POSIX_ACL_XATTR_ACCESS; 90 ea_name = POSIX_ACL_XATTR_ACCESS;
95 p_acl = &ji->i_acl;
96 break; 91 break;
97 case ACL_TYPE_DEFAULT: 92 case ACL_TYPE_DEFAULT:
98 ea_name = POSIX_ACL_XATTR_DEFAULT; 93 ea_name = POSIX_ACL_XATTR_DEFAULT;
99 p_acl = &ji->i_default_acl;
100 if (!S_ISDIR(inode->i_mode)) 94 if (!S_ISDIR(inode->i_mode))
101 return acl ? -EACCES : 0; 95 return acl ? -EACCES : 0;
102 break; 96 break;
@@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
116out: 110out:
117 kfree(value); 111 kfree(value);
118 112
119 if (!rc) { 113 if (!rc)
120 if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) 114 set_cached_acl(inode, type, acl);
121 posix_acl_release(*p_acl); 115
122 *p_acl = posix_acl_dup(acl);
123 }
124 return rc; 116 return rc;
125} 117}
126 118
127static int jfs_check_acl(struct inode *inode, int mask) 119static int jfs_check_acl(struct inode *inode, int mask)
128{ 120{
129 struct jfs_inode_info *ji = JFS_IP(inode); 121 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
130 122
131 if (ji->i_acl == JFS_ACL_NOT_CACHED) { 123 if (IS_ERR(acl))
132 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 124 return PTR_ERR(acl);
133 if (IS_ERR(acl)) 125 if (acl) {
134 return PTR_ERR(acl); 126 int error = posix_acl_permission(inode, acl, mask);
135 posix_acl_release(acl); 127 posix_acl_release(acl);
128 return error;
136 } 129 }
137 130
138 if (ji->i_acl)
139 return posix_acl_permission(inode, ji->i_acl, mask);
140 return -EAGAIN; 131 return -EAGAIN;
141} 132}
142 133
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 439901d205fe..1439f119ec83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -74,10 +74,6 @@ struct jfs_inode_info {
74 /* xattr_sem allows us to access the xattrs without taking i_mutex */ 74 /* xattr_sem allows us to access the xattrs without taking i_mutex */
75 struct rw_semaphore xattr_sem; 75 struct rw_semaphore xattr_sem;
76 lid_t xtlid; /* lid of xtree lock on directory */ 76 lid_t xtlid; /* lid of xtree lock on directory */
77#ifdef CONFIG_JFS_POSIX_ACL
78 struct posix_acl *i_acl;
79 struct posix_acl *i_default_acl;
80#endif
81 union { 77 union {
82 struct { 78 struct {
83 xtpage_t _xtroot; /* 288: xtree root */ 79 xtpage_t _xtroot; /* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
107#define i_inline u.link._inline 103#define i_inline u.link._inline
108#define i_inline_ea u.link._inline_ea 104#define i_inline_ea u.link._inline_ea
109 105
110#define JFS_ACL_NOT_CACHED ((void *)-1)
111
112#define IREAD_LOCK(ip, subclass) \ 106#define IREAD_LOCK(ip, subclass) \
113 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) 107 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
114#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) 108#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 09b1b6ee2186..37e6dcda8fc8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 128 ji->active_ag = -1;
129 } 129 }
130 spin_unlock_irq(&ji->ag_lock); 130 spin_unlock_irq(&ji->ag_lock);
131
132#ifdef CONFIG_JFS_POSIX_ACL
133 if (ji->i_acl != JFS_ACL_NOT_CACHED) {
134 posix_acl_release(ji->i_acl);
135 ji->i_acl = JFS_ACL_NOT_CACHED;
136 }
137 if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
138 posix_acl_release(ji->i_default_acl);
139 ji->i_default_acl = JFS_ACL_NOT_CACHED;
140 }
141#endif
142
143 kmem_cache_free(jfs_inode_cachep, ji); 131 kmem_cache_free(jfs_inode_cachep, ji);
144} 132}
145 133
@@ -798,10 +786,6 @@ static void init_once(void *foo)
798 init_rwsem(&jfs_ip->xattr_sem); 786 init_rwsem(&jfs_ip->xattr_sem);
799 spin_lock_init(&jfs_ip->ag_lock); 787 spin_lock_init(&jfs_ip->ag_lock);
800 jfs_ip->active_ag = -1; 788 jfs_ip->active_ag = -1;
801#ifdef CONFIG_JFS_POSIX_ACL
802 jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
803 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
804#endif
805 inode_init_once(&jfs_ip->vfs_inode); 789 inode_init_once(&jfs_ip->vfs_inode);
806} 790}
807 791
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 61dfa8173ebc..fad364548bc9 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
727 /* 727 /*
728 * We're changing the ACL. Get rid of the cached one 728 * We're changing the ACL. Get rid of the cached one
729 */ 729 */
730 acl =JFS_IP(inode)->i_acl; 730 forget_cached_acl(inode, ACL_TYPE_ACCESS);
731 if (acl != JFS_ACL_NOT_CACHED)
732 posix_acl_release(acl);
733 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
734 731
735 return 0; 732 return 0;
736 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { 733 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
746 /* 743 /*
747 * We're changing the default ACL. Get rid of the cached one 744 * We're changing the default ACL. Get rid of the cached one
748 */ 745 */
749 acl =JFS_IP(inode)->i_default_acl; 746 forget_cached_acl(inode, ACL_TYPE_DEFAULT);
750 if (acl && (acl != JFS_ACL_NOT_CACHED))
751 posix_acl_release(acl);
752 JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
753 747
754 return 0; 748 return 0;
755 } 749 }
diff --git a/fs/namei.c b/fs/namei.c
index 527119afb6a5..5b961eb71cbf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
1698 if (error) 1698 if (error)
1699 return ERR_PTR(error); 1699 return ERR_PTR(error);
1700 error = path_walk(pathname, &nd); 1700 error = path_walk(pathname, &nd);
1701 if (error) 1701 if (error) {
1702 if (nd.root.mnt)
1703 path_put(&nd.root);
1702 return ERR_PTR(error); 1704 return ERR_PTR(error);
1705 }
1703 if (unlikely(!audit_dummy_context())) 1706 if (unlikely(!audit_dummy_context()))
1704 audit_inode(pathname, nd.path.dentry); 1707 audit_inode(pathname, nd.path.dentry);
1705 1708
@@ -1759,6 +1762,8 @@ do_last:
1759 } 1762 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1763 filp = nameidata_to_filp(&nd, open_flag);
1761 mnt_drop_write(nd.path.mnt); 1764 mnt_drop_write(nd.path.mnt);
1765 if (nd.root.mnt)
1766 path_put(&nd.root);
1762 return filp; 1767 return filp;
1763 } 1768 }
1764 1769
@@ -1819,6 +1824,8 @@ ok:
1819 */ 1824 */
1820 if (will_write) 1825 if (will_write)
1821 mnt_drop_write(nd.path.mnt); 1826 mnt_drop_write(nd.path.mnt);
1827 if (nd.root.mnt)
1828 path_put(&nd.root);
1822 return filp; 1829 return filp;
1823 1830
1824exit_mutex_unlock: 1831exit_mutex_unlock:
@@ -1859,6 +1866,8 @@ do_link:
1859 * with "intent.open". 1866 * with "intent.open".
1860 */ 1867 */
1861 release_open_intent(&nd); 1868 release_open_intent(&nd);
1869 if (nd.root.mnt)
1870 path_put(&nd.root);
1862 return ERR_PTR(error); 1871 return ERR_PTR(error);
1863 } 1872 }
1864 nd.flags &= ~LOOKUP_PARENT; 1873 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index a7bea8c8bd46..3dc283fd4716 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42static int event; 42static int event;
43static DEFINE_IDA(mnt_id_ida); 43static DEFINE_IDA(mnt_id_ida);
44static DEFINE_IDA(mnt_group_ida); 44static DEFINE_IDA(mnt_group_ida);
45static int mnt_id_start = 0;
46static int mnt_group_start = 1;
45 47
46static struct list_head *mount_hashtable __read_mostly; 48static struct list_head *mount_hashtable __read_mostly;
47static struct kmem_cache *mnt_cache __read_mostly; 49static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
69retry: 71retry:
70 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 72 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
71 spin_lock(&vfsmount_lock); 73 spin_lock(&vfsmount_lock);
72 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); 74 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
75 if (!res)
76 mnt_id_start = mnt->mnt_id + 1;
73 spin_unlock(&vfsmount_lock); 77 spin_unlock(&vfsmount_lock);
74 if (res == -EAGAIN) 78 if (res == -EAGAIN)
75 goto retry; 79 goto retry;
@@ -79,8 +83,11 @@ retry:
79 83
80static void mnt_free_id(struct vfsmount *mnt) 84static void mnt_free_id(struct vfsmount *mnt)
81{ 85{
86 int id = mnt->mnt_id;
82 spin_lock(&vfsmount_lock); 87 spin_lock(&vfsmount_lock);
83 ida_remove(&mnt_id_ida, mnt->mnt_id); 88 ida_remove(&mnt_id_ida, id);
89 if (mnt_id_start > id)
90 mnt_id_start = id;
84 spin_unlock(&vfsmount_lock); 91 spin_unlock(&vfsmount_lock);
85} 92}
86 93
@@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt)
91 */ 98 */
92static int mnt_alloc_group_id(struct vfsmount *mnt) 99static int mnt_alloc_group_id(struct vfsmount *mnt)
93{ 100{
101 int res;
102
94 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) 103 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
95 return -ENOMEM; 104 return -ENOMEM;
96 105
97 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); 106 res = ida_get_new_above(&mnt_group_ida,
107 mnt_group_start,
108 &mnt->mnt_group_id);
109 if (!res)
110 mnt_group_start = mnt->mnt_group_id + 1;
111
112 return res;
98} 113}
99 114
100/* 115/*
@@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
102 */ 117 */
103void mnt_release_group_id(struct vfsmount *mnt) 118void mnt_release_group_id(struct vfsmount *mnt)
104{ 119{
105 ida_remove(&mnt_group_ida, mnt->mnt_group_id); 120 int id = mnt->mnt_group_id;
121 ida_remove(&mnt_group_ida, id);
122 if (mnt_group_start > id)
123 mnt_group_start = id;
106 mnt->mnt_group_id = 0; 124 mnt->mnt_group_id = 0;
107} 125}
108 126
@@ -2222,16 +2240,9 @@ static void __init init_mount_tree(void)
2222 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2240 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2223 if (IS_ERR(mnt)) 2241 if (IS_ERR(mnt))
2224 panic("Can't create rootfs"); 2242 panic("Can't create rootfs");
2225 ns = kmalloc(sizeof(*ns), GFP_KERNEL); 2243 ns = create_mnt_ns(mnt);
2226 if (!ns) 2244 if (IS_ERR(ns))
2227 panic("Can't allocate initial namespace"); 2245 panic("Can't allocate initial namespace");
2228 atomic_set(&ns->count, 1);
2229 INIT_LIST_HEAD(&ns->list);
2230 init_waitqueue_head(&ns->poll);
2231 ns->event = 0;
2232 list_add(&mnt->mnt_list, &ns->list);
2233 ns->root = mnt;
2234 mnt->mnt_ns = ns;
2235 2246
2236 init_task.nsproxy->mnt_ns = ns; 2247 init_task.nsproxy->mnt_ns = ns;
2237 get_mnt_ns(ns); 2248 get_mnt_ns(ns);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2696d6b513b7..fe9d8f2a13f8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
309 /* ii->i_file_acl = 0; */ 309 /* ii->i_file_acl = 0; */
310 /* ii->i_dir_acl = 0; */ 310 /* ii->i_dir_acl = 0; */
311 ii->i_dir_start_lookup = 0; 311 ii->i_dir_start_lookup = 0;
312#ifdef CONFIG_NILFS_FS_POSIX_ACL
313 ii->i_acl = NULL;
314 ii->i_default_acl = NULL;
315#endif
316 ii->i_cno = 0; 312 ii->i_cno = 0;
317 nilfs_set_inode_flags(inode); 313 nilfs_set_inode_flags(inode);
318 spin_lock(&sbi->s_next_gen_lock); 314 spin_lock(&sbi->s_next_gen_lock);
@@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
434 430
435 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); 431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
436 432
437#ifdef CONFIG_NILFS_FS_POSIX_ACL
438 ii->i_acl = NILFS_ACL_NOT_CACHED;
439 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
440#endif
441 if (nilfs_read_inode_common(inode, raw_inode)) 433 if (nilfs_read_inode_common(inode, raw_inode))
442 goto failed_unmap; 434 goto failed_unmap;
443 435
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index edf6a59d9f2a..724c63766e82 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -58,10 +58,6 @@ struct nilfs_inode_info {
58 */ 58 */
59 struct rw_semaphore xattr_sem; 59 struct rw_semaphore xattr_sem;
60#endif 60#endif
61#ifdef CONFIG_NILFS_POSIX_ACL
62 struct posix_acl *i_acl;
63 struct posix_acl *i_default_acl;
64#endif
65 struct buffer_head *i_bh; /* i_bh contains a new or dirty 61 struct buffer_head *i_bh; /* i_bh contains a new or dirty
66 disk inode */ 62 disk inode */
67 struct inode vfs_inode; 63 struct inode vfs_inode;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ab785f85aa50..8e2ec43b18f4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
189{ 189{
190 struct nilfs_inode_info *ii = NILFS_I(inode); 190 struct nilfs_inode_info *ii = NILFS_I(inode);
191 191
192#ifdef CONFIG_NILFS_POSIX_ACL
193 if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
194 posix_acl_release(ii->i_acl);
195 ii->i_acl = NILFS_ACL_NOT_CACHED;
196 }
197 if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
198 posix_acl_release(ii->i_default_acl);
199 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
200 }
201#endif
202 /* 192 /*
203 * Free resources allocated in nilfs_read_inode(), here. 193 * Free resources allocated in nilfs_read_inode(), here.
204 */ 194 */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cdeaa76f27f..110bb57c46ab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
92 enum ocfs2_unblock_action unblock_action; 92 enum ocfs2_unblock_action unblock_action;
93}; 93};
94 94
95/* Lockdep class keys */
96struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
97
95static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 98static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
96 int new_level); 99 int new_level);
97static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 100static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
317 u32 dlm_flags); 320 u32 dlm_flags);
318static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 321static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
319 int wanted); 322 int wanted);
320static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 323static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
321 struct ocfs2_lock_res *lockres, 324 struct ocfs2_lock_res *lockres,
322 int level); 325 int level, unsigned long caller_ip);
326static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
327 struct ocfs2_lock_res *lockres,
328 int level)
329{
330 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
331}
332
323static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 333static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
324static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 334static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
325static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 335static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
489 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 499 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
490 500
491 ocfs2_init_lock_stats(res); 501 ocfs2_init_lock_stats(res);
502#ifdef CONFIG_DEBUG_LOCK_ALLOC
503 if (type != OCFS2_LOCK_TYPE_OPEN)
504 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
505 &lockdep_keys[type], 0);
506 else
507 res->l_lockdep_map.key = NULL;
508#endif
492} 509}
493 510
494void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 511void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
644static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 661static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
645 struct ocfs2_super *osb) 662 struct ocfs2_super *osb)
646{ 663{
647 struct ocfs2_orphan_scan_lvb *lvb;
648
649 ocfs2_lock_res_init_once(res); 664 ocfs2_lock_res_init_once(res);
650 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 665 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
651 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 666 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
652 &ocfs2_orphan_scan_lops, osb); 667 &ocfs2_orphan_scan_lops, osb);
653 lvb = ocfs2_dlm_lvb(&res->l_lksb);
654 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
655} 668}
656 669
657void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 670void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1256 return ret; 1269 return ret;
1257} 1270}
1258 1271
1259static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1272static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1260 struct ocfs2_lock_res *lockres, 1273 struct ocfs2_lock_res *lockres,
1261 int level, 1274 int level,
1262 u32 lkm_flags, 1275 u32 lkm_flags,
1263 int arg_flags) 1276 int arg_flags,
1277 int l_subclass,
1278 unsigned long caller_ip)
1264{ 1279{
1265 struct ocfs2_mask_waiter mw; 1280 struct ocfs2_mask_waiter mw;
1266 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1281 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1403,13 +1418,37 @@ out:
1403 } 1418 }
1404 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1419 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1405 1420
1421#ifdef CONFIG_DEBUG_LOCK_ALLOC
1422 if (!ret && lockres->l_lockdep_map.key != NULL) {
1423 if (level == DLM_LOCK_PR)
1424 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1425 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1426 caller_ip);
1427 else
1428 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1429 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1430 caller_ip);
1431 }
1432#endif
1406 mlog_exit(ret); 1433 mlog_exit(ret);
1407 return ret; 1434 return ret;
1408} 1435}
1409 1436
1410static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1437static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1411 struct ocfs2_lock_res *lockres, 1438 struct ocfs2_lock_res *lockres,
1412 int level) 1439 int level,
1440 u32 lkm_flags,
1441 int arg_flags)
1442{
1443 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1444 0, _RET_IP_);
1445}
1446
1447
1448static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1449 struct ocfs2_lock_res *lockres,
1450 int level,
1451 unsigned long caller_ip)
1413{ 1452{
1414 unsigned long flags; 1453 unsigned long flags;
1415 1454
@@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1418 ocfs2_dec_holders(lockres, level); 1457 ocfs2_dec_holders(lockres, level);
1419 ocfs2_downconvert_on_unlock(osb, lockres); 1458 ocfs2_downconvert_on_unlock(osb, lockres);
1420 spin_unlock_irqrestore(&lockres->l_lock, flags); 1459 spin_unlock_irqrestore(&lockres->l_lock, flags);
1460#ifdef CONFIG_DEBUG_LOCK_ALLOC
1461 if (lockres->l_lockdep_map.key != NULL)
1462 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1463#endif
1421 mlog_exit_void(); 1464 mlog_exit_void();
1422} 1465}
1423 1466
@@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1989{ 2032{
1990 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2033 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1991 2034
1992 if (lvb->lvb_version == OCFS2_LVB_VERSION 2035 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2036 && lvb->lvb_version == OCFS2_LVB_VERSION
1993 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2037 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1994 return 1; 2038 return 1;
1995 return 0; 2039 return 0;
@@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
2162 * returns < 0 error if the callback will never be called, otherwise 2206 * returns < 0 error if the callback will never be called, otherwise
2163 * the result of the lock will be communicated via the callback. 2207 * the result of the lock will be communicated via the callback.
2164 */ 2208 */
2165int ocfs2_inode_lock_full(struct inode *inode, 2209int ocfs2_inode_lock_full_nested(struct inode *inode,
2166 struct buffer_head **ret_bh, 2210 struct buffer_head **ret_bh,
2167 int ex, 2211 int ex,
2168 int arg_flags) 2212 int arg_flags,
2213 int subclass)
2169{ 2214{
2170 int status, level, acquired; 2215 int status, level, acquired;
2171 u32 dlm_flags; 2216 u32 dlm_flags;
@@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
2203 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2248 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2204 dlm_flags |= DLM_LKF_NOQUEUE; 2249 dlm_flags |= DLM_LKF_NOQUEUE;
2205 2250
2206 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2251 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2252 arg_flags, subclass, _RET_IP_);
2207 if (status < 0) { 2253 if (status < 0) {
2208 if (status != -EAGAIN && status != -EIOCBRETRY) 2254 if (status != -EAGAIN && status != -EIOCBRETRY)
2209 mlog_errno(status); 2255 mlog_errno(status);
@@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode,
2369 mlog_exit_void(); 2415 mlog_exit_void();
2370} 2416}
2371 2417
2372int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) 2418int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
2373{ 2419{
2374 struct ocfs2_lock_res *lockres; 2420 struct ocfs2_lock_res *lockres;
2375 struct ocfs2_orphan_scan_lvb *lvb; 2421 struct ocfs2_orphan_scan_lvb *lvb;
2376 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2377 int status = 0; 2422 int status = 0;
2378 2423
2424 if (ocfs2_is_hard_readonly(osb))
2425 return -EROFS;
2426
2427 if (ocfs2_mount_local(osb))
2428 return 0;
2429
2379 lockres = &osb->osb_orphan_scan.os_lockres; 2430 lockres = &osb->osb_orphan_scan.os_lockres;
2380 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2431 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2381 if (status < 0) 2432 if (status < 0)
2382 return status; 2433 return status;
2383 2434
2384 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2435 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2385 if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 2436 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2437 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2386 *seqno = be32_to_cpu(lvb->lvb_os_seqno); 2438 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2439 else
2440 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2441
2387 return status; 2442 return status;
2388} 2443}
2389 2444
2390void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) 2445void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
2391{ 2446{
2392 struct ocfs2_lock_res *lockres; 2447 struct ocfs2_lock_res *lockres;
2393 struct ocfs2_orphan_scan_lvb *lvb; 2448 struct ocfs2_orphan_scan_lvb *lvb;
2394 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2395 2449
2396 lockres = &osb->osb_orphan_scan.os_lockres; 2450 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2397 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2451 lockres = &osb->osb_orphan_scan.os_lockres;
2398 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 2452 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2399 lvb->lvb_os_seqno = cpu_to_be32(seqno); 2453 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2400 ocfs2_cluster_unlock(osb, lockres, level); 2454 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2455 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2456 }
2401} 2457}
2402 2458
2403int ocfs2_super_lock(struct ocfs2_super *osb, 2459int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3627 struct ocfs2_global_disk_dqinfo *gdinfo; 3683 struct ocfs2_global_disk_dqinfo *gdinfo;
3628 int status = 0; 3684 int status = 0;
3629 3685
3630 if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3686 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3687 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3631 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3688 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3632 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3689 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3633 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3690 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 31b90d7b8f51..7553836931de 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
78/* don't block waiting for the downconvert thread, instead return -EAGAIN */ 78/* don't block waiting for the downconvert thread, instead return -EAGAIN */
79#define OCFS2_LOCK_NONBLOCK (0x04) 79#define OCFS2_LOCK_NONBLOCK (0x04)
80 80
81/* Locking subclasses of inode cluster lock */
82enum {
83 OI_LS_NORMAL = 0,
84 OI_LS_PARENT,
85 OI_LS_RENAME1,
86 OI_LS_RENAME2,
87};
88
81int ocfs2_dlm_init(struct ocfs2_super *osb); 89int ocfs2_dlm_init(struct ocfs2_super *osb);
82void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); 90void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
83void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 91void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode);
104int ocfs2_inode_lock_atime(struct inode *inode, 112int ocfs2_inode_lock_atime(struct inode *inode,
105 struct vfsmount *vfsmnt, 113 struct vfsmount *vfsmnt,
106 int *level); 114 int *level);
107int ocfs2_inode_lock_full(struct inode *inode, 115int ocfs2_inode_lock_full_nested(struct inode *inode,
108 struct buffer_head **ret_bh, 116 struct buffer_head **ret_bh,
109 int ex, 117 int ex,
110 int arg_flags); 118 int arg_flags,
119 int subclass);
111int ocfs2_inode_lock_with_page(struct inode *inode, 120int ocfs2_inode_lock_with_page(struct inode *inode,
112 struct buffer_head **ret_bh, 121 struct buffer_head **ret_bh,
113 int ex, 122 int ex,
114 struct page *page); 123 struct page *page);
124/* Variants without special locking class or flags */
125#define ocfs2_inode_lock_full(i, r, e, f)\
126 ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
127#define ocfs2_inode_lock_nested(i, b, e, s)\
128 ocfs2_inode_lock_full_nested(i, b, e, 0, s)
115/* 99% of the time we don't want to supply any additional flags -- 129/* 99% of the time we don't want to supply any additional flags --
116 * those are for very specific cases only. */ 130 * those are for very specific cases only. */
117#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) 131#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
118void ocfs2_inode_unlock(struct inode *inode, 132void ocfs2_inode_unlock(struct inode *inode,
119 int ex); 133 int ex);
120int ocfs2_super_lock(struct ocfs2_super *osb, 134int ocfs2_super_lock(struct ocfs2_super *osb,
121 int ex); 135 int ex);
122void ocfs2_super_unlock(struct ocfs2_super *osb, 136void ocfs2_super_unlock(struct ocfs2_super *osb,
123 int ex); 137 int ex);
124int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); 138int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
125void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); 139void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
126 140
127int ocfs2_rename_lock(struct ocfs2_super *osb); 141int ocfs2_rename_lock(struct ocfs2_super *osb);
128void ocfs2_rename_unlock(struct ocfs2_super *osb); 142void ocfs2_rename_unlock(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 07267e0da909..62442e413a00 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2026 size_t len, 2026 size_t len,
2027 unsigned int flags) 2027 unsigned int flags)
2028{ 2028{
2029 int ret = 0; 2029 int ret = 0, lock_level = 0;
2030 struct inode *inode = in->f_path.dentry->d_inode; 2030 struct inode *inode = in->f_path.dentry->d_inode;
2031 2031
2032 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2032 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2037 /* 2037 /*
2038 * See the comment in ocfs2_file_aio_read() 2038 * See the comment in ocfs2_file_aio_read()
2039 */ 2039 */
2040 ret = ocfs2_inode_lock(inode, NULL, 0); 2040 ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
2041 if (ret < 0) { 2041 if (ret < 0) {
2042 mlog_errno(ret); 2042 mlog_errno(ret);
2043 goto bail; 2043 goto bail;
2044 } 2044 }
2045 ocfs2_inode_unlock(inode, 0); 2045 ocfs2_inode_unlock(inode, lock_level);
2046 2046
2047 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2047 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2048 2048
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 10e1fa87396a..4dc8890ba316 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -215,6 +215,8 @@ bail:
215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) 215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
216{ 216{
217 struct ocfs2_find_inode_args *args = opaque; 217 struct ocfs2_find_inode_args *args = opaque;
218 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
219 ocfs2_file_ip_alloc_sem_key;
218 220
219 mlog_entry("inode = %p, opaque = %p\n", inode, opaque); 221 mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
220 222
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
223 if (args->fi_sysfile_type != 0) 225 if (args->fi_sysfile_type != 0)
224 lockdep_set_class(&inode->i_mutex, 226 lockdep_set_class(&inode->i_mutex,
225 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); 227 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
228 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
229 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
230 args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
231 args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
232 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
233 &ocfs2_quota_ip_alloc_sem_key);
234 else
235 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
236 &ocfs2_file_ip_alloc_sem_key);
226 237
227 mlog_exit(0); 238 mlog_exit(0);
228 return 0; 239 return 0;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4a3b9e6b31ad..f033760ecbea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1880 1880
1881 os = &osb->osb_orphan_scan; 1881 os = &osb->osb_orphan_scan;
1882 1882
1883 status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); 1883 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1884 goto out;
1885
1886 status = ocfs2_orphan_scan_lock(osb, &seqno);
1884 if (status < 0) { 1887 if (status < 0) {
1885 if (status != -EAGAIN) 1888 if (status != -EAGAIN)
1886 mlog_errno(status); 1889 mlog_errno(status);
1887 goto out; 1890 goto out;
1888 } 1891 }
1889 1892
1893 /* Do no queue the tasks if the volume is being umounted */
1894 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1895 goto unlock;
1896
1890 if (os->os_seqno != seqno) { 1897 if (os->os_seqno != seqno) {
1891 os->os_seqno = seqno; 1898 os->os_seqno = seqno;
1892 goto unlock; 1899 goto unlock;
@@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1903 os->os_count++; 1910 os->os_count++;
1904 os->os_scantime = CURRENT_TIME; 1911 os->os_scantime = CURRENT_TIME;
1905unlock: 1912unlock:
1906 ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); 1913 ocfs2_orphan_scan_unlock(osb, seqno);
1907out: 1914out:
1908 return; 1915 return;
1909} 1916}
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
1920 1927
1921 mutex_lock(&os->os_lock); 1928 mutex_lock(&os->os_lock);
1922 ocfs2_queue_orphan_scan(osb); 1929 ocfs2_queue_orphan_scan(osb);
1923 schedule_delayed_work(&os->os_orphan_scan_work, 1930 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1924 ocfs2_orphan_scan_timeout()); 1931 schedule_delayed_work(&os->os_orphan_scan_work,
1932 ocfs2_orphan_scan_timeout());
1925 mutex_unlock(&os->os_lock); 1933 mutex_unlock(&os->os_lock);
1926} 1934}
1927 1935
@@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1930 struct ocfs2_orphan_scan *os; 1938 struct ocfs2_orphan_scan *os;
1931 1939
1932 os = &osb->osb_orphan_scan; 1940 os = &osb->osb_orphan_scan;
1933 mutex_lock(&os->os_lock); 1941 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1934 cancel_delayed_work(&os->os_orphan_scan_work); 1942 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1935 mutex_unlock(&os->os_lock); 1943 mutex_lock(&os->os_lock);
1944 cancel_delayed_work(&os->os_orphan_scan_work);
1945 mutex_unlock(&os->os_lock);
1946 }
1936} 1947}
1937 1948
1938int ocfs2_orphan_scan_init(struct ocfs2_super *osb) 1949void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1939{ 1950{
1940 struct ocfs2_orphan_scan *os; 1951 struct ocfs2_orphan_scan *os;
1941 1952
1942 os = &osb->osb_orphan_scan; 1953 os = &osb->osb_orphan_scan;
1943 os->os_osb = osb; 1954 os->os_osb = osb;
1944 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0;
1945 os->os_scantime = CURRENT_TIME; 1957 os->os_scantime = CURRENT_TIME;
1946 mutex_init(&os->os_lock); 1958 mutex_init(&os->os_lock);
1947 1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1948 INIT_DELAYED_WORK(&os->os_orphan_scan_work, 1960
1949 ocfs2_orphan_scan_work); 1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1950 schedule_delayed_work(&os->os_orphan_scan_work, 1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1951 ocfs2_orphan_scan_timeout()); 1963 else {
1952 return 0; 1964 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1965 schedule_delayed_work(&os->os_orphan_scan_work,
1966 ocfs2_orphan_scan_timeout());
1967 }
1953} 1968}
1954 1969
1955struct ocfs2_orphan_filldir_priv { 1970struct ocfs2_orphan_filldir_priv {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 61045eeb3f6e..5432c7f79cc6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
144} 144}
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147int ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 150
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 33464c6b60a2..8601f934010b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, 118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); 119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
120 120
121 status = ocfs2_inode_lock(dir, NULL, 0); 121 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
122 if (status < 0) { 122 if (status < 0) {
123 if (status != -ENOENT) 123 if (status != -ENOENT)
124 mlog_errno(status); 124 mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
636 if (S_ISDIR(inode->i_mode)) 636 if (S_ISDIR(inode->i_mode))
637 return -EPERM; 637 return -EPERM;
638 638
639 err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); 639 err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
640 if (err < 0) { 640 if (err < 0) {
641 if (err != -ENOENT) 641 if (err != -ENOENT)
642 mlog_errno(err); 642 mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
800 return -EPERM; 800 return -EPERM;
801 } 801 }
802 802
803 status = ocfs2_inode_lock(dir, &parent_node_bh, 1); 803 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
804 OI_LS_PARENT);
804 if (status < 0) { 805 if (status < 0) {
805 if (status != -ENOENT) 806 if (status != -ENOENT)
806 mlog_errno(status); 807 mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
978 inode1 = tmpinode; 979 inode1 = tmpinode;
979 } 980 }
980 /* lock id2 */ 981 /* lock id2 */
981 status = ocfs2_inode_lock(inode2, bh2, 1); 982 status = ocfs2_inode_lock_nested(inode2, bh2, 1,
983 OI_LS_RENAME1);
982 if (status < 0) { 984 if (status < 0) {
983 if (status != -ENOENT) 985 if (status != -ENOENT)
984 mlog_errno(status); 986 mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
987 } 989 }
988 990
989 /* lock id1 */ 991 /* lock id1 */
990 status = ocfs2_inode_lock(inode1, bh1, 1); 992 status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
991 if (status < 0) { 993 if (status < 0) {
992 /* 994 /*
993 * An error return must mean that no cluster locks 995 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
1103 * won't have to concurrently downconvert the inode and the 1105 * won't have to concurrently downconvert the inode and the
1104 * dentry locks. 1106 * dentry locks.
1105 */ 1107 */
1106 status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); 1108 status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
1109 OI_LS_PARENT);
1107 if (status < 0) { 1110 if (status < 0) {
1108 if (status != -ENOENT) 1111 if (status != -ENOENT)
1109 mlog_errno(status); 1112 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18c1d9ec1c93..c9345ebb8493 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/lockdep.h>
37#ifndef CONFIG_OCFS2_COMPAT_JBD 38#ifndef CONFIG_OCFS2_COMPAT_JBD
38# include <linux/jbd2.h> 39# include <linux/jbd2.h>
39#else 40#else
@@ -152,6 +153,14 @@ struct ocfs2_lock_res {
152 unsigned int l_lock_max_exmode; /* Max wait for EX */ 153 unsigned int l_lock_max_exmode; /* Max wait for EX */
153 unsigned int l_lock_refresh; /* Disk refreshes */ 154 unsigned int l_lock_refresh; /* Disk refreshes */
154#endif 155#endif
156#ifdef CONFIG_DEBUG_LOCK_ALLOC
157 struct lockdep_map l_lockdep_map;
158#endif
159};
160
161enum ocfs2_orphan_scan_state {
162 ORPHAN_SCAN_ACTIVE,
163 ORPHAN_SCAN_INACTIVE
155}; 164};
156 165
157struct ocfs2_orphan_scan { 166struct ocfs2_orphan_scan {
@@ -162,6 +171,7 @@ struct ocfs2_orphan_scan {
162 struct timespec os_scantime; /* time this node ran the scan */ 171 struct timespec os_scantime; /* time this node ran the scan */
163 u32 os_count; /* tracks node specific scans */ 172 u32 os_count; /* tracks node specific scans */
164 u32 os_seqno; /* tracks cluster wide scans */ 173 u32 os_seqno; /* tracks cluster wide scans */
174 atomic_t os_state; /* ACTIVE or INACTIVE */
165}; 175};
166 176
167struct ocfs2_dlm_debug { 177struct ocfs2_dlm_debug {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index fcd120f1493a..3f661376a2de 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
236 return dlm_status_to_errno(lksb->lksb_o2dlm.status); 236 return dlm_status_to_errno(lksb->lksb_o2dlm.status);
237} 237}
238 238
239/*
240 * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
241 * contents, it will zero out the LVB. Thus the caller can always trust
242 * the contents.
243 */
244static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
245{
246 return 1;
247}
248
239static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) 249static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
240{ 250{
241 return (void *)(lksb->lksb_o2dlm.lvb); 251 return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
354 .dlm_lock = o2cb_dlm_lock, 364 .dlm_lock = o2cb_dlm_lock,
355 .dlm_unlock = o2cb_dlm_unlock, 365 .dlm_unlock = o2cb_dlm_unlock,
356 .lock_status = o2cb_dlm_lock_status, 366 .lock_status = o2cb_dlm_lock_status,
367 .lvb_valid = o2cb_dlm_lvb_valid,
357 .lock_lvb = o2cb_dlm_lvb, 368 .lock_lvb = o2cb_dlm_lvb,
358 .dump_lksb = o2cb_dump_lksb, 369 .dump_lksb = o2cb_dump_lksb,
359}; 370};
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76d41a8ac6..ff4c798a5635 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
738 return lksb->lksb_fsdlm.sb_status; 738 return lksb->lksb_fsdlm.sb_status;
739} 739}
740 740
741static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
742{
743 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
744
745 return !invalid;
746}
747
741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 748static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
742{ 749{
743 if (!lksb->lksb_fsdlm.sb_lvbptr) 750 if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
873 .dlm_lock = user_dlm_lock, 880 .dlm_lock = user_dlm_lock,
874 .dlm_unlock = user_dlm_unlock, 881 .dlm_unlock = user_dlm_unlock,
875 .lock_status = user_dlm_lock_status, 882 .lock_status = user_dlm_lock_status,
883 .lvb_valid = user_dlm_lvb_valid,
876 .lock_lvb = user_dlm_lvb, 884 .lock_lvb = user_dlm_lvb,
877 .plock = user_plock, 885 .plock = user_plock,
878 .dump_lksb = user_dlm_dump_lksb, 886 .dump_lksb = user_dlm_dump_lksb,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 68b668b0e60a..3f2f1c45b7b6 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -6,7 +6,7 @@
6 * Code which implements an OCFS2 specific interface to underlying 6 * Code which implements an OCFS2 specific interface to underlying
7 * cluster stacks. 7 * cluster stacks.
8 * 8 *
9 * Copyright (C) 2007 Oracle. All rights reserved. 9 * Copyright (C) 2007, 2009 Oracle. All rights reserved.
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public 12 * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
271} 271}
272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
273 273
274/* 274int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
275 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 275{
276 * don't cast at the glue level. The real answer is that the header 276 return active_stack->sp_ops->lvb_valid(lksb);
277 * ordering is nigh impossible. 277}
278 */ 278EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
279
279void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 280void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
280{ 281{
281 return active_stack->sp_ops->lock_lvb(lksb); 282 return active_stack->sp_ops->lock_lvb(lksb);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index c571af375ef8..03a44d60eac9 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -186,6 +186,11 @@ struct ocfs2_stack_operations {
186 int (*lock_status)(union ocfs2_dlm_lksb *lksb); 186 int (*lock_status)(union ocfs2_dlm_lksb *lksb);
187 187
188 /* 188 /*
189 * Return non-zero if the LVB is valid.
190 */
191 int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
192
193 /*
189 * Pull the lvb pointer off of the stack-specific lksb. 194 * Pull the lvb pointer off of the stack-specific lksb.
190 */ 195 */
191 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 196 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
252 struct ocfs2_lock_res *astarg); 257 struct ocfs2_lock_res *astarg);
253 258
254int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); 259int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
260int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
255void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 261void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
256void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 262void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
257 263
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8439f6b324b9..73a16d4666dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
923 int nr) 923 int nr)
924{ 924{
925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
926 int ret;
926 927
927 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 928 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
928 return 0; 929 return 0;
929 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 930
931 if (!buffer_jbd(bg_bh))
930 return 1; 932 return 1;
931 933
934 jbd_lock_bh_state(bg_bh);
932 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 935 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
933 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 936 if (bg)
937 ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
938 else
939 ret = 1;
940 jbd_unlock_bh_state(bg_bh);
941
942 return ret;
934} 943}
935 944
936static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 945static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1885 unsigned int tmp; 1894 unsigned int tmp;
1886 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1895 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1887 struct ocfs2_group_desc *undo_bg = NULL; 1896 struct ocfs2_group_desc *undo_bg = NULL;
1897 int cluster_bitmap = 0;
1888 1898
1889 mlog_entry_void(); 1899 mlog_entry_void();
1890 1900
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1905 } 1915 }
1906 1916
1907 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1917 if (ocfs2_is_cluster_bitmap(alloc_inode))
1908 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1918 cluster_bitmap = 1;
1919
1920 if (cluster_bitmap) {
1921 jbd_lock_bh_state(group_bh);
1922 undo_bg = (struct ocfs2_group_desc *)
1923 bh2jh(group_bh)->b_committed_data;
1924 BUG_ON(!undo_bg);
1925 }
1909 1926
1910 tmp = num_bits; 1927 tmp = num_bits;
1911 while(tmp--) { 1928 while(tmp--) {
1912 ocfs2_clear_bit((bit_off + tmp), 1929 ocfs2_clear_bit((bit_off + tmp),
1913 (unsigned long *) bg->bg_bitmap); 1930 (unsigned long *) bg->bg_bitmap);
1914 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1931 if (cluster_bitmap)
1915 ocfs2_set_bit(bit_off + tmp, 1932 ocfs2_set_bit(bit_off + tmp,
1916 (unsigned long *) undo_bg->bg_bitmap); 1933 (unsigned long *) undo_bg->bg_bitmap);
1917 } 1934 }
1918 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1935 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1919 1936
1937 if (cluster_bitmap)
1938 jbd_unlock_bh_state(group_bh);
1939
1920 status = ocfs2_journal_dirty(handle, group_bh); 1940 status = ocfs2_journal_dirty(handle, group_bh);
1921 if (status < 0) 1941 if (status < 0)
1922 mlog_errno(status); 1942 mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0d3ed7407a04..7efb349fb9bd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
205#ifdef CONFIG_DEBUG_FS 205#ifdef CONFIG_DEBUG_FS
206static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) 206static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
207{ 207{
208 int out = 0;
209 int i;
210 struct ocfs2_cluster_connection *cconn = osb->cconn; 208 struct ocfs2_cluster_connection *cconn = osb->cconn;
211 struct ocfs2_recovery_map *rm = osb->recovery_map; 209 struct ocfs2_recovery_map *rm = osb->recovery_map;
212 struct ocfs2_orphan_scan *os; 210 struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
211 int i, out = 0;
213 212
214 out += snprintf(buf + out, len - out, 213 out += snprintf(buf + out, len - out,
215 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", 214 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
@@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
234 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", 233 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
235 osb->s_mount_opt, osb->s_atime_quantum); 234 osb->s_mount_opt, osb->s_atime_quantum);
236 235
237 out += snprintf(buf + out, len - out, 236 if (cconn) {
238 "%10s => Stack: %s Name: %*s Version: %d.%d\n", 237 out += snprintf(buf + out, len - out,
239 "Cluster", 238 "%10s => Stack: %s Name: %*s "
240 (*osb->osb_cluster_stack == '\0' ? 239 "Version: %d.%d\n", "Cluster",
241 "o2cb" : osb->osb_cluster_stack), 240 (*osb->osb_cluster_stack == '\0' ?
242 cconn->cc_namelen, cconn->cc_name, 241 "o2cb" : osb->osb_cluster_stack),
243 cconn->cc_version.pv_major, cconn->cc_version.pv_minor); 242 cconn->cc_namelen, cconn->cc_name,
243 cconn->cc_version.pv_major,
244 cconn->cc_version.pv_minor);
245 }
244 246
245 spin_lock(&osb->dc_task_lock); 247 spin_lock(&osb->dc_task_lock);
246 out += snprintf(buf + out, len - out, 248 out += snprintf(buf + out, len - out,
247 "%10s => Pid: %d Count: %lu WakeSeq: %lu " 249 "%10s => Pid: %d Count: %lu WakeSeq: %lu "
248 "WorkSeq: %lu\n", "DownCnvt", 250 "WorkSeq: %lu\n", "DownCnvt",
249 task_pid_nr(osb->dc_task), osb->blocked_lock_count, 251 (osb->dc_task ? task_pid_nr(osb->dc_task) : -1),
250 osb->dc_wake_sequence, osb->dc_work_sequence); 252 osb->blocked_lock_count, osb->dc_wake_sequence,
253 osb->dc_work_sequence);
251 spin_unlock(&osb->dc_task_lock); 254 spin_unlock(&osb->dc_task_lock);
252 255
253 spin_lock(&osb->osb_lock); 256 spin_lock(&osb->osb_lock);
@@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
267 270
268 out += snprintf(buf + out, len - out, 271 out += snprintf(buf + out, len - out,
269 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", 272 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
270 task_pid_nr(osb->commit_task), osb->osb_commit_interval, 273 (osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
274 osb->osb_commit_interval,
271 atomic_read(&osb->needs_checkpoint)); 275 atomic_read(&osb->needs_checkpoint));
272 276
273 out += snprintf(buf + out, len - out, 277 out += snprintf(buf + out, len - out,
274 "%10s => State: %d NumTxns: %d TxnId: %lu\n", 278 "%10s => State: %d TxnId: %lu NumTxns: %d\n",
275 "Journal", osb->journal->j_state, 279 "Journal", osb->journal->j_state,
276 atomic_read(&osb->journal->j_num_trans), 280 osb->journal->j_trans_id,
277 osb->journal->j_trans_id); 281 atomic_read(&osb->journal->j_num_trans));
278 282
279 out += snprintf(buf + out, len - out, 283 out += snprintf(buf + out, len - out,
280 "%10s => GlobalAllocs: %d LocalAllocs: %d " 284 "%10s => GlobalAllocs: %d LocalAllocs: %d "
@@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
300 atomic_read(&osb->s_num_inodes_stolen)); 304 atomic_read(&osb->s_num_inodes_stolen));
301 spin_unlock(&osb->osb_lock); 305 spin_unlock(&osb->osb_lock);
302 306
307 out += snprintf(buf + out, len - out, "OrphanScan => ");
308 out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
309 os->os_count, os->os_seqno);
310 out += snprintf(buf + out, len - out, " Last Scan: ");
311 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
312 out += snprintf(buf + out, len - out, "Disabled\n");
313 else
314 out += snprintf(buf + out, len - out, "%lu seconds ago\n",
315 (get_seconds() - os->os_scantime.tv_sec));
316
303 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", 317 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
304 "Slots", "Num", "RecoGen"); 318 "Slots", "Num", "RecoGen");
305
306 for (i = 0; i < osb->max_slots; ++i) { 319 for (i = 0; i < osb->max_slots; ++i) {
307 out += snprintf(buf + out, len - out, 320 out += snprintf(buf + out, len - out,
308 "%10s %c %3d %10d\n", 321 "%10s %c %3d %10d\n",
@@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
311 i, osb->slot_recovery_generations[i]); 324 i, osb->slot_recovery_generations[i]);
312 } 325 }
313 326
314 os = &osb->osb_orphan_scan;
315 out += snprintf(buf + out, len - out, "Orphan Scan=> ");
316 out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
317 os->os_count, os->os_seqno);
318 out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
319 (get_seconds() - os->os_scantime.tv_sec));
320
321 return out; 327 return out;
322} 328}
323 329
@@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1175 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); 1181 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
1176 wake_up(&osb->osb_mount_event); 1182 wake_up(&osb->osb_mount_event);
1177 1183
1184 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb);
1186
1178 mlog_exit(status); 1187 mlog_exit(status);
1179 return status; 1188 return status;
1180 1189
@@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1810 1819
1811 debugfs_remove(osb->osb_ctxt); 1820 debugfs_remove(osb->osb_ctxt);
1812 1821
1822 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb);
1824
1813 ocfs2_disable_quotas(osb); 1825 ocfs2_disable_quotas(osb);
1814 1826
1815 ocfs2_shutdown_local_alloc(osb); 1827 ocfs2_shutdown_local_alloc(osb);
1816 1828
1817 ocfs2_truncate_log_shutdown(osb); 1829 ocfs2_truncate_log_shutdown(osb);
1818 1830
1819 ocfs2_orphan_scan_stop(osb);
1820
1821 /* This will disable recovery and flush any recovery work. */ 1831 /* This will disable recovery and flush any recovery work. */
1822 ocfs2_recovery_exit(osb); 1832 ocfs2_recovery_exit(osb);
1823 1833
@@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1978 goto bail; 1988 goto bail;
1979 } 1989 }
1980 1990
1981 status = ocfs2_orphan_scan_init(osb);
1982 if (status) {
1983 mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
1984 mlog_errno(status);
1985 goto bail;
1986 }
1987
1988 init_waitqueue_head(&osb->checkpoint_event); 1991 init_waitqueue_head(&osb->checkpoint_event);
1989 atomic_set(&osb->needs_checkpoint, 0); 1992 atomic_set(&osb->needs_checkpoint, 0);
1990 1993
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index ab713ebdd546..40e53702948c 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
50 int type, 50 int type,
51 u32 slot); 51 u32 slot);
52 52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
55#endif
56
53static inline int is_global_system_inode(int type) 57static inline int is_global_system_inode(int type)
54{ 58{
55 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && 59 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
118 inode = NULL; 122 inode = NULL;
119 goto bail; 123 goto bail;
120 } 124 }
125#ifdef CONFIG_DEBUG_LOCK_ALLOC
126 if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
127 type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
128 type == JOURNAL_SYSTEM_INODE) {
129 /* Ignore inode lock on these inodes as the lock does not
130 * really belong to any process and lockdep cannot handle
131 * that */
132 OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
133 } else {
134 lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
135 l_lockdep_map,
136 ocfs2_system_inodes[type].si_name,
137 &ocfs2_sysfile_cluster_lock_key[type], 0);
138 }
139#endif
121bail: 140bail:
122 141
123 return inode; 142 return inode;
diff --git a/fs/open.c b/fs/open.c
index 7200e23d9258..dd98e8076024 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
378#endif 378#endif
379#endif /* BITS_PER_LONG == 32 */ 379#endif /* BITS_PER_LONG == 32 */
380 380
381SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 381
382int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
382{ 383{
383 struct file *file; 384 struct inode *inode = file->f_path.dentry->d_inode;
384 struct inode *inode; 385 long ret;
385 long ret = -EINVAL;
386 386
387 if (offset < 0 || len <= 0) 387 if (offset < 0 || len <= 0)
388 goto out; 388 return -EINVAL;
389 389
390 /* Return error if mode is not supported */ 390 /* Return error if mode is not supported */
391 ret = -EOPNOTSUPP;
392 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 391 if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
393 goto out; 392 return -EOPNOTSUPP;
394 393
395 ret = -EBADF;
396 file = fget(fd);
397 if (!file)
398 goto out;
399 if (!(file->f_mode & FMODE_WRITE)) 394 if (!(file->f_mode & FMODE_WRITE))
400 goto out_fput; 395 return -EBADF;
401 /* 396 /*
402 * Revalidate the write permissions, in case security policy has 397 * Revalidate the write permissions, in case security policy has
403 * changed since the files were opened. 398 * changed since the files were opened.
404 */ 399 */
405 ret = security_file_permission(file, MAY_WRITE); 400 ret = security_file_permission(file, MAY_WRITE);
406 if (ret) 401 if (ret)
407 goto out_fput; 402 return ret;
408 403
409 inode = file->f_path.dentry->d_inode;
410
411 ret = -ESPIPE;
412 if (S_ISFIFO(inode->i_mode)) 404 if (S_ISFIFO(inode->i_mode))
413 goto out_fput; 405 return -ESPIPE;
414 406
415 ret = -ENODEV;
416 /* 407 /*
417 * Let individual file system decide if it supports preallocation 408 * Let individual file system decide if it supports preallocation
418 * for directories or not. 409 * for directories or not.
419 */ 410 */
420 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 411 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
421 goto out_fput; 412 return -ENODEV;
422 413
423 ret = -EFBIG;
424 /* Check for wrap through zero too */ 414 /* Check for wrap through zero too */
425 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 415 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
426 goto out_fput; 416 return -EFBIG;
427 417
428 if (inode->i_op->fallocate) 418 if (!inode->i_op->fallocate)
429 ret = inode->i_op->fallocate(inode, mode, offset, len); 419 return -EOPNOTSUPP;
430 else
431 ret = -EOPNOTSUPP;
432 420
433out_fput: 421 return inode->i_op->fallocate(inode, mode, offset, len);
434 fput(file);
435out:
436 return ret;
437} 422}
423
424SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
425{
426 struct file *file;
427 int error = -EBADF;
428
429 file = fget(fd);
430 if (file) {
431 error = do_fallocate(file, mode, offset, len);
432 fput(file);
433 }
434
435 return error;
436}
437
438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) 439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
440{ 440{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45db..a14d6cd9eeda 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
1131 REISERFS_I(inode)->i_trans_id = 0; 1131 REISERFS_I(inode)->i_trans_id = 0;
1132 REISERFS_I(inode)->i_jl = NULL; 1132 REISERFS_I(inode)->i_jl = NULL;
1133 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1133 mutex_init(&(REISERFS_I(inode)->i_mmap));
1134 reiserfs_init_acl_access(inode);
1135 reiserfs_init_acl_default(inode);
1136 reiserfs_init_xattr_rwsem(inode); 1134 reiserfs_init_xattr_rwsem(inode);
1137 1135
1138 if (stat_data_v1(ih)) { 1136 if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1834 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1832 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1835 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1833 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1836 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1834 mutex_init(&(REISERFS_I(inode)->i_mmap));
1837 reiserfs_init_acl_access(inode);
1838 reiserfs_init_acl_default(inode);
1839 reiserfs_init_xattr_rwsem(inode); 1835 reiserfs_init_xattr_rwsem(inode);
1840 1836
1841 /* key to search for correct place for new stat data */ 1837 /* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 238e9d9b31e0..18b315d3d104 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { 82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
83 printk 83 printk
84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
85 unlock_super(s);
86 return -ENOMEM; 85 return -ENOMEM;
87 } 86 }
88 /* the new journal bitmaps are zero filled, now we copy in the bitmap 87 /* the new journal bitmaps are zero filled, now we copy in the bitmap
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc22..d3aeb061612b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,10 +529,6 @@ static void init_once(void *foo)
529 529
530 INIT_LIST_HEAD(&ei->i_prealloc_list); 530 INIT_LIST_HEAD(&ei->i_prealloc_list);
531 inode_init_once(&ei->vfs_inode); 531 inode_init_once(&ei->vfs_inode);
532#ifdef CONFIG_REISERFS_FS_POSIX_ACL
533 ei->i_acl_access = NULL;
534 ei->i_acl_default = NULL;
535#endif
536} 532}
537 533
538static int init_inodecache(void) 534static int init_inodecache(void)
@@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
580 reiserfs_write_unlock(inode->i_sb); 576 reiserfs_write_unlock(inode->i_sb);
581} 577}
582 578
583#ifdef CONFIG_REISERFS_FS_POSIX_ACL
584static void reiserfs_clear_inode(struct inode *inode)
585{
586 struct posix_acl *acl;
587
588 acl = REISERFS_I(inode)->i_acl_access;
589 if (acl && !IS_ERR(acl))
590 posix_acl_release(acl);
591 REISERFS_I(inode)->i_acl_access = NULL;
592
593 acl = REISERFS_I(inode)->i_acl_default;
594 if (acl && !IS_ERR(acl))
595 posix_acl_release(acl);
596 REISERFS_I(inode)->i_acl_default = NULL;
597}
598#else
599#define reiserfs_clear_inode NULL
600#endif
601
602#ifdef CONFIG_QUOTA 579#ifdef CONFIG_QUOTA
603static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 580static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
604 size_t, loff_t); 581 size_t, loff_t);
@@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = {
612 .write_inode = reiserfs_write_inode, 589 .write_inode = reiserfs_write_inode,
613 .dirty_inode = reiserfs_dirty_inode, 590 .dirty_inode = reiserfs_dirty_inode,
614 .delete_inode = reiserfs_delete_inode, 591 .delete_inode = reiserfs_delete_inode,
615 .clear_inode = reiserfs_clear_inode,
616 .put_super = reiserfs_put_super, 592 .put_super = reiserfs_put_super,
617 .write_super = reiserfs_write_super, 593 .write_super = reiserfs_write_super,
618 .sync_fs = reiserfs_sync_fs, 594 .sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c303c426fe2b..35d6e672a279 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
188 return ERR_PTR(-EINVAL); 188 return ERR_PTR(-EINVAL);
189} 189}
190 190
191static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
192 struct posix_acl *acl)
193{
194 spin_lock(&inode->i_lock);
195 if (*i_acl != ERR_PTR(-ENODATA))
196 posix_acl_release(*i_acl);
197 *i_acl = posix_acl_dup(acl);
198 spin_unlock(&inode->i_lock);
199}
200
201static inline struct posix_acl *iget_acl(struct inode *inode,
202 struct posix_acl **i_acl)
203{
204 struct posix_acl *acl = ERR_PTR(-ENODATA);
205
206 spin_lock(&inode->i_lock);
207 if (*i_acl != ERR_PTR(-ENODATA))
208 acl = posix_acl_dup(*i_acl);
209 spin_unlock(&inode->i_lock);
210
211 return acl;
212}
213
214/* 191/*
215 * Inode operation get_posix_acl(). 192 * Inode operation get_posix_acl().
216 * 193 *
@@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
220struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) 197struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
221{ 198{
222 char *name, *value; 199 char *name, *value;
223 struct posix_acl *acl, **p_acl; 200 struct posix_acl *acl;
224 int size; 201 int size;
225 int retval; 202 int retval;
226 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 203
204 acl = get_cached_acl(inode, type);
205 if (acl != ACL_NOT_CACHED)
206 return acl;
227 207
228 switch (type) { 208 switch (type) {
229 case ACL_TYPE_ACCESS: 209 case ACL_TYPE_ACCESS:
230 name = POSIX_ACL_XATTR_ACCESS; 210 name = POSIX_ACL_XATTR_ACCESS;
231 p_acl = &reiserfs_i->i_acl_access;
232 break; 211 break;
233 case ACL_TYPE_DEFAULT: 212 case ACL_TYPE_DEFAULT:
234 name = POSIX_ACL_XATTR_DEFAULT; 213 name = POSIX_ACL_XATTR_DEFAULT;
235 p_acl = &reiserfs_i->i_acl_default;
236 break; 214 break;
237 default: 215 default:
238 return ERR_PTR(-EINVAL); 216 BUG();
239 } 217 }
240 218
241 acl = iget_acl(inode, p_acl);
242 if (acl && !IS_ERR(acl))
243 return acl;
244 else if (PTR_ERR(acl) == -ENODATA)
245 return NULL;
246
247 size = reiserfs_xattr_get(inode, name, NULL, 0); 219 size = reiserfs_xattr_get(inode, name, NULL, 0);
248 if (size < 0) { 220 if (size < 0) {
249 if (size == -ENODATA || size == -ENOSYS) { 221 if (size == -ENODATA || size == -ENOSYS) {
250 *p_acl = ERR_PTR(-ENODATA); 222 set_cached_acl(inode, type, NULL);
251 return NULL; 223 return NULL;
252 } 224 }
253 return ERR_PTR(size); 225 return ERR_PTR(size);
@@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
262 /* This shouldn't actually happen as it should have 234 /* This shouldn't actually happen as it should have
263 been caught above.. but just in case */ 235 been caught above.. but just in case */
264 acl = NULL; 236 acl = NULL;
265 *p_acl = ERR_PTR(-ENODATA);
266 } else if (retval < 0) { 237 } else if (retval < 0) {
267 acl = ERR_PTR(retval); 238 acl = ERR_PTR(retval);
268 } else { 239 } else {
269 acl = posix_acl_from_disk(value, retval); 240 acl = posix_acl_from_disk(value, retval);
270 if (!IS_ERR(acl))
271 iset_acl(inode, p_acl, acl);
272 } 241 }
242 if (!IS_ERR(acl))
243 set_cached_acl(inode, type, acl);
273 244
274 kfree(value); 245 kfree(value);
275 return acl; 246 return acl;
@@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
287{ 258{
288 char *name; 259 char *name;
289 void *value = NULL; 260 void *value = NULL;
290 struct posix_acl **p_acl;
291 size_t size = 0; 261 size_t size = 0;
292 int error; 262 int error;
293 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
294 263
295 if (S_ISLNK(inode->i_mode)) 264 if (S_ISLNK(inode->i_mode))
296 return -EOPNOTSUPP; 265 return -EOPNOTSUPP;
@@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
298 switch (type) { 267 switch (type) {
299 case ACL_TYPE_ACCESS: 268 case ACL_TYPE_ACCESS:
300 name = POSIX_ACL_XATTR_ACCESS; 269 name = POSIX_ACL_XATTR_ACCESS;
301 p_acl = &reiserfs_i->i_acl_access;
302 if (acl) { 270 if (acl) {
303 mode_t mode = inode->i_mode; 271 mode_t mode = inode->i_mode;
304 error = posix_acl_equiv_mode(acl, &mode); 272 error = posix_acl_equiv_mode(acl, &mode);
@@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
313 break; 281 break;
314 case ACL_TYPE_DEFAULT: 282 case ACL_TYPE_DEFAULT:
315 name = POSIX_ACL_XATTR_DEFAULT; 283 name = POSIX_ACL_XATTR_DEFAULT;
316 p_acl = &reiserfs_i->i_acl_default;
317 if (!S_ISDIR(inode->i_mode)) 284 if (!S_ISDIR(inode->i_mode))
318 return acl ? -EACCES : 0; 285 return acl ? -EACCES : 0;
319 break; 286 break;
@@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
346 kfree(value); 313 kfree(value);
347 314
348 if (!error) 315 if (!error)
349 iset_acl(inode, p_acl, acl); 316 set_cached_acl(inode, type, acl);
350 317
351 return error; 318 return error;
352} 319}
@@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
379 } 346 }
380 347
381 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); 348 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
382 if (IS_ERR(acl)) { 349 if (IS_ERR(acl))
383 if (PTR_ERR(acl) == -ENODATA)
384 goto apply_umask;
385 return PTR_ERR(acl); 350 return PTR_ERR(acl);
386 }
387 351
388 if (acl) { 352 if (acl) {
389 struct posix_acl *acl_copy; 353 struct posix_acl *acl_copy;
diff --git a/fs/super.c b/fs/super.c
index d40d53a22fb5..2761d3e22ed9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -608,6 +608,7 @@ void emergency_remount(void)
608 608
609static DEFINE_IDA(unnamed_dev_ida); 609static DEFINE_IDA(unnamed_dev_ida);
610static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 610static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
611static int unnamed_dev_start = 0; /* don't bother trying below it */
611 612
612int set_anon_super(struct super_block *s, void *data) 613int set_anon_super(struct super_block *s, void *data)
613{ 614{
@@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data)
618 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 619 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
619 return -ENOMEM; 620 return -ENOMEM;
620 spin_lock(&unnamed_dev_lock); 621 spin_lock(&unnamed_dev_lock);
621 error = ida_get_new(&unnamed_dev_ida, &dev); 622 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
623 if (!error)
624 unnamed_dev_start = dev + 1;
622 spin_unlock(&unnamed_dev_lock); 625 spin_unlock(&unnamed_dev_lock);
623 if (error == -EAGAIN) 626 if (error == -EAGAIN)
624 /* We raced and lost with another CPU. */ 627 /* We raced and lost with another CPU. */
@@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
629 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 632 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
630 spin_lock(&unnamed_dev_lock); 633 spin_lock(&unnamed_dev_lock);
631 ida_remove(&unnamed_dev_ida, dev); 634 ida_remove(&unnamed_dev_ida, dev);
635 if (unnamed_dev_start > dev)
636 unnamed_dev_start = dev;
632 spin_unlock(&unnamed_dev_lock); 637 spin_unlock(&unnamed_dev_lock);
633 return -EMFILE; 638 return -EMFILE;
634 } 639 }
@@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb)
645 generic_shutdown_super(sb); 650 generic_shutdown_super(sb);
646 spin_lock(&unnamed_dev_lock); 651 spin_lock(&unnamed_dev_lock);
647 ida_remove(&unnamed_dev_ida, slot); 652 ida_remove(&unnamed_dev_ida, slot);
653 if (slot < unnamed_dev_start)
654 unnamed_dev_start = slot;
648 spin_unlock(&unnamed_dev_lock); 655 spin_unlock(&unnamed_dev_lock);
649} 656}
650 657
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c89..adafcf556531 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
55 * ACL support is not implemented. 55 * ACL support is not implemented.
56 */ 56 */
57 57
58#include "ubifs.h"
58#include <linux/xattr.h> 59#include <linux/xattr.h>
59#include <linux/posix_acl_xattr.h> 60#include <linux/posix_acl_xattr.h>
60#include "ubifs.h"
61 61
62/* 62/*
63 * Limit the number of extended attributes per inode so that the total size 63 * Limit the number of extended attributes per inode so that the total size
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index e48e9a3af763..1e068535b58b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
238 238
239 mutex_lock(&sbi->s_alloc_mutex); 239 mutex_lock(&sbi->s_alloc_mutex);
240 part_len = sbi->s_partmaps[partition].s_partition_len; 240 part_len = sbi->s_partmaps[partition].s_partition_len;
241 if (first_block < 0 || first_block >= part_len) 241 if (first_block >= part_len)
242 goto out; 242 goto out;
243 243
244 if (first_block + block_count > part_len) 244 if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
297 mutex_lock(&sbi->s_alloc_mutex); 297 mutex_lock(&sbi->s_alloc_mutex);
298 298
299repeat: 299repeat:
300 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 300 if (goal >= sbi->s_partmaps[partition].s_partition_len)
301 goal = 0; 301 goal = 0;
302 302
303 nr_groups = bitmap->s_nr_groups; 303 nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
666 int8_t etype = -1; 666 int8_t etype = -1;
667 struct udf_inode_info *iinfo; 667 struct udf_inode_info *iinfo;
668 668
669 if (first_block < 0 || 669 if (first_block >= sbi->s_partmaps[partition].s_partition_len)
670 first_block >= sbi->s_partmaps[partition].s_partition_len)
671 return 0; 670 return 0;
672 671
673 iinfo = UDF_I(table); 672 iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
743 return newblock; 742 return newblock;
744 743
745 mutex_lock(&sbi->s_alloc_mutex); 744 mutex_lock(&sbi->s_alloc_mutex);
746 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 745 if (goal >= sbi->s_partmaps[partition].s_partition_len)
747 goal = 0; 746 goal = 0;
748 747
749 /* We search for the closest matching block to goal. If we find 748 /* We search for the closest matching block to goal. If we find
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 703843f30ffd..1b88fd5df05d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
56 struct block_device *bdev = sb->s_bdev; 56 struct block_device *bdev = sb->s_bdev;
57 unsigned long lblock = 0; 57 unsigned long lblock = 0;
58 58
59 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) 59 /*
60 * ioctl failed or returned obviously bogus value?
61 * Try using the device size...
62 */
63 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
64 lblock == 0)
60 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; 65 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
61 66
62 if (lblock) 67 if (lblock)
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 1e9d1246eebc..b23a54506446 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -25,14 +25,10 @@
25#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
26 26
27 27
28#define XFS_ACL_NOT_CACHED ((void *)-1)
29
30/* 28/*
31 * Locking scheme: 29 * Locking scheme:
32 * - all ACL updates are protected by inode->i_mutex, which is taken before 30 * - all ACL updates are protected by inode->i_mutex, which is taken before
33 * calling into this file. 31 * calling into this file.
34 * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
35 * protected by inode->i_lock.
36 */ 32 */
37 33
38STATIC struct posix_acl * 34STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
102 } 98 }
103} 99}
104 100
105/*
106 * Update the cached ACL pointer in the inode.
107 *
108 * Because we don't hold any locks while reading/writing the attribute
109 * from/to disk another thread could have raced and updated the cached
110 * ACL value before us. In that case we release the previous cached value
111 * and update it with our new value.
112 */
113STATIC void
114xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
115 struct posix_acl *acl)
116{
117 spin_lock(&inode->i_lock);
118 if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
119 posix_acl_release(*p_acl);
120 *p_acl = posix_acl_dup(acl);
121 spin_unlock(&inode->i_lock);
122}
123
124struct posix_acl * 101struct posix_acl *
125xfs_get_acl(struct inode *inode, int type) 102xfs_get_acl(struct inode *inode, int type)
126{ 103{
127 struct xfs_inode *ip = XFS_I(inode); 104 struct xfs_inode *ip = XFS_I(inode);
128 struct posix_acl *acl = NULL, **p_acl; 105 struct posix_acl *acl;
129 struct xfs_acl *xfs_acl; 106 struct xfs_acl *xfs_acl;
130 int len = sizeof(struct xfs_acl); 107 int len = sizeof(struct xfs_acl);
131 char *ea_name; 108 char *ea_name;
132 int error; 109 int error;
133 110
111 acl = get_cached_acl(inode, type);
112 if (acl != ACL_NOT_CACHED)
113 return acl;
114
134 switch (type) { 115 switch (type) {
135 case ACL_TYPE_ACCESS: 116 case ACL_TYPE_ACCESS:
136 ea_name = SGI_ACL_FILE; 117 ea_name = SGI_ACL_FILE;
137 p_acl = &ip->i_acl;
138 break; 118 break;
139 case ACL_TYPE_DEFAULT: 119 case ACL_TYPE_DEFAULT:
140 ea_name = SGI_ACL_DEFAULT; 120 ea_name = SGI_ACL_DEFAULT;
141 p_acl = &ip->i_default_acl;
142 break; 121 break;
143 default: 122 default:
144 return ERR_PTR(-EINVAL); 123 BUG();
145 } 124 }
146 125
147 spin_lock(&inode->i_lock);
148 if (*p_acl != XFS_ACL_NOT_CACHED)
149 acl = posix_acl_dup(*p_acl);
150 spin_unlock(&inode->i_lock);
151
152 /* 126 /*
153 * If we have a cached ACLs value just return it, not need to 127 * If we have a cached ACLs value just return it, not need to
154 * go out to the disk. 128 * go out to the disk.
155 */ 129 */
156 if (acl)
157 return acl;
158 130
159 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); 131 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
160 if (!xfs_acl) 132 if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
165 /* 137 /*
166 * If the attribute doesn't exist make sure we have a negative 138 * If the attribute doesn't exist make sure we have a negative
167 * cache entry, for any other error assume it is transient and 139 * cache entry, for any other error assume it is transient and
168 * leave the cache entry as XFS_ACL_NOT_CACHED. 140 * leave the cache entry as ACL_NOT_CACHED.
169 */ 141 */
170 if (error == -ENOATTR) { 142 if (error == -ENOATTR) {
171 acl = NULL; 143 acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
179 goto out; 151 goto out;
180 152
181 out_update_cache: 153 out_update_cache:
182 xfs_update_cached_acl(inode, p_acl, acl); 154 set_cached_acl(inode, type, acl);
183 out: 155 out:
184 kfree(xfs_acl); 156 kfree(xfs_acl);
185 return acl; 157 return acl;
@@ -189,7 +161,6 @@ STATIC int
189xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 161xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
190{ 162{
191 struct xfs_inode *ip = XFS_I(inode); 163 struct xfs_inode *ip = XFS_I(inode);
192 struct posix_acl **p_acl;
193 char *ea_name; 164 char *ea_name;
194 int error; 165 int error;
195 166
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
199 switch (type) { 170 switch (type) {
200 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
201 ea_name = SGI_ACL_FILE; 172 ea_name = SGI_ACL_FILE;
202 p_acl = &ip->i_acl;
203 break; 173 break;
204 case ACL_TYPE_DEFAULT: 174 case ACL_TYPE_DEFAULT:
205 if (!S_ISDIR(inode->i_mode)) 175 if (!S_ISDIR(inode->i_mode))
206 return acl ? -EACCES : 0; 176 return acl ? -EACCES : 0;
207 ea_name = SGI_ACL_DEFAULT; 177 ea_name = SGI_ACL_DEFAULT;
208 p_acl = &ip->i_default_acl;
209 break; 178 break;
210 default: 179 default:
211 return -EINVAL; 180 return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
242 } 211 }
243 212
244 if (!error) 213 if (!error)
245 xfs_update_cached_acl(inode, p_acl, acl); 214 set_cached_acl(inode, type, acl);
246 return error; 215 return error;
247} 216}
248 217
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
384 return error; 353 return error;
385} 354}
386 355
387void
388xfs_inode_init_acls(struct xfs_inode *ip)
389{
390 /*
391 * No need for locking, inode is not live yet.
392 */
393 ip->i_acl = XFS_ACL_NOT_CACHED;
394 ip->i_default_acl = XFS_ACL_NOT_CACHED;
395}
396
397void
398xfs_inode_clear_acls(struct xfs_inode *ip)
399{
400 /*
401 * No need for locking here, the inode is not live anymore
402 * and just about to be freed.
403 */
404 if (ip->i_acl != XFS_ACL_NOT_CACHED)
405 posix_acl_release(ip->i_acl);
406 if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
407 posix_acl_release(ip->i_default_acl);
408}
409
410
411/* 356/*
412 * System xattr handlers. 357 * System xattr handlers.
413 * 358 *
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 63dc1f2efad5..947b150df8ed 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
49extern void xfs_inode_init_acls(struct xfs_inode *ip);
50extern void xfs_inode_clear_acls(struct xfs_inode *ip);
51extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
52extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
53 51
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
57# define xfs_get_acl(inode, type) NULL 55# define xfs_get_acl(inode, type) NULL
58# define xfs_inherit_acl(inode, default_acl) 0 56# define xfs_inherit_acl(inode, default_acl) 0
59# define xfs_acl_chmod(inode) 0 57# define xfs_acl_chmod(inode) 0
60# define xfs_inode_init_acls(ip)
61# define xfs_inode_clear_acls(ip)
62# define posix_acl_access_exists(inode) 0 58# define posix_acl_access_exists(inode) 0
63# define posix_acl_default_exists(inode) 0 59# define posix_acl_default_exists(inode) 0
64#endif /* CONFIG_XFS_POSIX_ACL */ 60#endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..5fcec6f020a7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -83,7 +83,6 @@ xfs_inode_alloc(
83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
84 ip->i_size = 0; 84 ip->i_size = 0;
85 ip->i_new_size = 0; 85 ip->i_new_size = 0;
86 xfs_inode_init_acls(ip);
87 86
88 /* 87 /*
89 * Initialize inode's trace buffers. 88 * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
560 ASSERT(atomic_read(&ip->i_pincount) == 0); 559 ASSERT(atomic_read(&ip->i_pincount) == 0);
561 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
562 ASSERT(completion_done(&ip->i_flush)); 561 ASSERT(completion_done(&ip->i_flush));
563 xfs_inode_clear_acls(ip);
564 kmem_zone_free(xfs_inode_zone, ip); 562 kmem_zone_free(xfs_inode_zone, ip);
565} 563}
566 564
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 77016702938b..1804f866a71d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
273 /* VFS inode */ 273 /* VFS inode */
274 struct inode i_vnode; /* embedded VFS inode */ 274 struct inode i_vnode; /* embedded VFS inode */
275 275
276#ifdef CONFIG_XFS_POSIX_ACL
277 struct posix_acl *i_acl;
278 struct posix_acl *i_default_acl;
279#endif
280
281 /* Trace buffers per inode. */ 276 /* Trace buffers per inode. */
282#ifdef XFS_INODE_TRACE 277#ifdef XFS_INODE_TRACE
283 struct ktrace *i_trace; /* general inode trace */ 278 struct ktrace *i_trace; /* general inode trace */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c34b11022908..c65e4ce6c3af 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -114,10 +114,13 @@ struct acpi_device_ops {
114 acpi_op_notify notify; 114 acpi_op_notify notify;
115}; 115};
116 116
117#define ACPI_DRIVER_ALL_NOTIFY_EVENTS 0x1 /* system AND device events */
118
117struct acpi_driver { 119struct acpi_driver {
118 char name[80]; 120 char name[80];
119 char class[80]; 121 char class[80];
120 const struct acpi_device_id *ids; /* Supported Hardware IDs */ 122 const struct acpi_device_id *ids; /* Supported Hardware IDs */
123 unsigned int flags;
121 struct acpi_device_ops ops; 124 struct acpi_device_ops ops;
122 struct device_driver drv; 125 struct device_driver drv;
123 struct module *owner; 126 struct module *owner;
@@ -168,7 +171,7 @@ struct acpi_device_dir {
168 171
169/* Plug and Play */ 172/* Plug and Play */
170 173
171typedef char acpi_bus_id[5]; 174typedef char acpi_bus_id[8];
172typedef unsigned long acpi_bus_address; 175typedef unsigned long acpi_bus_address;
173typedef char acpi_hardware_id[15]; 176typedef char acpi_hardware_id[15];
174typedef char acpi_unique_id[9]; 177typedef char acpi_unique_id[9];
@@ -365,10 +368,10 @@ struct acpi_bus_type {
365int register_acpi_bus_type(struct acpi_bus_type *); 368int register_acpi_bus_type(struct acpi_bus_type *);
366int unregister_acpi_bus_type(struct acpi_bus_type *); 369int unregister_acpi_bus_type(struct acpi_bus_type *);
367struct device *acpi_get_physical_device(acpi_handle); 370struct device *acpi_get_physical_device(acpi_handle);
368struct device *acpi_get_physical_pci_device(acpi_handle);
369 371
370/* helper */ 372/* helper */
371acpi_handle acpi_get_child(acpi_handle, acpi_integer); 373acpi_handle acpi_get_child(acpi_handle, acpi_integer);
374int acpi_is_root_bridge(acpi_handle);
372acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); 375acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
373#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) 376#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
374 377
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 0352c8f0b05b..f4906f6568d4 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -57,8 +57,7 @@
57 */ 57 */
58 58
59#define ACPI_POWER_HID "LNXPOWER" 59#define ACPI_POWER_HID "LNXPOWER"
60#define ACPI_PROCESSOR_OBJECT_HID "ACPI_CPU" 60#define ACPI_PROCESSOR_OBJECT_HID "LNXCPU"
61#define ACPI_PROCESSOR_HID "ACPI0007"
62#define ACPI_SYSTEM_HID "LNXSYSTM" 61#define ACPI_SYSTEM_HID "LNXSYSTM"
63#define ACPI_THERMAL_HID "LNXTHERM" 62#define ACPI_THERMAL_HID "LNXTHERM"
64#define ACPI_BUTTON_HID_POWERF "LNXPWRBN" 63#define ACPI_BUTTON_HID_POWERF "LNXPWRBN"
@@ -91,17 +90,15 @@ int acpi_pci_link_free_irq(acpi_handle handle);
91 90
92/* ACPI PCI Interrupt Routing (pci_irq.c) */ 91/* ACPI PCI Interrupt Routing (pci_irq.c) */
93 92
94int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus); 93int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus);
95void acpi_pci_irq_del_prt(int segment, int bus); 94void acpi_pci_irq_del_prt(struct pci_bus *bus);
96 95
97/* ACPI PCI Device Binding (pci_bind.c) */ 96/* ACPI PCI Device Binding (pci_bind.c) */
98 97
99struct pci_bus; 98struct pci_bus;
100 99
101acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id); 100struct pci_dev *acpi_get_pci_dev(acpi_handle);
102int acpi_pci_bind(struct acpi_device *device); 101int acpi_pci_bind_root(struct acpi_device *device);
103int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id,
104 struct pci_bus *bus);
105 102
106/* Arch-defined function to add a bus to the system */ 103/* Arch-defined function to add a bus to the system */
107 104
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 4927c063347c..baf1e0a9a7ee 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -258,6 +258,7 @@ DECLARE_PER_CPU(struct acpi_processor *, processors);
258extern struct acpi_processor_errata errata; 258extern struct acpi_processor_errata errata;
259 259
260void arch_acpi_processor_init_pdc(struct acpi_processor *pr); 260void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
261void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
261 262
262#ifdef ARCH_HAS_POWER_INIT 263#ifdef ARCH_HAS_POWER_INIT
263void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, 264void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
diff --git a/include/acpi/video.h b/include/acpi/video.h
index af6fe95fd3d0..cf7be3dd157b 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -3,10 +3,10 @@
3 3
4#if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE) 4#if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
5extern int acpi_video_register(void); 5extern int acpi_video_register(void);
6extern int acpi_video_exit(void); 6extern void acpi_video_unregister(void);
7#else 7#else
8static inline int acpi_video_register(void) { return 0; } 8static inline int acpi_video_register(void) { return 0; }
9static inline void acpi_video_exit(void) { return; } 9static inline void acpi_video_unregister(void) { return; }
10#endif 10#endif
11 11
12#endif 12#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 03f22076381f..334a3593cdfd 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -57,6 +57,7 @@ header-y += dlmconstants.h
57header-y += dlm_device.h 57header-y += dlm_device.h
58header-y += dlm_netlink.h 58header-y += dlm_netlink.h
59header-y += dm-ioctl.h 59header-y += dm-ioctl.h
60header-y += dm-log-userspace.h
60header-y += dn.h 61header-y += dn.h
61header-y += dqblk_xfs.h 62header-y += dqblk_xfs.h
62header-y += efs_fs_sb.h 63header-y += efs_fs_sb.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 51b4b0a5ce8c..34321cfffeab 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -113,9 +113,6 @@ void acpi_irq_stats_init(void);
113extern u32 acpi_irq_handled; 113extern u32 acpi_irq_handled;
114extern u32 acpi_irq_not_handled; 114extern u32 acpi_irq_not_handled;
115 115
116extern struct acpi_mcfg_allocation *pci_mmcfg_config;
117extern int pci_mmcfg_config_num;
118
119extern int sbf_port; 116extern int sbf_port;
120extern unsigned long acpi_realmode_flags; 117extern unsigned long acpi_realmode_flags;
121 118
@@ -293,7 +290,10 @@ void __init acpi_s4_no_nvs(void);
293 OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) 290 OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
294 291
295extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); 292extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
293extern void acpi_early_init(void);
294
296#else /* CONFIG_ACPI */ 295#else /* CONFIG_ACPI */
296static inline void acpi_early_init(void) { }
297 297
298static inline int early_acpi_boot_init(void) 298static inline int early_acpi_boot_init(void)
299{ 299{
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4fa2810b675e..3c7a358241a7 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -599,6 +599,8 @@ extern void audit_log_untrustedstring(struct audit_buffer *ab,
599extern void audit_log_d_path(struct audit_buffer *ab, 599extern void audit_log_d_path(struct audit_buffer *ab,
600 const char *prefix, 600 const char *prefix,
601 struct path *path); 601 struct path *path);
602extern void audit_log_key(struct audit_buffer *ab,
603 char *key);
602extern void audit_log_lost(const char *message); 604extern void audit_log_lost(const char *message);
603extern int audit_update_lsm_rules(void); 605extern int audit_update_lsm_rules(void);
604 606
@@ -621,6 +623,7 @@ extern int audit_enabled;
621#define audit_log_n_untrustedstring(a,n,s) do { ; } while (0) 623#define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
622#define audit_log_untrustedstring(a,s) do { ; } while (0) 624#define audit_log_untrustedstring(a,s) do { ; } while (0)
623#define audit_log_d_path(b, p, d) do { ; } while (0) 625#define audit_log_d_path(b, p, d) do { ; } while (0)
626#define audit_log_key(b, k) do { ; } while (0)
624#define audit_enabled 0 627#define audit_enabled 0
625#endif 628#endif
626#endif 629#endif
diff --git a/include/linux/connector.h b/include/linux/connector.h
index b9966e64604e..b68d27850d51 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -41,8 +41,10 @@
41#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ 41#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */
42#define CN_DST_IDX 0x6 42#define CN_DST_IDX 0x6
43#define CN_DST_VAL 0x1 43#define CN_DST_VAL 0x1
44#define CN_IDX_DM 0x7 /* Device Mapper */
45#define CN_VAL_DM_USERSPACE_LOG 0x1
44 46
45#define CN_NETLINK_USERS 7 47#define CN_NETLINK_USERS 8
46 48
47/* 49/*
48 * Maximum connector's message size. 50 * Maximum connector's message size.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 49c2362977fd..0d6310657f32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -11,6 +11,7 @@
11#include <linux/bio.h> 11#include <linux/bio.h>
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13 13
14struct dm_dev;
14struct dm_target; 15struct dm_target;
15struct dm_table; 16struct dm_table;
16struct mapped_device; 17struct mapped_device;
@@ -21,6 +22,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
21union map_info { 22union map_info {
22 void *ptr; 23 void *ptr;
23 unsigned long long ll; 24 unsigned long long ll;
25 unsigned flush_request;
24}; 26};
25 27
26/* 28/*
@@ -80,6 +82,15 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
80typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 82typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
81 struct bio_vec *biovec, int max_size); 83 struct bio_vec *biovec, int max_size);
82 84
85typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
86 struct dm_dev *dev,
87 sector_t physical_start,
88 void *data);
89
90typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
91 iterate_devices_callout_fn fn,
92 void *data);
93
83/* 94/*
84 * Returns: 95 * Returns:
85 * 0: The target can handle the next I/O immediately. 96 * 0: The target can handle the next I/O immediately.
@@ -92,7 +103,8 @@ void dm_error(const char *message);
92/* 103/*
93 * Combine device limits. 104 * Combine device limits.
94 */ 105 */
95void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); 106int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
107 sector_t start, void *data);
96 108
97struct dm_dev { 109struct dm_dev {
98 struct block_device *bdev; 110 struct block_device *bdev;
@@ -138,23 +150,12 @@ struct target_type {
138 dm_ioctl_fn ioctl; 150 dm_ioctl_fn ioctl;
139 dm_merge_fn merge; 151 dm_merge_fn merge;
140 dm_busy_fn busy; 152 dm_busy_fn busy;
153 dm_iterate_devices_fn iterate_devices;
141 154
142 /* For internal device-mapper use. */ 155 /* For internal device-mapper use. */
143 struct list_head list; 156 struct list_head list;
144}; 157};
145 158
146struct io_restrictions {
147 unsigned long bounce_pfn;
148 unsigned long seg_boundary_mask;
149 unsigned max_hw_sectors;
150 unsigned max_sectors;
151 unsigned max_segment_size;
152 unsigned short logical_block_size;
153 unsigned short max_hw_segments;
154 unsigned short max_phys_segments;
155 unsigned char no_cluster; /* inverted so that 0 is default */
156};
157
158struct dm_target { 159struct dm_target {
159 struct dm_table *table; 160 struct dm_table *table;
160 struct target_type *type; 161 struct target_type *type;
@@ -163,15 +164,18 @@ struct dm_target {
163 sector_t begin; 164 sector_t begin;
164 sector_t len; 165 sector_t len;
165 166
166 /* FIXME: turn this into a mask, and merge with io_restrictions */
167 /* Always a power of 2 */ 167 /* Always a power of 2 */
168 sector_t split_io; 168 sector_t split_io;
169 169
170 /* 170 /*
171 * These are automatically filled in by 171 * A number of zero-length barrier requests that will be submitted
172 * dm_table_get_device. 172 * to the target for the purpose of flushing cache.
173 *
174 * The request number will be placed in union map_info->flush_request.
175 * It is a responsibility of the target driver to remap these requests
176 * to the real underlying devices.
173 */ 177 */
174 struct io_restrictions limits; 178 unsigned num_flush_requests;
175 179
176 /* target specific data */ 180 /* target specific data */
177 void *private; 181 void *private;
@@ -230,6 +234,7 @@ struct gendisk *dm_disk(struct mapped_device *md);
230int dm_suspended(struct mapped_device *md); 234int dm_suspended(struct mapped_device *md);
231int dm_noflush_suspending(struct dm_target *ti); 235int dm_noflush_suspending(struct dm_target *ti);
232union map_info *dm_get_mapinfo(struct bio *bio); 236union map_info *dm_get_mapinfo(struct bio *bio);
237union map_info *dm_get_rq_mapinfo(struct request *rq);
233 238
234/* 239/*
235 * Geometry functions. 240 * Geometry functions.
@@ -392,4 +397,12 @@ static inline unsigned long to_bytes(sector_t n)
392 return (n << SECTOR_SHIFT); 397 return (n << SECTOR_SHIFT);
393} 398}
394 399
400/*-----------------------------------------------------------------
401 * Helper for block layer and dm core operations
402 *---------------------------------------------------------------*/
403void dm_dispatch_request(struct request *rq);
404void dm_requeue_unmapped_request(struct request *rq);
405void dm_kill_unmapped_request(struct request *rq, int error);
406int dm_underlying_device_busy(struct request_queue *q);
407
395#endif /* _LINUX_DEVICE_MAPPER_H */ 408#endif /* _LINUX_DEVICE_MAPPER_H */
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 48e44ee2b466..2ab84c83c31a 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -123,6 +123,16 @@ struct dm_ioctl {
123 __u32 target_count; /* in/out */ 123 __u32 target_count; /* in/out */
124 __s32 open_count; /* out */ 124 __s32 open_count; /* out */
125 __u32 flags; /* in/out */ 125 __u32 flags; /* in/out */
126
127 /*
128 * event_nr holds either the event number (input and output) or the
129 * udev cookie value (input only).
130 * The DM_DEV_WAIT ioctl takes an event number as input.
131 * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
132 * use the field as a cookie to return in the DM_COOKIE
133 * variable with the uevents they issue.
134 * For output, the ioctls return the event number, not the cookie.
135 */
126 __u32 event_nr; /* in/out */ 136 __u32 event_nr; /* in/out */
127 __u32 padding; 137 __u32 padding;
128 138
@@ -256,9 +266,9 @@ enum {
256#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 266#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
257 267
258#define DM_VERSION_MAJOR 4 268#define DM_VERSION_MAJOR 4
259#define DM_VERSION_MINOR 14 269#define DM_VERSION_MINOR 15
260#define DM_VERSION_PATCHLEVEL 0 270#define DM_VERSION_PATCHLEVEL 0
261#define DM_VERSION_EXTRA "-ioctl (2008-04-23)" 271#define DM_VERSION_EXTRA "-ioctl (2009-04-01)"
262 272
263/* Status bits */ 273/* Status bits */
264#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 274#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
new file mode 100644
index 000000000000..642e3017b51f
--- /dev/null
+++ b/include/linux/dm-log-userspace.h
@@ -0,0 +1,386 @@
1/*
2 * Copyright (C) 2006-2009 Red Hat, Inc.
3 *
4 * This file is released under the LGPL.
5 */
6
7#ifndef __DM_LOG_USERSPACE_H__
8#define __DM_LOG_USERSPACE_H__
9
10#include <linux/dm-ioctl.h> /* For DM_UUID_LEN */
11
12/*
13 * The device-mapper userspace log module consists of a kernel component and
14 * a user-space component. The kernel component implements the API defined
15 * in dm-dirty-log.h. Its purpose is simply to pass the parameters and
16 * return values of those API functions between kernel and user-space.
17 *
18 * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc.
19 * These request types represent the different functions in the device-mapper
20 * dirty log API. Each of these is described in more detail below.
21 *
22 * The user-space program must listen for requests from the kernel (representing
23 * the various API functions) and process them.
24 *
25 * User-space begins by setting up the communication link (error checking
26 * removed for clarity):
27 * fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
28 * addr.nl_family = AF_NETLINK;
29 * addr.nl_groups = CN_IDX_DM;
30 * addr.nl_pid = 0;
31 * r = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
32 * opt = addr.nl_groups;
33 * setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
34 *
35 * User-space will then wait to receive requests form the kernel, which it
36 * will process as described below. The requests are received in the form,
37 * ((struct dm_ulog_request) + (additional data)). Depending on the request
38 * type, there may or may not be 'additional data'. In the descriptions below,
39 * you will see 'Payload-to-userspace' and 'Payload-to-kernel'. The
40 * 'Payload-to-userspace' is what the kernel sends in 'additional data' as
41 * necessary parameters to complete the request. The 'Payload-to-kernel' is
42 * the 'additional data' returned to the kernel that contains the necessary
43 * results of the request. The 'data_size' field in the dm_ulog_request
44 * structure denotes the availability and amount of payload data.
45 */
46
47/*
48 * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h):
49 * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
50 * unsigned argc, char **argv);
51 *
52 * Payload-to-userspace:
53 * A single string containing all the argv arguments separated by ' 's
54 * Payload-to-kernel:
55 * None. ('data_size' in the dm_ulog_request struct should be 0.)
56 *
57 * The UUID contained in the dm_ulog_request structure is the reference that
58 * will be used by all request types to a specific log. The constructor must
59 * record this assotiation with instance created.
60 *
61 * When the request has been processed, user-space must return the
62 * dm_ulog_request to the kernel - setting the 'error' field and
63 * 'data_size' appropriately.
64 */
65#define DM_ULOG_CTR 1
66
67/*
68 * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h):
69 * void (*dtr)(struct dm_dirty_log *log);
70 *
71 * Payload-to-userspace:
72 * A single string containing all the argv arguments separated by ' 's
73 * Payload-to-kernel:
74 * None. ('data_size' in the dm_ulog_request struct should be 0.)
75 *
76 * The UUID contained in the dm_ulog_request structure is all that is
77 * necessary to identify the log instance being destroyed. There is no
78 * payload data.
79 *
80 * When the request has been processed, user-space must return the
81 * dm_ulog_request to the kernel - setting the 'error' field and clearing
82 * 'data_size' appropriately.
83 */
84#define DM_ULOG_DTR 2
85
86/*
87 * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h):
88 * int (*presuspend)(struct dm_dirty_log *log);
89 *
90 * Payload-to-userspace:
91 * None.
92 * Payload-to-kernel:
93 * None.
94 *
95 * The UUID contained in the dm_ulog_request structure is all that is
96 * necessary to identify the log instance being presuspended. There is no
97 * payload data.
98 *
99 * When the request has been processed, user-space must return the
100 * dm_ulog_request to the kernel - setting the 'error' field and
101 * 'data_size' appropriately.
102 */
103#define DM_ULOG_PRESUSPEND 3
104
105/*
106 * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h):
107 * int (*postsuspend)(struct dm_dirty_log *log);
108 *
109 * Payload-to-userspace:
110 * None.
111 * Payload-to-kernel:
112 * None.
113 *
114 * The UUID contained in the dm_ulog_request structure is all that is
115 * necessary to identify the log instance being postsuspended. There is no
116 * payload data.
117 *
118 * When the request has been processed, user-space must return the
119 * dm_ulog_request to the kernel - setting the 'error' field and
120 * 'data_size' appropriately.
121 */
122#define DM_ULOG_POSTSUSPEND 4
123
124/*
125 * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h):
126 * int (*resume)(struct dm_dirty_log *log);
127 *
128 * Payload-to-userspace:
129 * None.
130 * Payload-to-kernel:
131 * None.
132 *
133 * The UUID contained in the dm_ulog_request structure is all that is
134 * necessary to identify the log instance being resumed. There is no
135 * payload data.
136 *
137 * When the request has been processed, user-space must return the
138 * dm_ulog_request to the kernel - setting the 'error' field and
139 * 'data_size' appropriately.
140 */
141#define DM_ULOG_RESUME 5
142
143/*
144 * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
145 * uint32_t (*get_region_size)(struct dm_dirty_log *log);
146 *
147 * Payload-to-userspace:
148 * None.
149 * Payload-to-kernel:
150 * uint64_t - contains the region size
151 *
152 * The region size is something that was determined at constructor time.
153 * It is returned in the payload area and 'data_size' is set to
154 * reflect this.
155 *
156 * When the request has been processed, user-space must return the
157 * dm_ulog_request to the kernel - setting the 'error' field appropriately.
158 */
159#define DM_ULOG_GET_REGION_SIZE 6
160
161/*
162 * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h):
163 * int (*is_clean)(struct dm_dirty_log *log, region_t region);
164 *
165 * Payload-to-userspace:
166 * uint64_t - the region to get clean status on
167 * Payload-to-kernel:
168 * int64_t - 1 if clean, 0 otherwise
169 *
170 * Payload is sizeof(uint64_t) and contains the region for which the clean
171 * status is being made.
172 *
173 * When the request has been processed, user-space must return the
174 * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or
175 * 1 (clean), setting 'data_size' and 'error' appropriately.
176 */
177#define DM_ULOG_IS_CLEAN 7
178
179/*
180 * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h):
181 * int (*in_sync)(struct dm_dirty_log *log, region_t region,
182 * int can_block);
183 *
184 * Payload-to-userspace:
185 * uint64_t - the region to get sync status on
186 * Payload-to-kernel:
187 * int64_t - 1 if in-sync, 0 otherwise
188 *
189 * Exactly the same as 'is_clean' above, except this time asking "has the
190 * region been recovered?" vs. "is the region not being modified?"
191 */
192#define DM_ULOG_IN_SYNC 8
193
194/*
195 * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h):
196 * int (*flush)(struct dm_dirty_log *log);
197 *
198 * Payload-to-userspace:
199 * None.
200 * Payload-to-kernel:
201 * None.
202 *
203 * No incoming or outgoing payload. Simply flush log state to disk.
204 *
205 * When the request has been processed, user-space must return the
206 * dm_ulog_request to the kernel - setting the 'error' field and clearing
207 * 'data_size' appropriately.
208 */
209#define DM_ULOG_FLUSH 9
210
211/*
212 * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h):
213 * void (*mark_region)(struct dm_dirty_log *log, region_t region);
214 *
215 * Payload-to-userspace:
216 * uint64_t [] - region(s) to mark
217 * Payload-to-kernel:
218 * None.
219 *
220 * Incoming payload contains the one or more regions to mark dirty.
221 * The number of regions contained in the payload can be determined from
222 * 'data_size/sizeof(uint64_t)'.
223 *
224 * When the request has been processed, user-space must return the
225 * dm_ulog_request to the kernel - setting the 'error' field and clearing
226 * 'data_size' appropriately.
227 */
228#define DM_ULOG_MARK_REGION 10
229
230/*
231 * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h):
232 * void (*clear_region)(struct dm_dirty_log *log, region_t region);
233 *
234 * Payload-to-userspace:
235 * uint64_t [] - region(s) to clear
236 * Payload-to-kernel:
237 * None.
238 *
239 * Incoming payload contains the one or more regions to mark clean.
240 * The number of regions contained in the payload can be determined from
241 * 'data_size/sizeof(uint64_t)'.
242 *
243 * When the request has been processed, user-space must return the
244 * dm_ulog_request to the kernel - setting the 'error' field and clearing
245 * 'data_size' appropriately.
246 */
247#define DM_ULOG_CLEAR_REGION 11
248
249/*
250 * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h):
251 * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region);
252 *
253 * Payload-to-userspace:
254 * None.
255 * Payload-to-kernel:
256 * {
257 * int64_t i; -- 1 if recovery necessary, 0 otherwise
258 * uint64_t r; -- The region to recover if i=1
259 * }
260 * 'data_size' should be set appropriately.
261 *
262 * When the request has been processed, user-space must return the
263 * dm_ulog_request to the kernel - setting the 'error' field appropriately.
264 */
265#define DM_ULOG_GET_RESYNC_WORK 12
266
267/*
268 * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h):
269 * void (*set_region_sync)(struct dm_dirty_log *log,
270 * region_t region, int in_sync);
271 *
272 * Payload-to-userspace:
273 * {
274 * uint64_t - region to set sync state on
275 * int64_t - 0 if not-in-sync, 1 if in-sync
276 * }
277 * Payload-to-kernel:
278 * None.
279 *
280 * When the request has been processed, user-space must return the
281 * dm_ulog_request to the kernel - setting the 'error' field and clearing
282 * 'data_size' appropriately.
283 */
284#define DM_ULOG_SET_REGION_SYNC 13
285
286/*
287 * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h):
288 * region_t (*get_sync_count)(struct dm_dirty_log *log);
289 *
290 * Payload-to-userspace:
291 * None.
292 * Payload-to-kernel:
293 * uint64_t - the number of in-sync regions
294 *
295 * No incoming payload. Kernel-bound payload contains the number of
296 * regions that are in-sync (in a size_t).
297 *
298 * When the request has been processed, user-space must return the
299 * dm_ulog_request to the kernel - setting the 'error' field and
300 * 'data_size' appropriately.
301 */
302#define DM_ULOG_GET_SYNC_COUNT 14
303
304/*
305 * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h):
306 * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO,
307 * char *result, unsigned maxlen);
308 *
309 * Payload-to-userspace:
310 * None.
311 * Payload-to-kernel:
312 * Character string containing STATUSTYPE_INFO
313 *
314 * When the request has been processed, user-space must return the
315 * dm_ulog_request to the kernel - setting the 'error' field and
316 * 'data_size' appropriately.
317 */
318#define DM_ULOG_STATUS_INFO 15
319
320/*
321 * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h):
322 * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE,
323 * char *result, unsigned maxlen);
324 *
325 * Payload-to-userspace:
326 * None.
327 * Payload-to-kernel:
328 * Character string containing STATUSTYPE_TABLE
329 *
330 * When the request has been processed, user-space must return the
331 * dm_ulog_request to the kernel - setting the 'error' field and
332 * 'data_size' appropriately.
333 */
334#define DM_ULOG_STATUS_TABLE 16
335
336/*
337 * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h):
338 * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
339 *
340 * Payload-to-userspace:
341 * uint64_t - region to determine recovery status on
342 * Payload-to-kernel:
343 * {
344 * int64_t is_recovering; -- 0 if no, 1 if yes
345 * uint64_t in_sync_hint; -- lowest region still needing resync
346 * }
347 *
348 * When the request has been processed, user-space must return the
349 * dm_ulog_request to the kernel - setting the 'error' field and
350 * 'data_size' appropriately.
351 */
352#define DM_ULOG_IS_REMOTE_RECOVERING 17
353
354/*
355 * (DM_ULOG_REQUEST_MASK & request_type) to get the request type
356 *
357 * Payload-to-userspace:
358 * A single string containing all the argv arguments separated by ' 's
359 * Payload-to-kernel:
360 * None. ('data_size' in the dm_ulog_request struct should be 0.)
361 *
362 * We are reserving 8 bits of the 32-bit 'request_type' field for the
363 * various request types above. The remaining 24-bits are currently
364 * set to zero and are reserved for future use and compatibility concerns.
365 *
366 * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the
367 * request type from the 'request_type' field to maintain forward compatibility.
368 */
369#define DM_ULOG_REQUEST_MASK 0xFF
370#define DM_ULOG_REQUEST_TYPE(request_type) \
371 (DM_ULOG_REQUEST_MASK & (request_type))
372
373struct dm_ulog_request {
374 char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
375 char padding[7]; /* Padding because DM_UUID_LEN = 129 */
376
377 int32_t error; /* Used to report back processing errors */
378
379 uint32_t seq; /* Sequence number for request */
380 uint32_t request_type; /* DM_ULOG_* defined above */
381 uint32_t data_size; /* How much data (not including this struct) */
382
383 char data[0];
384};
385
386#endif /* __DM_LOG_USERSPACE_H__ */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 1731fb5fd775..4a2b162c256a 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -126,6 +126,8 @@ extern int free_irte(int irq);
126extern int irq_remapped(int irq); 126extern int irq_remapped(int irq);
127extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); 127extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
128extern struct intel_iommu *map_ioapic_to_ir(int apic); 128extern struct intel_iommu *map_ioapic_to_ir(int apic);
129extern int set_ioapic_sid(struct irte *irte, int apic);
130extern int set_msi_sid(struct irte *irte, struct pci_dev *dev);
129#else 131#else
130static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) 132static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
131{ 133{
@@ -156,6 +158,15 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
156{ 158{
157 return NULL; 159 return NULL;
158} 160}
161static inline int set_ioapic_sid(struct irte *irte, int apic)
162{
163 return 0;
164}
165static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
166{
167 return 0;
168}
169
159#define irq_remapped(irq) (0) 170#define irq_remapped(irq) (0)
160#define enable_intr_remapping(mode) (-1) 171#define enable_intr_remapping(mode) (-1)
161#define disable_intr_remapping() (0) 172#define disable_intr_remapping() (0)
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 7894dd0f3b77..ca1bfe90004f 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -103,10 +103,6 @@ struct ext3_inode_info {
103 */ 103 */
104 struct rw_semaphore xattr_sem; 104 struct rw_semaphore xattr_sem;
105#endif 105#endif
106#ifdef CONFIG_EXT3_FS_POSIX_ACL
107 struct posix_acl *i_acl;
108 struct posix_acl *i_default_acl;
109#endif
110 106
111 struct list_head i_orphan; /* unlinked but open inodes */ 107 struct list_head i_orphan; /* unlinked but open inodes */
112 108
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 8e912ab6a072..3c155107d61f 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -3,4 +3,25 @@
3 3
4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ 4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
5 5
6#ifdef __KERNEL__
7
8/*
9 * Space reservation ioctls and argument structure
10 * are designed to be compatible with the legacy XFS ioctls.
11 */
12struct space_resv {
13 __s16 l_type;
14 __s16 l_whence;
15 __s64 l_start;
16 __s64 l_len; /* len == 0 means until end of file */
17 __s32 l_sysid;
18 __u32 l_pid;
19 __s32 l_pad[4]; /* reserved area */
20};
21
22#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
23#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
24
25#endif /* __KERNEL__ */
26
6#endif /* _FALLOC_H_ */ 27#endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ff5e4e01952..0872372184fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -710,6 +710,9 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
710#define i_size_ordered_init(inode) do { } while (0) 710#define i_size_ordered_init(inode) do { } while (0)
711#endif 711#endif
712 712
713struct posix_acl;
714#define ACL_NOT_CACHED ((void *)(-1))
715
713struct inode { 716struct inode {
714 struct hlist_node i_hash; 717 struct hlist_node i_hash;
715 struct list_head i_list; 718 struct list_head i_list;
@@ -773,6 +776,10 @@ struct inode {
773#ifdef CONFIG_SECURITY 776#ifdef CONFIG_SECURITY
774 void *i_security; 777 void *i_security;
775#endif 778#endif
779#ifdef CONFIG_FS_POSIX_ACL
780 struct posix_acl *i_acl;
781 struct posix_acl *i_default_acl;
782#endif
776 void *i_private; /* fs or device private pointer */ 783 void *i_private; /* fs or device private pointer */
777}; 784};
778 785
@@ -1906,6 +1913,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
1906 1913
1907extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, 1914extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
1908 struct file *filp); 1915 struct file *filp);
1916extern int do_fallocate(struct file *file, int mode, loff_t offset,
1917 loff_t len);
1909extern long do_sys_open(int dfd, const char __user *filename, int flags, 1918extern long do_sys_open(int dfd, const char __user *filename, int flags,
1910 int mode); 1919 int mode);
1911extern struct file *filp_open(const char *, int, int); 1920extern struct file *filp_open(const char *, int, int);
@@ -1914,6 +1923,10 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1914extern int filp_close(struct file *, fl_owner_t id); 1923extern int filp_close(struct file *, fl_owner_t id);
1915extern char * getname(const char __user *); 1924extern char * getname(const char __user *);
1916 1925
1926/* fs/ioctl.c */
1927
1928extern int ioctl_preallocate(struct file *filp, void __user *argp);
1929
1917/* fs/dcache.c */ 1930/* fs/dcache.c */
1918extern void __init vfs_caches_init_early(void); 1931extern void __init vfs_caches_init_early(void);
1919extern void __init vfs_caches_init(unsigned long); 1932extern void __init vfs_caches_init(unsigned long);
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 10d701eec484..b6a85183c333 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -175,16 +175,16 @@ struct icmp6_filter {
175 175
176 176
177extern void icmpv6_send(struct sk_buff *skb, 177extern void icmpv6_send(struct sk_buff *skb,
178 int type, int code, 178 u8 type, u8 code,
179 __u32 info, 179 __u32 info,
180 struct net_device *dev); 180 struct net_device *dev);
181 181
182extern int icmpv6_init(void); 182extern int icmpv6_init(void);
183extern int icmpv6_err_convert(int type, int code, 183extern int icmpv6_err_convert(u8 type, u8 code,
184 int *err); 184 int *err);
185extern void icmpv6_cleanup(void); 185extern void icmpv6_cleanup(void);
186extern void icmpv6_param_prob(struct sk_buff *skb, 186extern void icmpv6_param_prob(struct sk_buff *skb,
187 int code, int pos); 187 u8 code, int pos);
188 188
189struct flowi; 189struct flowi;
190struct in6_addr; 190struct in6_addr;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 95c6e00a72e8..cf1f3888067c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1361,7 +1361,6 @@ int ide_in_drive_list(u16 *, const struct drive_list_entry *);
1361#ifdef CONFIG_BLK_DEV_IDEDMA 1361#ifdef CONFIG_BLK_DEV_IDEDMA
1362int ide_dma_good_drive(ide_drive_t *); 1362int ide_dma_good_drive(ide_drive_t *);
1363int __ide_dma_bad_drive(ide_drive_t *); 1363int __ide_dma_bad_drive(ide_drive_t *);
1364int ide_id_dma_bug(ide_drive_t *);
1365 1364
1366u8 ide_find_dma_mode(ide_drive_t *, u8); 1365u8 ide_find_dma_mode(ide_drive_t *, u8);
1367 1366
@@ -1402,7 +1401,6 @@ void ide_dma_lost_irq(ide_drive_t *);
1402ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int); 1401ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int);
1403 1402
1404#else 1403#else
1405static inline int ide_id_dma_bug(ide_drive_t *drive) { return 0; }
1406static inline u8 ide_find_dma_mode(ide_drive_t *drive, u8 speed) { return 0; } 1404static inline u8 ide_find_dma_mode(ide_drive_t *drive, u8 speed) { return 0; }
1407static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; } 1405static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; }
1408static inline void ide_dma_off_quietly(ide_drive_t *drive) { ; } 1406static inline void ide_dma_off_quietly(ide_drive_t *drive) { ; }
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index da5a5a1f4cd2..b25d1b53df0d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -258,6 +258,16 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
258#define lockdep_set_subclass(lock, sub) \ 258#define lockdep_set_subclass(lock, sub) \
259 lockdep_init_map(&(lock)->dep_map, #lock, \ 259 lockdep_init_map(&(lock)->dep_map, #lock, \
260 (lock)->dep_map.key, sub) 260 (lock)->dep_map.key, sub)
261/*
262 * Compare locking classes
263 */
264#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key)
265
266static inline int lockdep_match_key(struct lockdep_map *lock,
267 struct lock_class_key *key)
268{
269 return lock->key == key;
270}
261 271
262/* 272/*
263 * Acquire a lock. 273 * Acquire a lock.
@@ -326,6 +336,11 @@ static inline void lockdep_on(void)
326#define lockdep_set_class_and_subclass(lock, key, sub) \ 336#define lockdep_set_class_and_subclass(lock, key, sub) \
327 do { (void)(key); } while (0) 337 do { (void)(key); } while (0)
328#define lockdep_set_subclass(lock, sub) do { } while (0) 338#define lockdep_set_subclass(lock, sub) do { } while (0)
339/*
340 * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
341 * case since the result is not well defined and the caller should rather
342 * #ifdef the call himself.
343 */
329 344
330# define INIT_LOCKDEP 345# define INIT_LOCKDEP
331# define lockdep_reset() do { debug_locks = 1; } while (0) 346# define lockdep_reset() do { debug_locks = 1; } while (0)
diff --git a/include/linux/max17040_battery.h b/include/linux/max17040_battery.h
new file mode 100644
index 000000000000..ad97b06cf930
--- /dev/null
+++ b/include/linux/max17040_battery.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) 2009 Samsung Electronics
3 * Minkyu Kang <mk7.kang@samsung.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef __MAX17040_BATTERY_H_
11#define __MAX17040_BATTERY_H_
12
13struct max17040_platform_data {
14 int (*battery_online)(void);
15 int (*charger_online)(void);
16 int (*charger_enable)(void);
17};
18
19#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d006e93d5c93..ba3a7cb1eaa0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -826,7 +826,7 @@ extern int make_pages_present(unsigned long addr, unsigned long end);
826extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 826extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
827 827
828int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 828int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
829 unsigned long start, int len, int write, int force, 829 unsigned long start, int nr_pages, int write, int force,
830 struct page **pages, struct vm_area_struct **vmas); 830 struct page **pages, struct vm_area_struct **vmas);
831int get_user_pages_fast(unsigned long start, int nr_pages, int write, 831int get_user_pages_fast(unsigned long start, int nr_pages, int write,
832 struct page **pages); 832 struct page **pages);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index b3646cd7fd5a..4391741b99dc 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -229,7 +229,6 @@ struct hotplug_params {
229extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, 229extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
230 struct hotplug_params *hpp); 230 struct hotplug_params *hpp);
231int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); 231int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
232int acpi_root_bridge(acpi_handle handle);
233int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle); 232int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
234int acpi_pci_detect_ejectable(struct pci_bus *pbus); 233int acpi_pci_detect_ejectable(struct pci_bus *pbus);
235#endif 234#endif
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 4bc241290c24..065a3652a3ea 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -83,4 +83,78 @@ extern int posix_acl_chmod_masq(struct posix_acl *, mode_t);
83extern struct posix_acl *get_posix_acl(struct inode *, int); 83extern struct posix_acl *get_posix_acl(struct inode *, int);
84extern int set_posix_acl(struct inode *, int, struct posix_acl *); 84extern int set_posix_acl(struct inode *, int, struct posix_acl *);
85 85
86#ifdef CONFIG_FS_POSIX_ACL
87static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
88{
89 struct posix_acl **p, *acl;
90 switch (type) {
91 case ACL_TYPE_ACCESS:
92 p = &inode->i_acl;
93 break;
94 case ACL_TYPE_DEFAULT:
95 p = &inode->i_default_acl;
96 break;
97 default:
98 return ERR_PTR(-EINVAL);
99 }
100 acl = ACCESS_ONCE(*p);
101 if (acl) {
102 spin_lock(&inode->i_lock);
103 acl = *p;
104 if (acl != ACL_NOT_CACHED)
105 acl = posix_acl_dup(acl);
106 spin_unlock(&inode->i_lock);
107 }
108 return acl;
109}
110
111static inline void set_cached_acl(struct inode *inode,
112 int type,
113 struct posix_acl *acl)
114{
115 struct posix_acl *old = NULL;
116 spin_lock(&inode->i_lock);
117 switch (type) {
118 case ACL_TYPE_ACCESS:
119 old = inode->i_acl;
120 inode->i_acl = posix_acl_dup(acl);
121 break;
122 case ACL_TYPE_DEFAULT:
123 old = inode->i_default_acl;
124 inode->i_default_acl = posix_acl_dup(acl);
125 break;
126 }
127 spin_unlock(&inode->i_lock);
128 if (old != ACL_NOT_CACHED)
129 posix_acl_release(old);
130}
131
132static inline void forget_cached_acl(struct inode *inode, int type)
133{
134 struct posix_acl *old = NULL;
135 spin_lock(&inode->i_lock);
136 switch (type) {
137 case ACL_TYPE_ACCESS:
138 old = inode->i_acl;
139 inode->i_acl = ACL_NOT_CACHED;
140 break;
141 case ACL_TYPE_DEFAULT:
142 old = inode->i_default_acl;
143 inode->i_default_acl = ACL_NOT_CACHED;
144 break;
145 }
146 spin_unlock(&inode->i_lock);
147 if (old != ACL_NOT_CACHED)
148 posix_acl_release(old);
149}
150#endif
151
152static inline void cache_no_acl(struct inode *inode)
153{
154#ifdef CONFIG_FS_POSIX_ACL
155 inode->i_acl = NULL;
156 inode->i_default_acl = NULL;
157#endif
158}
159
86#endif /* __LINUX_POSIX_ACL_H */ 160#endif /* __LINUX_POSIX_ACL_H */
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 8cc65757e47a..b4448853900e 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -56,15 +56,6 @@ int reiserfs_cache_default_acl(struct inode *dir);
56extern struct xattr_handler reiserfs_posix_acl_default_handler; 56extern struct xattr_handler reiserfs_posix_acl_default_handler;
57extern struct xattr_handler reiserfs_posix_acl_access_handler; 57extern struct xattr_handler reiserfs_posix_acl_access_handler;
58 58
59static inline void reiserfs_init_acl_access(struct inode *inode)
60{
61 REISERFS_I(inode)->i_acl_access = NULL;
62}
63
64static inline void reiserfs_init_acl_default(struct inode *inode)
65{
66 REISERFS_I(inode)->i_acl_default = NULL;
67}
68#else 59#else
69 60
70#define reiserfs_cache_default_acl(inode) 0 61#define reiserfs_cache_default_acl(inode) 0
@@ -86,12 +77,4 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
86{ 77{
87 return 0; 78 return 0;
88} 79}
89
90static inline void reiserfs_init_acl_access(struct inode *inode)
91{
92}
93
94static inline void reiserfs_init_acl_default(struct inode *inode)
95{
96}
97#endif 80#endif
diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
index 76360b36ac33..89f4d3abbf5a 100644
--- a/include/linux/reiserfs_fs_i.h
+++ b/include/linux/reiserfs_fs_i.h
@@ -54,10 +54,6 @@ struct reiserfs_inode_info {
54 unsigned int i_trans_id; 54 unsigned int i_trans_id;
55 struct reiserfs_journal_list *i_jl; 55 struct reiserfs_journal_list *i_jl;
56 struct mutex i_mmap; 56 struct mutex i_mmap;
57#ifdef CONFIG_REISERFS_FS_POSIX_ACL
58 struct posix_acl *i_acl_access;
59 struct posix_acl *i_acl_default;
60#endif
61#ifdef CONFIG_REISERFS_FS_XATTR 57#ifdef CONFIG_REISERFS_FS_XATTR
62 struct rw_semaphore i_xattr_sem; 58 struct rw_semaphore i_xattr_sem;
63#endif 59#endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index fd83f2584b15..abff6c9b413c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -19,10 +19,6 @@ struct shmem_inode_info {
19 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ 19 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */
20 struct list_head swaplist; /* chain of maybes on swap */ 20 struct list_head swaplist; /* chain of maybes on swap */
21 struct inode vfs_inode; 21 struct inode vfs_inode;
22#ifdef CONFIG_TMPFS_POSIX_ACL
23 struct posix_acl *i_acl;
24 struct posix_acl *i_default_acl;
25#endif
26}; 22};
27 23
28struct shmem_sb_info { 24struct shmem_sb_info {
@@ -45,7 +41,6 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
45#ifdef CONFIG_TMPFS_POSIX_ACL 41#ifdef CONFIG_TMPFS_POSIX_ACL
46int shmem_permission(struct inode *, int); 42int shmem_permission(struct inode *, int);
47int shmem_acl_init(struct inode *, struct inode *); 43int shmem_acl_init(struct inode *, struct inode *);
48void shmem_acl_destroy_inode(struct inode *);
49 44
50extern struct xattr_handler shmem_xattr_acl_access_handler; 45extern struct xattr_handler shmem_xattr_acl_access_handler;
51extern struct xattr_handler shmem_xattr_acl_default_handler; 46extern struct xattr_handler shmem_xattr_acl_default_handler;
@@ -57,9 +52,6 @@ static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
57{ 52{
58 return 0; 53 return 0;
59} 54}
60static inline void shmem_acl_destroy_inode(struct inode *inode)
61{
62}
63#endif /* CONFIG_TMPFS_POSIX_ACL */ 55#endif /* CONFIG_TMPFS_POSIX_ACL */
64 56
65#endif 57#endif
diff --git a/include/net/protocol.h b/include/net/protocol.h
index ffa5b8b1f1df..1089d5aabd49 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -53,7 +53,7 @@ struct inet6_protocol
53 53
54 void (*err_handler)(struct sk_buff *skb, 54 void (*err_handler)(struct sk_buff *skb,
55 struct inet6_skb_parm *opt, 55 struct inet6_skb_parm *opt,
56 int type, int code, int offset, 56 u8 type, u8 code, int offset,
57 __be32 info); 57 __be32 info);
58 58
59 int (*gso_send_check)(struct sk_buff *skb); 59 int (*gso_send_check)(struct sk_buff *skb);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 8a22599f26ba..f6b9b830df8c 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
6#include <net/protocol.h> 6#include <net/protocol.h>
7 7
8void raw6_icmp_error(struct sk_buff *, int nexthdr, 8void raw6_icmp_error(struct sk_buff *, int nexthdr,
9 int type, int code, int inner_offset, __be32); 9 u8 type, u8 code, int inner_offset, __be32);
10int raw6_local_deliver(struct sk_buff *, int); 10int raw6_local_deliver(struct sk_buff *, int);
11 11
12extern int rawv6_rcv(struct sock *sk, 12extern int rawv6_rcv(struct sock *sk,
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 9f80a7668289..d16a304cbed4 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -448,6 +448,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
448{ 448{
449 struct sctp_ulpevent *event = sctp_skb2event(skb); 449 struct sctp_ulpevent *event = sctp_skb2event(skb);
450 450
451 skb_orphan(skb);
451 skb->sk = sk; 452 skb->sk = sk;
452 skb->destructor = sctp_sock_rfree; 453 skb->destructor = sctp_sock_rfree;
453 atomic_add(event->rmem_len, &sk->sk_rmem_alloc); 454 atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
diff --git a/include/net/sock.h b/include/net/sock.h
index 07133c5e9868..352f06bbd7a9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1252,6 +1252,7 @@ static inline int sk_has_allocations(const struct sock *sk)
1252 1252
1253static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 1253static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1254{ 1254{
1255 skb_orphan(skb);
1255 skb->sk = sk; 1256 skb->sk = sk;
1256 skb->destructor = sock_wfree; 1257 skb->destructor = sock_wfree;
1257 /* 1258 /*
@@ -1264,6 +1265,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1264 1265
1265static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) 1266static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1266{ 1267{
1268 skb_orphan(skb);
1267 skb->sk = sk; 1269 skb->sk = sk;
1268 skb->destructor = sock_rfree; 1270 skb->destructor = sock_rfree;
1269 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 1271 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 736bca450886..9e3a3f4c1f60 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1274,7 +1274,7 @@ struct xfrm_tunnel {
1274struct xfrm6_tunnel { 1274struct xfrm6_tunnel {
1275 int (*handler)(struct sk_buff *skb); 1275 int (*handler)(struct sk_buff *skb);
1276 int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, 1276 int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
1277 int type, int code, int offset, __be32 info); 1277 u8 type, u8 code, int offset, __be32 info);
1278 struct xfrm6_tunnel *next; 1278 struct xfrm6_tunnel *next;
1279 int priority; 1279 int priority;
1280}; 1280};
diff --git a/init/main.c b/init/main.c
index 4870dfeb9ee5..2c5ade79eb81 100644
--- a/init/main.c
+++ b/init/main.c
@@ -24,6 +24,7 @@
24#include <linux/smp_lock.h> 24#include <linux/smp_lock.h>
25#include <linux/initrd.h> 25#include <linux/initrd.h>
26#include <linux/bootmem.h> 26#include <linux/bootmem.h>
27#include <linux/acpi.h>
27#include <linux/tty.h> 28#include <linux/tty.h>
28#include <linux/gfp.h> 29#include <linux/gfp.h>
29#include <linux/percpu.h> 30#include <linux/percpu.h>
@@ -88,11 +89,6 @@ extern void sbus_init(void);
88extern void prio_tree_init(void); 89extern void prio_tree_init(void);
89extern void radix_tree_init(void); 90extern void radix_tree_init(void);
90extern void free_initmem(void); 91extern void free_initmem(void);
91#ifdef CONFIG_ACPI
92extern void acpi_early_init(void);
93#else
94static inline void acpi_early_init(void) { }
95#endif
96#ifndef CONFIG_DEBUG_RODATA 92#ifndef CONFIG_DEBUG_RODATA
97static inline void mark_rodata_ro(void) { } 93static inline void mark_rodata_ro(void) { }
98#endif 94#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a32cb21ec97..780c8dcf4516 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_IKCONFIG) += configs.o
69obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o 69obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
70obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 70obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o 71obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 72obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o 73obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
74obj-$(CONFIG_GCOV_KERNEL) += gcov/ 74obj-$(CONFIG_GCOV_KERNEL) += gcov/
75obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 75obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
diff --git a/kernel/audit.c b/kernel/audit.c
index 9442c3533ba9..defc2e6f1e3b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -115,9 +115,6 @@ static atomic_t audit_lost = ATOMIC_INIT(0);
115/* The netlink socket. */ 115/* The netlink socket. */
116static struct sock *audit_sock; 116static struct sock *audit_sock;
117 117
118/* Inotify handle. */
119struct inotify_handle *audit_ih;
120
121/* Hash for inode-based rules */ 118/* Hash for inode-based rules */
122struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; 119struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
123 120
@@ -136,7 +133,7 @@ static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
136static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); 133static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
137 134
138/* Serialize requests from userspace. */ 135/* Serialize requests from userspace. */
139static DEFINE_MUTEX(audit_cmd_mutex); 136DEFINE_MUTEX(audit_cmd_mutex);
140 137
141/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting 138/* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
142 * audit records. Since printk uses a 1024 byte buffer, this buffer 139 * audit records. Since printk uses a 1024 byte buffer, this buffer
@@ -375,6 +372,25 @@ static void audit_hold_skb(struct sk_buff *skb)
375 kfree_skb(skb); 372 kfree_skb(skb);
376} 373}
377 374
375/*
376 * For one reason or another this nlh isn't getting delivered to the userspace
377 * audit daemon, just send it to printk.
378 */
379static void audit_printk_skb(struct sk_buff *skb)
380{
381 struct nlmsghdr *nlh = nlmsg_hdr(skb);
382 char *data = NLMSG_DATA(nlh);
383
384 if (nlh->nlmsg_type != AUDIT_EOE) {
385 if (printk_ratelimit())
386 printk(KERN_NOTICE "type=%d %s\n", nlh->nlmsg_type, data);
387 else
388 audit_log_lost("printk limit exceeded\n");
389 }
390
391 audit_hold_skb(skb);
392}
393
378static void kauditd_send_skb(struct sk_buff *skb) 394static void kauditd_send_skb(struct sk_buff *skb)
379{ 395{
380 int err; 396 int err;
@@ -427,14 +443,8 @@ static int kauditd_thread(void *dummy)
427 if (skb) { 443 if (skb) {
428 if (audit_pid) 444 if (audit_pid)
429 kauditd_send_skb(skb); 445 kauditd_send_skb(skb);
430 else { 446 else
431 if (printk_ratelimit()) 447 audit_printk_skb(skb);
432 printk(KERN_NOTICE "%s\n", skb->data + NLMSG_SPACE(0));
433 else
434 audit_log_lost("printk limit exceeded\n");
435
436 audit_hold_skb(skb);
437 }
438 } else { 448 } else {
439 DECLARE_WAITQUEUE(wait, current); 449 DECLARE_WAITQUEUE(wait, current);
440 set_current_state(TASK_INTERRUPTIBLE); 450 set_current_state(TASK_INTERRUPTIBLE);
@@ -495,42 +505,25 @@ int audit_send_list(void *_dest)
495 return 0; 505 return 0;
496} 506}
497 507
498#ifdef CONFIG_AUDIT_TREE
499static int prune_tree_thread(void *unused)
500{
501 mutex_lock(&audit_cmd_mutex);
502 audit_prune_trees();
503 mutex_unlock(&audit_cmd_mutex);
504 return 0;
505}
506
507void audit_schedule_prune(void)
508{
509 kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
510}
511#endif
512
513struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, 508struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
514 int multi, void *payload, int size) 509 int multi, void *payload, int size)
515{ 510{
516 struct sk_buff *skb; 511 struct sk_buff *skb;
517 struct nlmsghdr *nlh; 512 struct nlmsghdr *nlh;
518 int len = NLMSG_SPACE(size);
519 void *data; 513 void *data;
520 int flags = multi ? NLM_F_MULTI : 0; 514 int flags = multi ? NLM_F_MULTI : 0;
521 int t = done ? NLMSG_DONE : type; 515 int t = done ? NLMSG_DONE : type;
522 516
523 skb = alloc_skb(len, GFP_KERNEL); 517 skb = nlmsg_new(size, GFP_KERNEL);
524 if (!skb) 518 if (!skb)
525 return NULL; 519 return NULL;
526 520
527 nlh = NLMSG_PUT(skb, pid, seq, t, size); 521 nlh = NLMSG_NEW(skb, pid, seq, t, size, flags);
528 nlh->nlmsg_flags = flags; 522 data = NLMSG_DATA(nlh);
529 data = NLMSG_DATA(nlh);
530 memcpy(data, payload, size); 523 memcpy(data, payload, size);
531 return skb; 524 return skb;
532 525
533nlmsg_failure: /* Used by NLMSG_PUT */ 526nlmsg_failure: /* Used by NLMSG_NEW */
534 if (skb) 527 if (skb)
535 kfree_skb(skb); 528 kfree_skb(skb);
536 return NULL; 529 return NULL;
@@ -926,28 +919,29 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
926} 919}
927 920
928/* 921/*
929 * Get message from skb (based on rtnetlink_rcv_skb). Each message is 922 * Get message from skb. Each message is processed by audit_receive_msg.
930 * processed by audit_receive_msg. Malformed skbs with wrong length are 923 * Malformed skbs with wrong length are discarded silently.
931 * discarded silently.
932 */ 924 */
933static void audit_receive_skb(struct sk_buff *skb) 925static void audit_receive_skb(struct sk_buff *skb)
934{ 926{
935 int err; 927 struct nlmsghdr *nlh;
936 struct nlmsghdr *nlh; 928 /*
937 u32 rlen; 929 * len MUST be signed for NLMSG_NEXT to be able to dec it below 0
930 * if the nlmsg_len was not aligned
931 */
932 int len;
933 int err;
938 934
939 while (skb->len >= NLMSG_SPACE(0)) { 935 nlh = nlmsg_hdr(skb);
940 nlh = nlmsg_hdr(skb); 936 len = skb->len;
941 if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) 937
942 return; 938 while (NLMSG_OK(nlh, len)) {
943 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 939 err = audit_receive_msg(skb, nlh);
944 if (rlen > skb->len) 940 /* if err or if this message says it wants a response */
945 rlen = skb->len; 941 if (err || (nlh->nlmsg_flags & NLM_F_ACK))
946 if ((err = audit_receive_msg(skb, nlh))) {
947 netlink_ack(skb, nlh, err); 942 netlink_ack(skb, nlh, err);
948 } else if (nlh->nlmsg_flags & NLM_F_ACK) 943
949 netlink_ack(skb, nlh, 0); 944 nlh = NLMSG_NEXT(nlh, len);
950 skb_pull(skb, rlen);
951 } 945 }
952} 946}
953 947
@@ -959,13 +953,6 @@ static void audit_receive(struct sk_buff *skb)
959 mutex_unlock(&audit_cmd_mutex); 953 mutex_unlock(&audit_cmd_mutex);
960} 954}
961 955
962#ifdef CONFIG_AUDITSYSCALL
963static const struct inotify_operations audit_inotify_ops = {
964 .handle_event = audit_handle_ievent,
965 .destroy_watch = audit_free_parent,
966};
967#endif
968
969/* Initialize audit support at boot time. */ 956/* Initialize audit support at boot time. */
970static int __init audit_init(void) 957static int __init audit_init(void)
971{ 958{
@@ -991,12 +978,6 @@ static int __init audit_init(void)
991 978
992 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); 979 audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
993 980
994#ifdef CONFIG_AUDITSYSCALL
995 audit_ih = inotify_init(&audit_inotify_ops);
996 if (IS_ERR(audit_ih))
997 audit_panic("cannot initialize inotify handle");
998#endif
999
1000 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) 981 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
1001 INIT_LIST_HEAD(&audit_inode_hash[i]); 982 INIT_LIST_HEAD(&audit_inode_hash[i]);
1002 983
@@ -1070,18 +1051,20 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
1070 goto err; 1051 goto err;
1071 } 1052 }
1072 1053
1073 ab->skb = alloc_skb(AUDIT_BUFSIZ, gfp_mask);
1074 if (!ab->skb)
1075 goto err;
1076
1077 ab->ctx = ctx; 1054 ab->ctx = ctx;
1078 ab->gfp_mask = gfp_mask; 1055 ab->gfp_mask = gfp_mask;
1079 nlh = (struct nlmsghdr *)skb_put(ab->skb, NLMSG_SPACE(0)); 1056
1080 nlh->nlmsg_type = type; 1057 ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask);
1081 nlh->nlmsg_flags = 0; 1058 if (!ab->skb)
1082 nlh->nlmsg_pid = 0; 1059 goto nlmsg_failure;
1083 nlh->nlmsg_seq = 0; 1060
1061 nlh = NLMSG_NEW(ab->skb, 0, 0, type, 0, 0);
1062
1084 return ab; 1063 return ab;
1064
1065nlmsg_failure: /* Used by NLMSG_NEW */
1066 kfree_skb(ab->skb);
1067 ab->skb = NULL;
1085err: 1068err:
1086 audit_buffer_free(ab); 1069 audit_buffer_free(ab);
1087 return NULL; 1070 return NULL;
@@ -1452,6 +1435,15 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
1452 kfree(pathname); 1435 kfree(pathname);
1453} 1436}
1454 1437
1438void audit_log_key(struct audit_buffer *ab, char *key)
1439{
1440 audit_log_format(ab, " key=");
1441 if (key)
1442 audit_log_untrustedstring(ab, key);
1443 else
1444 audit_log_format(ab, "(null)");
1445}
1446
1455/** 1447/**
1456 * audit_log_end - end one audit record 1448 * audit_log_end - end one audit record
1457 * @ab: the audit_buffer 1449 * @ab: the audit_buffer
@@ -1475,15 +1467,7 @@ void audit_log_end(struct audit_buffer *ab)
1475 skb_queue_tail(&audit_skb_queue, ab->skb); 1467 skb_queue_tail(&audit_skb_queue, ab->skb);
1476 wake_up_interruptible(&kauditd_wait); 1468 wake_up_interruptible(&kauditd_wait);
1477 } else { 1469 } else {
1478 if (nlh->nlmsg_type != AUDIT_EOE) { 1470 audit_printk_skb(ab->skb);
1479 if (printk_ratelimit()) {
1480 printk(KERN_NOTICE "type=%d %s\n",
1481 nlh->nlmsg_type,
1482 ab->skb->data + NLMSG_SPACE(0));
1483 } else
1484 audit_log_lost("printk limit exceeded\n");
1485 }
1486 audit_hold_skb(ab->skb);
1487 } 1471 }
1488 ab->skb = NULL; 1472 ab->skb = NULL;
1489 } 1473 }
diff --git a/kernel/audit.h b/kernel/audit.h
index 16f18cac661b..208687be4f30 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -53,18 +53,7 @@ enum audit_state {
53}; 53};
54 54
55/* Rule lists */ 55/* Rule lists */
56struct audit_parent; 56struct audit_watch;
57
58struct audit_watch {
59 atomic_t count; /* reference count */
60 char *path; /* insertion path */
61 dev_t dev; /* associated superblock device */
62 unsigned long ino; /* associated inode number */
63 struct audit_parent *parent; /* associated parent */
64 struct list_head wlist; /* entry in parent->watches list */
65 struct list_head rules; /* associated rules */
66};
67
68struct audit_tree; 57struct audit_tree;
69struct audit_chunk; 58struct audit_chunk;
70 59
@@ -108,19 +97,28 @@ struct audit_netlink_list {
108 97
109int audit_send_list(void *); 98int audit_send_list(void *);
110 99
111struct inotify_watch;
112/* Inotify handle */
113extern struct inotify_handle *audit_ih;
114
115extern void audit_free_parent(struct inotify_watch *);
116extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
117 const char *, struct inode *);
118extern int selinux_audit_rule_update(void); 100extern int selinux_audit_rule_update(void);
119 101
120extern struct mutex audit_filter_mutex; 102extern struct mutex audit_filter_mutex;
121extern void audit_free_rule_rcu(struct rcu_head *); 103extern void audit_free_rule_rcu(struct rcu_head *);
122extern struct list_head audit_filter_list[]; 104extern struct list_head audit_filter_list[];
123 105
106/* audit watch functions */
107extern unsigned long audit_watch_inode(struct audit_watch *watch);
108extern dev_t audit_watch_dev(struct audit_watch *watch);
109extern void audit_put_watch(struct audit_watch *watch);
110extern void audit_get_watch(struct audit_watch *watch);
111extern int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op);
112extern int audit_add_watch(struct audit_krule *krule);
113extern void audit_remove_watch(struct audit_watch *watch);
114extern void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list);
115extern void audit_inotify_unregister(struct list_head *in_list);
116extern char *audit_watch_path(struct audit_watch *watch);
117extern struct list_head *audit_watch_rules(struct audit_watch *watch);
118
119extern struct audit_entry *audit_dupe_rule(struct audit_krule *old,
120 struct audit_watch *watch);
121
124#ifdef CONFIG_AUDIT_TREE 122#ifdef CONFIG_AUDIT_TREE
125extern struct audit_chunk *audit_tree_lookup(const struct inode *); 123extern struct audit_chunk *audit_tree_lookup(const struct inode *);
126extern void audit_put_chunk(struct audit_chunk *); 124extern void audit_put_chunk(struct audit_chunk *);
@@ -130,10 +128,9 @@ extern int audit_add_tree_rule(struct audit_krule *);
130extern int audit_remove_tree_rule(struct audit_krule *); 128extern int audit_remove_tree_rule(struct audit_krule *);
131extern void audit_trim_trees(void); 129extern void audit_trim_trees(void);
132extern int audit_tag_tree(char *old, char *new); 130extern int audit_tag_tree(char *old, char *new);
133extern void audit_schedule_prune(void);
134extern void audit_prune_trees(void);
135extern const char *audit_tree_path(struct audit_tree *); 131extern const char *audit_tree_path(struct audit_tree *);
136extern void audit_put_tree(struct audit_tree *); 132extern void audit_put_tree(struct audit_tree *);
133extern void audit_kill_trees(struct list_head *);
137#else 134#else
138#define audit_remove_tree_rule(rule) BUG() 135#define audit_remove_tree_rule(rule) BUG()
139#define audit_add_tree_rule(rule) -EINVAL 136#define audit_add_tree_rule(rule) -EINVAL
@@ -142,6 +139,7 @@ extern void audit_put_tree(struct audit_tree *);
142#define audit_put_tree(tree) (void)0 139#define audit_put_tree(tree) (void)0
143#define audit_tag_tree(old, new) -EINVAL 140#define audit_tag_tree(old, new) -EINVAL
144#define audit_tree_path(rule) "" /* never called */ 141#define audit_tree_path(rule) "" /* never called */
142#define audit_kill_trees(list) BUG()
145#endif 143#endif
146 144
147extern char *audit_unpack_string(void **, size_t *, size_t); 145extern char *audit_unpack_string(void **, size_t *, size_t);
@@ -160,7 +158,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
160 return 0; 158 return 0;
161} 159}
162extern void audit_filter_inodes(struct task_struct *, struct audit_context *); 160extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
161extern struct list_head *audit_killed_trees(void);
163#else 162#else
164#define audit_signal_info(s,t) AUDIT_DISABLED 163#define audit_signal_info(s,t) AUDIT_DISABLED
165#define audit_filter_inodes(t,c) AUDIT_DISABLED 164#define audit_filter_inodes(t,c) AUDIT_DISABLED
166#endif 165#endif
166
167extern struct mutex audit_cmd_mutex;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1f6396d76687..2451dc6f3282 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -2,6 +2,7 @@
2#include <linux/inotify.h> 2#include <linux/inotify.h>
3#include <linux/namei.h> 3#include <linux/namei.h>
4#include <linux/mount.h> 4#include <linux/mount.h>
5#include <linux/kthread.h>
5 6
6struct audit_tree; 7struct audit_tree;
7struct audit_chunk; 8struct audit_chunk;
@@ -441,13 +442,11 @@ static void kill_rules(struct audit_tree *tree)
441 if (rule->tree) { 442 if (rule->tree) {
442 /* not a half-baked one */ 443 /* not a half-baked one */
443 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 444 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
444 audit_log_format(ab, "op=remove rule dir="); 445 audit_log_format(ab, "op=");
446 audit_log_string(ab, "remove rule");
447 audit_log_format(ab, " dir=");
445 audit_log_untrustedstring(ab, rule->tree->pathname); 448 audit_log_untrustedstring(ab, rule->tree->pathname);
446 if (rule->filterkey) { 449 audit_log_key(ab, rule->filterkey);
447 audit_log_format(ab, " key=");
448 audit_log_untrustedstring(ab, rule->filterkey);
449 } else
450 audit_log_format(ab, " key=(null)");
451 audit_log_format(ab, " list=%d res=1", rule->listnr); 450 audit_log_format(ab, " list=%d res=1", rule->listnr);
452 audit_log_end(ab); 451 audit_log_end(ab);
453 rule->tree = NULL; 452 rule->tree = NULL;
@@ -519,6 +518,8 @@ static void trim_marked(struct audit_tree *tree)
519 } 518 }
520} 519}
521 520
521static void audit_schedule_prune(void);
522
522/* called with audit_filter_mutex */ 523/* called with audit_filter_mutex */
523int audit_remove_tree_rule(struct audit_krule *rule) 524int audit_remove_tree_rule(struct audit_krule *rule)
524{ 525{
@@ -824,10 +825,11 @@ int audit_tag_tree(char *old, char *new)
824 825
825/* 826/*
826 * That gets run when evict_chunk() ends up needing to kill audit_tree. 827 * That gets run when evict_chunk() ends up needing to kill audit_tree.
827 * Runs from a separate thread, with audit_cmd_mutex held. 828 * Runs from a separate thread.
828 */ 829 */
829void audit_prune_trees(void) 830static int prune_tree_thread(void *unused)
830{ 831{
832 mutex_lock(&audit_cmd_mutex);
831 mutex_lock(&audit_filter_mutex); 833 mutex_lock(&audit_filter_mutex);
832 834
833 while (!list_empty(&prune_list)) { 835 while (!list_empty(&prune_list)) {
@@ -844,6 +846,40 @@ void audit_prune_trees(void)
844 } 846 }
845 847
846 mutex_unlock(&audit_filter_mutex); 848 mutex_unlock(&audit_filter_mutex);
849 mutex_unlock(&audit_cmd_mutex);
850 return 0;
851}
852
853static void audit_schedule_prune(void)
854{
855 kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
856}
857
858/*
859 * ... and that one is done if evict_chunk() decides to delay until the end
860 * of syscall. Runs synchronously.
861 */
862void audit_kill_trees(struct list_head *list)
863{
864 mutex_lock(&audit_cmd_mutex);
865 mutex_lock(&audit_filter_mutex);
866
867 while (!list_empty(list)) {
868 struct audit_tree *victim;
869
870 victim = list_entry(list->next, struct audit_tree, list);
871 kill_rules(victim);
872 list_del_init(&victim->list);
873
874 mutex_unlock(&audit_filter_mutex);
875
876 prune_one(victim);
877
878 mutex_lock(&audit_filter_mutex);
879 }
880
881 mutex_unlock(&audit_filter_mutex);
882 mutex_unlock(&audit_cmd_mutex);
847} 883}
848 884
849/* 885/*
@@ -854,6 +890,8 @@ void audit_prune_trees(void)
854static void evict_chunk(struct audit_chunk *chunk) 890static void evict_chunk(struct audit_chunk *chunk)
855{ 891{
856 struct audit_tree *owner; 892 struct audit_tree *owner;
893 struct list_head *postponed = audit_killed_trees();
894 int need_prune = 0;
857 int n; 895 int n;
858 896
859 if (chunk->dead) 897 if (chunk->dead)
@@ -869,15 +907,21 @@ static void evict_chunk(struct audit_chunk *chunk)
869 owner->root = NULL; 907 owner->root = NULL;
870 list_del_init(&owner->same_root); 908 list_del_init(&owner->same_root);
871 spin_unlock(&hash_lock); 909 spin_unlock(&hash_lock);
872 kill_rules(owner); 910 if (!postponed) {
873 list_move(&owner->list, &prune_list); 911 kill_rules(owner);
874 audit_schedule_prune(); 912 list_move(&owner->list, &prune_list);
913 need_prune = 1;
914 } else {
915 list_move(&owner->list, postponed);
916 }
875 spin_lock(&hash_lock); 917 spin_lock(&hash_lock);
876 } 918 }
877 list_del_rcu(&chunk->hash); 919 list_del_rcu(&chunk->hash);
878 for (n = 0; n < chunk->count; n++) 920 for (n = 0; n < chunk->count; n++)
879 list_del_init(&chunk->owners[n].list); 921 list_del_init(&chunk->owners[n].list);
880 spin_unlock(&hash_lock); 922 spin_unlock(&hash_lock);
923 if (need_prune)
924 audit_schedule_prune();
881 mutex_unlock(&audit_filter_mutex); 925 mutex_unlock(&audit_filter_mutex);
882} 926}
883 927
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
new file mode 100644
index 000000000000..0e96dbc60ea9
--- /dev/null
+++ b/kernel/audit_watch.c
@@ -0,0 +1,543 @@
1/* audit_watch.c -- watching inodes
2 *
3 * Copyright 2003-2009 Red Hat, Inc.
4 * Copyright 2005 Hewlett-Packard Development Company, L.P.
5 * Copyright 2005 IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/kernel.h>
23#include <linux/audit.h>
24#include <linux/kthread.h>
25#include <linux/mutex.h>
26#include <linux/fs.h>
27#include <linux/namei.h>
28#include <linux/netlink.h>
29#include <linux/sched.h>
30#include <linux/inotify.h>
31#include <linux/security.h>
32#include "audit.h"
33
34/*
35 * Reference counting:
36 *
37 * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
38 * event. Each audit_watch holds a reference to its associated parent.
39 *
40 * audit_watch: if added to lists, lifetime is from audit_init_watch() to
41 * audit_remove_watch(). Additionally, an audit_watch may exist
42 * temporarily to assist in searching existing filter data. Each
43 * audit_krule holds a reference to its associated watch.
44 */
45
46struct audit_watch {
47 atomic_t count; /* reference count */
48 char *path; /* insertion path */
49 dev_t dev; /* associated superblock device */
50 unsigned long ino; /* associated inode number */
51 struct audit_parent *parent; /* associated parent */
52 struct list_head wlist; /* entry in parent->watches list */
53 struct list_head rules; /* associated rules */
54};
55
56struct audit_parent {
57 struct list_head ilist; /* entry in inotify registration list */
58 struct list_head watches; /* associated watches */
59 struct inotify_watch wdata; /* inotify watch data */
60 unsigned flags; /* status flags */
61};
62
63/* Inotify handle. */
64struct inotify_handle *audit_ih;
65
66/*
67 * audit_parent status flags:
68 *
69 * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
70 * a filesystem event to ensure we're adding audit watches to a valid parent.
71 * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
72 * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
73 * we can receive while holding nameidata.
74 */
75#define AUDIT_PARENT_INVALID 0x001
76
77/* Inotify events we care about. */
78#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
79
80static void audit_free_parent(struct inotify_watch *i_watch)
81{
82 struct audit_parent *parent;
83
84 parent = container_of(i_watch, struct audit_parent, wdata);
85 WARN_ON(!list_empty(&parent->watches));
86 kfree(parent);
87}
88
89void audit_get_watch(struct audit_watch *watch)
90{
91 atomic_inc(&watch->count);
92}
93
94void audit_put_watch(struct audit_watch *watch)
95{
96 if (atomic_dec_and_test(&watch->count)) {
97 WARN_ON(watch->parent);
98 WARN_ON(!list_empty(&watch->rules));
99 kfree(watch->path);
100 kfree(watch);
101 }
102}
103
104void audit_remove_watch(struct audit_watch *watch)
105{
106 list_del(&watch->wlist);
107 put_inotify_watch(&watch->parent->wdata);
108 watch->parent = NULL;
109 audit_put_watch(watch); /* match initial get */
110}
111
112char *audit_watch_path(struct audit_watch *watch)
113{
114 return watch->path;
115}
116
117struct list_head *audit_watch_rules(struct audit_watch *watch)
118{
119 return &watch->rules;
120}
121
122unsigned long audit_watch_inode(struct audit_watch *watch)
123{
124 return watch->ino;
125}
126
127dev_t audit_watch_dev(struct audit_watch *watch)
128{
129 return watch->dev;
130}
131
132/* Initialize a parent watch entry. */
133static struct audit_parent *audit_init_parent(struct nameidata *ndp)
134{
135 struct audit_parent *parent;
136 s32 wd;
137
138 parent = kzalloc(sizeof(*parent), GFP_KERNEL);
139 if (unlikely(!parent))
140 return ERR_PTR(-ENOMEM);
141
142 INIT_LIST_HEAD(&parent->watches);
143 parent->flags = 0;
144
145 inotify_init_watch(&parent->wdata);
146 /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
147 get_inotify_watch(&parent->wdata);
148 wd = inotify_add_watch(audit_ih, &parent->wdata,
149 ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
150 if (wd < 0) {
151 audit_free_parent(&parent->wdata);
152 return ERR_PTR(wd);
153 }
154
155 return parent;
156}
157
158/* Initialize a watch entry. */
159static struct audit_watch *audit_init_watch(char *path)
160{
161 struct audit_watch *watch;
162
163 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
164 if (unlikely(!watch))
165 return ERR_PTR(-ENOMEM);
166
167 INIT_LIST_HEAD(&watch->rules);
168 atomic_set(&watch->count, 1);
169 watch->path = path;
170 watch->dev = (dev_t)-1;
171 watch->ino = (unsigned long)-1;
172
173 return watch;
174}
175
176/* Translate a watch string to kernel respresentation. */
177int audit_to_watch(struct audit_krule *krule, char *path, int len, u32 op)
178{
179 struct audit_watch *watch;
180
181 if (!audit_ih)
182 return -EOPNOTSUPP;
183
184 if (path[0] != '/' || path[len-1] == '/' ||
185 krule->listnr != AUDIT_FILTER_EXIT ||
186 op != Audit_equal ||
187 krule->inode_f || krule->watch || krule->tree)
188 return -EINVAL;
189
190 watch = audit_init_watch(path);
191 if (IS_ERR(watch))
192 return PTR_ERR(watch);
193
194 audit_get_watch(watch);
195 krule->watch = watch;
196
197 return 0;
198}
199
200/* Duplicate the given audit watch. The new watch's rules list is initialized
201 * to an empty list and wlist is undefined. */
202static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
203{
204 char *path;
205 struct audit_watch *new;
206
207 path = kstrdup(old->path, GFP_KERNEL);
208 if (unlikely(!path))
209 return ERR_PTR(-ENOMEM);
210
211 new = audit_init_watch(path);
212 if (IS_ERR(new)) {
213 kfree(path);
214 goto out;
215 }
216
217 new->dev = old->dev;
218 new->ino = old->ino;
219 get_inotify_watch(&old->parent->wdata);
220 new->parent = old->parent;
221
222out:
223 return new;
224}
225
226static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
227{
228 if (audit_enabled) {
229 struct audit_buffer *ab;
230 ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
231 audit_log_format(ab, "auid=%u ses=%u op=",
232 audit_get_loginuid(current),
233 audit_get_sessionid(current));
234 audit_log_string(ab, op);
235 audit_log_format(ab, " path=");
236 audit_log_untrustedstring(ab, w->path);
237 audit_log_key(ab, r->filterkey);
238 audit_log_format(ab, " list=%d res=1", r->listnr);
239 audit_log_end(ab);
240 }
241}
242
243/* Update inode info in audit rules based on filesystem event. */
244static void audit_update_watch(struct audit_parent *parent,
245 const char *dname, dev_t dev,
246 unsigned long ino, unsigned invalidating)
247{
248 struct audit_watch *owatch, *nwatch, *nextw;
249 struct audit_krule *r, *nextr;
250 struct audit_entry *oentry, *nentry;
251
252 mutex_lock(&audit_filter_mutex);
253 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
254 if (audit_compare_dname_path(dname, owatch->path, NULL))
255 continue;
256
257 /* If the update involves invalidating rules, do the inode-based
258 * filtering now, so we don't omit records. */
259 if (invalidating && current->audit_context)
260 audit_filter_inodes(current, current->audit_context);
261
262 nwatch = audit_dupe_watch(owatch);
263 if (IS_ERR(nwatch)) {
264 mutex_unlock(&audit_filter_mutex);
265 audit_panic("error updating watch, skipping");
266 return;
267 }
268 nwatch->dev = dev;
269 nwatch->ino = ino;
270
271 list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
272
273 oentry = container_of(r, struct audit_entry, rule);
274 list_del(&oentry->rule.rlist);
275 list_del_rcu(&oentry->list);
276
277 nentry = audit_dupe_rule(&oentry->rule, nwatch);
278 if (IS_ERR(nentry)) {
279 list_del(&oentry->rule.list);
280 audit_panic("error updating watch, removing");
281 } else {
282 int h = audit_hash_ino((u32)ino);
283 list_add(&nentry->rule.rlist, &nwatch->rules);
284 list_add_rcu(&nentry->list, &audit_inode_hash[h]);
285 list_replace(&oentry->rule.list,
286 &nentry->rule.list);
287 }
288
289 audit_watch_log_rule_change(r, owatch, "updated rules");
290
291 call_rcu(&oentry->rcu, audit_free_rule_rcu);
292 }
293
294 audit_remove_watch(owatch);
295 goto add_watch_to_parent; /* event applies to a single watch */
296 }
297 mutex_unlock(&audit_filter_mutex);
298 return;
299
300add_watch_to_parent:
301 list_add(&nwatch->wlist, &parent->watches);
302 mutex_unlock(&audit_filter_mutex);
303 return;
304}
305
306/* Remove all watches & rules associated with a parent that is going away. */
307static void audit_remove_parent_watches(struct audit_parent *parent)
308{
309 struct audit_watch *w, *nextw;
310 struct audit_krule *r, *nextr;
311 struct audit_entry *e;
312
313 mutex_lock(&audit_filter_mutex);
314 parent->flags |= AUDIT_PARENT_INVALID;
315 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
316 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
317 e = container_of(r, struct audit_entry, rule);
318 audit_watch_log_rule_change(r, w, "remove rule");
319 list_del(&r->rlist);
320 list_del(&r->list);
321 list_del_rcu(&e->list);
322 call_rcu(&e->rcu, audit_free_rule_rcu);
323 }
324 audit_remove_watch(w);
325 }
326 mutex_unlock(&audit_filter_mutex);
327}
328
329/* Unregister inotify watches for parents on in_list.
330 * Generates an IN_IGNORED event. */
331void audit_inotify_unregister(struct list_head *in_list)
332{
333 struct audit_parent *p, *n;
334
335 list_for_each_entry_safe(p, n, in_list, ilist) {
336 list_del(&p->ilist);
337 inotify_rm_watch(audit_ih, &p->wdata);
338 /* the unpin matching the pin in audit_do_del_rule() */
339 unpin_inotify_watch(&p->wdata);
340 }
341}
342
343/* Get path information necessary for adding watches. */
344static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
345{
346 struct nameidata *ndparent, *ndwatch;
347 int err;
348
349 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
350 if (unlikely(!ndparent))
351 return -ENOMEM;
352
353 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
354 if (unlikely(!ndwatch)) {
355 kfree(ndparent);
356 return -ENOMEM;
357 }
358
359 err = path_lookup(path, LOOKUP_PARENT, ndparent);
360 if (err) {
361 kfree(ndparent);
362 kfree(ndwatch);
363 return err;
364 }
365
366 err = path_lookup(path, 0, ndwatch);
367 if (err) {
368 kfree(ndwatch);
369 ndwatch = NULL;
370 }
371
372 *ndp = ndparent;
373 *ndw = ndwatch;
374
375 return 0;
376}
377
378/* Release resources used for watch path information. */
379static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
380{
381 if (ndp) {
382 path_put(&ndp->path);
383 kfree(ndp);
384 }
385 if (ndw) {
386 path_put(&ndw->path);
387 kfree(ndw);
388 }
389}
390
391/* Associate the given rule with an existing parent inotify_watch.
392 * Caller must hold audit_filter_mutex. */
393static void audit_add_to_parent(struct audit_krule *krule,
394 struct audit_parent *parent)
395{
396 struct audit_watch *w, *watch = krule->watch;
397 int watch_found = 0;
398
399 list_for_each_entry(w, &parent->watches, wlist) {
400 if (strcmp(watch->path, w->path))
401 continue;
402
403 watch_found = 1;
404
405 /* put krule's and initial refs to temporary watch */
406 audit_put_watch(watch);
407 audit_put_watch(watch);
408
409 audit_get_watch(w);
410 krule->watch = watch = w;
411 break;
412 }
413
414 if (!watch_found) {
415 get_inotify_watch(&parent->wdata);
416 watch->parent = parent;
417
418 list_add(&watch->wlist, &parent->watches);
419 }
420 list_add(&krule->rlist, &watch->rules);
421}
422
423/* Find a matching watch entry, or add this one.
424 * Caller must hold audit_filter_mutex. */
425int audit_add_watch(struct audit_krule *krule)
426{
427 struct audit_watch *watch = krule->watch;
428 struct inotify_watch *i_watch;
429 struct audit_parent *parent;
430 struct nameidata *ndp = NULL, *ndw = NULL;
431 int ret = 0;
432
433 mutex_unlock(&audit_filter_mutex);
434
435 /* Avoid calling path_lookup under audit_filter_mutex. */
436 ret = audit_get_nd(watch->path, &ndp, &ndw);
437 if (ret) {
438 /* caller expects mutex locked */
439 mutex_lock(&audit_filter_mutex);
440 goto error;
441 }
442
443 /* update watch filter fields */
444 if (ndw) {
445 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
446 watch->ino = ndw->path.dentry->d_inode->i_ino;
447 }
448
449 /* The audit_filter_mutex must not be held during inotify calls because
450 * we hold it during inotify event callback processing. If an existing
451 * inotify watch is found, inotify_find_watch() grabs a reference before
452 * returning.
453 */
454 if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
455 &i_watch) < 0) {
456 parent = audit_init_parent(ndp);
457 if (IS_ERR(parent)) {
458 /* caller expects mutex locked */
459 mutex_lock(&audit_filter_mutex);
460 ret = PTR_ERR(parent);
461 goto error;
462 }
463 } else
464 parent = container_of(i_watch, struct audit_parent, wdata);
465
466 mutex_lock(&audit_filter_mutex);
467
468 /* parent was moved before we took audit_filter_mutex */
469 if (parent->flags & AUDIT_PARENT_INVALID)
470 ret = -ENOENT;
471 else
472 audit_add_to_parent(krule, parent);
473
474 /* match get in audit_init_parent or inotify_find_watch */
475 put_inotify_watch(&parent->wdata);
476
477error:
478 audit_put_nd(ndp, ndw); /* NULL args OK */
479 return ret;
480
481}
482
483void audit_remove_watch_rule(struct audit_krule *krule, struct list_head *list)
484{
485 struct audit_watch *watch = krule->watch;
486 struct audit_parent *parent = watch->parent;
487
488 list_del(&krule->rlist);
489
490 if (list_empty(&watch->rules)) {
491 audit_remove_watch(watch);
492
493 if (list_empty(&parent->watches)) {
494 /* Put parent on the inotify un-registration
495 * list. Grab a reference before releasing
496 * audit_filter_mutex, to be released in
497 * audit_inotify_unregister().
498 * If filesystem is going away, just leave
499 * the sucker alone, eviction will take
500 * care of it. */
501 if (pin_inotify_watch(&parent->wdata))
502 list_add(&parent->ilist, list);
503 }
504 }
505}
506
507/* Update watch data in audit rules based on inotify events. */
508static void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
509 u32 cookie, const char *dname, struct inode *inode)
510{
511 struct audit_parent *parent;
512
513 parent = container_of(i_watch, struct audit_parent, wdata);
514
515 if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
516 audit_update_watch(parent, dname, inode->i_sb->s_dev,
517 inode->i_ino, 0);
518 else if (mask & (IN_DELETE|IN_MOVED_FROM))
519 audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
520 /* inotify automatically removes the watch and sends IN_IGNORED */
521 else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
522 audit_remove_parent_watches(parent);
523 /* inotify does not remove the watch, so remove it manually */
524 else if(mask & IN_MOVE_SELF) {
525 audit_remove_parent_watches(parent);
526 inotify_remove_watch_locked(audit_ih, i_watch);
527 } else if (mask & IN_IGNORED)
528 put_inotify_watch(i_watch);
529}
530
531static const struct inotify_operations audit_inotify_ops = {
532 .handle_event = audit_handle_ievent,
533 .destroy_watch = audit_free_parent,
534};
535
536static int __init audit_watch_init(void)
537{
538 audit_ih = inotify_init(&audit_inotify_ops);
539 if (IS_ERR(audit_ih))
540 audit_panic("cannot initialize inotify handle");
541 return 0;
542}
543subsys_initcall(audit_watch_init);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 713098ee5a02..a70604047f3c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/netlink.h> 28#include <linux/netlink.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/inotify.h>
31#include <linux/security.h> 30#include <linux/security.h>
32#include "audit.h" 31#include "audit.h"
33 32
@@ -44,36 +43,6 @@
44 * be written directly provided audit_filter_mutex is held. 43 * be written directly provided audit_filter_mutex is held.
45 */ 44 */
46 45
47/*
48 * Reference counting:
49 *
50 * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
51 * event. Each audit_watch holds a reference to its associated parent.
52 *
53 * audit_watch: if added to lists, lifetime is from audit_init_watch() to
54 * audit_remove_watch(). Additionally, an audit_watch may exist
55 * temporarily to assist in searching existing filter data. Each
56 * audit_krule holds a reference to its associated watch.
57 */
58
59struct audit_parent {
60 struct list_head ilist; /* entry in inotify registration list */
61 struct list_head watches; /* associated watches */
62 struct inotify_watch wdata; /* inotify watch data */
63 unsigned flags; /* status flags */
64};
65
66/*
67 * audit_parent status flags:
68 *
69 * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
70 * a filesystem event to ensure we're adding audit watches to a valid parent.
71 * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
72 * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
73 * we can receive while holding nameidata.
74 */
75#define AUDIT_PARENT_INVALID 0x001
76
77/* Audit filter lists, defined in <linux/audit.h> */ 46/* Audit filter lists, defined in <linux/audit.h> */
78struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { 47struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
79 LIST_HEAD_INIT(audit_filter_list[0]), 48 LIST_HEAD_INIT(audit_filter_list[0]),
@@ -97,41 +66,6 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
97 66
98DEFINE_MUTEX(audit_filter_mutex); 67DEFINE_MUTEX(audit_filter_mutex);
99 68
100/* Inotify events we care about. */
101#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
102
103void audit_free_parent(struct inotify_watch *i_watch)
104{
105 struct audit_parent *parent;
106
107 parent = container_of(i_watch, struct audit_parent, wdata);
108 WARN_ON(!list_empty(&parent->watches));
109 kfree(parent);
110}
111
112static inline void audit_get_watch(struct audit_watch *watch)
113{
114 atomic_inc(&watch->count);
115}
116
117static void audit_put_watch(struct audit_watch *watch)
118{
119 if (atomic_dec_and_test(&watch->count)) {
120 WARN_ON(watch->parent);
121 WARN_ON(!list_empty(&watch->rules));
122 kfree(watch->path);
123 kfree(watch);
124 }
125}
126
127static void audit_remove_watch(struct audit_watch *watch)
128{
129 list_del(&watch->wlist);
130 put_inotify_watch(&watch->parent->wdata);
131 watch->parent = NULL;
132 audit_put_watch(watch); /* match initial get */
133}
134
135static inline void audit_free_rule(struct audit_entry *e) 69static inline void audit_free_rule(struct audit_entry *e)
136{ 70{
137 int i; 71 int i;
@@ -156,50 +90,6 @@ void audit_free_rule_rcu(struct rcu_head *head)
156 audit_free_rule(e); 90 audit_free_rule(e);
157} 91}
158 92
159/* Initialize a parent watch entry. */
160static struct audit_parent *audit_init_parent(struct nameidata *ndp)
161{
162 struct audit_parent *parent;
163 s32 wd;
164
165 parent = kzalloc(sizeof(*parent), GFP_KERNEL);
166 if (unlikely(!parent))
167 return ERR_PTR(-ENOMEM);
168
169 INIT_LIST_HEAD(&parent->watches);
170 parent->flags = 0;
171
172 inotify_init_watch(&parent->wdata);
173 /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
174 get_inotify_watch(&parent->wdata);
175 wd = inotify_add_watch(audit_ih, &parent->wdata,
176 ndp->path.dentry->d_inode, AUDIT_IN_WATCH);
177 if (wd < 0) {
178 audit_free_parent(&parent->wdata);
179 return ERR_PTR(wd);
180 }
181
182 return parent;
183}
184
185/* Initialize a watch entry. */
186static struct audit_watch *audit_init_watch(char *path)
187{
188 struct audit_watch *watch;
189
190 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
191 if (unlikely(!watch))
192 return ERR_PTR(-ENOMEM);
193
194 INIT_LIST_HEAD(&watch->rules);
195 atomic_set(&watch->count, 1);
196 watch->path = path;
197 watch->dev = (dev_t)-1;
198 watch->ino = (unsigned long)-1;
199
200 return watch;
201}
202
203/* Initialize an audit filterlist entry. */ 93/* Initialize an audit filterlist entry. */
204static inline struct audit_entry *audit_init_entry(u32 field_count) 94static inline struct audit_entry *audit_init_entry(u32 field_count)
205{ 95{
@@ -260,31 +150,6 @@ static inline int audit_to_inode(struct audit_krule *krule,
260 return 0; 150 return 0;
261} 151}
262 152
263/* Translate a watch string to kernel respresentation. */
264static int audit_to_watch(struct audit_krule *krule, char *path, int len,
265 u32 op)
266{
267 struct audit_watch *watch;
268
269 if (!audit_ih)
270 return -EOPNOTSUPP;
271
272 if (path[0] != '/' || path[len-1] == '/' ||
273 krule->listnr != AUDIT_FILTER_EXIT ||
274 op != Audit_equal ||
275 krule->inode_f || krule->watch || krule->tree)
276 return -EINVAL;
277
278 watch = audit_init_watch(path);
279 if (IS_ERR(watch))
280 return PTR_ERR(watch);
281
282 audit_get_watch(watch);
283 krule->watch = watch;
284
285 return 0;
286}
287
288static __u32 *classes[AUDIT_SYSCALL_CLASSES]; 153static __u32 *classes[AUDIT_SYSCALL_CLASSES];
289 154
290int __init audit_register_class(int class, unsigned *list) 155int __init audit_register_class(int class, unsigned *list)
@@ -766,7 +631,8 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
766 break; 631 break;
767 case AUDIT_WATCH: 632 case AUDIT_WATCH:
768 data->buflen += data->values[i] = 633 data->buflen += data->values[i] =
769 audit_pack_string(&bufp, krule->watch->path); 634 audit_pack_string(&bufp,
635 audit_watch_path(krule->watch));
770 break; 636 break;
771 case AUDIT_DIR: 637 case AUDIT_DIR:
772 data->buflen += data->values[i] = 638 data->buflen += data->values[i] =
@@ -818,7 +684,8 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
818 return 1; 684 return 1;
819 break; 685 break;
820 case AUDIT_WATCH: 686 case AUDIT_WATCH:
821 if (strcmp(a->watch->path, b->watch->path)) 687 if (strcmp(audit_watch_path(a->watch),
688 audit_watch_path(b->watch)))
822 return 1; 689 return 1;
823 break; 690 break;
824 case AUDIT_DIR: 691 case AUDIT_DIR:
@@ -844,32 +711,6 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
844 return 0; 711 return 0;
845} 712}
846 713
847/* Duplicate the given audit watch. The new watch's rules list is initialized
848 * to an empty list and wlist is undefined. */
849static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
850{
851 char *path;
852 struct audit_watch *new;
853
854 path = kstrdup(old->path, GFP_KERNEL);
855 if (unlikely(!path))
856 return ERR_PTR(-ENOMEM);
857
858 new = audit_init_watch(path);
859 if (IS_ERR(new)) {
860 kfree(path);
861 goto out;
862 }
863
864 new->dev = old->dev;
865 new->ino = old->ino;
866 get_inotify_watch(&old->parent->wdata);
867 new->parent = old->parent;
868
869out:
870 return new;
871}
872
873/* Duplicate LSM field information. The lsm_rule is opaque, so must be 714/* Duplicate LSM field information. The lsm_rule is opaque, so must be
874 * re-initialized. */ 715 * re-initialized. */
875static inline int audit_dupe_lsm_field(struct audit_field *df, 716static inline int audit_dupe_lsm_field(struct audit_field *df,
@@ -904,8 +745,8 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
904 * rule with the new rule in the filterlist, then free the old rule. 745 * rule with the new rule in the filterlist, then free the old rule.
905 * The rlist element is undefined; list manipulations are handled apart from 746 * The rlist element is undefined; list manipulations are handled apart from
906 * the initial copy. */ 747 * the initial copy. */
907static struct audit_entry *audit_dupe_rule(struct audit_krule *old, 748struct audit_entry *audit_dupe_rule(struct audit_krule *old,
908 struct audit_watch *watch) 749 struct audit_watch *watch)
909{ 750{
910 u32 fcount = old->field_count; 751 u32 fcount = old->field_count;
911 struct audit_entry *entry; 752 struct audit_entry *entry;
@@ -977,137 +818,6 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
977 return entry; 818 return entry;
978} 819}
979 820
980/* Update inode info in audit rules based on filesystem event. */
981static void audit_update_watch(struct audit_parent *parent,
982 const char *dname, dev_t dev,
983 unsigned long ino, unsigned invalidating)
984{
985 struct audit_watch *owatch, *nwatch, *nextw;
986 struct audit_krule *r, *nextr;
987 struct audit_entry *oentry, *nentry;
988
989 mutex_lock(&audit_filter_mutex);
990 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
991 if (audit_compare_dname_path(dname, owatch->path, NULL))
992 continue;
993
994 /* If the update involves invalidating rules, do the inode-based
995 * filtering now, so we don't omit records. */
996 if (invalidating && current->audit_context)
997 audit_filter_inodes(current, current->audit_context);
998
999 nwatch = audit_dupe_watch(owatch);
1000 if (IS_ERR(nwatch)) {
1001 mutex_unlock(&audit_filter_mutex);
1002 audit_panic("error updating watch, skipping");
1003 return;
1004 }
1005 nwatch->dev = dev;
1006 nwatch->ino = ino;
1007
1008 list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
1009
1010 oentry = container_of(r, struct audit_entry, rule);
1011 list_del(&oentry->rule.rlist);
1012 list_del_rcu(&oentry->list);
1013
1014 nentry = audit_dupe_rule(&oentry->rule, nwatch);
1015 if (IS_ERR(nentry)) {
1016 list_del(&oentry->rule.list);
1017 audit_panic("error updating watch, removing");
1018 } else {
1019 int h = audit_hash_ino((u32)ino);
1020 list_add(&nentry->rule.rlist, &nwatch->rules);
1021 list_add_rcu(&nentry->list, &audit_inode_hash[h]);
1022 list_replace(&oentry->rule.list,
1023 &nentry->rule.list);
1024 }
1025
1026 call_rcu(&oentry->rcu, audit_free_rule_rcu);
1027 }
1028
1029 if (audit_enabled) {
1030 struct audit_buffer *ab;
1031 ab = audit_log_start(NULL, GFP_NOFS,
1032 AUDIT_CONFIG_CHANGE);
1033 audit_log_format(ab, "auid=%u ses=%u",
1034 audit_get_loginuid(current),
1035 audit_get_sessionid(current));
1036 audit_log_format(ab,
1037 " op=updated rules specifying path=");
1038 audit_log_untrustedstring(ab, owatch->path);
1039 audit_log_format(ab, " with dev=%u ino=%lu\n",
1040 dev, ino);
1041 audit_log_format(ab, " list=%d res=1", r->listnr);
1042 audit_log_end(ab);
1043 }
1044 audit_remove_watch(owatch);
1045 goto add_watch_to_parent; /* event applies to a single watch */
1046 }
1047 mutex_unlock(&audit_filter_mutex);
1048 return;
1049
1050add_watch_to_parent:
1051 list_add(&nwatch->wlist, &parent->watches);
1052 mutex_unlock(&audit_filter_mutex);
1053 return;
1054}
1055
1056/* Remove all watches & rules associated with a parent that is going away. */
1057static void audit_remove_parent_watches(struct audit_parent *parent)
1058{
1059 struct audit_watch *w, *nextw;
1060 struct audit_krule *r, *nextr;
1061 struct audit_entry *e;
1062
1063 mutex_lock(&audit_filter_mutex);
1064 parent->flags |= AUDIT_PARENT_INVALID;
1065 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
1066 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
1067 e = container_of(r, struct audit_entry, rule);
1068 if (audit_enabled) {
1069 struct audit_buffer *ab;
1070 ab = audit_log_start(NULL, GFP_NOFS,
1071 AUDIT_CONFIG_CHANGE);
1072 audit_log_format(ab, "auid=%u ses=%u",
1073 audit_get_loginuid(current),
1074 audit_get_sessionid(current));
1075 audit_log_format(ab, " op=remove rule path=");
1076 audit_log_untrustedstring(ab, w->path);
1077 if (r->filterkey) {
1078 audit_log_format(ab, " key=");
1079 audit_log_untrustedstring(ab,
1080 r->filterkey);
1081 } else
1082 audit_log_format(ab, " key=(null)");
1083 audit_log_format(ab, " list=%d res=1",
1084 r->listnr);
1085 audit_log_end(ab);
1086 }
1087 list_del(&r->rlist);
1088 list_del(&r->list);
1089 list_del_rcu(&e->list);
1090 call_rcu(&e->rcu, audit_free_rule_rcu);
1091 }
1092 audit_remove_watch(w);
1093 }
1094 mutex_unlock(&audit_filter_mutex);
1095}
1096
1097/* Unregister inotify watches for parents on in_list.
1098 * Generates an IN_IGNORED event. */
1099static void audit_inotify_unregister(struct list_head *in_list)
1100{
1101 struct audit_parent *p, *n;
1102
1103 list_for_each_entry_safe(p, n, in_list, ilist) {
1104 list_del(&p->ilist);
1105 inotify_rm_watch(audit_ih, &p->wdata);
1106 /* the unpin matching the pin in audit_do_del_rule() */
1107 unpin_inotify_watch(&p->wdata);
1108 }
1109}
1110
1111/* Find an existing audit rule. 821/* Find an existing audit rule.
1112 * Caller must hold audit_filter_mutex to prevent stale rule data. */ 822 * Caller must hold audit_filter_mutex to prevent stale rule data. */
1113static struct audit_entry *audit_find_rule(struct audit_entry *entry, 823static struct audit_entry *audit_find_rule(struct audit_entry *entry,
@@ -1145,134 +855,6 @@ out:
1145 return found; 855 return found;
1146} 856}
1147 857
1148/* Get path information necessary for adding watches. */
1149static int audit_get_nd(char *path, struct nameidata **ndp,
1150 struct nameidata **ndw)
1151{
1152 struct nameidata *ndparent, *ndwatch;
1153 int err;
1154
1155 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
1156 if (unlikely(!ndparent))
1157 return -ENOMEM;
1158
1159 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
1160 if (unlikely(!ndwatch)) {
1161 kfree(ndparent);
1162 return -ENOMEM;
1163 }
1164
1165 err = path_lookup(path, LOOKUP_PARENT, ndparent);
1166 if (err) {
1167 kfree(ndparent);
1168 kfree(ndwatch);
1169 return err;
1170 }
1171
1172 err = path_lookup(path, 0, ndwatch);
1173 if (err) {
1174 kfree(ndwatch);
1175 ndwatch = NULL;
1176 }
1177
1178 *ndp = ndparent;
1179 *ndw = ndwatch;
1180
1181 return 0;
1182}
1183
1184/* Release resources used for watch path information. */
1185static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
1186{
1187 if (ndp) {
1188 path_put(&ndp->path);
1189 kfree(ndp);
1190 }
1191 if (ndw) {
1192 path_put(&ndw->path);
1193 kfree(ndw);
1194 }
1195}
1196
1197/* Associate the given rule with an existing parent inotify_watch.
1198 * Caller must hold audit_filter_mutex. */
1199static void audit_add_to_parent(struct audit_krule *krule,
1200 struct audit_parent *parent)
1201{
1202 struct audit_watch *w, *watch = krule->watch;
1203 int watch_found = 0;
1204
1205 list_for_each_entry(w, &parent->watches, wlist) {
1206 if (strcmp(watch->path, w->path))
1207 continue;
1208
1209 watch_found = 1;
1210
1211 /* put krule's and initial refs to temporary watch */
1212 audit_put_watch(watch);
1213 audit_put_watch(watch);
1214
1215 audit_get_watch(w);
1216 krule->watch = watch = w;
1217 break;
1218 }
1219
1220 if (!watch_found) {
1221 get_inotify_watch(&parent->wdata);
1222 watch->parent = parent;
1223
1224 list_add(&watch->wlist, &parent->watches);
1225 }
1226 list_add(&krule->rlist, &watch->rules);
1227}
1228
1229/* Find a matching watch entry, or add this one.
1230 * Caller must hold audit_filter_mutex. */
1231static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
1232 struct nameidata *ndw)
1233{
1234 struct audit_watch *watch = krule->watch;
1235 struct inotify_watch *i_watch;
1236 struct audit_parent *parent;
1237 int ret = 0;
1238
1239 /* update watch filter fields */
1240 if (ndw) {
1241 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
1242 watch->ino = ndw->path.dentry->d_inode->i_ino;
1243 }
1244
1245 /* The audit_filter_mutex must not be held during inotify calls because
1246 * we hold it during inotify event callback processing. If an existing
1247 * inotify watch is found, inotify_find_watch() grabs a reference before
1248 * returning.
1249 */
1250 mutex_unlock(&audit_filter_mutex);
1251
1252 if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode,
1253 &i_watch) < 0) {
1254 parent = audit_init_parent(ndp);
1255 if (IS_ERR(parent)) {
1256 /* caller expects mutex locked */
1257 mutex_lock(&audit_filter_mutex);
1258 return PTR_ERR(parent);
1259 }
1260 } else
1261 parent = container_of(i_watch, struct audit_parent, wdata);
1262
1263 mutex_lock(&audit_filter_mutex);
1264
1265 /* parent was moved before we took audit_filter_mutex */
1266 if (parent->flags & AUDIT_PARENT_INVALID)
1267 ret = -ENOENT;
1268 else
1269 audit_add_to_parent(krule, parent);
1270
1271 /* match get in audit_init_parent or inotify_find_watch */
1272 put_inotify_watch(&parent->wdata);
1273 return ret;
1274}
1275
1276static u64 prio_low = ~0ULL/2; 858static u64 prio_low = ~0ULL/2;
1277static u64 prio_high = ~0ULL/2 - 1; 859static u64 prio_high = ~0ULL/2 - 1;
1278 860
@@ -1282,7 +864,6 @@ static inline int audit_add_rule(struct audit_entry *entry)
1282 struct audit_entry *e; 864 struct audit_entry *e;
1283 struct audit_watch *watch = entry->rule.watch; 865 struct audit_watch *watch = entry->rule.watch;
1284 struct audit_tree *tree = entry->rule.tree; 866 struct audit_tree *tree = entry->rule.tree;
1285 struct nameidata *ndp = NULL, *ndw = NULL;
1286 struct list_head *list; 867 struct list_head *list;
1287 int h, err; 868 int h, err;
1288#ifdef CONFIG_AUDITSYSCALL 869#ifdef CONFIG_AUDITSYSCALL
@@ -1296,8 +877,8 @@ static inline int audit_add_rule(struct audit_entry *entry)
1296 877
1297 mutex_lock(&audit_filter_mutex); 878 mutex_lock(&audit_filter_mutex);
1298 e = audit_find_rule(entry, &list); 879 e = audit_find_rule(entry, &list);
1299 mutex_unlock(&audit_filter_mutex);
1300 if (e) { 880 if (e) {
881 mutex_unlock(&audit_filter_mutex);
1301 err = -EEXIST; 882 err = -EEXIST;
1302 /* normally audit_add_tree_rule() will free it on failure */ 883 /* normally audit_add_tree_rule() will free it on failure */
1303 if (tree) 884 if (tree)
@@ -1305,22 +886,16 @@ static inline int audit_add_rule(struct audit_entry *entry)
1305 goto error; 886 goto error;
1306 } 887 }
1307 888
1308 /* Avoid calling path_lookup under audit_filter_mutex. */
1309 if (watch) {
1310 err = audit_get_nd(watch->path, &ndp, &ndw);
1311 if (err)
1312 goto error;
1313 }
1314
1315 mutex_lock(&audit_filter_mutex);
1316 if (watch) { 889 if (watch) {
1317 /* audit_filter_mutex is dropped and re-taken during this call */ 890 /* audit_filter_mutex is dropped and re-taken during this call */
1318 err = audit_add_watch(&entry->rule, ndp, ndw); 891 err = audit_add_watch(&entry->rule);
1319 if (err) { 892 if (err) {
1320 mutex_unlock(&audit_filter_mutex); 893 mutex_unlock(&audit_filter_mutex);
1321 goto error; 894 goto error;
1322 } 895 }
1323 h = audit_hash_ino((u32)watch->ino); 896 /* entry->rule.watch may have changed during audit_add_watch() */
897 watch = entry->rule.watch;
898 h = audit_hash_ino((u32)audit_watch_inode(watch));
1324 list = &audit_inode_hash[h]; 899 list = &audit_inode_hash[h];
1325 } 900 }
1326 if (tree) { 901 if (tree) {
@@ -1358,11 +933,9 @@ static inline int audit_add_rule(struct audit_entry *entry)
1358#endif 933#endif
1359 mutex_unlock(&audit_filter_mutex); 934 mutex_unlock(&audit_filter_mutex);
1360 935
1361 audit_put_nd(ndp, ndw); /* NULL args OK */
1362 return 0; 936 return 0;
1363 937
1364error: 938error:
1365 audit_put_nd(ndp, ndw); /* NULL args OK */
1366 if (watch) 939 if (watch)
1367 audit_put_watch(watch); /* tmp watch, matches initial get */ 940 audit_put_watch(watch); /* tmp watch, matches initial get */
1368 return err; 941 return err;
@@ -1372,7 +945,7 @@ error:
1372static inline int audit_del_rule(struct audit_entry *entry) 945static inline int audit_del_rule(struct audit_entry *entry)
1373{ 946{
1374 struct audit_entry *e; 947 struct audit_entry *e;
1375 struct audit_watch *watch, *tmp_watch = entry->rule.watch; 948 struct audit_watch *watch = entry->rule.watch;
1376 struct audit_tree *tree = entry->rule.tree; 949 struct audit_tree *tree = entry->rule.tree;
1377 struct list_head *list; 950 struct list_head *list;
1378 LIST_HEAD(inotify_list); 951 LIST_HEAD(inotify_list);
@@ -1394,29 +967,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
1394 goto out; 967 goto out;
1395 } 968 }
1396 969
1397 watch = e->rule.watch; 970 if (e->rule.watch)
1398 if (watch) { 971 audit_remove_watch_rule(&e->rule, &inotify_list);
1399 struct audit_parent *parent = watch->parent;
1400
1401 list_del(&e->rule.rlist);
1402
1403 if (list_empty(&watch->rules)) {
1404 audit_remove_watch(watch);
1405
1406 if (list_empty(&parent->watches)) {
1407 /* Put parent on the inotify un-registration
1408 * list. Grab a reference before releasing
1409 * audit_filter_mutex, to be released in
1410 * audit_inotify_unregister().
1411 * If filesystem is going away, just leave
1412 * the sucker alone, eviction will take
1413 * care of it.
1414 */
1415 if (pin_inotify_watch(&parent->wdata))
1416 list_add(&parent->ilist, &inotify_list);
1417 }
1418 }
1419 }
1420 972
1421 if (e->rule.tree) 973 if (e->rule.tree)
1422 audit_remove_tree_rule(&e->rule); 974 audit_remove_tree_rule(&e->rule);
@@ -1438,8 +990,8 @@ static inline int audit_del_rule(struct audit_entry *entry)
1438 audit_inotify_unregister(&inotify_list); 990 audit_inotify_unregister(&inotify_list);
1439 991
1440out: 992out:
1441 if (tmp_watch) 993 if (watch)
1442 audit_put_watch(tmp_watch); /* match initial get */ 994 audit_put_watch(watch); /* match initial get */
1443 if (tree) 995 if (tree)
1444 audit_put_tree(tree); /* that's the temporary one */ 996 audit_put_tree(tree); /* that's the temporary one */
1445 997
@@ -1527,11 +1079,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid,
1527 security_release_secctx(ctx, len); 1079 security_release_secctx(ctx, len);
1528 } 1080 }
1529 } 1081 }
1530 audit_log_format(ab, " op=%s rule key=", action); 1082 audit_log_format(ab, " op=");
1531 if (rule->filterkey) 1083 audit_log_string(ab, action);
1532 audit_log_untrustedstring(ab, rule->filterkey); 1084 audit_log_key(ab, rule->filterkey);
1533 else
1534 audit_log_format(ab, "(null)");
1535 audit_log_format(ab, " list=%d res=%d", rule->listnr, res); 1085 audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
1536 audit_log_end(ab); 1086 audit_log_end(ab);
1537} 1087}
@@ -1595,7 +1145,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1595 return PTR_ERR(entry); 1145 return PTR_ERR(entry);
1596 1146
1597 err = audit_add_rule(entry); 1147 err = audit_add_rule(entry);
1598 audit_log_rule_change(loginuid, sessionid, sid, "add", 1148 audit_log_rule_change(loginuid, sessionid, sid, "add rule",
1599 &entry->rule, !err); 1149 &entry->rule, !err);
1600 1150
1601 if (err) 1151 if (err)
@@ -1611,7 +1161,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
1611 return PTR_ERR(entry); 1161 return PTR_ERR(entry);
1612 1162
1613 err = audit_del_rule(entry); 1163 err = audit_del_rule(entry);
1614 audit_log_rule_change(loginuid, sessionid, sid, "remove", 1164 audit_log_rule_change(loginuid, sessionid, sid, "remove rule",
1615 &entry->rule, !err); 1165 &entry->rule, !err);
1616 1166
1617 audit_free_rule(entry); 1167 audit_free_rule(entry);
@@ -1793,7 +1343,7 @@ static int update_lsm_rule(struct audit_krule *r)
1793 list_del(&r->list); 1343 list_del(&r->list);
1794 } else { 1344 } else {
1795 if (watch) { 1345 if (watch) {
1796 list_add(&nentry->rule.rlist, &watch->rules); 1346 list_add(&nentry->rule.rlist, audit_watch_rules(watch));
1797 list_del(&r->rlist); 1347 list_del(&r->rlist);
1798 } else if (tree) 1348 } else if (tree)
1799 list_replace_init(&r->rlist, &nentry->rule.rlist); 1349 list_replace_init(&r->rlist, &nentry->rule.rlist);
@@ -1829,27 +1379,3 @@ int audit_update_lsm_rules(void)
1829 1379
1830 return err; 1380 return err;
1831} 1381}
1832
1833/* Update watch data in audit rules based on inotify events. */
1834void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
1835 u32 cookie, const char *dname, struct inode *inode)
1836{
1837 struct audit_parent *parent;
1838
1839 parent = container_of(i_watch, struct audit_parent, wdata);
1840
1841 if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
1842 audit_update_watch(parent, dname, inode->i_sb->s_dev,
1843 inode->i_ino, 0);
1844 else if (mask & (IN_DELETE|IN_MOVED_FROM))
1845 audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
1846 /* inotify automatically removes the watch and sends IN_IGNORED */
1847 else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
1848 audit_remove_parent_watches(parent);
1849 /* inotify does not remove the watch, so remove it manually */
1850 else if(mask & IN_MOVE_SELF) {
1851 audit_remove_parent_watches(parent);
1852 inotify_remove_watch_locked(audit_ih, i_watch);
1853 } else if (mask & IN_IGNORED)
1854 put_inotify_watch(i_watch);
1855}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7d6ac7c1f414..68d3c6a0ecd6 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -199,6 +199,7 @@ struct audit_context {
199 199
200 struct audit_tree_refs *trees, *first_trees; 200 struct audit_tree_refs *trees, *first_trees;
201 int tree_count; 201 int tree_count;
202 struct list_head killed_trees;
202 203
203 int type; 204 int type;
204 union { 205 union {
@@ -548,9 +549,9 @@ static int audit_filter_rules(struct task_struct *tsk,
548 } 549 }
549 break; 550 break;
550 case AUDIT_WATCH: 551 case AUDIT_WATCH:
551 if (name && rule->watch->ino != (unsigned long)-1) 552 if (name && audit_watch_inode(rule->watch) != (unsigned long)-1)
552 result = (name->dev == rule->watch->dev && 553 result = (name->dev == audit_watch_dev(rule->watch) &&
553 name->ino == rule->watch->ino); 554 name->ino == audit_watch_inode(rule->watch));
554 break; 555 break;
555 case AUDIT_DIR: 556 case AUDIT_DIR:
556 if (ctx) 557 if (ctx)
@@ -853,6 +854,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
853 if (!(context = kmalloc(sizeof(*context), GFP_KERNEL))) 854 if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
854 return NULL; 855 return NULL;
855 audit_zero_context(context, state); 856 audit_zero_context(context, state);
857 INIT_LIST_HEAD(&context->killed_trees);
856 return context; 858 return context;
857} 859}
858 860
@@ -1024,8 +1026,8 @@ static int audit_log_single_execve_arg(struct audit_context *context,
1024{ 1026{
1025 char arg_num_len_buf[12]; 1027 char arg_num_len_buf[12];
1026 const char __user *tmp_p = p; 1028 const char __user *tmp_p = p;
1027 /* how many digits are in arg_num? 3 is the length of " a=" */ 1029 /* how many digits are in arg_num? 5 is the length of ' a=""' */
1028 size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 3; 1030 size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 5;
1029 size_t len, len_left, to_send; 1031 size_t len, len_left, to_send;
1030 size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN; 1032 size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
1031 unsigned int i, has_cntl = 0, too_long = 0; 1033 unsigned int i, has_cntl = 0, too_long = 0;
@@ -1137,7 +1139,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
1137 if (has_cntl) 1139 if (has_cntl)
1138 audit_log_n_hex(*ab, buf, to_send); 1140 audit_log_n_hex(*ab, buf, to_send);
1139 else 1141 else
1140 audit_log_format(*ab, "\"%s\"", buf); 1142 audit_log_string(*ab, buf);
1141 1143
1142 p += to_send; 1144 p += to_send;
1143 len_left -= to_send; 1145 len_left -= to_send;
@@ -1372,11 +1374,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
1372 1374
1373 1375
1374 audit_log_task_info(ab, tsk); 1376 audit_log_task_info(ab, tsk);
1375 if (context->filterkey) { 1377 audit_log_key(ab, context->filterkey);
1376 audit_log_format(ab, " key=");
1377 audit_log_untrustedstring(ab, context->filterkey);
1378 } else
1379 audit_log_format(ab, " key=(null)");
1380 audit_log_end(ab); 1378 audit_log_end(ab);
1381 1379
1382 for (aux = context->aux; aux; aux = aux->next) { 1380 for (aux = context->aux; aux; aux = aux->next) {
@@ -1549,6 +1547,8 @@ void audit_free(struct task_struct *tsk)
1549 /* that can happen only if we are called from do_exit() */ 1547 /* that can happen only if we are called from do_exit() */
1550 if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT) 1548 if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
1551 audit_log_exit(context, tsk); 1549 audit_log_exit(context, tsk);
1550 if (!list_empty(&context->killed_trees))
1551 audit_kill_trees(&context->killed_trees);
1552 1552
1553 audit_free_context(context); 1553 audit_free_context(context);
1554} 1554}
@@ -1692,6 +1692,9 @@ void audit_syscall_exit(int valid, long return_code)
1692 context->in_syscall = 0; 1692 context->in_syscall = 0;
1693 context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; 1693 context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
1694 1694
1695 if (!list_empty(&context->killed_trees))
1696 audit_kill_trees(&context->killed_trees);
1697
1695 if (context->previous) { 1698 if (context->previous) {
1696 struct audit_context *new_context = context->previous; 1699 struct audit_context *new_context = context->previous;
1697 context->previous = NULL; 1700 context->previous = NULL;
@@ -2525,3 +2528,11 @@ void audit_core_dumps(long signr)
2525 audit_log_format(ab, " sig=%ld", signr); 2528 audit_log_format(ab, " sig=%ld", signr);
2526 audit_log_end(ab); 2529 audit_log_end(ab);
2527} 2530}
2531
2532struct list_head *audit_killed_trees(void)
2533{
2534 struct audit_context *ctx = current->audit_context;
2535 if (likely(!ctx || !ctx->in_syscall))
2536 return NULL;
2537 return &ctx->killed_trees;
2538}
diff --git a/kernel/futex.c b/kernel/futex.c
index 80b5ce716596..794c862125fe 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -284,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
284 drop_futex_key_refs(key); 284 drop_futex_key_refs(key);
285} 285}
286 286
287/*
288 * fault_in_user_writeable - fault in user address and verify RW access
289 * @uaddr: pointer to faulting user space address
290 *
291 * Slow path to fixup the fault we just took in the atomic write
292 * access to @uaddr.
293 *
294 * We have no generic implementation of a non destructive write to the
295 * user address. We know that we faulted in the atomic pagefault
296 * disabled section so we can as well avoid the #PF overhead by
297 * calling get_user_pages() right away.
298 */
299static int fault_in_user_writeable(u32 __user *uaddr)
300{
301 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
302 1, 1, 0, NULL, NULL);
303 return ret < 0 ? ret : 0;
304}
305
287/** 306/**
288 * futex_top_waiter() - Return the highest priority waiter on a futex 307 * futex_top_waiter() - Return the highest priority waiter on a futex
289 * @hb: the hash bucket the futex_q's reside in 308 * @hb: the hash bucket the futex_q's reside in
@@ -896,7 +915,6 @@ retry:
896retry_private: 915retry_private:
897 op_ret = futex_atomic_op_inuser(op, uaddr2); 916 op_ret = futex_atomic_op_inuser(op, uaddr2);
898 if (unlikely(op_ret < 0)) { 917 if (unlikely(op_ret < 0)) {
899 u32 dummy;
900 918
901 double_unlock_hb(hb1, hb2); 919 double_unlock_hb(hb1, hb2);
902 920
@@ -914,7 +932,7 @@ retry_private:
914 goto out_put_keys; 932 goto out_put_keys;
915 } 933 }
916 934
917 ret = get_user(dummy, uaddr2); 935 ret = fault_in_user_writeable(uaddr2);
918 if (ret) 936 if (ret)
919 goto out_put_keys; 937 goto out_put_keys;
920 938
@@ -1204,7 +1222,7 @@ retry_private:
1204 double_unlock_hb(hb1, hb2); 1222 double_unlock_hb(hb1, hb2);
1205 put_futex_key(fshared, &key2); 1223 put_futex_key(fshared, &key2);
1206 put_futex_key(fshared, &key1); 1224 put_futex_key(fshared, &key1);
1207 ret = get_user(curval2, uaddr2); 1225 ret = fault_in_user_writeable(uaddr2);
1208 if (!ret) 1226 if (!ret)
1209 goto retry; 1227 goto retry;
1210 goto out; 1228 goto out;
@@ -1482,7 +1500,7 @@ retry:
1482handle_fault: 1500handle_fault:
1483 spin_unlock(q->lock_ptr); 1501 spin_unlock(q->lock_ptr);
1484 1502
1485 ret = get_user(uval, uaddr); 1503 ret = fault_in_user_writeable(uaddr);
1486 1504
1487 spin_lock(q->lock_ptr); 1505 spin_lock(q->lock_ptr);
1488 1506
@@ -1807,7 +1825,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1807{ 1825{
1808 struct hrtimer_sleeper timeout, *to = NULL; 1826 struct hrtimer_sleeper timeout, *to = NULL;
1809 struct futex_hash_bucket *hb; 1827 struct futex_hash_bucket *hb;
1810 u32 uval;
1811 struct futex_q q; 1828 struct futex_q q;
1812 int res, ret; 1829 int res, ret;
1813 1830
@@ -1909,16 +1926,9 @@ out:
1909 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1926 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1910 1927
1911uaddr_faulted: 1928uaddr_faulted:
1912 /*
1913 * We have to r/w *(int __user *)uaddr, and we have to modify it
1914 * atomically. Therefore, if we continue to fault after get_user()
1915 * below, we need to handle the fault ourselves, while still holding
1916 * the mmap_sem. This can occur if the uaddr is under contention as
1917 * we have to drop the mmap_sem in order to call get_user().
1918 */
1919 queue_unlock(&q, hb); 1929 queue_unlock(&q, hb);
1920 1930
1921 ret = get_user(uval, uaddr); 1931 ret = fault_in_user_writeable(uaddr);
1922 if (ret) 1932 if (ret)
1923 goto out_put_key; 1933 goto out_put_key;
1924 1934
@@ -2013,17 +2023,10 @@ out:
2013 return ret; 2023 return ret;
2014 2024
2015pi_faulted: 2025pi_faulted:
2016 /*
2017 * We have to r/w *(int __user *)uaddr, and we have to modify it
2018 * atomically. Therefore, if we continue to fault after get_user()
2019 * below, we need to handle the fault ourselves, while still holding
2020 * the mmap_sem. This can occur if the uaddr is under contention as
2021 * we have to drop the mmap_sem in order to call get_user().
2022 */
2023 spin_unlock(&hb->lock); 2026 spin_unlock(&hb->lock);
2024 put_futex_key(fshared, &key); 2027 put_futex_key(fshared, &key);
2025 2028
2026 ret = get_user(uval, uaddr); 2029 ret = fault_in_user_writeable(uaddr);
2027 if (!ret) 2030 if (!ret)
2028 goto retry; 2031 goto retry;
2029 2032
diff --git a/mm/memory.c b/mm/memory.c
index f46ac18ba231..65216194eb8d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1207,8 +1207,8 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1207 1207
1208 1208
1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1209int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1210 unsigned long start, int len, int flags, 1210 unsigned long start, int nr_pages, int flags,
1211 struct page **pages, struct vm_area_struct **vmas) 1211 struct page **pages, struct vm_area_struct **vmas)
1212{ 1212{
1213 int i; 1213 int i;
1214 unsigned int vm_flags = 0; 1214 unsigned int vm_flags = 0;
@@ -1217,7 +1217,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); 1217 int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL); 1218 int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
1219 1219
1220 if (len <= 0) 1220 if (nr_pages <= 0)
1221 return 0; 1221 return 0;
1222 /* 1222 /*
1223 * Require read or write permissions. 1223 * Require read or write permissions.
@@ -1269,7 +1269,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1269 vmas[i] = gate_vma; 1269 vmas[i] = gate_vma;
1270 i++; 1270 i++;
1271 start += PAGE_SIZE; 1271 start += PAGE_SIZE;
1272 len--; 1272 nr_pages--;
1273 continue; 1273 continue;
1274 } 1274 }
1275 1275
@@ -1280,7 +1280,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1280 1280
1281 if (is_vm_hugetlb_page(vma)) { 1281 if (is_vm_hugetlb_page(vma)) {
1282 i = follow_hugetlb_page(mm, vma, pages, vmas, 1282 i = follow_hugetlb_page(mm, vma, pages, vmas,
1283 &start, &len, i, write); 1283 &start, &nr_pages, i, write);
1284 continue; 1284 continue;
1285 } 1285 }
1286 1286
@@ -1357,9 +1357,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1357 vmas[i] = vma; 1357 vmas[i] = vma;
1358 i++; 1358 i++;
1359 start += PAGE_SIZE; 1359 start += PAGE_SIZE;
1360 len--; 1360 nr_pages--;
1361 } while (len && start < vma->vm_end); 1361 } while (nr_pages && start < vma->vm_end);
1362 } while (len); 1362 } while (nr_pages);
1363 return i; 1363 return i;
1364} 1364}
1365 1365
@@ -1368,7 +1368,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1368 * @tsk: task_struct of target task 1368 * @tsk: task_struct of target task
1369 * @mm: mm_struct of target mm 1369 * @mm: mm_struct of target mm
1370 * @start: starting user address 1370 * @start: starting user address
1371 * @len: number of pages from start to pin 1371 * @nr_pages: number of pages from start to pin
1372 * @write: whether pages will be written to by the caller 1372 * @write: whether pages will be written to by the caller
1373 * @force: whether to force write access even if user mapping is 1373 * @force: whether to force write access even if user mapping is
1374 * readonly. This will result in the page being COWed even 1374 * readonly. This will result in the page being COWed even
@@ -1380,7 +1380,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1380 * Or NULL if the caller does not require them. 1380 * Or NULL if the caller does not require them.
1381 * 1381 *
1382 * Returns number of pages pinned. This may be fewer than the number 1382 * Returns number of pages pinned. This may be fewer than the number
1383 * requested. If len is 0 or negative, returns 0. If no pages 1383 * requested. If nr_pages is 0 or negative, returns 0. If no pages
1384 * were pinned, returns -errno. Each page returned must be released 1384 * were pinned, returns -errno. Each page returned must be released
1385 * with a put_page() call when it is finished with. vmas will only 1385 * with a put_page() call when it is finished with. vmas will only
1386 * remain valid while mmap_sem is held. 1386 * remain valid while mmap_sem is held.
@@ -1414,7 +1414,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1414 * See also get_user_pages_fast, for performance critical applications. 1414 * See also get_user_pages_fast, for performance critical applications.
1415 */ 1415 */
1416int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1416int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1417 unsigned long start, int len, int write, int force, 1417 unsigned long start, int nr_pages, int write, int force,
1418 struct page **pages, struct vm_area_struct **vmas) 1418 struct page **pages, struct vm_area_struct **vmas)
1419{ 1419{
1420 int flags = 0; 1420 int flags = 0;
@@ -1424,9 +1424,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1424 if (force) 1424 if (force)
1425 flags |= GUP_FLAGS_FORCE; 1425 flags |= GUP_FLAGS_FORCE;
1426 1426
1427 return __get_user_pages(tsk, mm, 1427 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
1428 start, len, flags,
1429 pages, vmas);
1430} 1428}
1431 1429
1432EXPORT_SYMBOL(get_user_pages); 1430EXPORT_SYMBOL(get_user_pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 2fd2ad5da98e..bf0cc762a7d2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -173,8 +173,8 @@ unsigned int kobjsize(const void *objp)
173} 173}
174 174
175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 175int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
176 unsigned long start, int len, int flags, 176 unsigned long start, int nr_pages, int flags,
177 struct page **pages, struct vm_area_struct **vmas) 177 struct page **pages, struct vm_area_struct **vmas)
178{ 178{
179 struct vm_area_struct *vma; 179 struct vm_area_struct *vma;
180 unsigned long vm_flags; 180 unsigned long vm_flags;
@@ -189,7 +189,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 189 vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 190 vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
191 191
192 for (i = 0; i < len; i++) { 192 for (i = 0; i < nr_pages; i++) {
193 vma = find_vma(mm, start); 193 vma = find_vma(mm, start);
194 if (!vma) 194 if (!vma)
195 goto finish_or_fault; 195 goto finish_or_fault;
@@ -224,7 +224,7 @@ finish_or_fault:
224 * - don't permit access to VMAs that don't support it, such as I/O mappings 224 * - don't permit access to VMAs that don't support it, such as I/O mappings
225 */ 225 */
226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 226int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
227 unsigned long start, int len, int write, int force, 227 unsigned long start, int nr_pages, int write, int force,
228 struct page **pages, struct vm_area_struct **vmas) 228 struct page **pages, struct vm_area_struct **vmas)
229{ 229{
230 int flags = 0; 230 int flags = 0;
@@ -234,9 +234,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
234 if (force) 234 if (force)
235 flags |= GUP_FLAGS_FORCE; 235 flags |= GUP_FLAGS_FORCE;
236 236
237 return __get_user_pages(tsk, mm, 237 return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
238 start, len, flags,
239 pages, vmas);
240} 238}
241EXPORT_SYMBOL(get_user_pages); 239EXPORT_SYMBOL(get_user_pages);
242 240
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aecc9cdfdfce..5d714f8fb303 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1153,10 +1153,10 @@ again:
1153 * properly detect and handle allocation failures. 1153 * properly detect and handle allocation failures.
1154 * 1154 *
1155 * We most definitely don't want callers attempting to 1155 * We most definitely don't want callers attempting to
1156 * allocate greater than single-page units with 1156 * allocate greater than order-1 page units with
1157 * __GFP_NOFAIL. 1157 * __GFP_NOFAIL.
1158 */ 1158 */
1159 WARN_ON_ONCE(order > 0); 1159 WARN_ON_ONCE(order > 1);
1160 } 1160 }
1161 spin_lock_irqsave(&zone->lock, flags); 1161 spin_lock_irqsave(&zone->lock, flags);
1162 page = __rmqueue(zone, order, migratetype); 1162 page = __rmqueue(zone, order, migratetype);
diff --git a/mm/shmem.c b/mm/shmem.c
index e89d7ec18eda..d713239ce2ce 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1558,6 +1558,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
1558 spin_lock_init(&info->lock); 1558 spin_lock_init(&info->lock);
1559 info->flags = flags & VM_NORESERVE; 1559 info->flags = flags & VM_NORESERVE;
1560 INIT_LIST_HEAD(&info->swaplist); 1560 INIT_LIST_HEAD(&info->swaplist);
1561 cache_no_acl(inode);
1561 1562
1562 switch (mode & S_IFMT) { 1563 switch (mode & S_IFMT) {
1563 default: 1564 default:
@@ -2388,7 +2389,6 @@ static void shmem_destroy_inode(struct inode *inode)
2388 /* only struct inode is valid if it's an inline symlink */ 2389 /* only struct inode is valid if it's an inline symlink */
2389 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2390 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2390 } 2391 }
2391 shmem_acl_destroy_inode(inode);
2392 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); 2392 kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
2393} 2393}
2394 2394
@@ -2397,10 +2397,6 @@ static void init_once(void *foo)
2397 struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2397 struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
2398 2398
2399 inode_init_once(&p->vfs_inode); 2399 inode_init_once(&p->vfs_inode);
2400#ifdef CONFIG_TMPFS_POSIX_ACL
2401 p->i_acl = NULL;
2402 p->i_default_acl = NULL;
2403#endif
2404} 2400}
2405 2401
2406static int init_inodecache(void) 2402static int init_inodecache(void)
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 8e5aadd7dcd6..606a8e757a42 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -22,11 +22,11 @@ shmem_get_acl(struct inode *inode, int type)
22 spin_lock(&inode->i_lock); 22 spin_lock(&inode->i_lock);
23 switch(type) { 23 switch(type) {
24 case ACL_TYPE_ACCESS: 24 case ACL_TYPE_ACCESS:
25 acl = posix_acl_dup(SHMEM_I(inode)->i_acl); 25 acl = posix_acl_dup(inode->i_acl);
26 break; 26 break;
27 27
28 case ACL_TYPE_DEFAULT: 28 case ACL_TYPE_DEFAULT:
29 acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl); 29 acl = posix_acl_dup(inode->i_default_acl);
30 break; 30 break;
31 } 31 }
32 spin_unlock(&inode->i_lock); 32 spin_unlock(&inode->i_lock);
@@ -45,13 +45,13 @@ shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
45 spin_lock(&inode->i_lock); 45 spin_lock(&inode->i_lock);
46 switch(type) { 46 switch(type) {
47 case ACL_TYPE_ACCESS: 47 case ACL_TYPE_ACCESS:
48 free = SHMEM_I(inode)->i_acl; 48 free = inode->i_acl;
49 SHMEM_I(inode)->i_acl = posix_acl_dup(acl); 49 inode->i_acl = posix_acl_dup(acl);
50 break; 50 break;
51 51
52 case ACL_TYPE_DEFAULT: 52 case ACL_TYPE_DEFAULT:
53 free = SHMEM_I(inode)->i_default_acl; 53 free = inode->i_default_acl;
54 SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl); 54 inode->i_default_acl = posix_acl_dup(acl);
55 break; 55 break;
56 } 56 }
57 spin_unlock(&inode->i_lock); 57 spin_unlock(&inode->i_lock);
@@ -155,23 +155,6 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
155} 155}
156 156
157/** 157/**
158 * shmem_acl_destroy_inode - destroy acls hanging off the in-memory inode
159 *
160 * This is done before destroying the actual inode.
161 */
162
163void
164shmem_acl_destroy_inode(struct inode *inode)
165{
166 if (SHMEM_I(inode)->i_acl)
167 posix_acl_release(SHMEM_I(inode)->i_acl);
168 SHMEM_I(inode)->i_acl = NULL;
169 if (SHMEM_I(inode)->i_default_acl)
170 posix_acl_release(SHMEM_I(inode)->i_default_acl);
171 SHMEM_I(inode)->i_default_acl = NULL;
172}
173
174/**
175 * shmem_check_acl - check_acl() callback for generic_permission() 158 * shmem_check_acl - check_acl() callback for generic_permission()
176 */ 159 */
177static int 160static int
diff --git a/mm/slub.c b/mm/slub.c
index ce62b770e2fc..819f056b39c6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1085,11 +1085,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1085{ 1085{
1086 struct page *page; 1086 struct page *page;
1087 struct kmem_cache_order_objects oo = s->oo; 1087 struct kmem_cache_order_objects oo = s->oo;
1088 gfp_t alloc_gfp;
1088 1089
1089 flags |= s->allocflags; 1090 flags |= s->allocflags;
1090 1091
1091 page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node, 1092 /*
1092 oo); 1093 * Let the initial higher-order allocation fail under memory pressure
1094 * so we fall-back to the minimum order allocation.
1095 */
1096 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1097
1098 page = alloc_slab_page(alloc_gfp, node, oo);
1093 if (unlikely(!page)) { 1099 if (unlikely(!page)) {
1094 oo = s->min; 1100 oo = s->min;
1095 /* 1101 /*
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 5f1d2107a1dd..de56d3983de0 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -437,8 +437,7 @@ free:
437int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, 437int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
438 struct packet_type *ptype, struct net_device *orig_dev) 438 struct packet_type *ptype, struct net_device *orig_dev)
439{ 439{
440 skb->sk = NULL; /* Initially we don't know who it's for */ 440 skb_orphan(skb);
441 skb->destructor = NULL; /* Who initializes this, dammit?! */
442 441
443 if (!net_eq(dev_net(dev), &init_net)) { 442 if (!net_eq(dev_net(dev), &init_net)) {
444 kfree_skb(skb); 443 kfree_skb(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index baf2dc13a34a..60b572812278 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2310,8 +2310,6 @@ ncls:
2310 if (!skb) 2310 if (!skb)
2311 goto out; 2311 goto out;
2312 2312
2313 skb_orphan(skb);
2314
2315 type = skb->protocol; 2313 type = skb->protocol;
2316 list_for_each_entry_rcu(ptype, 2314 list_for_each_entry_rcu(ptype,
2317 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2315 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 05ea7440d9e5..3e70faab2989 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
85} 85}
86 86
87static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 87static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
88 int type, int code, int offset, __be32 info) 88 u8 type, u8 code, int offset, __be32 info)
89{ 89{
90 struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; 90 struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
91 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); 91 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 65b3a8b11a6c..278f46f5011b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1093,8 +1093,27 @@ restart:
1093 * If we drop it here, the callers have no way to resolve routes 1093 * If we drop it here, the callers have no way to resolve routes
1094 * when we're not caching. Instead, just point *rp at rt, so 1094 * when we're not caching. Instead, just point *rp at rt, so
1095 * the caller gets a single use out of the route 1095 * the caller gets a single use out of the route
1096 * Note that we do rt_free on this new route entry, so that
1097 * once its refcount hits zero, we are still able to reap it
1098 * (Thanks Alexey)
1099 * Note also the rt_free uses call_rcu. We don't actually
1100 * need rcu protection here, this is just our path to get
1101 * on the route gc list.
1096 */ 1102 */
1097 goto report_and_exit; 1103
1104 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1105 int err = arp_bind_neighbour(&rt->u.dst);
1106 if (err) {
1107 if (net_ratelimit())
1108 printk(KERN_WARNING
1109 "Neighbour table failure & not caching routes.\n");
1110 rt_drop(rt);
1111 return err;
1112 }
1113 }
1114
1115 rt_free(rt);
1116 goto skip_hashing;
1098 } 1117 }
1099 1118
1100 rthp = &rt_hash_table[hash].chain; 1119 rthp = &rt_hash_table[hash].chain;
@@ -1211,7 +1230,8 @@ restart:
1211#if RT_CACHE_DEBUG >= 2 1230#if RT_CACHE_DEBUG >= 2
1212 if (rt->u.dst.rt_next) { 1231 if (rt->u.dst.rt_next) {
1213 struct rtable *trt; 1232 struct rtable *trt;
1214 printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); 1233 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1234 hash, &rt->rt_dst);
1215 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1235 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1216 printk(" . %pI4", &trt->rt_dst); 1236 printk(" . %pI4", &trt->rt_dst);
1217 printk("\n"); 1237 printk("\n");
@@ -1226,7 +1246,7 @@ restart:
1226 1246
1227 spin_unlock_bh(rt_hash_lock_addr(hash)); 1247 spin_unlock_bh(rt_hash_lock_addr(hash));
1228 1248
1229report_and_exit: 1249skip_hashing:
1230 if (rp) 1250 if (rp)
1231 *rp = rt; 1251 *rp = rt;
1232 else 1252 else
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 52449f7a1b71..86f42a288c4b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -405,7 +405,7 @@ out:
405} 405}
406 406
407static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 407static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
408 int type, int code, int offset, __be32 info) 408 u8 type, u8 code, int offset, __be32 info)
409{ 409{
410 struct net *net = dev_net(skb->dev); 410 struct net *net = dev_net(skb->dev);
411 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; 411 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c2f250150db1..678bb95b1525 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
354} 354}
355 355
356static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 356static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
357 int type, int code, int offset, __be32 info) 357 u8 type, u8 code, int offset, __be32 info)
358{ 358{
359 struct net *net = dev_net(skb->dev); 359 struct net *net = dev_net(skb->dev);
360 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; 360 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36dff8807183..eab62a7a8f06 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
117/* 117/*
118 * Slightly more convenient version of icmpv6_send. 118 * Slightly more convenient version of icmpv6_send.
119 */ 119 */
120void icmpv6_param_prob(struct sk_buff *skb, int code, int pos) 120void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
121{ 121{
122 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); 122 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
123 kfree_skb(skb); 123 kfree_skb(skb);
@@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb)
161/* 161/*
162 * Check the ICMP output rate limit 162 * Check the ICMP output rate limit
163 */ 163 */
164static inline int icmpv6_xrlim_allow(struct sock *sk, int type, 164static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
165 struct flowi *fl) 165 struct flowi *fl)
166{ 166{
167 struct dst_entry *dst; 167 struct dst_entry *dst;
@@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
305/* 305/*
306 * Send an ICMP message in response to a packet in error 306 * Send an ICMP message in response to a packet in error
307 */ 307 */
308void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 308void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
309 struct net_device *dev) 309 struct net_device *dev)
310{ 310{
311 struct net *net = dev_net(skb->dev); 311 struct net *net = dev_net(skb->dev);
@@ -590,7 +590,7 @@ out:
590 icmpv6_xmit_unlock(sk); 590 icmpv6_xmit_unlock(sk);
591} 591}
592 592
593static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) 593static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
594{ 594{
595 struct inet6_protocol *ipprot; 595 struct inet6_protocol *ipprot;
596 int inner_offset; 596 int inner_offset;
@@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
643 struct in6_addr *saddr, *daddr; 643 struct in6_addr *saddr, *daddr;
644 struct ipv6hdr *orig_hdr; 644 struct ipv6hdr *orig_hdr;
645 struct icmp6hdr *hdr; 645 struct icmp6hdr *hdr;
646 int type; 646 u8 type;
647 647
648 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 648 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
649 struct sec_path *sp = skb_sec_path(skb); 649 struct sec_path *sp = skb_sec_path(skb);
@@ -914,7 +914,7 @@ static const struct icmp6_err {
914 }, 914 },
915}; 915};
916 916
917int icmpv6_err_convert(int type, int code, int *err) 917int icmpv6_err_convert(u8 type, u8 code, int *err)
918{ 918{
919 int fatal = 0; 919 int fatal = 0;
920 920
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 404d16a97d5c..51f410e7775a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
394 394
395static int 395static int
396ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 396ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
397 int *type, int *code, int *msg, __u32 *info, int offset) 397 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
398{ 398{
399 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; 399 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
400 struct ip6_tnl *t; 400 struct ip6_tnl *t;
401 int rel_msg = 0; 401 int rel_msg = 0;
402 int rel_type = ICMPV6_DEST_UNREACH; 402 u8 rel_type = ICMPV6_DEST_UNREACH;
403 int rel_code = ICMPV6_ADDR_UNREACH; 403 u8 rel_code = ICMPV6_ADDR_UNREACH;
404 __u32 rel_info = 0; 404 __u32 rel_info = 0;
405 __u16 len; 405 __u16 len;
406 int err = -ENOENT; 406 int err = -ENOENT;
@@ -488,11 +488,11 @@ out:
488 488
489static int 489static int
490ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 490ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
491 int type, int code, int offset, __be32 info) 491 u8 type, u8 code, int offset, __be32 info)
492{ 492{
493 int rel_msg = 0; 493 int rel_msg = 0;
494 int rel_type = type; 494 u8 rel_type = type;
495 int rel_code = code; 495 u8 rel_code = code;
496 __u32 rel_info = ntohl(info); 496 __u32 rel_info = ntohl(info);
497 int err; 497 int err;
498 struct sk_buff *skb2; 498 struct sk_buff *skb2;
@@ -586,11 +586,11 @@ out:
586 586
587static int 587static int
588ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 588ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
589 int type, int code, int offset, __be32 info) 589 u8 type, u8 code, int offset, __be32 info)
590{ 590{
591 int rel_msg = 0; 591 int rel_msg = 0;
592 int rel_type = type; 592 u8 rel_type = type;
593 int rel_code = code; 593 u8 rel_code = code;
594 __u32 rel_info = ntohl(info); 594 __u32 rel_info = ntohl(info);
595 int err; 595 int err;
596 596
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 3a0b3be7ece5..79c172f1ff01 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -51,7 +51,7 @@
51#include <linux/mutex.h> 51#include <linux/mutex.h>
52 52
53static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 53static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
54 int type, int code, int offset, __be32 info) 54 u8 type, u8 code, int offset, __be32 info)
55{ 55{
56 __be32 spi; 56 __be32 spi;
57 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; 57 struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f995e19c87a9..f797e8c6f3b3 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
54 return data + padlen; 54 return data + padlen;
55} 55}
56 56
57static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) 57static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
58{ 58{
59 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); 59 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
60} 60}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8b0b6f948063..d6c3c1c34b2d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -310,7 +310,7 @@ out:
310 310
311static void rawv6_err(struct sock *sk, struct sk_buff *skb, 311static void rawv6_err(struct sock *sk, struct sk_buff *skb,
312 struct inet6_skb_parm *opt, 312 struct inet6_skb_parm *opt,
313 int type, int code, int offset, __be32 info) 313 u8 type, u8 code, int offset, __be32 info)
314{ 314{
315 struct inet_sock *inet = inet_sk(sk); 315 struct inet_sock *inet = inet_sk(sk);
316 struct ipv6_pinfo *np = inet6_sk(sk); 316 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
343} 343}
344 344
345void raw6_icmp_error(struct sk_buff *skb, int nexthdr, 345void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
346 int type, int code, int inner_offset, __be32 info) 346 u8 type, u8 code, int inner_offset, __be32 info)
347{ 347{
348 struct sock *sk; 348 struct sock *sk;
349 int hash; 349 int hash;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 658293ea05ba..1473ee0a1f51 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1865 * Drop the packet on the floor 1865 * Drop the packet on the floor
1866 */ 1866 */
1867 1867
1868static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) 1868static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1869{ 1869{
1870 int type; 1870 int type;
1871 struct dst_entry *dst = skb_dst(skb); 1871 struct dst_entry *dst = skb_dst(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 53b6a4192b16..58810c65b635 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -317,7 +317,7 @@ failure:
317} 317}
318 318
319static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 319static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
320 int type, int code, int offset, __be32 info) 320 u8 type, u8 code, int offset, __be32 info)
321{ 321{
322 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; 322 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
323 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 323 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 669f280989c3..633ad789effc 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -124,7 +124,7 @@ drop:
124} 124}
125 125
126static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 126static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
127 int type, int code, int offset, __be32 info) 127 u8 type, u8 code, int offset, __be32 info)
128{ 128{
129 struct xfrm6_tunnel *handler; 129 struct xfrm6_tunnel *handler;
130 130
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 023beda6b224..33b59bd92c4d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -312,7 +312,7 @@ csum_copy_err:
312} 312}
313 313
314void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 314void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
315 int type, int code, int offset, __be32 info, 315 u8 type, u8 code, int offset, __be32 info,
316 struct udp_table *udptable) 316 struct udp_table *udptable)
317{ 317{
318 struct ipv6_pinfo *np; 318 struct ipv6_pinfo *np;
@@ -346,8 +346,8 @@ out:
346} 346}
347 347
348static __inline__ void udpv6_err(struct sk_buff *skb, 348static __inline__ void udpv6_err(struct sk_buff *skb,
349 struct inet6_skb_parm *opt, int type, 349 struct inet6_skb_parm *opt, u8 type,
350 int code, int offset, __be32 info ) 350 u8 code, int offset, __be32 info )
351{ 351{
352 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); 352 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
353} 353}
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 23779208c334..6bb303471e20 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -9,7 +9,7 @@
9 9
10extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); 10extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, 11extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
12 int , int , int , __be32 , struct udp_table *); 12 u8 , u8 , int , __be32 , struct udp_table *);
13 13
14extern int udp_v6_get_port(struct sock *sk, unsigned short snum); 14extern int udp_v6_get_port(struct sock *sk, unsigned short snum);
15 15
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index ba162a824585..4818c48688f2 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb)
20 20
21static void udplitev6_err(struct sk_buff *skb, 21static void udplitev6_err(struct sk_buff *skb,
22 struct inet6_skb_parm *opt, 22 struct inet6_skb_parm *opt,
23 int type, int code, int offset, __be32 info) 23 u8 type, u8 code, int offset, __be32 info)
24{ 24{
25 __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); 25 __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
26} 26}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 80193db224d9..81a95c00e503 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
262} 262}
263 263
264static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 264static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
265 int type, int code, int offset, __be32 info) 265 u8 type, u8 code, int offset, __be32 info)
266{ 266{
267 /* xfrm6_tunnel native err handling */ 267 /* xfrm6_tunnel native err handling */
268 switch (type) { 268 switch (type) {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 5922febe25c4..cb762c8723ea 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -913,9 +913,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
913 /* Clean up the original one to keep it in listen state */ 913 /* Clean up the original one to keep it in listen state */
914 irttp_listen(self->tsap); 914 irttp_listen(self->tsap);
915 915
916 /* Wow ! What is that ? Jean II */
917 skb->sk = NULL;
918 skb->destructor = NULL;
919 kfree_skb(skb); 916 kfree_skb(skb);
920 sk->sk_ack_backlog--; 917 sk->sk_ack_backlog--;
921 918
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
index 67c99d20857f..7ba96618660e 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self,
196 /* Don't forget to refcount it - see ircomm_tty_do_softint() */ 196 /* Don't forget to refcount it - see ircomm_tty_do_softint() */
197 skb_get(skb); 197 skb_get(skb);
198 198
199 skb_orphan(skb);
199 skb->destructor = ircomm_lmp_flow_control; 200 skb->destructor = ircomm_lmp_flow_control;
200 201
201 if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) { 202 if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5f72b94b4918..7508f11c5b39 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -335,7 +335,8 @@ begin:
335 h = __nf_conntrack_find(net, tuple); 335 h = __nf_conntrack_find(net, tuple);
336 if (h) { 336 if (h) {
337 ct = nf_ct_tuplehash_to_ctrack(h); 337 ct = nf_ct_tuplehash_to_ctrack(h);
338 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 338 if (unlikely(nf_ct_is_dying(ct) ||
339 !atomic_inc_not_zero(&ct->ct_general.use)))
339 h = NULL; 340 h = NULL;
340 else { 341 else {
341 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { 342 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
@@ -425,7 +426,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
425 /* Remove from unconfirmed list */ 426 /* Remove from unconfirmed list */
426 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 427 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
427 428
428 __nf_conntrack_hash_insert(ct, hash, repl_hash);
429 /* Timer relative to confirmation time, not original 429 /* Timer relative to confirmation time, not original
430 setting time, otherwise we'd get timer wrap in 430 setting time, otherwise we'd get timer wrap in
431 weird delay cases. */ 431 weird delay cases. */
@@ -433,8 +433,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
433 add_timer(&ct->timeout); 433 add_timer(&ct->timeout);
434 atomic_inc(&ct->ct_general.use); 434 atomic_inc(&ct->ct_general.use);
435 set_bit(IPS_CONFIRMED_BIT, &ct->status); 435 set_bit(IPS_CONFIRMED_BIT, &ct->status);
436
437 /* Since the lookup is lockless, hash insertion must be done after
438 * starting the timer and setting the CONFIRMED bit. The RCU barriers
439 * guarantee that no other CPU can find the conntrack before the above
440 * stores are visible.
441 */
442 __nf_conntrack_hash_insert(ct, hash, repl_hash);
436 NF_CT_STAT_INC(net, insert); 443 NF_CT_STAT_INC(net, insert);
437 spin_unlock_bh(&nf_conntrack_lock); 444 spin_unlock_bh(&nf_conntrack_lock);
445
438 help = nfct_help(ct); 446 help = nfct_help(ct);
439 if (help && help->helper) 447 if (help && help->helper)
440 nf_conntrack_event_cache(IPCT_HELPER, ct); 448 nf_conntrack_event_cache(IPCT_HELPER, ct);
@@ -503,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash)
503 cnt++; 511 cnt++;
504 } 512 }
505 513
506 if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 514 if (ct && unlikely(nf_ct_is_dying(ct) ||
515 !atomic_inc_not_zero(&ct->ct_general.use)))
507 ct = NULL; 516 ct = NULL;
508 if (ct || cnt >= NF_CT_EVICTION_RANGE) 517 if (ct || cnt >= NF_CT_EVICTION_RANGE)
509 break; 518 break;
@@ -1267,13 +1276,19 @@ err_cache:
1267 return ret; 1276 return ret;
1268} 1277}
1269 1278
1279/*
1280 * We need to use special "null" values, not used in hash table
1281 */
1282#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1283#define DYING_NULLS_VAL ((1<<30)+1)
1284
1270static int nf_conntrack_init_net(struct net *net) 1285static int nf_conntrack_init_net(struct net *net)
1271{ 1286{
1272 int ret; 1287 int ret;
1273 1288
1274 atomic_set(&net->ct.count, 0); 1289 atomic_set(&net->ct.count, 0);
1275 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); 1290 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
1276 INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); 1291 INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
1277 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1292 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1278 if (!net->ct.stat) { 1293 if (!net->ct.stat) {
1279 ret = -ENOMEM; 1294 ret = -ENOMEM;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 2fefe147750a..4e620305f28c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -47,7 +47,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
47 mutex_lock(&nf_log_mutex); 47 mutex_lock(&nf_log_mutex);
48 48
49 if (pf == NFPROTO_UNSPEC) { 49 if (pf == NFPROTO_UNSPEC) {
50 int i;
51 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) 50 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
52 list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); 51 list_add_tail(&(logger->list[i]), &(nf_loggers_l[i]));
53 } else { 52 } else {
@@ -216,7 +215,7 @@ static const struct file_operations nflog_file_ops = {
216#endif /* PROC_FS */ 215#endif /* PROC_FS */
217 216
218#ifdef CONFIG_SYSCTL 217#ifdef CONFIG_SYSCTL
219struct ctl_path nf_log_sysctl_path[] = { 218static struct ctl_path nf_log_sysctl_path[] = {
220 { .procname = "net", .ctl_name = CTL_NET, }, 219 { .procname = "net", .ctl_name = CTL_NET, },
221 { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, 220 { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
222 { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, 221 { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, },
@@ -228,19 +227,26 @@ static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
228static struct ctl_table_header *nf_log_dir_header; 227static struct ctl_table_header *nf_log_dir_header;
229 228
230static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, 229static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
231 void *buffer, size_t *lenp, loff_t *ppos) 230 void __user *buffer, size_t *lenp, loff_t *ppos)
232{ 231{
233 const struct nf_logger *logger; 232 const struct nf_logger *logger;
233 char buf[NFLOGGER_NAME_LEN];
234 size_t size = *lenp;
234 int r = 0; 235 int r = 0;
235 int tindex = (unsigned long)table->extra1; 236 int tindex = (unsigned long)table->extra1;
236 237
237 if (write) { 238 if (write) {
238 if (!strcmp(buffer, "NONE")) { 239 if (size > sizeof(buf))
240 size = sizeof(buf);
241 if (copy_from_user(buf, buffer, size))
242 return -EFAULT;
243
244 if (!strcmp(buf, "NONE")) {
239 nf_log_unbind_pf(tindex); 245 nf_log_unbind_pf(tindex);
240 return 0; 246 return 0;
241 } 247 }
242 mutex_lock(&nf_log_mutex); 248 mutex_lock(&nf_log_mutex);
243 logger = __find_logger(tindex, buffer); 249 logger = __find_logger(tindex, buf);
244 if (logger == NULL) { 250 if (logger == NULL) {
245 mutex_unlock(&nf_log_mutex); 251 mutex_unlock(&nf_log_mutex);
246 return -ENOENT; 252 return -ENOENT;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 498b45101df7..f28f6a5fc02d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -40,12 +40,12 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
40static u32 hash_v4(const struct sk_buff *skb) 40static u32 hash_v4(const struct sk_buff *skb)
41{ 41{
42 const struct iphdr *iph = ip_hdr(skb); 42 const struct iphdr *iph = ip_hdr(skb);
43 u32 ipaddr; 43 __be32 ipaddr;
44 44
45 /* packets in either direction go into same queue */ 45 /* packets in either direction go into same queue */
46 ipaddr = iph->saddr ^ iph->daddr; 46 ipaddr = iph->saddr ^ iph->daddr;
47 47
48 return jhash_2words(ipaddr, iph->protocol, jhash_initval); 48 return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
49} 49}
50 50
51static unsigned int 51static unsigned int
@@ -63,14 +63,14 @@ nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
63static u32 hash_v6(const struct sk_buff *skb) 63static u32 hash_v6(const struct sk_buff *skb)
64{ 64{
65 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 65 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
66 u32 addr[4]; 66 __be32 addr[4];
67 67
68 addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; 68 addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
69 addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; 69 addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
70 addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; 70 addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
71 addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; 71 addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
72 72
73 return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); 73 return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval);
74} 74}
75 75
76static unsigned int 76static unsigned int
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 69a639f35403..225ee3ecd69d 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -15,14 +15,14 @@
15#include <net/netfilter/nf_conntrack.h> 15#include <net/netfilter/nf_conntrack.h>
16#include <linux/netfilter/xt_cluster.h> 16#include <linux/netfilter/xt_cluster.h>
17 17
18static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) 18static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct)
19{ 19{
20 return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; 20 return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
21} 21}
22 22
23static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) 23static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
24{ 24{
25 return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; 25 return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
26} 26}
27 27
28static inline u_int32_t 28static inline u_int32_t
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 01dd07b764ec..98fc190e8f0e 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -54,6 +54,7 @@ static bool quota_mt_check(const struct xt_mtchk_param *par)
54 if (q->master == NULL) 54 if (q->master == NULL)
55 return -ENOMEM; 55 return -ENOMEM;
56 56
57 q->master->quota = q->quota;
57 return true; 58 return true;
58} 59}
59 60
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 220a1d588ee0..4fc6a917f6de 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -66,7 +66,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
66 if (info->flags & XT_RATEEST_MATCH_BPS) 66 if (info->flags & XT_RATEEST_MATCH_BPS)
67 ret &= bps1 == bps2; 67 ret &= bps1 == bps2;
68 if (info->flags & XT_RATEEST_MATCH_PPS) 68 if (info->flags & XT_RATEEST_MATCH_PPS)
69 ret &= pps2 == pps2; 69 ret &= pps1 == pps2;
70 break; 70 break;
71 } 71 }
72 72
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a63de3f7f185..6a4b19094143 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = {
133 133
134/* ICMP error handler. */ 134/* ICMP error handler. */
135SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 135SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
136 int type, int code, int offset, __be32 info) 136 u8 type, u8 code, int offset, __be32 info)
137{ 137{
138 struct inet6_dev *idev; 138 struct inet6_dev *idev;
139 struct sock *sk; 139 struct sock *sk;