aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext4.txt14
-rw-r--r--Documentation/filesystems/fiemap.txt228
-rw-r--r--Documentation/filesystems/proc.txt73
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/um/sys-x86_64/syscall_table.c4
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/Kconfig.cpu54
-rw-r--r--arch/x86/boot/cpu.c17
-rw-r--r--arch/x86/boot/mkcpustr.c40
-rw-r--r--arch/x86/ia32/ia32_signal.c51
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/apic_32.c437
-rw-r--r--arch/x86/kernel/apic_64.c626
-rw-r--r--arch/x86/kernel/cpu/Makefile34
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c88
-rw-r--r--arch/x86/kernel/cpu/amd.c548
-rw-r--r--arch/x86/kernel/cpu/amd_64.c224
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c6
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c973
-rw-r--r--arch/x86/kernel/cpu/common_64.c763
-rw-r--r--arch/x86/kernel/cpu/cpu.h19
-rw-r--r--arch/x86/kernel/cpu/cyrix.c23
-rw-r--r--arch/x86/kernel/cpu/feature_names.c84
-rw-r--r--arch/x86/kernel/cpu/intel.c364
-rw-r--r--arch/x86/kernel/cpu/intel_64.c95
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c169
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c2
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl32
-rw-r--r--arch/x86/kernel/cpu/powerflags.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c32
-rw-r--r--arch/x86/kernel/cpu/umc.c3
-rw-r--r--arch/x86/kernel/e820.c28
-rw-r--r--arch/x86/kernel/es7000_32.c (renamed from arch/x86/mach-es7000/es7000plat.c)87
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c159
-rw-r--r--arch/x86/kernel/genx2apic_phys.c154
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c70
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c47
-rw-r--r--arch/x86/kernel/io_apic_64.c639
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h14
-rw-r--r--arch/x86/kernel/signal_32.c45
-rw-r--r--arch/x86/kernel/signal_64.c95
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S9
-rw-r--r--arch/x86/kernel/xsave.c316
-rw-r--r--arch/x86/kvm/vmx.h15
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/usercopy_32.c7
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-es7000/Makefile5
-rw-r--r--arch/x86/mach-es7000/es7000.h114
-rw-r--r--arch/x86/mach-generic/Makefile1
-rw-r--r--arch/x86/mach-generic/bigsmp.c9
-rw-r--r--arch/x86/mach-generic/es7000.c13
-rw-r--r--arch/x86/mach-generic/numaq.c12
-rw-r--r--arch/x86/mach-generic/summit.c11
-rw-r--r--arch/x86/mm/init_64.c56
-rw-r--r--arch/x86/pci/acpi.c5
-rw-r--r--arch/x86/pci/i386.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c12
-rw-r--r--arch/x86/power/cpu_32.c7
-rw-r--r--arch/x86/power/cpu_64.c7
-rw-r--r--arch/x86/xen/enlighten.c45
-rw-r--r--drivers/acpi/glue.c3
-rw-r--r--drivers/ata/libata-scsi.c2
-rw-r--r--drivers/block/hd.c9
-rw-r--r--drivers/ide/Kconfig23
-rw-r--r--drivers/ide/Makefile7
-rw-r--r--drivers/ide/arm/icside.c5
-rw-r--r--drivers/ide/arm/palm_bk3710.c8
-rw-r--r--drivers/ide/ide-acpi.c6
-rw-r--r--drivers/ide/ide-atapi.c236
-rw-r--r--drivers/ide/ide-cd.c72
-rw-r--r--drivers/ide/ide-disk.c340
-rw-r--r--drivers/ide/ide-dma.c52
-rw-r--r--drivers/ide/ide-floppy.c668
-rw-r--r--drivers/ide/ide-floppy.h63
-rw-r--r--drivers/ide/ide-floppy_ioctl.c243
-rw-r--r--drivers/ide/ide-generic.c55
-rw-r--r--drivers/ide/ide-io.c117
-rw-r--r--drivers/ide/ide-ioctls.c290
-rw-r--r--drivers/ide/ide-iops.c225
-rw-r--r--drivers/ide/ide-lib.c73
-rw-r--r--drivers/ide/ide-probe.c252
-rw-r--r--drivers/ide/ide-proc.c306
-rw-r--r--drivers/ide/ide-tape.c478
-rw-r--r--drivers/ide/ide-taskfile.c156
-rw-r--r--drivers/ide/ide-timings.c22
-rw-r--r--drivers/ide/ide.c238
-rw-r--r--drivers/ide/legacy/ali14xx.c1
-rw-r--r--drivers/ide/legacy/buddha.c1
-rw-r--r--drivers/ide/legacy/dtc2278.c1
-rw-r--r--drivers/ide/legacy/falconide.c1
-rw-r--r--drivers/ide/legacy/gayle.c1
-rw-r--r--drivers/ide/legacy/ht6560b.c1
-rw-r--r--drivers/ide/legacy/ide-cs.c1
-rw-r--r--drivers/ide/legacy/macide.c1
-rw-r--r--drivers/ide/legacy/q40ide.c2
-rw-r--r--drivers/ide/legacy/qd65xx.c23
-rw-r--r--drivers/ide/legacy/umc8672.c1
-rw-r--r--drivers/ide/pci/aec62xx.c5
-rw-r--r--drivers/ide/pci/alim15x3.c9
-rw-r--r--drivers/ide/pci/amd74xx.c8
-rw-r--r--drivers/ide/pci/atiixp.c3
-rw-r--r--drivers/ide/pci/cmd640.c43
-rw-r--r--drivers/ide/pci/cmd64x.c5
-rw-r--r--drivers/ide/pci/cs5520.c3
-rw-r--r--drivers/ide/pci/cs5530.c19
-rw-r--r--drivers/ide/pci/cs5535.c14
-rw-r--r--drivers/ide/pci/cy82c693.c4
-rw-r--r--drivers/ide/pci/delkin_cb.c1
-rw-r--r--drivers/ide/pci/generic.c3
-rw-r--r--drivers/ide/pci/hpt34x.c5
-rw-r--r--drivers/ide/pci/hpt366.c77
-rw-r--r--drivers/ide/pci/it8213.c3
-rw-r--r--drivers/ide/pci/it821x.c58
-rw-r--r--drivers/ide/pci/jmicron.c3
-rw-r--r--drivers/ide/pci/ns87415.c9
-rw-r--r--drivers/ide/pci/opti621.c7
-rw-r--r--drivers/ide/pci/pdc202xx_new.c15
-rw-r--r--drivers/ide/pci/pdc202xx_old.c11
-rw-r--r--drivers/ide/pci/piix.c5
-rw-r--r--drivers/ide/pci/rz1000.c1
-rw-r--r--drivers/ide/pci/sc1200.c15
-rw-r--r--drivers/ide/pci/scc_pata.c5
-rw-r--r--drivers/ide/pci/serverworks.c9
-rw-r--r--drivers/ide/pci/sgiioc4.c1
-rw-r--r--drivers/ide/pci/siimage.c17
-rw-r--r--drivers/ide/pci/sis5513.c5
-rw-r--r--drivers/ide/pci/sl82c105.c7
-rw-r--r--drivers/ide/pci/slc90e66.c3
-rw-r--r--drivers/ide/pci/triflex.c3
-rw-r--r--drivers/ide/pci/trm290.c1
-rw-r--r--drivers/ide/pci/via82cxxx.c10
-rw-r--r--drivers/ide/ppc/pmac.c6
-rw-r--r--drivers/ide/setup-pci.c33
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c397
-rw-r--r--drivers/pci/intel-iommu.c185
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--drivers/scsi/ide-scsi.c120
-rw-r--r--fs/Kconfig88
-rw-r--r--fs/Makefile2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/file.c1
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext4/Makefile10
-rw-r--r--fs/ext4/acl.h12
-rw-r--r--fs/ext4/balloc.c1457
-rw-r--r--fs/ext4/bitmap.c6
-rw-r--r--fs/ext4/dir.c64
-rw-r--r--fs/ext4/ext4.h131
-rw-r--r--fs/ext4/ext4_extents.h15
-rw-r--r--fs/ext4/ext4_i.h39
-rw-r--r--fs/ext4/ext4_sb.h25
-rw-r--r--fs/ext4/extents.c281
-rw-r--r--fs/ext4/file.c10
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c8
-rw-r--r--fs/ext4/ialloc.c71
-rw-r--r--fs/ext4/inode.c620
-rw-r--r--fs/ext4/ioctl.c84
-rw-r--r--fs/ext4/mballoc.c220
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/migrate.c10
-rw-r--r--fs/ext4/namei.c402
-rw-r--r--fs/ext4/resize.c33
-rw-r--r--fs/ext4/super.c274
-rw-r--r--fs/ext4/symlink.c8
-rw-r--r--fs/ext4/xattr.c14
-rw-r--r--fs/ext4/xattr.h12
-rw-r--r--fs/ioctl.c273
-rw-r--r--fs/jbd2/checkpoint.c22
-rw-r--r--fs/jbd2/commit.c22
-rw-r--r--fs/jbd2/journal.c75
-rw-r--r--fs/ocfs2/alloc.c9
-rw-r--r--fs/ocfs2/alloc.h9
-rw-r--r--fs/ocfs2/extent_map.c346
-rw-r--r--fs/ocfs2/extent_map.h3
-rw-r--r--fs/ocfs2/file.c1
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/asm-x86/apic.h65
-rw-r--r--include/asm-x86/apicdef.h3
-rw-r--r--include/asm-x86/arch_hooks.h2
-rw-r--r--include/asm-x86/bigsmp/apic.h (renamed from include/asm-x86/mach-bigsmp/mach_apic.h)10
-rw-r--r--include/asm-x86/bigsmp/apicdef.h13
-rw-r--r--include/asm-x86/bigsmp/ipi.h (renamed from include/asm-x86/mach-bigsmp/mach_ipi.h)6
-rw-r--r--include/asm-x86/bugs.h5
-rw-r--r--include/asm-x86/cpufeature.h118
-rw-r--r--include/asm-x86/e820.h2
-rw-r--r--include/asm-x86/es7000/apic.h (renamed from include/asm-x86/mach-es7000/mach_apic.h)32
-rw-r--r--include/asm-x86/es7000/apicdef.h13
-rw-r--r--include/asm-x86/es7000/ipi.h (renamed from include/asm-x86/mach-es7000/mach_ipi.h)6
-rw-r--r--include/asm-x86/es7000/mpparse.h (renamed from include/asm-x86/mach-es7000/mach_mpparse.h)6
-rw-r--r--include/asm-x86/es7000/wakecpu.h (renamed from include/asm-x86/mach-es7000/mach_wakecpu.h)8
-rw-r--r--include/asm-x86/genapic_64.h8
-rw-r--r--include/asm-x86/hw_irq.h3
-rw-r--r--include/asm-x86/i387.h84
-rw-r--r--include/asm-x86/i8259.h3
-rw-r--r--include/asm-x86/io_apic.h20
-rw-r--r--include/asm-x86/ipi.h16
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/asm-x86/mach-bigsmp/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/mach_apicdef.h6
-rw-r--r--include/asm-x86/mach-es7000/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-numaq/mach_mpparse.h7
-rw-r--r--include/asm-x86/mach-summit/mach_apicdef.h13
-rw-r--r--include/asm-x86/mpspec.h3
-rw-r--r--include/asm-x86/msidef.h4
-rw-r--r--include/asm-x86/msr-index.h16
-rw-r--r--include/asm-x86/numaq/apic.h (renamed from include/asm-x86/mach-numaq/mach_apic.h)6
-rw-r--r--include/asm-x86/numaq/apicdef.h (renamed from include/asm-x86/mach-numaq/mach_apicdef.h)6
-rw-r--r--include/asm-x86/numaq/ipi.h (renamed from include/asm-x86/mach-numaq/mach_ipi.h)6
-rw-r--r--include/asm-x86/numaq/mpparse.h7
-rw-r--r--include/asm-x86/numaq/wakecpu.h (renamed from include/asm-x86/mach-numaq/mach_wakecpu.h)6
-rw-r--r--include/asm-x86/paravirt.h19
-rw-r--r--include/asm-x86/processor-cyrix.h8
-rw-r--r--include/asm-x86/processor-flags.h1
-rw-r--r--include/asm-x86/processor.h27
-rw-r--r--include/asm-x86/setup.h1
-rw-r--r--include/asm-x86/sigcontext.h87
-rw-r--r--include/asm-x86/sigcontext32.h6
-rw-r--r--include/asm-x86/smp.h17
-rw-r--r--include/asm-x86/summit/apic.h (renamed from include/asm-x86/mach-summit/mach_apic.h)24
-rw-r--r--include/asm-x86/summit/apicdef.h13
-rw-r--r--include/asm-x86/summit/ipi.h (renamed from include/asm-x86/mach-summit/mach_ipi.h)6
-rw-r--r--include/asm-x86/summit/irq_vectors_limits.h (renamed from include/asm-x86/mach-summit/irq_vectors_limits.h)6
-rw-r--r--include/asm-x86/summit/mpparse.h (renamed from include/asm-x86/mach-summit/mach_mpparse.h)13
-rw-r--r--include/asm-x86/thread_info.h1
-rw-r--r--include/asm-x86/ucontext.h6
-rw-r--r--include/asm-x86/xcr.h49
-rw-r--r--include/asm-x86/xsave.h118
-rw-r--r--include/linux/ata.h112
-rw-r--r--include/linux/dmar.h127
-rw-r--r--include/linux/ext3_fs.h2
-rw-r--r--include/linux/fiemap.h64
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/ide.h316
-rw-r--r--include/linux/ioport.h3
-rw-r--r--include/linux/irq.h1
-rw-r--r--include/linux/jbd2.h3
-rw-r--r--include/linux/percpu.h7
-rw-r--r--include/linux/percpu_counter.h12
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/resource.c68
-rw-r--r--lib/percpu_counter.c8
271 files changed, 11598 insertions, 8388 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 0d5394920a31..74484e696405 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org
32 you will need to merge your changes with the version from e2fsprogs 32 you will need to merge your changes with the version from e2fsprogs
33 1.41.x. 33 1.41.x.
34 34
35 - Create a new filesystem using the ext4dev filesystem type: 35 - Create a new filesystem using the ext4 filesystem type:
36 36
37 # mke2fs -t ext4dev /dev/hda1 37 # mke2fs -t ext4 /dev/hda1
38 38
39 Or configure an existing ext3 filesystem to support extents and set 39 Or configure an existing ext3 filesystem to support extents and set
40 the test_fs flag to indicate that it's ok for an in-development 40 the test_fs flag to indicate that it's ok for an in-development
@@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org
47 47
48 # tune2fs -I 256 /dev/hda1 48 # tune2fs -I 256 /dev/hda1
49 49
50 (Note: we currently do not have tools to convert an ext4dev 50 (Note: we currently do not have tools to convert an ext4
51 filesystem back to ext3; so please do not do try this on production 51 filesystem back to ext3; so please do not do try this on production
52 filesystems.) 52 filesystems.)
53 53
54 - Mounting: 54 - Mounting:
55 55
56 # mount -t ext4dev /dev/hda1 /wherever 56 # mount -t ext4 /dev/hda1 /wherever
57 57
58 - When comparing performance with other filesystems, remember that 58 - When comparing performance with other filesystems, remember that
59 ext3/4 by default offers higher data integrity guarantees than most. 59 ext3/4 by default offers higher data integrity guarantees than most.
@@ -177,6 +177,11 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in
177 your disks are battery-backed in one way or another, 177 your disks are battery-backed in one way or another,
178 disabling barriers may safely improve performance. 178 disabling barriers may safely improve performance.
179 179
180inode_readahead=n This tuning parameter controls the maximum
181 number of inode table blocks that ext4's inode
182 table readahead algorithm will pre-read into
183 the buffer cache. The default value is 32 blocks.
184
180orlov (*) This enables the new Orlov block allocator. It is 185orlov (*) This enables the new Orlov block allocator. It is
181 enabled by default. 186 enabled by default.
182 187
@@ -252,6 +257,7 @@ stripe=n Number of filesystem blocks that mballoc will try
252delalloc (*) Deferring block allocation until write-out time. 257delalloc (*) Deferring block allocation until write-out time.
253nodelalloc Disable delayed allocation. Blocks are allocation 258nodelalloc Disable delayed allocation. Blocks are allocation
254 when data is copied from user to page cache. 259 when data is copied from user to page cache.
260
255Data Mode 261Data Mode
256========= 262=========
257There are 3 different data modes: 263There are 3 different data modes:
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
new file mode 100644
index 000000000000..1e3defcfe50b
--- /dev/null
+++ b/Documentation/filesystems/fiemap.txt
@@ -0,0 +1,228 @@
1============
2Fiemap Ioctl
3============
4
5The fiemap ioctl is an efficient method for userspace to get file
6extent mappings. Instead of block-by-block mapping (such as bmap), fiemap
7returns a list of extents.
8
9
10Request Basics
11--------------
12
13A fiemap request is encoded within struct fiemap:
14
15struct fiemap {
16 __u64 fm_start; /* logical offset (inclusive) at
17 * which to start mapping (in) */
18 __u64 fm_length; /* logical length of mapping which
19 * userspace cares about (in) */
20 __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
21 __u32 fm_mapped_extents; /* number of extents that were
22 * mapped (out) */
23 __u32 fm_extent_count; /* size of fm_extents array (in) */
24 __u32 fm_reserved;
25 struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
26};
27
28
29fm_start, and fm_length specify the logical range within the file
30which the process would like mappings for. Extents returned mirror
31those on disk - that is, the logical offset of the 1st returned extent
32may start before fm_start, and the range covered by the last returned
33extent may end after fm_length. All offsets and lengths are in bytes.
34
35Certain flags to modify the way in which mappings are looked up can be
36set in fm_flags. If the kernel doesn't understand some particular
37flags, it will return EBADR and the contents of fm_flags will contain
38the set of flags which caused the error. If the kernel is compatible
39with all flags passed, the contents of fm_flags will be unmodified.
40It is up to userspace to determine whether rejection of a particular
41flag is fatal to it's operation. This scheme is intended to allow the
42fiemap interface to grow in the future but without losing
43compatibility with old software.
44
45fm_extent_count specifies the number of elements in the fm_extents[] array
46that can be used to return extents. If fm_extent_count is zero, then the
47fm_extents[] array is ignored (no extents will be returned), and the
48fm_mapped_extents count will hold the number of extents needed in
49fm_extents[] to hold the file's current mapping. Note that there is
50nothing to prevent the file from changing between calls to FIEMAP.
51
52The following flags can be set in fm_flags:
53
54* FIEMAP_FLAG_SYNC
55If this flag is set, the kernel will sync the file before mapping extents.
56
57* FIEMAP_FLAG_XATTR
58If this flag is set, the extents returned will describe the inodes
59extended attribute lookup tree, instead of it's data tree.
60
61
62Extent Mapping
63--------------
64
65Extent information is returned within the embedded fm_extents array
66which userspace must allocate along with the fiemap structure. The
67number of elements in the fiemap_extents[] array should be passed via
68fm_extent_count. The number of extents mapped by kernel will be
69returned via fm_mapped_extents. If the number of fiemap_extents
70allocated is less than would be required to map the requested range,
71the maximum number of extents that can be mapped in the fm_extent[]
72array will be returned and fm_mapped_extents will be equal to
73fm_extent_count. In that case, the last extent in the array will not
74complete the requested range and will not have the FIEMAP_EXTENT_LAST
75flag set (see the next section on extent flags).
76
77Each extent is described by a single fiemap_extent structure as
78returned in fm_extents.
79
80struct fiemap_extent {
81 __u64 fe_logical; /* logical offset in bytes for the start of
82 * the extent */
83 __u64 fe_physical; /* physical offset in bytes for the start
84 * of the extent */
85 __u64 fe_length; /* length in bytes for the extent */
86 __u64 fe_reserved64[2];
87 __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
88 __u32 fe_reserved[3];
89};
90
91All offsets and lengths are in bytes and mirror those on disk. It is valid
92for an extents logical offset to start before the request or it's logical
93length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is
94returned, fe_logical, fe_physical, and fe_length will be aligned to the
95block size of the file system. With the exception of extents flagged as
96FIEMAP_EXTENT_MERGED, adjacent extents will not be merged.
97
98The fe_flags field contains flags which describe the extent returned.
99A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in
100the file so that the process making fiemap calls can determine when no
101more extents are available, without having to call the ioctl again.
102
103Some flags are intentionally vague and will always be set in the
104presence of other more specific flags. This way a program looking for
105a general property does not have to know all existing and future flags
106which imply that property.
107
108For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL
109are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking
110for inline or tail-packed data can key on the specific flag. Software
111which simply cares not to try operating on non-aligned extents
112however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to
113worry about all present and future flags which might imply unaligned
114data. Note that the opposite is not true - it would be valid for
115FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
116
117* FIEMAP_EXTENT_LAST
118This is the last extent in the file. A mapping attempt past this
119extent will return nothing.
120
121* FIEMAP_EXTENT_UNKNOWN
122The location of this extent is currently unknown. This may indicate
123the data is stored on an inaccessible volume or that no storage has
124been allocated for the file yet.
125
126* FIEMAP_EXTENT_DELALLOC
127 - This will also set FIEMAP_EXTENT_UNKNOWN.
128Delayed allocation - while there is data for this extent, it's
129physical location has not been allocated yet.
130
131* FIEMAP_EXTENT_ENCODED
132This extent does not consist of plain filesystem blocks but is
133encoded (e.g. encrypted or compressed). Reading the data in this
134extent via I/O to the block device will have undefined results.
135
136Note that it is *always* undefined to try to update the data
137in-place by writing to the indicated location without the
138assistance of the filesystem, or to access the data using the
139information returned by the FIEMAP interface while the filesystem
140is mounted. In other words, user applications may only read the
141extent data via I/O to the block device while the filesystem is
142unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
143clear; user applications must not try reading or writing to the
144filesystem via the block device under any other circumstances.
145
146* FIEMAP_EXTENT_DATA_ENCRYPTED
147 - This will also set FIEMAP_EXTENT_ENCODED
148The data in this extent has been encrypted by the file system.
149
150* FIEMAP_EXTENT_NOT_ALIGNED
151Extent offsets and length are not guaranteed to be block aligned.
152
153* FIEMAP_EXTENT_DATA_INLINE
154 This will also set FIEMAP_EXTENT_NOT_ALIGNED
155Data is located within a meta data block.
156
157* FIEMAP_EXTENT_DATA_TAIL
158 This will also set FIEMAP_EXTENT_NOT_ALIGNED
159Data is packed into a block with data from other files.
160
161* FIEMAP_EXTENT_UNWRITTEN
162Unwritten extent - the extent is allocated but it's data has not been
163initialized. This indicates the extent's data will be all zero if read
164through the filesystem but the contents are undefined if read directly from
165the device.
166
167* FIEMAP_EXTENT_MERGED
168This will be set when a file does not support extents, i.e., it uses a block
169based addressing scheme. Since returning an extent for each block back to
170userspace would be highly inefficient, the kernel will try to merge most
171adjacent blocks into 'extents'.
172
173
174VFS -> File System Implementation
175---------------------------------
176
177File systems wishing to support fiemap must implement a ->fiemap callback on
178their inode_operations structure. The fs ->fiemap call is responsible for
179defining it's set of supported fiemap flags, and calling a helper function on
180each discovered extent:
181
182struct inode_operations {
183 ...
184
185 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
186 u64 len);
187
188->fiemap is passed struct fiemap_extent_info which describes the
189fiemap request:
190
191struct fiemap_extent_info {
192 unsigned int fi_flags; /* Flags as passed from user */
193 unsigned int fi_extents_mapped; /* Number of mapped extents */
194 unsigned int fi_extents_max; /* Size of fiemap_extent array */
195 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
196};
197
198It is intended that the file system should not need to access any of this
199structure directly.
200
201
202Flag checking should be done at the beginning of the ->fiemap callback via the
203fiemap_check_flags() helper:
204
205int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
206
207The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The
208set of fiemap flags which the fs understands should be passed via fs_flags. If
209fiemap_check_flags finds invalid user flags, it will place the bad values in
210fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
211fiemap_check_flags(), it should immediately exit, returning that error back to
212ioctl_fiemap().
213
214
215For each extent in the request range, the file system should call
216the helper function, fiemap_fill_next_extent():
217
218int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
219 u64 phys, u64 len, u32 flags, u32 dev);
220
221fiemap_fill_next_extent() will use the passed values to populate the
222next free extent in the fm_extents array. 'General' extent flags will
223automatically be set from specific flags on behalf of the calling file
224system so that the userspace API is not broken.
225
226fiemap_fill_next_extent() returns 0 on success, and 1 when the
227user-supplied fm_extents array is full. If an error is encountered
228while copying the extent to user memory, -EFAULT will be returned.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f566ad9bcb7b..d831d24d2a6c 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -923,45 +923,44 @@ CPUs.
923The "procs_blocked" line gives the number of processes currently blocked, 923The "procs_blocked" line gives the number of processes currently blocked,
924waiting for I/O to complete. 924waiting for I/O to complete.
925 925
926
9261.9 Ext4 file system parameters 9271.9 Ext4 file system parameters
927------------------------------ 928------------------------------
928Ext4 file system have one directory per partition under /proc/fs/ext4/ 929
929# ls /proc/fs/ext4/hdc/ 930Information about mounted ext4 file systems can be found in
930group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req 931/proc/fs/ext4. Each mounted filesystem will have a directory in
931stats stream_req 932/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
932 933/proc/fs/ext4/dm-0). The files in each per-device directory are shown
933mb_groups: 934in Table 1-10, below.
934This file gives the details of multiblock allocator buddy cache of free blocks 935
935 936Table 1-10: Files in /proc/fs/ext4/<devname>
936mb_history: 937..............................................................................
937Multiblock allocation history. 938 File Content
938 939 mb_groups details of multiblock allocator buddy cache of free blocks
939stats: 940 mb_history multiblock allocation history
940This file indicate whether the multiblock allocator should start collecting 941 stats controls whether the multiblock allocator should start
941statistics. The statistics are shown during unmount 942 collecting statistics, which are shown during the unmount
942 943 group_prealloc the multiblock allocator will round up allocation
943group_prealloc: 944 requests to a multiple of this tuning parameter if the
944The multiblock allocator normalize the block allocation request to 945 stripe size is not set in the ext4 superblock
945group_prealloc filesystem blocks if we don't have strip value set. 946 max_to_scan The maximum number of extents the multiblock allocator
946The stripe value can be specified at mount time or during mke2fs. 947 will search to find the best extent
947 948 min_to_scan The minimum number of extents the multiblock allocator
948max_to_scan: 949 will search to find the best extent
949How long multiblock allocator can look for a best extent (in found extents) 950 order2_req Tuning parameter which controls the minimum size for
950 951 requests (as a power of 2) where the buddy cache is
951min_to_scan: 952 used
952How long multiblock allocator must look for a best extent 953 stream_req Files which have fewer blocks than this tunable
953 954 parameter will have their blocks allocated out of a
954order2_req: 955 block group specific preallocation pool, so that small
955Multiblock allocator use 2^N search using buddies only for requests greater 956 files are packed closely together. Each large file
956than or equal to order2_req. The request size is specfied in file system 957 will have its blocks allocated out of its own unique
957blocks. A value of 2 indicate only if the requests are greater than or equal 958 preallocation pool.
958to 4 blocks. 959inode_readahead Tuning parameter which controls the maximum number of
959 960 inode table blocks that ext4's inode table readahead
960stream_req: 961 algorithm will pre-read into the buffer cache
961Files smaller than stream_req are served by the stream allocator, whose 962..............................................................................
962purpose is to pack requests as close each to other as possible to 963
963produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
964filesystem block size will use group based preallocation.
965 964
966------------------------------------------------------------------------------ 965------------------------------------------------------------------------------
967Summary 966Summary
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2ca9c8f8c8d8..25efbaf1f59b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1428,6 +1428,12 @@ and is between 256 and 4096 characters. It is defined in the file
1428 1428
1429 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1429 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1430 1430
1431 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1432
1433 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1434 default x2apic cluster mode on platforms
1435 supporting x2apic.
1436
1431 noltlbs [PPC] Do not use large page/tlb entries for kernel 1437 noltlbs [PPC] Do not use large page/tlb entries for kernel
1432 lowmem mapping on PPC40x. 1438 lowmem mapping on PPC40x.
1433 1439
diff --git a/MAINTAINERS b/MAINTAINERS
index 68781ed2b734..587f418ed00d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1659,9 +1659,10 @@ L: linux-ext4@vger.kernel.org
1659S: Maintained 1659S: Maintained
1660 1660
1661EXT4 FILE SYSTEM 1661EXT4 FILE SYSTEM
1662P: Stephen Tweedie, Andrew Morton 1662P: Theodore Ts'o
1663M: sct@redhat.com, akpm@linux-foundation.org, adilger@sun.com 1663M: tytso@mit.edu, adilger@sun.com
1664L: linux-ext4@vger.kernel.org 1664L: linux-ext4@vger.kernel.org
1665W: http://ext4.wiki.kernel.org
1665S: Maintained 1666S: Maintained
1666 1667
1667F71805F HARDWARE MONITORING DRIVER 1668F71805F HARDWARE MONITORING DRIVER
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c
index c128eb897008..32f5fbe2d0d2 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/um/sys-x86_64/syscall_table.c
@@ -41,12 +41,12 @@
41#define stub_rt_sigreturn sys_rt_sigreturn 41#define stub_rt_sigreturn sys_rt_sigreturn
42 42
43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
44#undef _ASM_X86_64_UNISTD_H_ 44#undef ASM_X86__UNISTD_64_H
45#include <asm-x86/unistd_64.h> 45#include <asm-x86/unistd_64.h>
46 46
47#undef __SYSCALL 47#undef __SYSCALL
48#define __SYSCALL(nr, sym) [ nr ] = sym, 48#define __SYSCALL(nr, sym) [ nr ] = sym,
49#undef _ASM_X86_64_UNISTD_H_ 49#undef ASM_X86__UNISTD_64_H
50 50
51typedef void (*sys_call_ptr_t)(void); 51typedef void (*sys_call_ptr_t)(void);
52 52
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0d7cdbbfc1ee..44d4f2130d01 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1689,6 +1689,14 @@ config DMAR_FLOPPY_WA
1689 workaround will setup a 1:1 mapping for the first 1689 workaround will setup a 1:1 mapping for the first
1690 16M to make floppy (an ISA device) work. 1690 16M to make floppy (an ISA device) work.
1691 1691
1692config INTR_REMAP
1693 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1694 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1695 help
1696 Supports Interrupt remapping for IO-APIC and MSI devices.
1697 To use x2apic mode in the CPU's which support x2APIC enhancements or
1698 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1699
1692source "drivers/pci/pcie/Kconfig" 1700source "drivers/pci/pcie/Kconfig"
1693 1701
1694source "drivers/pci/Kconfig" 1702source "drivers/pci/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 60a85768cfcb..f8843c3ae77d 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -419,6 +419,60 @@ config X86_DEBUGCTLMSR
419 def_bool y 419 def_bool y
420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) 420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
421 421
422menuconfig PROCESSOR_SELECT
423 default y
424 bool "Supported processor vendors" if EMBEDDED
425 help
426 This lets you choose what x86 vendor support code your kernel
427 will include.
428
429config CPU_SUP_INTEL
430 default y
431 bool "Support Intel processors" if PROCESSOR_SELECT
432 help
433 This enables extended support for Intel processors
434
435config CPU_SUP_CYRIX_32
436 default y
437 bool "Support Cyrix processors" if PROCESSOR_SELECT
438 depends on !64BIT
439 help
440 This enables extended support for Cyrix processors
441
442config CPU_SUP_AMD
443 default y
444 bool "Support AMD processors" if PROCESSOR_SELECT
445 help
446 This enables extended support for AMD processors
447
448config CPU_SUP_CENTAUR_32
449 default y
450 bool "Support Centaur processors" if PROCESSOR_SELECT
451 depends on !64BIT
452 help
453 This enables extended support for Centaur processors
454
455config CPU_SUP_CENTAUR_64
456 default y
457 bool "Support Centaur processors" if PROCESSOR_SELECT
458 depends on 64BIT
459 help
460 This enables extended support for Centaur processors
461
462config CPU_SUP_TRANSMETA_32
463 default y
464 bool "Support Transmeta processors" if PROCESSOR_SELECT
465 depends on !64BIT
466 help
467 This enables extended support for Transmeta processors
468
469config CPU_SUP_UMC_32
470 default y
471 bool "Support UMC processors" if PROCESSOR_SELECT
472 depends on !64BIT
473 help
474 This enables extended support for UMC processors
475
422config X86_DS 476config X86_DS
423 bool "Debug Store support" 477 bool "Debug Store support"
424 default y 478 default y
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 75298fe2edca..6ec6bb6e9957 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -59,17 +59,18 @@ int validate_cpu(void)
59 u32 e = err_flags[i]; 59 u32 e = err_flags[i];
60 60
61 for (j = 0; j < 32; j++) { 61 for (j = 0; j < 32; j++) {
62 int n = (i << 5)+j; 62 if (msg_strs[0] < i ||
63 if (*msg_strs < n) { 63 (msg_strs[0] == i && msg_strs[1] < j)) {
64 /* Skip to the next string */ 64 /* Skip to the next string */
65 do { 65 msg_strs += 2;
66 msg_strs++; 66 while (*msg_strs++)
67 } while (*msg_strs); 67 ;
68 msg_strs++;
69 } 68 }
70 if (e & 1) { 69 if (e & 1) {
71 if (*msg_strs == n && msg_strs[1]) 70 if (msg_strs[0] == i &&
72 printf("%s ", msg_strs+1); 71 msg_strs[1] == j &&
72 msg_strs[2])
73 printf("%s ", msg_strs+2);
73 else 74 else
74 printf("%d:%d ", i, j); 75 printf("%d:%d ", i, j);
75 } 76 }
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index bbe76953bae9..8ef60f20b371 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,33 +15,33 @@
15 15
16#include <stdio.h> 16#include <stdio.h>
17 17
18#include "../kernel/cpu/feature_names.c" 18#include "../kernel/cpu/capflags.c"
19
20#if NCAPFLAGS > 8
21# error "Need to adjust the boot code handling of CPUID strings"
22#endif
23 19
24int main(void) 20int main(void)
25{ 21{
26 int i; 22 int i, j;
27 const char *str; 23 const char *str;
28 24
29 printf("static const char x86_cap_strs[] = \n"); 25 printf("static const char x86_cap_strs[] = \n");
30 26
31 for (i = 0; i < NCAPINTS*32; i++) { 27 for (i = 0; i < NCAPINTS; i++) {
32 str = x86_cap_flags[i]; 28 for (j = 0; j < 32; j++) {
33 29 str = x86_cap_flags[i*32+j];
34 if (i == NCAPINTS*32-1) { 30
35 /* The last entry must be unconditional; this 31 if (i == NCAPINTS-1 && j == 31) {
36 also consumes the compiler-added null character */ 32 /* The last entry must be unconditional; this
37 if (!str) 33 also consumes the compiler-added null
38 str = ""; 34 character */
39 printf("\t\"\\x%02x\"\"%s\"\n", i, str); 35 if (!str)
40 } else if (str) { 36 str = "";
41 printf("#if REQUIRED_MASK%d & (1 << %d)\n" 37 printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
42 "\t\"\\x%02x\"\"%s\\0\"\n" 38 i, j, str);
43 "#endif\n", 39 } else if (str) {
44 i >> 5, i & 31, i, str); 40 printf("#if REQUIRED_MASK%d & (1 << %d)\n"
41 "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
42 "#endif\n",
43 i, j, i, j, str);
44 }
45 } 45 }
46 } 46 }
47 printf("\t;\n"); 47 printf("\t;\n");
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f1a2ac777faf..8d64c1bc8474 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -179,9 +179,10 @@ struct sigframe
179 u32 pretcode; 179 u32 pretcode;
180 int sig; 180 int sig;
181 struct sigcontext_ia32 sc; 181 struct sigcontext_ia32 sc;
182 struct _fpstate_ia32 fpstate; 182 struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
183 unsigned int extramask[_COMPAT_NSIG_WORDS-1]; 183 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
184 char retcode[8]; 184 char retcode[8];
185 /* fp state follows here */
185}; 186};
186 187
187struct rt_sigframe 188struct rt_sigframe
@@ -192,8 +193,8 @@ struct rt_sigframe
192 u32 puc; 193 u32 puc;
193 compat_siginfo_t info; 194 compat_siginfo_t info;
194 struct ucontext_ia32 uc; 195 struct ucontext_ia32 uc;
195 struct _fpstate_ia32 fpstate;
196 char retcode[8]; 196 char retcode[8];
197 /* fp state follows here */
197}; 198};
198 199
199#define COPY(x) { \ 200#define COPY(x) { \
@@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
215 unsigned int *peax) 216 unsigned int *peax)
216{ 217{
217 unsigned int tmpflags, gs, oldgs, err = 0; 218 unsigned int tmpflags, gs, oldgs, err = 0;
218 struct _fpstate_ia32 __user *buf; 219 void __user *buf;
219 u32 tmp; 220 u32 tmp;
220 221
221 /* Always make any pending restarted system calls return -EINTR */ 222 /* Always make any pending restarted system calls return -EINTR */
@@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
259 260
260 err |= __get_user(tmp, &sc->fpstate); 261 err |= __get_user(tmp, &sc->fpstate);
261 buf = compat_ptr(tmp); 262 buf = compat_ptr(tmp);
262 if (buf) { 263 err |= restore_i387_xstate_ia32(buf);
263 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
264 goto badframe;
265 err |= restore_i387_ia32(buf);
266 } else {
267 struct task_struct *me = current;
268
269 if (used_math()) {
270 clear_fpu(me);
271 clear_used_math();
272 }
273 }
274 264
275 err |= __get_user(tmp, &sc->ax); 265 err |= __get_user(tmp, &sc->ax);
276 *peax = tmp; 266 *peax = tmp;
277 267
278 return err; 268 return err;
279
280badframe:
281 return 1;
282} 269}
283 270
284asmlinkage long sys32_sigreturn(struct pt_regs *regs) 271asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@@ -350,7 +337,7 @@ badframe:
350 */ 337 */
351 338
352static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, 339static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
353 struct _fpstate_ia32 __user *fpstate, 340 void __user *fpstate,
354 struct pt_regs *regs, unsigned int mask) 341 struct pt_regs *regs, unsigned int mask)
355{ 342{
356 int tmp, err = 0; 343 int tmp, err = 0;
@@ -380,7 +367,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
380 err |= __put_user((u32)regs->flags, &sc->flags); 367 err |= __put_user((u32)regs->flags, &sc->flags);
381 err |= __put_user((u32)regs->sp, &sc->sp_at_signal); 368 err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
382 369
383 tmp = save_i387_ia32(fpstate); 370 tmp = save_i387_xstate_ia32(fpstate);
384 if (tmp < 0) 371 if (tmp < 0)
385 err = -EFAULT; 372 err = -EFAULT;
386 else { 373 else {
@@ -401,7 +388,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
401 * Determine which stack to use.. 388 * Determine which stack to use..
402 */ 389 */
403static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 390static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
404 size_t frame_size) 391 size_t frame_size,
392 void **fpstate)
405{ 393{
406 unsigned long sp; 394 unsigned long sp;
407 395
@@ -420,6 +408,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
420 ka->sa.sa_restorer) 408 ka->sa.sa_restorer)
421 sp = (unsigned long) ka->sa.sa_restorer; 409 sp = (unsigned long) ka->sa.sa_restorer;
422 410
411 if (used_math()) {
412 sp = sp - sig_xstate_ia32_size;
413 *fpstate = (struct _fpstate_ia32 *) sp;
414 }
415
423 sp -= frame_size; 416 sp -= frame_size;
424 /* Align the stack pointer according to the i386 ABI, 417 /* Align the stack pointer according to the i386 ABI,
425 * i.e. so that on function entry ((sp + 4) & 15) == 0. */ 418 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
@@ -433,6 +426,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
433 struct sigframe __user *frame; 426 struct sigframe __user *frame;
434 void __user *restorer; 427 void __user *restorer;
435 int err = 0; 428 int err = 0;
429 void __user *fpstate = NULL;
436 430
437 /* copy_to_user optimizes that into a single 8 byte store */ 431 /* copy_to_user optimizes that into a single 8 byte store */
438 static const struct { 432 static const struct {
@@ -447,7 +441,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
447 0, 441 0,
448 }; 442 };
449 443
450 frame = get_sigframe(ka, regs, sizeof(*frame)); 444 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
451 445
452 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 446 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
453 goto give_sigsegv; 447 goto give_sigsegv;
@@ -456,8 +450,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
456 if (err) 450 if (err)
457 goto give_sigsegv; 451 goto give_sigsegv;
458 452
459 err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, 453 err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
460 set->sig[0]);
461 if (err) 454 if (err)
462 goto give_sigsegv; 455 goto give_sigsegv;
463 456
@@ -521,6 +514,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
521 struct rt_sigframe __user *frame; 514 struct rt_sigframe __user *frame;
522 void __user *restorer; 515 void __user *restorer;
523 int err = 0; 516 int err = 0;
517 void __user *fpstate = NULL;
524 518
525 /* __copy_to_user optimizes that into a single 8 byte store */ 519 /* __copy_to_user optimizes that into a single 8 byte store */
526 static const struct { 520 static const struct {
@@ -536,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
536 0, 530 0,
537 }; 531 };
538 532
539 frame = get_sigframe(ka, regs, sizeof(*frame)); 533 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
540 534
541 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 535 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
542 goto give_sigsegv; 536 goto give_sigsegv;
@@ -549,13 +543,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
549 goto give_sigsegv; 543 goto give_sigsegv;
550 544
551 /* Create the ucontext. */ 545 /* Create the ucontext. */
552 err |= __put_user(0, &frame->uc.uc_flags); 546 if (cpu_has_xsave)
547 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
548 else
549 err |= __put_user(0, &frame->uc.uc_flags);
553 err |= __put_user(0, &frame->uc.uc_link); 550 err |= __put_user(0, &frame->uc.uc_link);
554 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 551 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
555 err |= __put_user(sas_ss_flags(regs->sp), 552 err |= __put_user(sas_ss_flags(regs->sp),
556 &frame->uc.uc_stack.ss_flags); 553 &frame->uc.uc_stack.ss_flags);
557 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 554 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
558 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 555 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
559 regs, set->sig[0]); 556 regs, set->sig[0]);
560 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 557 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
561 if (err) 558 if (err)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..c9be69fedb70 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
38 38
39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
40obj-y += process.o 40obj-y += process.o
41obj-y += i387.o 41obj-y += i387.o xsave.o
42obj-y += ptrace.o 42obj-y += ptrace.o
43obj-y += ds.o 43obj-y += ds.o
44obj-$(CONFIG_X86_32) += tls.o 44obj-$(CONFIG_X86_32) += tls.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
72obj-$(CONFIG_X86_ES7000) += es7000_32.o
72obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o 73obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
73obj-y += vsmp_64.o 74obj-y += vsmp_64.o
74obj-$(CONFIG_KPROBES) += kprobes.o 75obj-$(CONFIG_KPROBES) += kprobes.o
@@ -104,6 +105,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 105ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 106 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 107 obj-y += bios_uv.o
108 obj-y += genx2apic_cluster.o
109 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 110 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 111 obj-$(CONFIG_AUDIT) += audit_64.o
109 112
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7d40ef7b36e3..c2ac1b4515a0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -252,10 +252,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
252 return; 252 return;
253 } 253 }
254 254
255#ifdef CONFIG_X86_32
256 if (boot_cpu_physical_apicid != -1U) 255 if (boot_cpu_physical_apicid != -1U)
257 ver = apic_version[boot_cpu_physical_apicid]; 256 ver = apic_version[boot_cpu_physical_apicid];
258#endif
259 257
260 generic_processor_info(id, ver); 258 generic_processor_info(id, ver);
261} 259}
@@ -774,11 +772,9 @@ static void __init acpi_register_lapic_address(unsigned long address)
774 772
775 set_fixmap_nocache(FIX_APIC_BASE, address); 773 set_fixmap_nocache(FIX_APIC_BASE, address);
776 if (boot_cpu_physical_apicid == -1U) { 774 if (boot_cpu_physical_apicid == -1U) {
777 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 775 boot_cpu_physical_apicid = read_apic_id();
778#ifdef CONFIG_X86_32
779 apic_version[boot_cpu_physical_apicid] = 776 apic_version[boot_cpu_physical_apicid] =
780 GET_APIC_VERSION(apic_read(APIC_LVR)); 777 GET_APIC_VERSION(apic_read(APIC_LVR));
781#endif
782 } 778 }
783} 779}
784 780
@@ -1350,7 +1346,9 @@ static void __init acpi_process_madt(void)
1350 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1351 1347
1352 smp_found_config = 1; 1348 smp_found_config = 1;
1349#ifdef CONFIG_X86_32
1353 setup_apic_routing(); 1350 setup_apic_routing();
1351#endif
1354 } 1352 }
1355 } 1353 }
1356 if (error == -EINVAL) { 1354 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b0..a91c57cb666a 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -60,10 +60,8 @@ unsigned long mp_lapic_addr;
60static int force_enable_local_apic; 60static int force_enable_local_apic;
61int disable_apic; 61int disable_apic;
62 62
63/* Local APIC timer verification ok */
64static int local_apic_timer_verify_ok;
65/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 63/* Disable local APIC timer from the kernel commandline or via dmi quirk */
66static int local_apic_timer_disabled; 64static int disable_apic_timer __cpuinitdata;
67/* Local APIC timer works in C2 */ 65/* Local APIC timer works in C2 */
68int local_apic_timer_c2_ok; 66int local_apic_timer_c2_ok;
69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 67EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -130,7 +128,11 @@ static inline int lapic_get_version(void)
130 */ 128 */
131static inline int lapic_is_integrated(void) 129static inline int lapic_is_integrated(void)
132{ 130{
131#ifdef CONFIG_X86_64
132 return 1;
133#else
133 return APIC_INTEGRATED(lapic_get_version()); 134 return APIC_INTEGRATED(lapic_get_version());
135#endif
134} 136}
135 137
136/* 138/*
@@ -145,13 +147,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 147 return lapic_get_version() >= 0x14;
146} 148}
147 149
148void apic_wait_icr_idle(void) 150/*
151 * Paravirt kernels also might be using these below ops. So we still
152 * use generic apic_read()/apic_write(), which might be pointing to different
153 * ops in PARAVIRT case.
154 */
155void xapic_wait_icr_idle(void)
149{ 156{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 157 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 158 cpu_relax();
152} 159}
153 160
154u32 safe_apic_wait_icr_idle(void) 161u32 safe_xapic_wait_icr_idle(void)
155{ 162{
156 u32 send_status; 163 u32 send_status;
157 int timeout; 164 int timeout;
@@ -167,16 +174,48 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 174 return send_status;
168} 175}
169 176
177void xapic_icr_write(u32 low, u32 id)
178{
179 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
180 apic_write(APIC_ICR, low);
181}
182
183u64 xapic_icr_read(void)
184{
185 u32 icr1, icr2;
186
187 icr2 = apic_read(APIC_ICR2);
188 icr1 = apic_read(APIC_ICR);
189
190 return icr1 | ((u64)icr2 << 32);
191}
192
193static struct apic_ops xapic_ops = {
194 .read = native_apic_mem_read,
195 .write = native_apic_mem_write,
196 .icr_read = xapic_icr_read,
197 .icr_write = xapic_icr_write,
198 .wait_icr_idle = xapic_wait_icr_idle,
199 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
200};
201
202struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops);
204
170/** 205/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 207 */
173void __cpuinit enable_NMI_through_LVT0(void) 208void __cpuinit enable_NMI_through_LVT0(void)
174{ 209{
175 unsigned int v = APIC_DM_NMI; 210 unsigned int v;
176 211
177 /* Level triggered for 82489DX */ 212 /* unmask and set to NMI */
213 v = APIC_DM_NMI;
214
215 /* Level triggered for 82489DX (32bit mode) */
178 if (!lapic_is_integrated()) 216 if (!lapic_is_integrated())
179 v |= APIC_LVT_LEVEL_TRIGGER; 217 v |= APIC_LVT_LEVEL_TRIGGER;
218
180 apic_write(APIC_LVT0, v); 219 apic_write(APIC_LVT0, v);
181} 220}
182 221
@@ -193,9 +232,13 @@ int get_physical_broadcast(void)
193 */ 232 */
194int lapic_get_maxlvt(void) 233int lapic_get_maxlvt(void)
195{ 234{
196 unsigned int v = apic_read(APIC_LVR); 235 unsigned int v;
197 236
198 /* 82489DXs do not report # of LVT entries. */ 237 v = apic_read(APIC_LVR);
238 /*
239 * - we always have APIC integrated on 64bit mode
240 * - 82489DXs do not report # of LVT entries
241 */
199 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; 242 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
200} 243}
201 244
@@ -203,8 +246,12 @@ int lapic_get_maxlvt(void)
203 * Local APIC timer 246 * Local APIC timer
204 */ 247 */
205 248
206/* Clock divisor is set to 16 */ 249/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
207#define APIC_DIVISOR 16 253#define APIC_DIVISOR 16
254#endif
208 255
209/* 256/*
210 * This function sets up the local APIC timer, with a timeout of 257 * This function sets up the local APIC timer, with a timeout of
@@ -212,6 +259,9 @@ int lapic_get_maxlvt(void)
212 * this function twice on the boot CPU, once with a bogus timeout 259 * this function twice on the boot CPU, once with a bogus timeout
213 * value, second time for real. The other (noncalibrating) CPUs 260 * value, second time for real. The other (noncalibrating) CPUs
214 * call this function only once, with the real, calibrated value. 261 * call this function only once, with the real, calibrated value.
262 *
263 * We do reads before writes even if unnecessary, to get around the
264 * P5 APIC double write bug.
215 */ 265 */
216static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 266static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
217{ 267{
@@ -233,14 +283,44 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
233 */ 283 */
234 tmp_value = apic_read(APIC_TDCR); 284 tmp_value = apic_read(APIC_TDCR);
235 apic_write(APIC_TDCR, 285 apic_write(APIC_TDCR,
236 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 286 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
237 APIC_TDR_DIV_16); 287 APIC_TDR_DIV_16);
238 288
239 if (!oneshot) 289 if (!oneshot)
240 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 290 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
241} 291}
242 292
243/* 293/*
294 * Setup extended LVT, AMD specific (K8, family 10h)
295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 */
299
300#define APIC_EILVT_LVTOFF_MCE 0
301#define APIC_EILVT_LVTOFF_IBS 1
302
303static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
304{
305 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
306 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
307
308 apic_write(reg, v);
309}
310
311u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
312{
313 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
314 return APIC_EILVT_LVTOFF_MCE;
315}
316
317u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
318{
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS;
321}
322
323/*
244 * Program the next event, relative to now 324 * Program the next event, relative to now
245 */ 325 */
246static int lapic_next_event(unsigned long delta, 326static int lapic_next_event(unsigned long delta,
@@ -259,8 +339,8 @@ static void lapic_timer_setup(enum clock_event_mode mode,
259 unsigned long flags; 339 unsigned long flags;
260 unsigned int v; 340 unsigned int v;
261 341
262 /* Lapic used for broadcast ? */ 342 /* Lapic used as dummy for broadcast ? */
263 if (!local_apic_timer_verify_ok) 343 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
264 return; 344 return;
265 345
266 local_irq_save(flags); 346 local_irq_save(flags);
@@ -473,7 +553,7 @@ static int __init calibrate_APIC_clock(void)
473 return -1; 553 return -1;
474 } 554 }
475 555
476 local_apic_timer_verify_ok = 1; 556 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
477 557
478 /* We trust the pm timer based calibration */ 558 /* We trust the pm timer based calibration */
479 if (!pm_referenced) { 559 if (!pm_referenced) {
@@ -507,11 +587,11 @@ static int __init calibrate_APIC_clock(void)
507 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 587 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
508 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 588 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
509 else 589 else
510 local_apic_timer_verify_ok = 0; 590 levt->features |= CLOCK_EVT_FEAT_DUMMY;
511 } else 591 } else
512 local_irq_enable(); 592 local_irq_enable();
513 593
514 if (!local_apic_timer_verify_ok) { 594 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
515 printk(KERN_WARNING 595 printk(KERN_WARNING
516 "APIC timer disabled due to verification failure.\n"); 596 "APIC timer disabled due to verification failure.\n");
517 return -1; 597 return -1;
@@ -533,7 +613,8 @@ void __init setup_boot_APIC_clock(void)
533 * timer as a dummy clock event source on SMP systems, so the 613 * timer as a dummy clock event source on SMP systems, so the
534 * broadcast mechanism is used. On UP systems simply ignore it. 614 * broadcast mechanism is used. On UP systems simply ignore it.
535 */ 615 */
536 if (local_apic_timer_disabled) { 616 if (disable_apic_timer) {
617 printk(KERN_INFO "Disabling APIC timer\n");
537 /* No broadcast on UP ! */ 618 /* No broadcast on UP ! */
538 if (num_possible_cpus() > 1) { 619 if (num_possible_cpus() > 1) {
539 lapic_clockevent.mult = 1; 620 lapic_clockevent.mult = 1;
@@ -602,7 +683,11 @@ static void local_apic_timer_interrupt(void)
602 /* 683 /*
603 * the NMI deadlock-detector uses this. 684 * the NMI deadlock-detector uses this.
604 */ 685 */
686#ifdef CONFIG_X86_64
687 add_pda(apic_timer_irqs, 1);
688#else
605 per_cpu(irq_stat, cpu).apic_timer_irqs++; 689 per_cpu(irq_stat, cpu).apic_timer_irqs++;
690#endif
606 691
607 evt->event_handler(evt); 692 evt->event_handler(evt);
608} 693}
@@ -642,35 +727,6 @@ int setup_profiling_timer(unsigned int multiplier)
642} 727}
643 728
644/* 729/*
645 * Setup extended LVT, AMD specific (K8, family 10h)
646 *
647 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
648 * MCE interrupts are supported. Thus MCE offset must be set to 0.
649 */
650
651#define APIC_EILVT_LVTOFF_MCE 0
652#define APIC_EILVT_LVTOFF_IBS 1
653
654static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
655{
656 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
657 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
658 apic_write(reg, v);
659}
660
661u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
662{
663 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
664 return APIC_EILVT_LVTOFF_MCE;
665}
666
667u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
668{
669 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
670 return APIC_EILVT_LVTOFF_IBS;
671}
672
673/*
674 * Local APIC start and shutdown 730 * Local APIC start and shutdown
675 */ 731 */
676 732
@@ -715,7 +771,7 @@ void clear_local_APIC(void)
715 } 771 }
716 772
717 /* lets not touch this if we didn't frob it */ 773 /* lets not touch this if we didn't frob it */
718#ifdef CONFIG_X86_MCE_P4THERMAL 774#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
719 if (maxlvt >= 5) { 775 if (maxlvt >= 5) {
720 v = apic_read(APIC_LVTTHMR); 776 v = apic_read(APIC_LVTTHMR);
721 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 777 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -732,10 +788,6 @@ void clear_local_APIC(void)
732 if (maxlvt >= 4) 788 if (maxlvt >= 4)
733 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 789 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
734 790
735#ifdef CONFIG_X86_MCE_P4THERMAL
736 if (maxlvt >= 5)
737 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
738#endif
739 /* Integrated APIC (!82489DX) ? */ 791 /* Integrated APIC (!82489DX) ? */
740 if (lapic_is_integrated()) { 792 if (lapic_is_integrated()) {
741 if (maxlvt > 3) 793 if (maxlvt > 3)
@@ -750,7 +802,7 @@ void clear_local_APIC(void)
750 */ 802 */
751void disable_local_APIC(void) 803void disable_local_APIC(void)
752{ 804{
753 unsigned long value; 805 unsigned int value;
754 806
755 clear_local_APIC(); 807 clear_local_APIC();
756 808
@@ -762,6 +814,7 @@ void disable_local_APIC(void)
762 value &= ~APIC_SPIV_APIC_ENABLED; 814 value &= ~APIC_SPIV_APIC_ENABLED;
763 apic_write(APIC_SPIV, value); 815 apic_write(APIC_SPIV, value);
764 816
817#ifdef CONFIG_X86_32
765 /* 818 /*
766 * When LAPIC was disabled by the BIOS and enabled by the kernel, 819 * When LAPIC was disabled by the BIOS and enabled by the kernel,
767 * restore the disabled state. 820 * restore the disabled state.
@@ -773,6 +826,7 @@ void disable_local_APIC(void)
773 l &= ~MSR_IA32_APICBASE_ENABLE; 826 l &= ~MSR_IA32_APICBASE_ENABLE;
774 wrmsr(MSR_IA32_APICBASE, l, h); 827 wrmsr(MSR_IA32_APICBASE, l, h);
775 } 828 }
829#endif
776} 830}
777 831
778/* 832/*
@@ -789,11 +843,15 @@ void lapic_shutdown(void)
789 return; 843 return;
790 844
791 local_irq_save(flags); 845 local_irq_save(flags);
792 clear_local_APIC();
793 846
794 if (enabled_via_apicbase) 847#ifdef CONFIG_X86_32
848 if (!enabled_via_apicbase)
849 clear_local_APIC();
850 else
851#endif
795 disable_local_APIC(); 852 disable_local_APIC();
796 853
854
797 local_irq_restore(flags); 855 local_irq_restore(flags);
798} 856}
799 857
@@ -838,6 +896,12 @@ int __init verify_local_APIC(void)
838 */ 896 */
839 reg0 = apic_read(APIC_ID); 897 reg0 = apic_read(APIC_ID);
840 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 898 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
899 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
900 reg1 = apic_read(APIC_ID);
901 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
902 apic_write(APIC_ID, reg0);
903 if (reg1 != (reg0 ^ APIC_ID_MASK))
904 return 0;
841 905
842 /* 906 /*
843 * The next two are just to see if we have sane values. 907 * The next two are just to see if we have sane values.
@@ -863,14 +927,15 @@ void __init sync_Arb_IDs(void)
863 */ 927 */
864 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 928 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
865 return; 929 return;
930
866 /* 931 /*
867 * Wait for idle. 932 * Wait for idle.
868 */ 933 */
869 apic_wait_icr_idle(); 934 apic_wait_icr_idle();
870 935
871 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 936 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
872 apic_write(APIC_ICR, 937 apic_write(APIC_ICR, APIC_DEST_ALLINC |
873 APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); 938 APIC_INT_LEVELTRIG | APIC_DM_INIT);
874} 939}
875 940
876/* 941/*
@@ -878,7 +943,7 @@ void __init sync_Arb_IDs(void)
878 */ 943 */
879void __init init_bsp_APIC(void) 944void __init init_bsp_APIC(void)
880{ 945{
881 unsigned long value; 946 unsigned int value;
882 947
883 /* 948 /*
884 * Don't do the setup now if we have a SMP BIOS as the 949 * Don't do the setup now if we have a SMP BIOS as the
@@ -899,11 +964,13 @@ void __init init_bsp_APIC(void)
899 value &= ~APIC_VECTOR_MASK; 964 value &= ~APIC_VECTOR_MASK;
900 value |= APIC_SPIV_APIC_ENABLED; 965 value |= APIC_SPIV_APIC_ENABLED;
901 966
967#ifdef CONFIG_X86_32
902 /* This bit is reserved on P4/Xeon and should be cleared */ 968 /* This bit is reserved on P4/Xeon and should be cleared */
903 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 969 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
904 (boot_cpu_data.x86 == 15)) 970 (boot_cpu_data.x86 == 15))
905 value &= ~APIC_SPIV_FOCUS_DISABLED; 971 value &= ~APIC_SPIV_FOCUS_DISABLED;
906 else 972 else
973#endif
907 value |= APIC_SPIV_FOCUS_DISABLED; 974 value |= APIC_SPIV_FOCUS_DISABLED;
908 value |= SPURIOUS_APIC_VECTOR; 975 value |= SPURIOUS_APIC_VECTOR;
909 apic_write(APIC_SPIV, value); 976 apic_write(APIC_SPIV, value);
@@ -922,6 +989,16 @@ static void __cpuinit lapic_setup_esr(void)
922{ 989{
923 unsigned long oldvalue, value, maxlvt; 990 unsigned long oldvalue, value, maxlvt;
924 if (lapic_is_integrated() && !esr_disable) { 991 if (lapic_is_integrated() && !esr_disable) {
992 if (esr_disable) {
993 /*
994 * Something untraceable is creating bad interrupts on
995 * secondary quads ... for the moment, just leave the
996 * ESR disabled - we can't do anything useful with the
997 * errors anyway - mbligh
998 */
999 printk(KERN_INFO "Leaving ESR disabled.\n");
1000 return;
1001 }
925 /* !82489DX */ 1002 /* !82489DX */
926 maxlvt = lapic_get_maxlvt(); 1003 maxlvt = lapic_get_maxlvt();
927 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1004 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
@@ -942,16 +1019,7 @@ static void __cpuinit lapic_setup_esr(void)
942 "vector: 0x%08lx after: 0x%08lx\n", 1019 "vector: 0x%08lx after: 0x%08lx\n",
943 oldvalue, value); 1020 oldvalue, value);
944 } else { 1021 } else {
945 if (esr_disable) 1022 printk(KERN_INFO "No ESR for 82489DX.\n");
946 /*
947 * Something untraceable is creating bad interrupts on
948 * secondary quads ... for the moment, just leave the
949 * ESR disabled - we can't do anything useful with the
950 * errors anyway - mbligh
951 */
952 printk(KERN_INFO "Leaving ESR disabled.\n");
953 else
954 printk(KERN_INFO "No ESR for 82489DX.\n");
955 } 1023 }
956} 1024}
957 1025
@@ -1089,13 +1157,17 @@ void __cpuinit setup_local_APIC(void)
1089 1157
1090void __cpuinit end_local_APIC_setup(void) 1158void __cpuinit end_local_APIC_setup(void)
1091{ 1159{
1092 unsigned long value;
1093
1094 lapic_setup_esr(); 1160 lapic_setup_esr();
1095 /* Disable the local apic timer */ 1161
1096 value = apic_read(APIC_LVTT); 1162#ifdef CONFIG_X86_32
1097 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1163 {
1098 apic_write(APIC_LVTT, value); 1164 unsigned int value;
1165 /* Disable the local apic timer */
1166 value = apic_read(APIC_LVTT);
1167 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1168 apic_write(APIC_LVTT, value);
1169 }
1170#endif
1099 1171
1100 setup_apic_nmi_watchdog(NULL); 1172 setup_apic_nmi_watchdog(NULL);
1101 apic_pm_activate(); 1173 apic_pm_activate();
@@ -1205,7 +1277,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1277 * default configuration (or the MP table is broken).
1206 */ 1278 */
1207 if (boot_cpu_physical_apicid == -1U) 1279 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1280 boot_cpu_physical_apicid = read_apic_id();
1209 1281
1210} 1282}
1211 1283
@@ -1242,7 +1314,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1314 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1315 */
1244#ifdef CONFIG_CRASH_DUMP 1316#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1317 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1318#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1319 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1320
@@ -1321,59 +1393,12 @@ void smp_error_interrupt(struct pt_regs *regs)
1321 irq_exit(); 1393 irq_exit();
1322} 1394}
1323 1395
1324#ifdef CONFIG_SMP
1325void __init smp_intr_init(void)
1326{
1327 /*
1328 * IRQ0 must be given a fixed assignment and initialized,
1329 * because it's used before the IO-APIC is set up.
1330 */
1331 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1332
1333 /*
1334 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1335 * IPI, driven by wakeup.
1336 */
1337 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1338
1339 /* IPI for invalidation */
1340 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1341
1342 /* IPI for generic function call */
1343 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1344
1345 /* IPI for single call function */
1346 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1347 call_function_single_interrupt);
1348}
1349#endif
1350
1351/*
1352 * Initialize APIC interrupts
1353 */
1354void __init apic_intr_init(void)
1355{
1356#ifdef CONFIG_SMP
1357 smp_intr_init();
1358#endif
1359 /* self generated IPI for local APIC timer */
1360 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1361
1362 /* IPI vectors for APIC spurious and error interrupts */
1363 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1364 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1365
1366 /* thermal monitor LVT interrupt */
1367#ifdef CONFIG_X86_MCE_P4THERMAL
1368 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1369#endif
1370}
1371
1372/** 1396/**
1373 * connect_bsp_APIC - attach the APIC to the interrupt system 1397 * connect_bsp_APIC - attach the APIC to the interrupt system
1374 */ 1398 */
1375void __init connect_bsp_APIC(void) 1399void __init connect_bsp_APIC(void)
1376{ 1400{
1401#ifdef CONFIG_X86_32
1377 if (pic_mode) { 1402 if (pic_mode) {
1378 /* 1403 /*
1379 * Do not trust the local APIC being empty at bootup. 1404 * Do not trust the local APIC being empty at bootup.
@@ -1388,6 +1413,7 @@ void __init connect_bsp_APIC(void)
1388 outb(0x70, 0x22); 1413 outb(0x70, 0x22);
1389 outb(0x01, 0x23); 1414 outb(0x01, 0x23);
1390 } 1415 }
1416#endif
1391 enable_apic_mode(); 1417 enable_apic_mode();
1392} 1418}
1393 1419
@@ -1400,6 +1426,9 @@ void __init connect_bsp_APIC(void)
1400 */ 1426 */
1401void disconnect_bsp_APIC(int virt_wire_setup) 1427void disconnect_bsp_APIC(int virt_wire_setup)
1402{ 1428{
1429 unsigned int value;
1430
1431#ifdef CONFIG_X86_32
1403 if (pic_mode) { 1432 if (pic_mode) {
1404 /* 1433 /*
1405 * Put the board back into PIC mode (has an effect only on 1434 * Put the board back into PIC mode (has an effect only on
@@ -1411,54 +1440,53 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1411 "entering PIC mode.\n"); 1440 "entering PIC mode.\n");
1412 outb(0x70, 0x22); 1441 outb(0x70, 0x22);
1413 outb(0x00, 0x23); 1442 outb(0x00, 0x23);
1414 } else { 1443 return;
1415 /* Go back to Virtual Wire compatibility mode */ 1444 }
1416 unsigned long value; 1445#endif
1417 1446
1418 /* For the spurious interrupt use vector F, and enable it */ 1447 /* Go back to Virtual Wire compatibility mode */
1419 value = apic_read(APIC_SPIV);
1420 value &= ~APIC_VECTOR_MASK;
1421 value |= APIC_SPIV_APIC_ENABLED;
1422 value |= 0xf;
1423 apic_write(APIC_SPIV, value);
1424 1448
1425 if (!virt_wire_setup) { 1449 /* For the spurious interrupt use vector F, and enable it */
1426 /* 1450 value = apic_read(APIC_SPIV);
1427 * For LVT0 make it edge triggered, active high, 1451 value &= ~APIC_VECTOR_MASK;
1428 * external and enabled 1452 value |= APIC_SPIV_APIC_ENABLED;
1429 */ 1453 value |= 0xf;
1430 value = apic_read(APIC_LVT0); 1454 apic_write(APIC_SPIV, value);
1431 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1432 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1433 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1434 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1435 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1436 apic_write(APIC_LVT0, value);
1437 } else {
1438 /* Disable LVT0 */
1439 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1440 }
1441 1455
1456 if (!virt_wire_setup) {
1442 /* 1457 /*
1443 * For LVT1 make it edge triggered, active high, nmi and 1458 * For LVT0 make it edge triggered, active high,
1444 * enabled 1459 * external and enabled
1445 */ 1460 */
1446 value = apic_read(APIC_LVT1); 1461 value = apic_read(APIC_LVT0);
1447 value &= ~( 1462 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1448 APIC_MODE_MASK | APIC_SEND_PENDING |
1449 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1463 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1450 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 1464 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1451 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 1465 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1452 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 1466 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1453 apic_write(APIC_LVT1, value); 1467 apic_write(APIC_LVT0, value);
1468 } else {
1469 /* Disable LVT0 */
1470 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1454 } 1471 }
1472
1473 /*
1474 * For LVT1 make it edge triggered, active high,
1475 * nmi and enabled
1476 */
1477 value = apic_read(APIC_LVT1);
1478 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1479 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1480 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1481 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1482 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1483 apic_write(APIC_LVT1, value);
1455} 1484}
1456 1485
1457void __cpuinit generic_processor_info(int apicid, int version) 1486void __cpuinit generic_processor_info(int apicid, int version)
1458{ 1487{
1459 int cpu; 1488 int cpu;
1460 cpumask_t tmp_map; 1489 cpumask_t tmp_map;
1461 physid_mask_t phys_cpu;
1462 1490
1463 /* 1491 /*
1464 * Validate version 1492 * Validate version
@@ -1471,9 +1499,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1471 } 1499 }
1472 apic_version[apicid] = version; 1500 apic_version[apicid] = version;
1473 1501
1474 phys_cpu = apicid_to_cpu_present(apicid);
1475 physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
1476
1477 if (num_processors >= NR_CPUS) { 1502 if (num_processors >= NR_CPUS) {
1478 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1503 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1479 " Processor ignored.\n", NR_CPUS); 1504 " Processor ignored.\n", NR_CPUS);
@@ -1484,17 +1509,19 @@ void __cpuinit generic_processor_info(int apicid, int version)
1484 cpus_complement(tmp_map, cpu_present_map); 1509 cpus_complement(tmp_map, cpu_present_map);
1485 cpu = first_cpu(tmp_map); 1510 cpu = first_cpu(tmp_map);
1486 1511
1487 if (apicid == boot_cpu_physical_apicid) 1512 physid_set(apicid, phys_cpu_present_map);
1513 if (apicid == boot_cpu_physical_apicid) {
1488 /* 1514 /*
1489 * x86_bios_cpu_apicid is required to have processors listed 1515 * x86_bios_cpu_apicid is required to have processors listed
1490 * in same order as logical cpu numbers. Hence the first 1516 * in same order as logical cpu numbers. Hence the first
1491 * entry is BSP, and so on. 1517 * entry is BSP, and so on.
1492 */ 1518 */
1493 cpu = 0; 1519 cpu = 0;
1494 1520 }
1495 if (apicid > max_physical_apicid) 1521 if (apicid > max_physical_apicid)
1496 max_physical_apicid = apicid; 1522 max_physical_apicid = apicid;
1497 1523
1524#ifdef CONFIG_X86_32
1498 /* 1525 /*
1499 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1526 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1500 * but we need to work other dependencies like SMP_SUSPEND etc 1527 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1514,7 +1541,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1514 def_to_bigsmp = 1; 1541 def_to_bigsmp = 1;
1515 } 1542 }
1516 } 1543 }
1517#ifdef CONFIG_SMP 1544#endif
1545
1546#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1518 /* are we being called early in kernel startup? */ 1547 /* are we being called early in kernel startup? */
1519 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1548 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1520 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1549 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1527,6 +1556,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1527 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1556 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1528 } 1557 }
1529#endif 1558#endif
1559
1530 cpu_set(cpu, cpu_possible_map); 1560 cpu_set(cpu, cpu_possible_map);
1531 cpu_set(cpu, cpu_present_map); 1561 cpu_set(cpu, cpu_present_map);
1532} 1562}
@@ -1537,6 +1567,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1537#ifdef CONFIG_PM 1567#ifdef CONFIG_PM
1538 1568
1539static struct { 1569static struct {
1570 /*
1571 * 'active' is true if the local APIC was enabled by us and
1572 * not the BIOS; this signifies that we are also responsible
1573 * for disabling it before entering apm/acpi suspend
1574 */
1540 int active; 1575 int active;
1541 /* r/w apic fields */ 1576 /* r/w apic fields */
1542 unsigned int apic_id; 1577 unsigned int apic_id;
@@ -1577,7 +1612,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1577 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1612 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1578 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1613 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1579 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1614 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1580#ifdef CONFIG_X86_MCE_P4THERMAL 1615#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1581 if (maxlvt >= 5) 1616 if (maxlvt >= 5)
1582 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1617 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1583#endif 1618#endif
@@ -1601,16 +1636,23 @@ static int lapic_resume(struct sys_device *dev)
1601 1636
1602 local_irq_save(flags); 1637 local_irq_save(flags);
1603 1638
1604 /* 1639#ifdef CONFIG_X86_64
1605 * Make sure the APICBASE points to the right address 1640 if (x2apic)
1606 * 1641 enable_x2apic();
1607 * FIXME! This will be wrong if we ever support suspend on 1642 else
1608 * SMP! We'll need to do this as part of the CPU restore! 1643#endif
1609 */ 1644 {
1610 rdmsr(MSR_IA32_APICBASE, l, h); 1645 /*
1611 l &= ~MSR_IA32_APICBASE_BASE; 1646 * Make sure the APICBASE points to the right address
1612 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1647 *
1613 wrmsr(MSR_IA32_APICBASE, l, h); 1648 * FIXME! This will be wrong if we ever support suspend on
1649 * SMP! We'll need to do this as part of the CPU restore!
1650 */
1651 rdmsr(MSR_IA32_APICBASE, l, h);
1652 l &= ~MSR_IA32_APICBASE_BASE;
1653 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1654 wrmsr(MSR_IA32_APICBASE, l, h);
1655 }
1614 1656
1615 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1657 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1616 apic_write(APIC_ID, apic_pm_state.apic_id); 1658 apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -1620,7 +1662,7 @@ static int lapic_resume(struct sys_device *dev)
1620 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1662 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1621 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1663 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1622 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1664 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1623#ifdef CONFIG_X86_MCE_P4THERMAL 1665#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1624 if (maxlvt >= 5) 1666 if (maxlvt >= 5)
1625 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1667 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1626#endif 1668#endif
@@ -1634,7 +1676,9 @@ static int lapic_resume(struct sys_device *dev)
1634 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1676 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1635 apic_write(APIC_ESR, 0); 1677 apic_write(APIC_ESR, 0);
1636 apic_read(APIC_ESR); 1678 apic_read(APIC_ESR);
1679
1637 local_irq_restore(flags); 1680 local_irq_restore(flags);
1681
1638 return 0; 1682 return 0;
1639} 1683}
1640 1684
@@ -1690,20 +1734,20 @@ static int __init parse_lapic(char *arg)
1690} 1734}
1691early_param("lapic", parse_lapic); 1735early_param("lapic", parse_lapic);
1692 1736
1693static int __init parse_nolapic(char *arg) 1737static int __init setup_disableapic(char *arg)
1694{ 1738{
1695 disable_apic = 1; 1739 disable_apic = 1;
1696 setup_clear_cpu_cap(X86_FEATURE_APIC); 1740 setup_clear_cpu_cap(X86_FEATURE_APIC);
1697 return 0; 1741 return 0;
1698} 1742}
1699early_param("nolapic", parse_nolapic); 1743early_param("disableapic", setup_disableapic);
1700 1744
1701static int __init parse_disable_lapic_timer(char *arg) 1745/* same as disableapic, for compatibility */
1746static int __init setup_nolapic(char *arg)
1702{ 1747{
1703 local_apic_timer_disabled = 1; 1748 return setup_disableapic(arg);
1704 return 0;
1705} 1749}
1706early_param("nolapic_timer", parse_disable_lapic_timer); 1750early_param("nolapic", setup_nolapic);
1707 1751
1708static int __init parse_lapic_timer_c2_ok(char *arg) 1752static int __init parse_lapic_timer_c2_ok(char *arg)
1709{ 1753{
@@ -1712,15 +1756,40 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1712} 1756}
1713early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1757early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1714 1758
1759static int __init parse_disable_apic_timer(char *arg)
1760{
1761 disable_apic_timer = 1;
1762 return 0;
1763}
1764early_param("noapictimer", parse_disable_apic_timer);
1765
1766static int __init parse_nolapic_timer(char *arg)
1767{
1768 disable_apic_timer = 1;
1769 return 0;
1770}
1771early_param("nolapic_timer", parse_nolapic_timer);
1772
1715static int __init apic_set_verbosity(char *arg) 1773static int __init apic_set_verbosity(char *arg)
1716{ 1774{
1717 if (!arg) 1775 if (!arg) {
1776#ifdef CONFIG_X86_64
1777 skip_ioapic_setup = 0;
1778 ioapic_force = 1;
1779 return 0;
1780#endif
1718 return -EINVAL; 1781 return -EINVAL;
1782 }
1719 1783
1720 if (strcmp(arg, "debug") == 0) 1784 if (strcmp("debug", arg) == 0)
1721 apic_verbosity = APIC_DEBUG; 1785 apic_verbosity = APIC_DEBUG;
1722 else if (strcmp(arg, "verbose") == 0) 1786 else if (strcmp("verbose", arg) == 0)
1723 apic_verbosity = APIC_VERBOSE; 1787 apic_verbosity = APIC_VERBOSE;
1788 else {
1789 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1790 " use apic=verbose or apic=debug\n", arg);
1791 return -EINVAL;
1792 }
1724 1793
1725 return 0; 1794 return 0;
1726} 1795}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831c..53898b65a6ae 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,13 +40,20 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
45 47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
46static int disable_apic_timer __cpuinitdata; 49static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 50static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
49 57
50/* Local APIC timer works in C2 */ 58/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 59int local_apic_timer_c2_ok;
@@ -73,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
73static void lapic_timer_broadcast(cpumask_t mask); 81static void lapic_timer_broadcast(cpumask_t mask);
74static void apic_pm_activate(void); 82static void apic_pm_activate(void);
75 83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
76static struct clock_event_device lapic_clockevent = { 87static struct clock_event_device lapic_clockevent = {
77 .name = "lapic", 88 .name = "lapic",
78 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT 89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
@@ -99,11 +110,15 @@ static inline int lapic_get_version(void)
99} 110}
100 111
101/* 112/*
102 * Check, if the APIC is integrated or a seperate chip 113 * Check, if the APIC is integrated or a separate chip
103 */ 114 */
104static inline int lapic_is_integrated(void) 115static inline int lapic_is_integrated(void)
105{ 116{
117#ifdef CONFIG_X86_64
106 return 1; 118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
107} 122}
108 123
109/* 124/*
@@ -118,13 +133,18 @@ static int modern_apic(void)
118 return lapic_get_version() >= 0x14; 133 return lapic_get_version() >= 0x14;
119} 134}
120 135
121void apic_wait_icr_idle(void) 136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
122{ 142{
123 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
124 cpu_relax(); 144 cpu_relax();
125} 145}
126 146
127u32 safe_apic_wait_icr_idle(void) 147u32 safe_xapic_wait_icr_idle(void)
128{ 148{
129 u32 send_status; 149 u32 send_status;
130 int timeout; 150 int timeout;
@@ -140,6 +160,68 @@ u32 safe_apic_wait_icr_idle(void)
140 return send_status; 160 return send_status;
141} 161}
142 162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
143/** 225/**
144 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
145 */ 227 */
@@ -149,6 +231,11 @@ void __cpuinit enable_NMI_through_LVT0(void)
149 231
150 /* unmask and set to NMI */ 232 /* unmask and set to NMI */
151 v = APIC_DM_NMI; 233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
152 apic_write(APIC_LVT0, v); 239 apic_write(APIC_LVT0, v);
153} 240}
154 241
@@ -157,14 +244,28 @@ void __cpuinit enable_NMI_through_LVT0(void)
157 */ 244 */
158int lapic_get_maxlvt(void) 245int lapic_get_maxlvt(void)
159{ 246{
160 unsigned int v, maxlvt; 247 unsigned int v;
161 248
162 v = apic_read(APIC_LVR); 249 v = apic_read(APIC_LVR);
163 maxlvt = GET_APIC_MAXLVT(v); 250 /*
164 return maxlvt; 251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
165} 255}
166 256
167/* 257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
168 * This function sets up the local APIC timer, with a timeout of 269 * This function sets up the local APIC timer, with a timeout of
169 * 'clocks' APIC bus clock. During calibration we actually call 270 * 'clocks' APIC bus clock. During calibration we actually call
170 * this function twice on the boot CPU, once with a bogus timeout 271 * this function twice on the boot CPU, once with a bogus timeout
@@ -174,7 +275,6 @@ int lapic_get_maxlvt(void)
174 * We do reads before writes even if unnecessary, to get around the 275 * We do reads before writes even if unnecessary, to get around the
175 * P5 APIC double write bug. 276 * P5 APIC double write bug.
176 */ 277 */
177
178static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
179{ 279{
180 unsigned int lvtt_value, tmp_value; 280 unsigned int lvtt_value, tmp_value;
@@ -182,6 +282,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
182 lvtt_value = LOCAL_TIMER_VECTOR; 282 lvtt_value = LOCAL_TIMER_VECTOR;
183 if (!oneshot) 283 if (!oneshot)
184 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
185 if (!irqen) 288 if (!irqen)
186 lvtt_value |= APIC_LVT_MASKED; 289 lvtt_value |= APIC_LVT_MASKED;
187 290
@@ -191,12 +294,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
191 * Divide PICLK by 16 294 * Divide PICLK by 16
192 */ 295 */
193 tmp_value = apic_read(APIC_TDCR); 296 tmp_value = apic_read(APIC_TDCR);
194 apic_write(APIC_TDCR, (tmp_value 297 apic_write(APIC_TDCR,
195 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
196 | APIC_TDR_DIV_16); 299 APIC_TDR_DIV_16);
197 300
198 if (!oneshot) 301 if (!oneshot)
199 apic_write(APIC_TMICT, clocks); 302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
200} 303}
201 304
202/* 305/*
@@ -366,7 +469,7 @@ static int __init calibrate_APIC_clock(void)
366 lapic_clockevent.min_delta_ns = 469 lapic_clockevent.min_delta_ns =
367 clockevent_delta2ns(0xF, &lapic_clockevent); 470 clockevent_delta2ns(0xF, &lapic_clockevent);
368 471
369 calibration_result = result / HZ; 472 calibration_result = (result * APIC_DIVISOR) / HZ;
370 473
371 /* 474 /*
372 * Do a sanity check on the APIC calibration result 475 * Do a sanity check on the APIC calibration result
@@ -388,10 +491,10 @@ static int __init calibrate_APIC_clock(void)
388void __init setup_boot_APIC_clock(void) 491void __init setup_boot_APIC_clock(void)
389{ 492{
390 /* 493 /*
391 * The local apic timer can be disabled via the kernel commandline. 494 * The local apic timer can be disabled via the kernel
392 * Register the lapic timer as a dummy clock event source on SMP 495 * commandline or from the CPU detection code. Register the lapic
393 * systems, so the broadcast mechanism is used. On UP systems simply 496 * timer as a dummy clock event source on SMP systems, so the
394 * ignore it. 497 * broadcast mechanism is used. On UP systems simply ignore it.
395 */ 498 */
396 if (disable_apic_timer) { 499 if (disable_apic_timer) {
397 printk(KERN_INFO "Disabling APIC timer\n"); 500 printk(KERN_INFO "Disabling APIC timer\n");
@@ -403,7 +506,9 @@ void __init setup_boot_APIC_clock(void)
403 return; 506 return;
404 } 507 }
405 508
406 printk(KERN_INFO "Using local APIC timer interrupts.\n"); 509 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
510 "calibrating APIC timer ...\n");
511
407 if (calibrate_APIC_clock()) { 512 if (calibrate_APIC_clock()) {
408 /* No broadcast on UP ! */ 513 /* No broadcast on UP ! */
409 if (num_possible_cpus() > 1) 514 if (num_possible_cpus() > 1)
@@ -422,6 +527,7 @@ void __init setup_boot_APIC_clock(void)
422 printk(KERN_WARNING "APIC timer registered as dummy," 527 printk(KERN_WARNING "APIC timer registered as dummy,"
423 " due to nmi_watchdog=%d!\n", nmi_watchdog); 528 " due to nmi_watchdog=%d!\n", nmi_watchdog);
424 529
530 /* Setup the lapic or request the broadcast */
425 setup_APIC_timer(); 531 setup_APIC_timer();
426} 532}
427 533
@@ -460,7 +566,11 @@ static void local_apic_timer_interrupt(void)
460 /* 566 /*
461 * the NMI deadlock-detector uses this. 567 * the NMI deadlock-detector uses this.
462 */ 568 */
569#ifdef CONFIG_X86_64
463 add_pda(apic_timer_irqs, 1); 570 add_pda(apic_timer_irqs, 1);
571#else
572 per_cpu(irq_stat, cpu).apic_timer_irqs++;
573#endif
464 574
465 evt->event_handler(evt); 575 evt->event_handler(evt);
466} 576}
@@ -491,6 +601,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
491 irq_enter(); 601 irq_enter();
492 local_apic_timer_interrupt(); 602 local_apic_timer_interrupt();
493 irq_exit(); 603 irq_exit();
604
494 set_irq_regs(old_regs); 605 set_irq_regs(old_regs);
495} 606}
496 607
@@ -544,6 +655,13 @@ void clear_local_APIC(void)
544 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 655 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
545 } 656 }
546 657
658 /* lets not touch this if we didn't frob it */
659#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
660 if (maxlvt >= 5) {
661 v = apic_read(APIC_LVTTHMR);
662 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
663 }
664#endif
547 /* 665 /*
548 * Clean APIC state for other OSs: 666 * Clean APIC state for other OSs:
549 */ 667 */
@@ -554,8 +672,14 @@ void clear_local_APIC(void)
554 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 672 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
555 if (maxlvt >= 4) 673 if (maxlvt >= 4)
556 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 674 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
557 apic_write(APIC_ESR, 0); 675
558 apic_read(APIC_ESR); 676 /* Integrated APIC (!82489DX) ? */
677 if (lapic_is_integrated()) {
678 if (maxlvt > 3)
679 /* Clear ESR due to Pentium errata 3AP and 11AP */
680 apic_write(APIC_ESR, 0);
681 apic_read(APIC_ESR);
682 }
559} 683}
560 684
561/** 685/**
@@ -574,8 +698,28 @@ void disable_local_APIC(void)
574 value = apic_read(APIC_SPIV); 698 value = apic_read(APIC_SPIV);
575 value &= ~APIC_SPIV_APIC_ENABLED; 699 value &= ~APIC_SPIV_APIC_ENABLED;
576 apic_write(APIC_SPIV, value); 700 apic_write(APIC_SPIV, value);
701
702#ifdef CONFIG_X86_32
703 /*
704 * When LAPIC was disabled by the BIOS and enabled by the kernel,
705 * restore the disabled state.
706 */
707 if (enabled_via_apicbase) {
708 unsigned int l, h;
709
710 rdmsr(MSR_IA32_APICBASE, l, h);
711 l &= ~MSR_IA32_APICBASE_ENABLE;
712 wrmsr(MSR_IA32_APICBASE, l, h);
713 }
714#endif
577} 715}
578 716
717/*
718 * If Linux enabled the LAPIC against the BIOS default disable it down before
719 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
720 * not power-off. Additionally clear all LVT entries before disable_local_APIC
721 * for the case where Linux didn't enable the LAPIC.
722 */
579void lapic_shutdown(void) 723void lapic_shutdown(void)
580{ 724{
581 unsigned long flags; 725 unsigned long flags;
@@ -585,7 +729,13 @@ void lapic_shutdown(void)
585 729
586 local_irq_save(flags); 730 local_irq_save(flags);
587 731
588 disable_local_APIC(); 732#ifdef CONFIG_X86_32
733 if (!enabled_via_apicbase)
734 clear_local_APIC();
735 else
736#endif
737 disable_local_APIC();
738
589 739
590 local_irq_restore(flags); 740 local_irq_restore(flags);
591} 741}
@@ -629,10 +779,10 @@ int __init verify_local_APIC(void)
629 /* 779 /*
630 * The ID register is read/write in a real APIC. 780 * The ID register is read/write in a real APIC.
631 */ 781 */
632 reg0 = read_apic_id(); 782 reg0 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 783 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
634 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 784 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
635 reg1 = read_apic_id(); 785 reg1 = apic_read(APIC_ID);
636 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 786 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
637 apic_write(APIC_ID, reg0); 787 apic_write(APIC_ID, reg0);
638 if (reg1 != (reg0 ^ APIC_ID_MASK)) 788 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -656,8 +806,11 @@ int __init verify_local_APIC(void)
656 */ 806 */
657void __init sync_Arb_IDs(void) 807void __init sync_Arb_IDs(void)
658{ 808{
659 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ 809 /*
660 if (modern_apic()) 810 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
811 * needed on AMD.
812 */
813 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
661 return; 814 return;
662 815
663 /* 816 /*
@@ -666,8 +819,8 @@ void __init sync_Arb_IDs(void)
666 apic_wait_icr_idle(); 819 apic_wait_icr_idle();
667 820
668 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 821 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
669 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG 822 apic_write(APIC_ICR, APIC_DEST_ALLINC |
670 | APIC_DM_INIT); 823 APIC_INT_LEVELTRIG | APIC_DM_INIT);
671} 824}
672 825
673/* 826/*
@@ -684,8 +837,6 @@ void __init init_bsp_APIC(void)
684 if (smp_found_config || !cpu_has_apic) 837 if (smp_found_config || !cpu_has_apic)
685 return; 838 return;
686 839
687 value = apic_read(APIC_LVR);
688
689 /* 840 /*
690 * Do not trust the local APIC being empty at bootup. 841 * Do not trust the local APIC being empty at bootup.
691 */ 842 */
@@ -697,7 +848,15 @@ void __init init_bsp_APIC(void)
697 value = apic_read(APIC_SPIV); 848 value = apic_read(APIC_SPIV);
698 value &= ~APIC_VECTOR_MASK; 849 value &= ~APIC_VECTOR_MASK;
699 value |= APIC_SPIV_APIC_ENABLED; 850 value |= APIC_SPIV_APIC_ENABLED;
700 value |= APIC_SPIV_FOCUS_DISABLED; 851
852#ifdef CONFIG_X86_32
853 /* This bit is reserved on P4/Xeon and should be cleared */
854 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
855 (boot_cpu_data.x86 == 15))
856 value &= ~APIC_SPIV_FOCUS_DISABLED;
857 else
858#endif
859 value |= APIC_SPIV_FOCUS_DISABLED;
701 value |= SPURIOUS_APIC_VECTOR; 860 value |= SPURIOUS_APIC_VECTOR;
702 apic_write(APIC_SPIV, value); 861 apic_write(APIC_SPIV, value);
703 862
@@ -706,9 +865,50 @@ void __init init_bsp_APIC(void)
706 */ 865 */
707 apic_write(APIC_LVT0, APIC_DM_EXTINT); 866 apic_write(APIC_LVT0, APIC_DM_EXTINT);
708 value = APIC_DM_NMI; 867 value = APIC_DM_NMI;
868 if (!lapic_is_integrated()) /* 82489DX */
869 value |= APIC_LVT_LEVEL_TRIGGER;
709 apic_write(APIC_LVT1, value); 870 apic_write(APIC_LVT1, value);
710} 871}
711 872
873static void __cpuinit lapic_setup_esr(void)
874{
875 unsigned long oldvalue, value, maxlvt;
876 if (lapic_is_integrated() && !esr_disable) {
877 if (esr_disable) {
878 /*
879 * Something untraceable is creating bad interrupts on
880 * secondary quads ... for the moment, just leave the
881 * ESR disabled - we can't do anything useful with the
882 * errors anyway - mbligh
883 */
884 printk(KERN_INFO "Leaving ESR disabled.\n");
885 return;
886 }
887 /* !82489DX */
888 maxlvt = lapic_get_maxlvt();
889 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
890 apic_write(APIC_ESR, 0);
891 oldvalue = apic_read(APIC_ESR);
892
893 /* enables sending errors */
894 value = ERROR_APIC_VECTOR;
895 apic_write(APIC_LVTERR, value);
896 /*
897 * spec says clear errors after enabling vector.
898 */
899 if (maxlvt > 3)
900 apic_write(APIC_ESR, 0);
901 value = apic_read(APIC_ESR);
902 if (value != oldvalue)
903 apic_printk(APIC_VERBOSE, "ESR value before enabling "
904 "vector: 0x%08lx after: 0x%08lx\n",
905 oldvalue, value);
906 } else {
907 printk(KERN_INFO "No ESR for 82489DX.\n");
908 }
909}
910
911
712/** 912/**
713 * setup_local_APIC - setup the local APIC 913 * setup_local_APIC - setup the local APIC
714 */ 914 */
@@ -814,25 +1014,143 @@ void __cpuinit setup_local_APIC(void)
814 preempt_enable(); 1014 preempt_enable();
815} 1015}
816 1016
817static void __cpuinit lapic_setup_esr(void)
818{
819 unsigned maxlvt = lapic_get_maxlvt();
820
821 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
822 /*
823 * spec says clear errors after enabling vector.
824 */
825 if (maxlvt > 3)
826 apic_write(APIC_ESR, 0);
827}
828
829void __cpuinit end_local_APIC_setup(void) 1017void __cpuinit end_local_APIC_setup(void)
830{ 1018{
831 lapic_setup_esr(); 1019 lapic_setup_esr();
1020
1021#ifdef CONFIG_X86_32
1022 {
1023 unsigned int value;
1024 /* Disable the local apic timer */
1025 value = apic_read(APIC_LVTT);
1026 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1027 apic_write(APIC_LVTT, value);
1028 }
1029#endif
1030
832 setup_apic_nmi_watchdog(NULL); 1031 setup_apic_nmi_watchdog(NULL);
833 apic_pm_activate(); 1032 apic_pm_activate();
834} 1033}
835 1034
1035void check_x2apic(void)
1036{
1037 int msr, msr2;
1038
1039 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1040
1041 if (msr & X2APIC_ENABLE) {
1042 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1043 x2apic_preenabled = x2apic = 1;
1044 apic_ops = &x2apic_ops;
1045 }
1046}
1047
1048void enable_x2apic(void)
1049{
1050 int msr, msr2;
1051
1052 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1053 if (!(msr & X2APIC_ENABLE)) {
1054 printk("Enabling x2apic\n");
1055 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1056 }
1057}
1058
1059void enable_IR_x2apic(void)
1060{
1061#ifdef CONFIG_INTR_REMAP
1062 int ret;
1063 unsigned long flags;
1064
1065 if (!cpu_has_x2apic)
1066 return;
1067
1068 if (!x2apic_preenabled && disable_x2apic) {
1069 printk(KERN_INFO
1070 "Skipped enabling x2apic and Interrupt-remapping "
1071 "because of nox2apic\n");
1072 return;
1073 }
1074
1075 if (x2apic_preenabled && disable_x2apic)
1076 panic("Bios already enabled x2apic, can't enforce nox2apic");
1077
1078 if (!x2apic_preenabled && skip_ioapic_setup) {
1079 printk(KERN_INFO
1080 "Skipped enabling x2apic and Interrupt-remapping "
1081 "because of skipping io-apic setup\n");
1082 return;
1083 }
1084
1085 ret = dmar_table_init();
1086 if (ret) {
1087 printk(KERN_INFO
1088 "dmar_table_init() failed with %d:\n", ret);
1089
1090 if (x2apic_preenabled)
1091 panic("x2apic enabled by bios. But IR enabling failed");
1092 else
1093 printk(KERN_INFO
1094 "Not enabling x2apic,Intr-remapping\n");
1095 return;
1096 }
1097
1098 local_irq_save(flags);
1099 mask_8259A();
1100 save_mask_IO_APIC_setup();
1101
1102 ret = enable_intr_remapping(1);
1103
1104 if (ret && x2apic_preenabled) {
1105 local_irq_restore(flags);
1106 panic("x2apic enabled by bios. But IR enabling failed");
1107 }
1108
1109 if (ret)
1110 goto end;
1111
1112 if (!x2apic) {
1113 x2apic = 1;
1114 apic_ops = &x2apic_ops;
1115 enable_x2apic();
1116 }
1117end:
1118 if (ret)
1119 /*
1120 * IR enabling failed
1121 */
1122 restore_IO_APIC_setup();
1123 else
1124 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1125
1126 unmask_8259A();
1127 local_irq_restore(flags);
1128
1129 if (!ret) {
1130 if (!x2apic_preenabled)
1131 printk(KERN_INFO
1132 "Enabled x2apic and interrupt-remapping\n");
1133 else
1134 printk(KERN_INFO
1135 "Enabled Interrupt-remapping\n");
1136 } else
1137 printk(KERN_ERR
1138 "Failed to enable Interrupt-remapping and x2apic\n");
1139#else
1140 if (!cpu_has_x2apic)
1141 return;
1142
1143 if (x2apic_preenabled)
1144 panic("x2apic enabled prior OS handover,"
1145 " enable CONFIG_INTR_REMAP");
1146
1147 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1148 " and x2apic\n");
1149#endif
1150
1151 return;
1152}
1153
836/* 1154/*
837 * Detect and enable local APICs on non-SMP boards. 1155 * Detect and enable local APICs on non-SMP boards.
838 * Original code written by Keir Fraser. 1156 * Original code written by Keir Fraser.
@@ -872,7 +1190,7 @@ void __init early_init_lapic_mapping(void)
872 * Fetch the APIC ID of the BSP in case we have a 1190 * Fetch the APIC ID of the BSP in case we have a
873 * default configuration (or the MP table is broken). 1191 * default configuration (or the MP table is broken).
874 */ 1192 */
875 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1193 boot_cpu_physical_apicid = read_apic_id();
876} 1194}
877 1195
878/** 1196/**
@@ -880,6 +1198,11 @@ void __init early_init_lapic_mapping(void)
880 */ 1198 */
881void __init init_apic_mappings(void) 1199void __init init_apic_mappings(void)
882{ 1200{
1201 if (x2apic) {
1202 boot_cpu_physical_apicid = read_apic_id();
1203 return;
1204 }
1205
883 /* 1206 /*
884 * If no local APIC can be found then set up a fake all 1207 * If no local APIC can be found then set up a fake all
885 * zeroes page to simulate the local APIC and another 1208 * zeroes page to simulate the local APIC and another
@@ -899,13 +1222,15 @@ void __init init_apic_mappings(void)
899 * Fetch the APIC ID of the BSP in case we have a 1222 * Fetch the APIC ID of the BSP in case we have a
900 * default configuration (or the MP table is broken). 1223 * default configuration (or the MP table is broken).
901 */ 1224 */
902 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1225 boot_cpu_physical_apicid = read_apic_id();
903} 1226}
904 1227
905/* 1228/*
906 * This initializes the IO-APIC and APIC hardware if this is 1229 * This initializes the IO-APIC and APIC hardware if this is
907 * a UP kernel. 1230 * a UP kernel.
908 */ 1231 */
1232int apic_version[MAX_APICS];
1233
909int __init APIC_init_uniprocessor(void) 1234int __init APIC_init_uniprocessor(void)
910{ 1235{
911 if (disable_apic) { 1236 if (disable_apic) {
@@ -918,6 +1243,9 @@ int __init APIC_init_uniprocessor(void)
918 return -1; 1243 return -1;
919 } 1244 }
920 1245
1246 enable_IR_x2apic();
1247 setup_apic_routing();
1248
921 verify_local_APIC(); 1249 verify_local_APIC();
922 1250
923 connect_bsp_APIC(); 1251 connect_bsp_APIC();
@@ -1004,17 +1332,57 @@ asmlinkage void smp_error_interrupt(void)
1004} 1332}
1005 1333
1006/** 1334/**
1007 * * connect_bsp_APIC - attach the APIC to the interrupt system 1335 * connect_bsp_APIC - attach the APIC to the interrupt system
1008 * */ 1336 */
1009void __init connect_bsp_APIC(void) 1337void __init connect_bsp_APIC(void)
1010{ 1338{
1339#ifdef CONFIG_X86_32
1340 if (pic_mode) {
1341 /*
1342 * Do not trust the local APIC being empty at bootup.
1343 */
1344 clear_local_APIC();
1345 /*
1346 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1347 * local APIC to INT and NMI lines.
1348 */
1349 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1350 "enabling APIC mode.\n");
1351 outb(0x70, 0x22);
1352 outb(0x01, 0x23);
1353 }
1354#endif
1011 enable_apic_mode(); 1355 enable_apic_mode();
1012} 1356}
1013 1357
1358/**
1359 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1360 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1361 *
1362 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1363 * APIC is disabled.
1364 */
1014void disconnect_bsp_APIC(int virt_wire_setup) 1365void disconnect_bsp_APIC(int virt_wire_setup)
1015{ 1366{
1367 unsigned int value;
1368
1369#ifdef CONFIG_X86_32
1370 if (pic_mode) {
1371 /*
1372 * Put the board back into PIC mode (has an effect only on
1373 * certain older boards). Note that APIC interrupts, including
1374 * IPIs, won't work beyond this point! The only exception are
1375 * INIT IPIs.
1376 */
1377 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1378 "entering PIC mode.\n");
1379 outb(0x70, 0x22);
1380 outb(0x00, 0x23);
1381 return;
1382 }
1383#endif
1384
1016 /* Go back to Virtual Wire compatibility mode */ 1385 /* Go back to Virtual Wire compatibility mode */
1017 unsigned long value;
1018 1386
1019 /* For the spurious interrupt use vector F, and enable it */ 1387 /* For the spurious interrupt use vector F, and enable it */
1020 value = apic_read(APIC_SPIV); 1388 value = apic_read(APIC_SPIV);
@@ -1040,7 +1408,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1040 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1408 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1041 } 1409 }
1042 1410
1043 /* For LVT1 make it edge triggered, active high, nmi and enabled */ 1411 /*
1412 * For LVT1 make it edge triggered, active high,
1413 * nmi and enabled
1414 */
1044 value = apic_read(APIC_LVT1); 1415 value = apic_read(APIC_LVT1);
1045 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 1416 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1046 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1417 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
@@ -1055,9 +1426,20 @@ void __cpuinit generic_processor_info(int apicid, int version)
1055 int cpu; 1426 int cpu;
1056 cpumask_t tmp_map; 1427 cpumask_t tmp_map;
1057 1428
1429 /*
1430 * Validate version
1431 */
1432 if (version == 0x0) {
1433 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1434 "fixing up to 0x10. (tell your hw vendor)\n",
1435 version);
1436 version = 0x10;
1437 }
1438 apic_version[apicid] = version;
1439
1058 if (num_processors >= NR_CPUS) { 1440 if (num_processors >= NR_CPUS) {
1059 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1441 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1060 " Processor ignored.\n", NR_CPUS); 1442 " Processor ignored.\n", NR_CPUS);
1061 return; 1443 return;
1062 } 1444 }
1063 1445
@@ -1077,6 +1459,29 @@ void __cpuinit generic_processor_info(int apicid, int version)
1077 if (apicid > max_physical_apicid) 1459 if (apicid > max_physical_apicid)
1078 max_physical_apicid = apicid; 1460 max_physical_apicid = apicid;
1079 1461
1462#ifdef CONFIG_X86_32
1463 /*
1464 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1465 * but we need to work other dependencies like SMP_SUSPEND etc
1466 * before this can be done without some confusion.
1467 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1468 * - Ashok Raj <ashok.raj@intel.com>
1469 */
1470 if (max_physical_apicid >= 8) {
1471 switch (boot_cpu_data.x86_vendor) {
1472 case X86_VENDOR_INTEL:
1473 if (!APIC_XAPIC(version)) {
1474 def_to_bigsmp = 0;
1475 break;
1476 }
1477 /* If P4 and above fall through */
1478 case X86_VENDOR_AMD:
1479 def_to_bigsmp = 1;
1480 }
1481 }
1482#endif
1483
1484#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1080 /* are we being called early in kernel startup? */ 1485 /* are we being called early in kernel startup? */
1081 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1486 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1082 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1487 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1088,20 +1493,28 @@ void __cpuinit generic_processor_info(int apicid, int version)
1088 per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1493 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1089 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1494 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1090 } 1495 }
1496#endif
1091 1497
1092 cpu_set(cpu, cpu_possible_map); 1498 cpu_set(cpu, cpu_possible_map);
1093 cpu_set(cpu, cpu_present_map); 1499 cpu_set(cpu, cpu_present_map);
1094} 1500}
1095 1501
1502int hard_smp_processor_id(void)
1503{
1504 return read_apic_id();
1505}
1506
1096/* 1507/*
1097 * Power management 1508 * Power management
1098 */ 1509 */
1099#ifdef CONFIG_PM 1510#ifdef CONFIG_PM
1100 1511
1101static struct { 1512static struct {
1102 /* 'active' is true if the local APIC was enabled by us and 1513 /*
1103 not the BIOS; this signifies that we are also responsible 1514 * 'active' is true if the local APIC was enabled by us and
1104 for disabling it before entering apm/acpi suspend */ 1515 * not the BIOS; this signifies that we are also responsible
1516 * for disabling it before entering apm/acpi suspend
1517 */
1105 int active; 1518 int active;
1106 /* r/w apic fields */ 1519 /* r/w apic fields */
1107 unsigned int apic_id; 1520 unsigned int apic_id;
@@ -1129,7 +1542,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1129 1542
1130 maxlvt = lapic_get_maxlvt(); 1543 maxlvt = lapic_get_maxlvt();
1131 1544
1132 apic_pm_state.apic_id = read_apic_id(); 1545 apic_pm_state.apic_id = apic_read(APIC_ID);
1133 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1546 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1134 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1547 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1135 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1548 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1142,10 +1555,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1142 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1555 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1143 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1556 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1144 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1557 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1145#ifdef CONFIG_X86_MCE_INTEL 1558#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1146 if (maxlvt >= 5) 1559 if (maxlvt >= 5)
1147 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1560 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1148#endif 1561#endif
1562
1149 local_irq_save(flags); 1563 local_irq_save(flags);
1150 disable_local_APIC(); 1564 disable_local_APIC();
1151 local_irq_restore(flags); 1565 local_irq_restore(flags);
@@ -1164,10 +1578,25 @@ static int lapic_resume(struct sys_device *dev)
1164 maxlvt = lapic_get_maxlvt(); 1578 maxlvt = lapic_get_maxlvt();
1165 1579
1166 local_irq_save(flags); 1580 local_irq_save(flags);
1167 rdmsr(MSR_IA32_APICBASE, l, h); 1581
1168 l &= ~MSR_IA32_APICBASE_BASE; 1582#ifdef CONFIG_X86_64
1169 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1583 if (x2apic)
1170 wrmsr(MSR_IA32_APICBASE, l, h); 1584 enable_x2apic();
1585 else
1586#endif
1587 {
1588 /*
1589 * Make sure the APICBASE points to the right address
1590 *
1591 * FIXME! This will be wrong if we ever support suspend on
1592 * SMP! We'll need to do this as part of the CPU restore!
1593 */
1594 rdmsr(MSR_IA32_APICBASE, l, h);
1595 l &= ~MSR_IA32_APICBASE_BASE;
1596 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1597 wrmsr(MSR_IA32_APICBASE, l, h);
1598 }
1599
1171 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1600 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1172 apic_write(APIC_ID, apic_pm_state.apic_id); 1601 apic_write(APIC_ID, apic_pm_state.apic_id);
1173 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1602 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1176,7 +1605,7 @@ static int lapic_resume(struct sys_device *dev)
1176 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1605 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1177 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1606 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1178 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1607 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1179#ifdef CONFIG_X86_MCE_INTEL 1608#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1180 if (maxlvt >= 5) 1609 if (maxlvt >= 5)
1181 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1610 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1182#endif 1611#endif
@@ -1190,10 +1619,17 @@ static int lapic_resume(struct sys_device *dev)
1190 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1619 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1191 apic_write(APIC_ESR, 0); 1620 apic_write(APIC_ESR, 0);
1192 apic_read(APIC_ESR); 1621 apic_read(APIC_ESR);
1622
1193 local_irq_restore(flags); 1623 local_irq_restore(flags);
1624
1194 return 0; 1625 return 0;
1195} 1626}
1196 1627
1628/*
1629 * This device has no shutdown method - fully functioning local APICs
1630 * are needed on every CPU up until machine_halt/restart/poweroff.
1631 */
1632
1197static struct sysdev_class lapic_sysclass = { 1633static struct sysdev_class lapic_sysclass = {
1198 .name = "lapic", 1634 .name = "lapic",
1199 .resume = lapic_resume, 1635 .resume = lapic_resume,
@@ -1307,31 +1743,19 @@ __cpuinit int apic_is_clustered_box(void)
1307 return (clusters > 2); 1743 return (clusters > 2);
1308} 1744}
1309 1745
1310/* 1746static __init int setup_nox2apic(char *str)
1311 * APIC command line parameters
1312 */
1313static int __init apic_set_verbosity(char *str)
1314{ 1747{
1315 if (str == NULL) { 1748 disable_x2apic = 1;
1316 skip_ioapic_setup = 0; 1749 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1317 ioapic_force = 1;
1318 return 0;
1319 }
1320 if (strcmp("debug", str) == 0)
1321 apic_verbosity = APIC_DEBUG;
1322 else if (strcmp("verbose", str) == 0)
1323 apic_verbosity = APIC_VERBOSE;
1324 else {
1325 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1326 " use apic=verbose or apic=debug\n", str);
1327 return -EINVAL;
1328 }
1329
1330 return 0; 1750 return 0;
1331} 1751}
1332early_param("apic", apic_set_verbosity); 1752early_param("nox2apic", setup_nox2apic);
1753
1333 1754
1334static __init int setup_disableapic(char *str) 1755/*
1756 * APIC command line parameters
1757 */
1758static int __init setup_disableapic(char *arg)
1335{ 1759{
1336 disable_apic = 1; 1760 disable_apic = 1;
1337 setup_clear_cpu_cap(X86_FEATURE_APIC); 1761 setup_clear_cpu_cap(X86_FEATURE_APIC);
@@ -1340,9 +1764,9 @@ static __init int setup_disableapic(char *str)
1340early_param("disableapic", setup_disableapic); 1764early_param("disableapic", setup_disableapic);
1341 1765
1342/* same as disableapic, for compatibility */ 1766/* same as disableapic, for compatibility */
1343static __init int setup_nolapic(char *str) 1767static int __init setup_nolapic(char *arg)
1344{ 1768{
1345 return setup_disableapic(str); 1769 return setup_disableapic(arg);
1346} 1770}
1347early_param("nolapic", setup_nolapic); 1771early_param("nolapic", setup_nolapic);
1348 1772
@@ -1353,14 +1777,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1353} 1777}
1354early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1778early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1355 1779
1356static __init int setup_noapictimer(char *str) 1780static int __init parse_disable_apic_timer(char *arg)
1357{ 1781{
1358 if (str[0] != ' ' && str[0] != 0)
1359 return 0;
1360 disable_apic_timer = 1; 1782 disable_apic_timer = 1;
1361 return 1; 1783 return 0;
1362} 1784}
1363__setup("noapictimer", setup_noapictimer); 1785early_param("noapictimer", parse_disable_apic_timer);
1786
1787static int __init parse_nolapic_timer(char *arg)
1788{
1789 disable_apic_timer = 1;
1790 return 0;
1791}
1792early_param("nolapic_timer", parse_nolapic_timer);
1364 1793
1365static __init int setup_apicpmtimer(char *s) 1794static __init int setup_apicpmtimer(char *s)
1366{ 1795{
@@ -1370,6 +1799,31 @@ static __init int setup_apicpmtimer(char *s)
1370} 1799}
1371__setup("apicpmtimer", setup_apicpmtimer); 1800__setup("apicpmtimer", setup_apicpmtimer);
1372 1801
1802static int __init apic_set_verbosity(char *arg)
1803{
1804 if (!arg) {
1805#ifdef CONFIG_X86_64
1806 skip_ioapic_setup = 0;
1807 ioapic_force = 1;
1808 return 0;
1809#endif
1810 return -EINVAL;
1811 }
1812
1813 if (strcmp("debug", arg) == 0)
1814 apic_verbosity = APIC_DEBUG;
1815 else if (strcmp("verbose", arg) == 0)
1816 apic_verbosity = APIC_VERBOSE;
1817 else {
1818 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1819 " use apic=verbose or apic=debug\n", arg);
1820 return -EINVAL;
1821 }
1822
1823 return 0;
1824}
1825early_param("apic", apic_set_verbosity);
1826
1373static int __init lapic_insert_resource(void) 1827static int __init lapic_insert_resource(void)
1374{ 1828{
1375 if (!apic_phys) 1829 if (!apic_phys)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee76eaad3001..7f0b45a5d788 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,22 +3,30 @@
3# 3#
4 4
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 5obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o capflags.o powerflags.o common.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
9obj-$(CONFIG_X86_64) += common_64.o bugs_64.o 9obj-$(CONFIG_X86_64) += bugs_64.o
10obj-$(CONFIG_X86_32) += amd.o 10
11obj-$(CONFIG_X86_64) += amd_64.o 11obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
12obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_CPU_SUP_AMD) += amd.o
13obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
14obj-$(CONFIG_X86_64) += centaur_64.o 14obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
15obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
16obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
17obj-$(CONFIG_X86_64) += intel_64.o 17obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
18obj-$(CONFIG_X86_32) += umc.o
19 18
20obj-$(CONFIG_X86_MCE) += mcheck/ 19obj-$(CONFIG_X86_MCE) += mcheck/
21obj-$(CONFIG_MTRR) += mtrr/ 20obj-$(CONFIG_MTRR) += mtrr/
22obj-$(CONFIG_CPU_FREQ) += cpufreq/ 21obj-$(CONFIG_CPU_FREQ) += cpufreq/
23 22
24obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 23obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
24
25quiet_cmd_mkcapflags = MKCAP $@
26 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
27
28cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
29
30targets += capflags.c
31$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
32 $(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index a6ef672adbba..0d9c993aa93e 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,6 +7,8 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h>
11
10struct cpuid_bit { 12struct cpuid_bit {
11 u16 feature; 13 u16 feature;
12 u8 reg; 14 u8 reg;
@@ -48,6 +50,92 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
48 } 50 }
49} 51}
50 52
53/* leaf 0xb SMT level */
54#define SMT_LEVEL 0
55
56/* leaf 0xb sub-leaf types */
57#define INVALID_TYPE 0
58#define SMT_TYPE 1
59#define CORE_TYPE 2
60
61#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
62#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
63#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
64
65/*
66 * Check for extended topology enumeration cpuid leaf 0xb and if it
67 * exists, use it for populating initial_apicid and cpu topology
68 * detection.
69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{
72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings;
76
77 if (c->cpuid_level < 0xb)
78 return;
79
80 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
81
82 /*
83 * check if the cpuid leaf 0xb is actually implemented.
84 */
85 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
86 return;
87
88 set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
89
90 /*
91 * initial apic id, which also represents 32-bit extended x2apic id.
92 */
93 c->initial_apicid = edx;
94
95 /*
96 * Populate HT related information from sub-leaf level 0.
97 */
98 core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
99 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
100
101 sub_index = 1;
102 do {
103 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
104
105 /*
106 * Check for the Core type in the implemented sub leaves.
107 */
108 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
109 core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
110 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
111 break;
112 }
113
114 sub_index++;
115 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118
119#ifdef CONFIG_X86_32
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123#else
124 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
125 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
126#endif
127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
128
129
130 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
131 c->phys_proc_id);
132 if (c->x86_max_cores > 1)
133 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
134 c->cpu_core_id);
135 return;
136#endif
137}
138
51#ifdef CONFIG_X86_PAT 139#ifdef CONFIG_X86_PAT
52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 140void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
53{ 141{
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 18514ed26104..32e73520adf7 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,13 +1,22 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/bitops.h> 2#include <linux/bitops.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4
4#include <asm/io.h> 5#include <asm/io.h>
5#include <asm/processor.h> 6#include <asm/processor.h>
6#include <asm/apic.h> 7#include <asm/apic.h>
7 8
9#ifdef CONFIG_X86_64
10# include <asm/numa_64.h>
11# include <asm/mmconfig.h>
12# include <asm/cacheflush.h>
13#endif
14
8#include <mach_apic.h> 15#include <mach_apic.h>
16
9#include "cpu.h" 17#include "cpu.h"
10 18
19#ifdef CONFIG_X86_32
11/* 20/*
12 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 21 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
13 * misexecution of code under Linux. Owners of such processors should 22 * misexecution of code under Linux. Owners of such processors should
@@ -24,26 +33,273 @@
24extern void vide(void); 33extern void vide(void);
25__asm__(".align 4\nvide: ret"); 34__asm__(".align 4\nvide: ret");
26 35
27static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 36static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
28{ 37{
29 if (cpuid_eax(0x80000000) >= 0x80000007) { 38/*
30 c->x86_power = cpuid_edx(0x80000007); 39 * General Systems BIOSen alias the cpu frequency registers
31 if (c->x86_power & (1<<8)) 40 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
32 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 41 * drivers subsequently pokes it, and changes the CPU speed.
42 * Workaround : Remove the unneeded alias.
43 */
44#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
45#define CBAR_ENB (0x80000000)
46#define CBAR_KEY (0X000000CB)
47 if (c->x86_model == 9 || c->x86_model == 10) {
48 if (inl (CBAR) & CBAR_ENB)
49 outl (0 | CBAR_KEY, CBAR);
33 } 50 }
34
35 /* Set MTRR capability flag if appropriate */
36 if (c->x86_model == 13 || c->x86_model == 9 ||
37 (c->x86_model == 8 && c->x86_mask >= 8))
38 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
39} 51}
40 52
41static void __cpuinit init_amd(struct cpuinfo_x86 *c) 53
54static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
42{ 55{
43 u32 l, h; 56 u32 l, h;
44 int mbytes = num_physpages >> (20-PAGE_SHIFT); 57 int mbytes = num_physpages >> (20-PAGE_SHIFT);
45 int r;
46 58
59 if (c->x86_model < 6) {
60 /* Based on AMD doc 20734R - June 2000 */
61 if (c->x86_model == 0) {
62 clear_cpu_cap(c, X86_FEATURE_APIC);
63 set_cpu_cap(c, X86_FEATURE_PGE);
64 }
65 return;
66 }
67
68 if (c->x86_model == 6 && c->x86_mask == 1) {
69 const int K6_BUG_LOOP = 1000000;
70 int n;
71 void (*f_vide)(void);
72 unsigned long d, d2;
73
74 printk(KERN_INFO "AMD K6 stepping B detected - ");
75
76 /*
77 * It looks like AMD fixed the 2.6.2 bug and improved indirect
78 * calls at the same time.
79 */
80
81 n = K6_BUG_LOOP;
82 f_vide = vide;
83 rdtscl(d);
84 while (n--)
85 f_vide();
86 rdtscl(d2);
87 d = d2-d;
88
89 if (d > 20*K6_BUG_LOOP)
90 printk("system stability may be impaired when more than 32 MB are used.\n");
91 else
92 printk("probably OK (after B9730xxxx).\n");
93 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
94 }
95
96 /* K6 with old style WHCR */
97 if (c->x86_model < 8 ||
98 (c->x86_model == 8 && c->x86_mask < 8)) {
99 /* We can only write allocate on the low 508Mb */
100 if (mbytes > 508)
101 mbytes = 508;
102
103 rdmsr(MSR_K6_WHCR, l, h);
104 if ((l&0x0000FFFF) == 0) {
105 unsigned long flags;
106 l = (1<<0)|((mbytes/4)<<1);
107 local_irq_save(flags);
108 wbinvd();
109 wrmsr(MSR_K6_WHCR, l, h);
110 local_irq_restore(flags);
111 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
112 mbytes);
113 }
114 return;
115 }
116
117 if ((c->x86_model == 8 && c->x86_mask > 7) ||
118 c->x86_model == 9 || c->x86_model == 13) {
119 /* The more serious chips .. */
120
121 if (mbytes > 4092)
122 mbytes = 4092;
123
124 rdmsr(MSR_K6_WHCR, l, h);
125 if ((l&0xFFFF0000) == 0) {
126 unsigned long flags;
127 l = ((mbytes>>2)<<22)|(1<<16);
128 local_irq_save(flags);
129 wbinvd();
130 wrmsr(MSR_K6_WHCR, l, h);
131 local_irq_restore(flags);
132 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
133 mbytes);
134 }
135
136 return;
137 }
138
139 if (c->x86_model == 10) {
140 /* AMD Geode LX is model 10 */
141 /* placeholder for any needed mods */
142 return;
143 }
144}
145
146static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
147{
148 u32 l, h;
149
150 /*
151 * Bit 15 of Athlon specific MSR 15, needs to be 0
152 * to enable SSE on Palomino/Morgan/Barton CPU's.
153 * If the BIOS didn't enable it already, enable it here.
154 */
155 if (c->x86_model >= 6 && c->x86_model <= 10) {
156 if (!cpu_has(c, X86_FEATURE_XMM)) {
157 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
158 rdmsr(MSR_K7_HWCR, l, h);
159 l &= ~0x00008000;
160 wrmsr(MSR_K7_HWCR, l, h);
161 set_cpu_cap(c, X86_FEATURE_XMM);
162 }
163 }
164
165 /*
166 * It's been determined by AMD that Athlons since model 8 stepping 1
167 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
168 * As per AMD technical note 27212 0.2
169 */
170 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
171 rdmsr(MSR_K7_CLK_CTL, l, h);
172 if ((l & 0xfff00000) != 0x20000000) {
173 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
174 ((l & 0x000fffff)|0x20000000));
175 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
176 }
177 }
178
179 set_cpu_cap(c, X86_FEATURE_K7);
180}
181#endif
182
183#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
184static int __cpuinit nearby_node(int apicid)
185{
186 int i, node;
187
188 for (i = apicid - 1; i >= 0; i--) {
189 node = apicid_to_node[i];
190 if (node != NUMA_NO_NODE && node_online(node))
191 return node;
192 }
193 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
194 node = apicid_to_node[i];
195 if (node != NUMA_NO_NODE && node_online(node))
196 return node;
197 }
198 return first_node(node_online_map); /* Shouldn't happen */
199}
200#endif
201
202/*
203 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
204 * Assumes number of cores is a power of two.
205 */
206static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
207{
208#ifdef CONFIG_X86_HT
209 unsigned bits;
210
211 bits = c->x86_coreid_bits;
212
213 /* Low order bits define the core id (index of core in socket) */
214 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
215 /* Convert the initial APIC ID into the socket ID */
216 c->phys_proc_id = c->initial_apicid >> bits;
217#endif
218}
219
220static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
221{
222#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
223 int cpu = smp_processor_id();
224 int node;
225 unsigned apicid = hard_smp_processor_id();
226
227 node = c->phys_proc_id;
228 if (apicid_to_node[apicid] != NUMA_NO_NODE)
229 node = apicid_to_node[apicid];
230 if (!node_online(node)) {
231 /* Two possibilities here:
232 - The CPU is missing memory and no node was created.
233 In that case try picking one from a nearby CPU
234 - The APIC IDs differ from the HyperTransport node IDs
235 which the K8 northbridge parsing fills in.
236 Assume they are all increased by a constant offset,
237 but in the same order as the HT nodeids.
238 If that doesn't result in a usable node fall back to the
239 path for the previous case. */
240
241 int ht_nodeid = c->initial_apicid;
242
243 if (ht_nodeid >= 0 &&
244 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
245 node = apicid_to_node[ht_nodeid];
246 /* Pick a nearby node */
247 if (!node_online(node))
248 node = nearby_node(apicid);
249 }
250 numa_set_node(cpu, node);
251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
253#endif
254}
255
256static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
257{
258#ifdef CONFIG_X86_HT
259 unsigned bits, ecx;
260
261 /* Multi core CPU? */
262 if (c->extended_cpuid_level < 0x80000008)
263 return;
264
265 ecx = cpuid_ecx(0x80000008);
266
267 c->x86_max_cores = (ecx & 0xff) + 1;
268
269 /* CPU telling us the core id bits shift? */
270 bits = (ecx >> 12) & 0xF;
271
272 /* Otherwise recompute */
273 if (bits == 0) {
274 while ((1 << bits) < c->x86_max_cores)
275 bits++;
276 }
277
278 c->x86_coreid_bits = bits;
279#endif
280}
281
282static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
283{
284 early_init_amd_mc(c);
285
286 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
287 if (c->x86_power & (1<<8))
288 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
289
290#ifdef CONFIG_X86_64
291 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
292#else
293 /* Set MTRR capability flag if appropriate */
294 if (c->x86 == 5)
295 if (c->x86_model == 13 || c->x86_model == 9 ||
296 (c->x86_model == 8 && c->x86_mask >= 8))
297 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
298#endif
299}
300
301static void __cpuinit init_amd(struct cpuinfo_x86 *c)
302{
47#ifdef CONFIG_SMP 303#ifdef CONFIG_SMP
48 unsigned long long value; 304 unsigned long long value;
49 305
@@ -54,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
54 * Errata 63 for SH-B3 steppings 310 * Errata 63 for SH-B3 steppings
55 * Errata 122 for all steppings (F+ have it disabled by default) 311 * Errata 122 for all steppings (F+ have it disabled by default)
56 */ 312 */
57 if (c->x86 == 15) { 313 if (c->x86 == 0xf) {
58 rdmsrl(MSR_K7_HWCR, value); 314 rdmsrl(MSR_K7_HWCR, value);
59 value |= 1 << 6; 315 value |= 1 << 6;
60 wrmsrl(MSR_K7_HWCR, value); 316 wrmsrl(MSR_K7_HWCR, value);
@@ -64,209 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
64 early_init_amd(c); 320 early_init_amd(c);
65 321
66 /* 322 /*
67 * FIXME: We should handle the K5 here. Set up the write
68 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
69 * no bus pipeline)
70 */
71
72 /*
73 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 323 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
74 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 324 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
75 */ 325 */
76 clear_cpu_cap(c, 0*32+31); 326 clear_cpu_cap(c, 0*32+31);
77 327
78 r = get_model_name(c); 328#ifdef CONFIG_X86_64
329 /* On C+ stepping K8 rep microcode works well for copy/memset */
330 if (c->x86 == 0xf) {
331 u32 level;
79 332
80 switch (c->x86) { 333 level = cpuid_eax(1);
81 case 4: 334 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
82 /* 335 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
83 * General Systems BIOSen alias the cpu frequency registers
84 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
85 * drivers subsequently pokes it, and changes the CPU speed.
86 * Workaround : Remove the unneeded alias.
87 */
88#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
89#define CBAR_ENB (0x80000000)
90#define CBAR_KEY (0X000000CB)
91 if (c->x86_model == 9 || c->x86_model == 10) {
92 if (inl (CBAR) & CBAR_ENB)
93 outl (0 | CBAR_KEY, CBAR);
94 }
95 break;
96 case 5:
97 if (c->x86_model < 6) {
98 /* Based on AMD doc 20734R - June 2000 */
99 if (c->x86_model == 0) {
100 clear_cpu_cap(c, X86_FEATURE_APIC);
101 set_cpu_cap(c, X86_FEATURE_PGE);
102 }
103 break;
104 }
105
106 if (c->x86_model == 6 && c->x86_mask == 1) {
107 const int K6_BUG_LOOP = 1000000;
108 int n;
109 void (*f_vide)(void);
110 unsigned long d, d2;
111
112 printk(KERN_INFO "AMD K6 stepping B detected - ");
113
114 /*
115 * It looks like AMD fixed the 2.6.2 bug and improved indirect
116 * calls at the same time.
117 */
118
119 n = K6_BUG_LOOP;
120 f_vide = vide;
121 rdtscl(d);
122 while (n--)
123 f_vide();
124 rdtscl(d2);
125 d = d2-d;
126
127 if (d > 20*K6_BUG_LOOP)
128 printk("system stability may be impaired when more than 32 MB are used.\n");
129 else
130 printk("probably OK (after B9730xxxx).\n");
131 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
132 }
133
134 /* K6 with old style WHCR */
135 if (c->x86_model < 8 ||
136 (c->x86_model == 8 && c->x86_mask < 8)) {
137 /* We can only write allocate on the low 508Mb */
138 if (mbytes > 508)
139 mbytes = 508;
140
141 rdmsr(MSR_K6_WHCR, l, h);
142 if ((l&0x0000FFFF) == 0) {
143 unsigned long flags;
144 l = (1<<0)|((mbytes/4)<<1);
145 local_irq_save(flags);
146 wbinvd();
147 wrmsr(MSR_K6_WHCR, l, h);
148 local_irq_restore(flags);
149 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
150 mbytes);
151 }
152 break;
153 }
154
155 if ((c->x86_model == 8 && c->x86_mask > 7) ||
156 c->x86_model == 9 || c->x86_model == 13) {
157 /* The more serious chips .. */
158
159 if (mbytes > 4092)
160 mbytes = 4092;
161
162 rdmsr(MSR_K6_WHCR, l, h);
163 if ((l&0xFFFF0000) == 0) {
164 unsigned long flags;
165 l = ((mbytes>>2)<<22)|(1<<16);
166 local_irq_save(flags);
167 wbinvd();
168 wrmsr(MSR_K6_WHCR, l, h);
169 local_irq_restore(flags);
170 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
171 mbytes);
172 }
173
174 break;
175 }
176
177 if (c->x86_model == 10) {
178 /* AMD Geode LX is model 10 */
179 /* placeholder for any needed mods */
180 break;
181 }
182 break;
183 case 6: /* An Athlon/Duron */
184
185 /*
186 * Bit 15 of Athlon specific MSR 15, needs to be 0
187 * to enable SSE on Palomino/Morgan/Barton CPU's.
188 * If the BIOS didn't enable it already, enable it here.
189 */
190 if (c->x86_model >= 6 && c->x86_model <= 10) {
191 if (!cpu_has(c, X86_FEATURE_XMM)) {
192 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
193 rdmsr(MSR_K7_HWCR, l, h);
194 l &= ~0x00008000;
195 wrmsr(MSR_K7_HWCR, l, h);
196 set_cpu_cap(c, X86_FEATURE_XMM);
197 }
198 }
199
200 /*
201 * It's been determined by AMD that Athlons since model 8 stepping 1
202 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
203 * As per AMD technical note 27212 0.2
204 */
205 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
206 rdmsr(MSR_K7_CLK_CTL, l, h);
207 if ((l & 0xfff00000) != 0x20000000) {
208 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
209 ((l & 0x000fffff)|0x20000000));
210 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
211 }
212 }
213 break;
214 } 336 }
337 if (c->x86 == 0x10 || c->x86 == 0x11)
338 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
339#else
340
341 /*
342 * FIXME: We should handle the K5 here. Set up the write
343 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
344 * no bus pipeline)
345 */
215 346
216 switch (c->x86) { 347 switch (c->x86) {
217 case 15: 348 case 4:
218 /* Use K8 tuning for Fam10h and Fam11h */ 349 init_amd_k5(c);
219 case 0x10:
220 case 0x11:
221 set_cpu_cap(c, X86_FEATURE_K8);
222 break; 350 break;
223 case 6: 351 case 5:
224 set_cpu_cap(c, X86_FEATURE_K7); 352 init_amd_k6(c);
353 break;
354 case 6: /* An Athlon/Duron */
355 init_amd_k7(c);
225 break; 356 break;
226 } 357 }
358
359 /* K6s reports MCEs but don't actually have all the MSRs */
360 if (c->x86 < 6)
361 clear_cpu_cap(c, X86_FEATURE_MCE);
362#endif
363
364 /* Enable workaround for FXSAVE leak */
227 if (c->x86 >= 6) 365 if (c->x86 >= 6)
228 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 366 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
229 367
230 display_cacheinfo(c); 368 if (!c->x86_model_id[0]) {
231 369 switch (c->x86) {
232 if (cpuid_eax(0x80000000) >= 0x80000008) 370 case 0xf:
233 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 371 /* Should distinguish Models here, but this is only
372 a fallback anyways. */
373 strcpy(c->x86_model_id, "Hammer");
374 break;
375 }
376 }
234 377
235#ifdef CONFIG_X86_HT 378 display_cacheinfo(c);
236 /*
237 * On a AMD multi core setup the lower bits of the APIC id
238 * distinguish the cores.
239 */
240 if (c->x86_max_cores > 1) {
241 int cpu = smp_processor_id();
242 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
243 379
244 if (bits == 0) { 380 /* Multi core CPU? */
245 while ((1 << bits) < c->x86_max_cores) 381 if (c->extended_cpuid_level >= 0x80000008) {
246 bits++; 382 amd_detect_cmp(c);
247 } 383 srat_detect_node(c);
248 c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
249 c->phys_proc_id >>= bits;
250 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
251 cpu, c->x86_max_cores, c->cpu_core_id);
252 } 384 }
385
386#ifdef CONFIG_X86_32
387 detect_ht(c);
253#endif 388#endif
254 389
255 if (cpuid_eax(0x80000000) >= 0x80000006) { 390 if (c->extended_cpuid_level >= 0x80000006) {
256 if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) 391 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
257 num_cache_leaves = 4; 392 num_cache_leaves = 4;
258 else 393 else
259 num_cache_leaves = 3; 394 num_cache_leaves = 3;
260 } 395 }
261 396
262 /* K6s reports MCEs but don't actually have all the MSRs */ 397 if (c->x86 >= 0xf && c->x86 <= 0x11)
263 if (c->x86 < 6) 398 set_cpu_cap(c, X86_FEATURE_K8);
264 clear_cpu_cap(c, X86_FEATURE_MCE);
265 399
266 if (cpu_has_xmm2) 400 if (cpu_has_xmm2) {
401 /* MFENCE stops RDTSC speculation */
267 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 402 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
403 }
404
405#ifdef CONFIG_X86_64
406 if (c->x86 == 0x10) {
407 /* do this for boot cpu */
408 if (c == &boot_cpu_data)
409 check_enable_amd_mmconf_dmi();
410
411 fam10h_check_enable_mmcfg();
412 }
413
414 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
415 unsigned long long tseg;
416
417 /*
418 * Split up direct mapping around the TSEG SMM area.
419 * Don't do it for gbpages because there seems very little
420 * benefit in doing so.
421 */
422 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
423 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
424 if ((tseg>>PMD_SHIFT) <
425 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
426 ((tseg>>PMD_SHIFT) <
427 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
428 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
429 set_memory_4k((unsigned long)__va(tseg), 1);
430 }
431 }
432#endif
268} 433}
269 434
435#ifdef CONFIG_X86_32
270static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 436static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
271{ 437{
272 /* AMD errata T13 (order #21922) */ 438 /* AMD errata T13 (order #21922) */
@@ -279,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
279 } 445 }
280 return size; 446 return size;
281} 447}
448#endif
282 449
283static struct cpu_dev amd_cpu_dev __cpuinitdata = { 450static struct cpu_dev amd_cpu_dev __cpuinitdata = {
284 .c_vendor = "AMD", 451 .c_vendor = "AMD",
285 .c_ident = { "AuthenticAMD" }, 452 .c_ident = { "AuthenticAMD" },
453#ifdef CONFIG_X86_32
286 .c_models = { 454 .c_models = {
287 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = 455 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
288 { 456 {
@@ -295,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
295 } 463 }
296 }, 464 },
297 }, 465 },
466 .c_size_cache = amd_size_cache,
467#endif
298 .c_early_init = early_init_amd, 468 .c_early_init = early_init_amd,
299 .c_init = init_amd, 469 .c_init = init_amd,
300 .c_size_cache = amd_size_cache, 470 .c_x86_vendor = X86_VENDOR_AMD,
301}; 471};
302 472
303cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); 473cpu_dev_register(amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
deleted file mode 100644
index d1692b2a41ff..000000000000
--- a/arch/x86/kernel/cpu/amd_64.c
+++ /dev/null
@@ -1,224 +0,0 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118
119 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
120}
121
122static void __cpuinit init_amd(struct cpuinfo_x86 *c)
123{
124 unsigned level;
125
126#ifdef CONFIG_SMP
127 unsigned long value;
128
129 /*
130 * Disable TLB flush filter by setting HWCR.FFDIS on K8
131 * bit 6 of msr C001_0015
132 *
133 * Errata 63 for SH-B3 steppings
134 * Errata 122 for all steppings (F+ have it disabled by default)
135 */
136 if (c->x86 == 0xf) {
137 rdmsrl(MSR_K8_HWCR, value);
138 value |= 1 << 6;
139 wrmsrl(MSR_K8_HWCR, value);
140 }
141#endif
142
143 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
144 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
145 clear_cpu_cap(c, 0*32+31);
146
147 /* On C+ stepping K8 rep microcode works well for copy/memset */
148 if (c->x86 == 0xf) {
149 level = cpuid_eax(1);
150 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
151 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
152 }
153 if (c->x86 == 0x10 || c->x86 == 0x11)
154 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
155
156 /* Enable workaround for FXSAVE leak */
157 if (c->x86 >= 6)
158 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
159
160 level = get_model_name(c);
161 if (!level) {
162 switch (c->x86) {
163 case 0xf:
164 /* Should distinguish Models here, but this is only
165 a fallback anyways. */
166 strcpy(c->x86_model_id, "Hammer");
167 break;
168 }
169 }
170 display_cacheinfo(c);
171
172 /* Multi core CPU? */
173 if (c->extended_cpuid_level >= 0x80000008)
174 amd_detect_cmp(c);
175
176 if (c->extended_cpuid_level >= 0x80000006 &&
177 (cpuid_edx(0x80000006) & 0xf000))
178 num_cache_leaves = 4;
179 else
180 num_cache_leaves = 3;
181
182 if (c->x86 >= 0xf && c->x86 <= 0x11)
183 set_cpu_cap(c, X86_FEATURE_K8);
184
185 /* MFENCE stops RDTSC speculation */
186 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
187
188 if (c->x86 == 0x10) {
189 /* do this for boot cpu */
190 if (c == &boot_cpu_data)
191 check_enable_amd_mmconf_dmi();
192
193 fam10h_check_enable_mmcfg();
194 }
195
196 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
197 unsigned long long tseg;
198
199 /*
200 * Split up direct mapping around the TSEG SMM area.
201 * Don't do it for gbpages because there seems very little
202 * benefit in doing so.
203 */
204 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
205 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
206 if ((tseg>>PMD_SHIFT) <
207 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
208 ((tseg>>PMD_SHIFT) <
209 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
210 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
211 set_memory_4k((unsigned long)__va(tseg), 1);
212 }
213 }
214}
215
216static struct cpu_dev amd_cpu_dev __cpuinitdata = {
217 .c_vendor = "AMD",
218 .c_ident = { "AuthenticAMD" },
219 .c_early_init = early_init_amd,
220 .c_init = init_amd,
221};
222
223cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
224
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index a0534c04d38a..89bfdd9cacc6 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
289 if (c->x86_model >= 6 && c->x86_model < 9) 289 if (c->x86_model >= 6 && c->x86_model < 9)
290 set_cpu_cap(c, X86_FEATURE_3DNOW); 290 set_cpu_cap(c, X86_FEATURE_3DNOW);
291 291
292 get_model_name(c);
293 display_cacheinfo(c); 292 display_cacheinfo(c);
294} 293}
295 294
@@ -475,6 +474,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
475 .c_early_init = early_init_centaur, 474 .c_early_init = early_init_centaur,
476 .c_init = init_centaur, 475 .c_init = init_centaur,
477 .c_size_cache = centaur_size_cache, 476 .c_size_cache = centaur_size_cache,
477 .c_x86_vendor = X86_VENDOR_CENTAUR,
478}; 478};
479 479
480cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 480cpu_dev_register(centaur_cpu_dev);
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 1d181c40e2e1..a1625f5a1e78 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,9 +16,10 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
16 16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{ 18{
19 early_init_centaur(c);
20
19 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 21 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
20 c->x86_cache_alignment = c->x86_clflush_size * 2; 22 c->x86_cache_alignment = c->x86_clflush_size * 2;
21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
22 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 23 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
23 } 24 }
24 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
@@ -29,7 +30,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
29 .c_ident = { "CentaurHauls" }, 30 .c_ident = { "CentaurHauls" },
30 .c_early_init = early_init_centaur, 31 .c_early_init = early_init_centaur,
31 .c_init = init_centaur, 32 .c_init = init_centaur,
33 .c_x86_vendor = X86_VENDOR_CENTAUR,
32}; 34};
33 35
34cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 36cpu_dev_register(centaur_cpu_dev);
35 37
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 000000000000..2056ccf572cc
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
1/*
2 * cmpxchg*() fallbacks for CPU not supporting these instructions
3 */
4
5#include <linux/kernel.h>
6#include <linux/smp.h>
7#include <linux/module.h>
8
9#ifndef CONFIG_X86_CMPXCHG
10unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
11{
12 u8 prev;
13 unsigned long flags;
14
15 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
16 local_irq_save(flags);
17 prev = *(u8 *)ptr;
18 if (prev == old)
19 *(u8 *)ptr = new;
20 local_irq_restore(flags);
21 return prev;
22}
23EXPORT_SYMBOL(cmpxchg_386_u8);
24
25unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
26{
27 u16 prev;
28 unsigned long flags;
29
30 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
31 local_irq_save(flags);
32 prev = *(u16 *)ptr;
33 if (prev == old)
34 *(u16 *)ptr = new;
35 local_irq_restore(flags);
36 return prev;
37}
38EXPORT_SYMBOL(cmpxchg_386_u16);
39
40unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
41{
42 u32 prev;
43 unsigned long flags;
44
45 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
46 local_irq_save(flags);
47 prev = *(u32 *)ptr;
48 if (prev == old)
49 *(u32 *)ptr = new;
50 local_irq_restore(flags);
51 return prev;
52}
53EXPORT_SYMBOL(cmpxchg_386_u32);
54#endif
55
56#ifndef CONFIG_X86_CMPXCHG64
57unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
58{
59 u64 prev;
60 unsigned long flags;
61
62 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
63 local_irq_save(flags);
64 prev = *(u64 *)ptr;
65 if (prev == old)
66 *(u64 *)ptr = new;
67 local_irq_restore(flags);
68 return prev;
69}
70EXPORT_SYMBOL(cmpxchg_486_u64);
71#endif
72
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e456bd955bb..7581b62df184 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,28 +1,62 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
2#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
3#include <linux/delay.h> 10#include <linux/delay.h>
4#include <linux/smp.h> 11#include <linux/smp.h>
5#include <linux/module.h>
6#include <linux/percpu.h> 12#include <linux/percpu.h>
7#include <linux/bootmem.h>
8#include <asm/processor.h>
9#include <asm/i387.h> 13#include <asm/i387.h>
10#include <asm/msr.h> 14#include <asm/msr.h>
11#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/linkage.h>
12#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
13#include <asm/mtrr.h> 18#include <asm/mtrr.h>
14#include <asm/mce.h> 19#include <asm/mce.h>
15#include <asm/pat.h> 20#include <asm/pat.h>
16#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h>
17#ifdef CONFIG_X86_LOCAL_APIC 23#ifdef CONFIG_X86_LOCAL_APIC
18#include <asm/mpspec.h> 24#include <asm/mpspec.h>
19#include <asm/apic.h> 25#include <asm/apic.h>
20#include <mach_apic.h> 26#include <mach_apic.h>
27#include <asm/genapic.h>
21#endif 28#endif
22 29
30#include <asm/pda.h>
31#include <asm/pgtable.h>
32#include <asm/processor.h>
33#include <asm/desc.h>
34#include <asm/atomic.h>
35#include <asm/proto.h>
36#include <asm/sections.h>
37#include <asm/setup.h>
38
23#include "cpu.h" 39#include "cpu.h"
24 40
41static struct cpu_dev *this_cpu __cpuinitdata;
42
43#ifdef CONFIG_X86_64
44/* We need valid kernel segments for data and code in long mode too
45 * IRET will check the segment types kkeil 2000/10/28
46 * Also sysret mandates a special GDT layout
47 */
48/* The TLS descriptors are currently at a different place compared to i386.
49 Hopefully nobody expects them at a fixed place (Wine?) */
25DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 50DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
51 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
52 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
53 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
54 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
55 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
56 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
57} };
58#else
59DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
26 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 60 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
27 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 61 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
28 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 62 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -56,17 +90,150 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
56 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 90 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
57 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 91 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
58} }; 92} };
93#endif
59EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 94EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
60 95
61__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 96#ifdef CONFIG_X86_32
62
63static int cachesize_override __cpuinitdata = -1; 97static int cachesize_override __cpuinitdata = -1;
64static int disable_x86_serial_nr __cpuinitdata = 1; 98static int disable_x86_serial_nr __cpuinitdata = 1;
65 99
66struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 100static int __init cachesize_setup(char *str)
101{
102 get_option(&str, &cachesize_override);
103 return 1;
104}
105__setup("cachesize=", cachesize_setup);
106
107static int __init x86_fxsr_setup(char *s)
108{
109 setup_clear_cpu_cap(X86_FEATURE_FXSR);
110 setup_clear_cpu_cap(X86_FEATURE_XMM);
111 return 1;
112}
113__setup("nofxsr", x86_fxsr_setup);
114
115static int __init x86_sep_setup(char *s)
116{
117 setup_clear_cpu_cap(X86_FEATURE_SEP);
118 return 1;
119}
120__setup("nosep", x86_sep_setup);
121
122/* Standard macro to see if a specific flag is changeable */
123static inline int flag_is_changeable_p(u32 flag)
124{
125 u32 f1, f2;
126
127 asm("pushfl\n\t"
128 "pushfl\n\t"
129 "popl %0\n\t"
130 "movl %0,%1\n\t"
131 "xorl %2,%0\n\t"
132 "pushl %0\n\t"
133 "popfl\n\t"
134 "pushfl\n\t"
135 "popl %0\n\t"
136 "popfl\n\t"
137 : "=&r" (f1), "=&r" (f2)
138 : "ir" (flag));
139
140 return ((f1^f2) & flag) != 0;
141}
142
143/* Probe for the CPUID instruction */
144static int __cpuinit have_cpuid_p(void)
145{
146 return flag_is_changeable_p(X86_EFLAGS_ID);
147}
148
149static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
150{
151 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
152 /* Disable processor serial number */
153 unsigned long lo, hi;
154 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
155 lo |= 0x200000;
156 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
157 printk(KERN_NOTICE "CPU serial number disabled.\n");
158 clear_cpu_cap(c, X86_FEATURE_PN);
159
160 /* Disabling the serial number may affect the cpuid level */
161 c->cpuid_level = cpuid_eax(0);
162 }
163}
164
165static int __init x86_serial_nr_setup(char *s)
166{
167 disable_x86_serial_nr = 0;
168 return 1;
169}
170__setup("serialnumber", x86_serial_nr_setup);
171#else
172static inline int flag_is_changeable_p(u32 flag)
173{
174 return 1;
175}
176/* Probe for the CPUID instruction */
177static inline int have_cpuid_p(void)
178{
179 return 1;
180}
181static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
182{
183}
184#endif
185
186/*
187 * Naming convention should be: <Name> [(<Codename>)]
188 * This table only is used unless init_<vendor>() below doesn't set it;
189 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
190 *
191 */
192
193/* Look up CPU names by table lookup. */
194static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
195{
196 struct cpu_model_info *info;
197
198 if (c->x86_model >= 16)
199 return NULL; /* Range check */
200
201 if (!this_cpu)
202 return NULL;
203
204 info = this_cpu->c_models;
205
206 while (info && info->family) {
207 if (info->family == c->x86)
208 return info->model_names[c->x86_model];
209 info++;
210 }
211 return NULL; /* Not found */
212}
213
214__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
215
216/* Current gdt points %fs at the "master" per-cpu area: after this,
217 * it's on the real one. */
218void switch_to_new_gdt(void)
219{
220 struct desc_ptr gdt_descr;
221
222 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
223 gdt_descr.size = GDT_SIZE - 1;
224 load_gdt(&gdt_descr);
225#ifdef CONFIG_X86_32
226 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
227#endif
228}
229
230static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
67 231
68static void __cpuinit default_init(struct cpuinfo_x86 *c) 232static void __cpuinit default_init(struct cpuinfo_x86 *c)
69{ 233{
234#ifdef CONFIG_X86_64
235 display_cacheinfo(c);
236#else
70 /* Not much we can do here... */ 237 /* Not much we can do here... */
71 /* Check if at least it has cpuid */ 238 /* Check if at least it has cpuid */
72 if (c->cpuid_level == -1) { 239 if (c->cpuid_level == -1) {
@@ -76,28 +243,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
76 else if (c->x86 == 3) 243 else if (c->x86 == 3)
77 strcpy(c->x86_model_id, "386"); 244 strcpy(c->x86_model_id, "386");
78 } 245 }
246#endif
79} 247}
80 248
81static struct cpu_dev __cpuinitdata default_cpu = { 249static struct cpu_dev __cpuinitdata default_cpu = {
82 .c_init = default_init, 250 .c_init = default_init,
83 .c_vendor = "Unknown", 251 .c_vendor = "Unknown",
252 .c_x86_vendor = X86_VENDOR_UNKNOWN,
84}; 253};
85static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
86 254
87static int __init cachesize_setup(char *str) 255static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
88{
89 get_option(&str, &cachesize_override);
90 return 1;
91}
92__setup("cachesize=", cachesize_setup);
93
94int __cpuinit get_model_name(struct cpuinfo_x86 *c)
95{ 256{
96 unsigned int *v; 257 unsigned int *v;
97 char *p, *q; 258 char *p, *q;
98 259
99 if (cpuid_eax(0x80000000) < 0x80000004) 260 if (c->extended_cpuid_level < 0x80000004)
100 return 0; 261 return;
101 262
102 v = (unsigned int *) c->x86_model_id; 263 v = (unsigned int *) c->x86_model_id;
103 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 264 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
@@ -116,30 +277,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
116 while (q <= &c->x86_model_id[48]) 277 while (q <= &c->x86_model_id[48])
117 *q++ = '\0'; /* Zero-pad the rest */ 278 *q++ = '\0'; /* Zero-pad the rest */
118 } 279 }
119
120 return 1;
121} 280}
122 281
123
124void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 282void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
125{ 283{
126 unsigned int n, dummy, ecx, edx, l2size; 284 unsigned int n, dummy, ebx, ecx, edx, l2size;
127 285
128 n = cpuid_eax(0x80000000); 286 n = c->extended_cpuid_level;
129 287
130 if (n >= 0x80000005) { 288 if (n >= 0x80000005) {
131 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); 289 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
132 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", 290 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
133 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); 291 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
134 c->x86_cache_size = (ecx>>24)+(edx>>24); 292 c->x86_cache_size = (ecx>>24) + (edx>>24);
293#ifdef CONFIG_X86_64
294 /* On K8 L1 TLB is inclusive, so don't count it */
295 c->x86_tlbsize = 0;
296#endif
135 } 297 }
136 298
137 if (n < 0x80000006) /* Some chips just has a large L1. */ 299 if (n < 0x80000006) /* Some chips just has a large L1. */
138 return; 300 return;
139 301
140 ecx = cpuid_ecx(0x80000006); 302 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
141 l2size = ecx >> 16; 303 l2size = ecx >> 16;
142 304
305#ifdef CONFIG_X86_64
306 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
307#else
143 /* do processor-specific cache resizing */ 308 /* do processor-specific cache resizing */
144 if (this_cpu->c_size_cache) 309 if (this_cpu->c_size_cache)
145 l2size = this_cpu->c_size_cache(c, l2size); 310 l2size = this_cpu->c_size_cache(c, l2size);
@@ -150,116 +315,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
150 315
151 if (l2size == 0) 316 if (l2size == 0)
152 return; /* Again, no L2 cache is possible */ 317 return; /* Again, no L2 cache is possible */
318#endif
153 319
154 c->x86_cache_size = l2size; 320 c->x86_cache_size = l2size;
155 321
156 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 322 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
157 l2size, ecx & 0xFF); 323 l2size, ecx & 0xFF);
158} 324}
159 325
160/* 326void __cpuinit detect_ht(struct cpuinfo_x86 *c)
161 * Naming convention should be: <Name> [(<Codename>)]
162 * This table only is used unless init_<vendor>() below doesn't set it;
163 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
164 *
165 */
166
167/* Look up CPU names by table lookup. */
168static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
169{ 327{
170 struct cpu_model_info *info; 328#ifdef CONFIG_X86_HT
329 u32 eax, ebx, ecx, edx;
330 int index_msb, core_bits;
171 331
172 if (c->x86_model >= 16) 332 if (!cpu_has(c, X86_FEATURE_HT))
173 return NULL; /* Range check */ 333 return;
174 334
175 if (!this_cpu) 335 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
176 return NULL; 336 goto out;
177 337
178 info = this_cpu->c_models; 338 if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
339 return;
179 340
180 while (info && info->family) { 341 cpuid(1, &eax, &ebx, &ecx, &edx);
181 if (info->family == c->x86) 342
182 return info->model_names[c->x86_model]; 343 smp_num_siblings = (ebx & 0xff0000) >> 16;
183 info++; 344
345 if (smp_num_siblings == 1) {
346 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
347 } else if (smp_num_siblings > 1) {
348
349 if (smp_num_siblings > NR_CPUS) {
350 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
351 smp_num_siblings);
352 smp_num_siblings = 1;
353 return;
354 }
355
356 index_msb = get_count_order(smp_num_siblings);
357#ifdef CONFIG_X86_64
358 c->phys_proc_id = phys_pkg_id(index_msb);
359#else
360 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
361#endif
362
363 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
364
365 index_msb = get_count_order(smp_num_siblings);
366
367 core_bits = get_count_order(c->x86_max_cores);
368
369#ifdef CONFIG_X86_64
370 c->cpu_core_id = phys_pkg_id(index_msb) &
371 ((1 << core_bits) - 1);
372#else
373 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
374 ((1 << core_bits) - 1);
375#endif
184 } 376 }
185 return NULL; /* Not found */
186}
187 377
378out:
379 if ((c->x86_max_cores * smp_num_siblings) > 1) {
380 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
381 c->phys_proc_id);
382 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
383 c->cpu_core_id);
384 }
385#endif
386}
188 387
189static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) 388static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
190{ 389{
191 char *v = c->x86_vendor_id; 390 char *v = c->x86_vendor_id;
192 int i; 391 int i;
193 static int printed; 392 static int printed;
194 393
195 for (i = 0; i < X86_VENDOR_NUM; i++) { 394 for (i = 0; i < X86_VENDOR_NUM; i++) {
196 if (cpu_devs[i]) { 395 if (!cpu_devs[i])
197 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 396 break;
198 (cpu_devs[i]->c_ident[1] && 397
199 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 398 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
200 c->x86_vendor = i; 399 (cpu_devs[i]->c_ident[1] &&
201 if (!early) 400 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
202 this_cpu = cpu_devs[i]; 401 this_cpu = cpu_devs[i];
203 return; 402 c->x86_vendor = this_cpu->c_x86_vendor;
204 } 403 return;
205 } 404 }
206 } 405 }
406
207 if (!printed) { 407 if (!printed) {
208 printed++; 408 printed++;
209 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); 409 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
210 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 410 printk(KERN_ERR "CPU: Your system may be unstable.\n");
211 } 411 }
412
212 c->x86_vendor = X86_VENDOR_UNKNOWN; 413 c->x86_vendor = X86_VENDOR_UNKNOWN;
213 this_cpu = &default_cpu; 414 this_cpu = &default_cpu;
214} 415}
215 416
216 417void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
217static int __init x86_fxsr_setup(char *s)
218{
219 setup_clear_cpu_cap(X86_FEATURE_FXSR);
220 setup_clear_cpu_cap(X86_FEATURE_XMM);
221 return 1;
222}
223__setup("nofxsr", x86_fxsr_setup);
224
225
226static int __init x86_sep_setup(char *s)
227{
228 setup_clear_cpu_cap(X86_FEATURE_SEP);
229 return 1;
230}
231__setup("nosep", x86_sep_setup);
232
233
234/* Standard macro to see if a specific flag is changeable */
235static inline int flag_is_changeable_p(u32 flag)
236{
237 u32 f1, f2;
238
239 asm("pushfl\n\t"
240 "pushfl\n\t"
241 "popl %0\n\t"
242 "movl %0,%1\n\t"
243 "xorl %2,%0\n\t"
244 "pushl %0\n\t"
245 "popfl\n\t"
246 "pushfl\n\t"
247 "popl %0\n\t"
248 "popfl\n\t"
249 : "=&r" (f1), "=&r" (f2)
250 : "ir" (flag));
251
252 return ((f1^f2) & flag) != 0;
253}
254
255
256/* Probe for the CPUID instruction */
257static int __cpuinit have_cpuid_p(void)
258{
259 return flag_is_changeable_p(X86_EFLAGS_ID);
260}
261
262void __init cpu_detect(struct cpuinfo_x86 *c)
263{ 418{
264 /* Get vendor name */ 419 /* Get vendor name */
265 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 420 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -268,50 +423,87 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
268 (unsigned int *)&c->x86_vendor_id[4]); 423 (unsigned int *)&c->x86_vendor_id[4]);
269 424
270 c->x86 = 4; 425 c->x86 = 4;
426 /* Intel-defined flags: level 0x00000001 */
271 if (c->cpuid_level >= 0x00000001) { 427 if (c->cpuid_level >= 0x00000001) {
272 u32 junk, tfms, cap0, misc; 428 u32 junk, tfms, cap0, misc;
273 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 429 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
274 c->x86 = (tfms >> 8) & 15; 430 c->x86 = (tfms >> 8) & 0xf;
275 c->x86_model = (tfms >> 4) & 15; 431 c->x86_model = (tfms >> 4) & 0xf;
432 c->x86_mask = tfms & 0xf;
276 if (c->x86 == 0xf) 433 if (c->x86 == 0xf)
277 c->x86 += (tfms >> 20) & 0xff; 434 c->x86 += (tfms >> 20) & 0xff;
278 if (c->x86 >= 0x6) 435 if (c->x86 >= 0x6)
279 c->x86_model += ((tfms >> 16) & 0xF) << 4; 436 c->x86_model += ((tfms >> 16) & 0xf) << 4;
280 c->x86_mask = tfms & 15;
281 if (cap0 & (1<<19)) { 437 if (cap0 & (1<<19)) {
282 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
283 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 438 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
439 c->x86_cache_alignment = c->x86_clflush_size;
284 } 440 }
285 } 441 }
286} 442}
287static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) 443
444static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
288{ 445{
289 u32 tfms, xlvl; 446 u32 tfms, xlvl;
290 unsigned int ebx; 447 u32 ebx;
291 448
292 memset(&c->x86_capability, 0, sizeof c->x86_capability); 449 /* Intel-defined flags: level 0x00000001 */
293 if (have_cpuid_p()) { 450 if (c->cpuid_level >= 0x00000001) {
294 /* Intel-defined flags: level 0x00000001 */ 451 u32 capability, excap;
295 if (c->cpuid_level >= 0x00000001) { 452 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
296 u32 capability, excap; 453 c->x86_capability[0] = capability;
297 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 454 c->x86_capability[4] = excap;
298 c->x86_capability[0] = capability; 455 }
299 c->x86_capability[4] = excap;
300 }
301 456
302 /* AMD-defined flags: level 0x80000001 */ 457 /* AMD-defined flags: level 0x80000001 */
303 xlvl = cpuid_eax(0x80000000); 458 xlvl = cpuid_eax(0x80000000);
304 if ((xlvl & 0xffff0000) == 0x80000000) { 459 c->extended_cpuid_level = xlvl;
305 if (xlvl >= 0x80000001) { 460 if ((xlvl & 0xffff0000) == 0x80000000) {
306 c->x86_capability[1] = cpuid_edx(0x80000001); 461 if (xlvl >= 0x80000001) {
307 c->x86_capability[6] = cpuid_ecx(0x80000001); 462 c->x86_capability[1] = cpuid_edx(0x80000001);
308 } 463 c->x86_capability[6] = cpuid_ecx(0x80000001);
309 } 464 }
465 }
310 466
467#ifdef CONFIG_X86_64
468 if (c->extended_cpuid_level >= 0x80000008) {
469 u32 eax = cpuid_eax(0x80000008);
470
471 c->x86_virt_bits = (eax >> 8) & 0xff;
472 c->x86_phys_bits = eax & 0xff;
311 } 473 }
474#endif
475
476 if (c->extended_cpuid_level >= 0x80000007)
477 c->x86_power = cpuid_edx(0x80000007);
312 478
313} 479}
314 480
481static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
482{
483#ifdef CONFIG_X86_32
484 int i;
485
486 /*
487 * First of all, decide if this is a 486 or higher
488 * It's a 486 if we can modify the AC flag
489 */
490 if (flag_is_changeable_p(X86_EFLAGS_AC))
491 c->x86 = 4;
492 else
493 c->x86 = 3;
494
495 for (i = 0; i < X86_VENDOR_NUM; i++)
496 if (cpu_devs[i] && cpu_devs[i]->c_identify) {
497 c->x86_vendor_id[0] = 0;
498 cpu_devs[i]->c_identify(c);
499 if (c->x86_vendor_id[0]) {
500 get_cpu_vendor(c);
501 break;
502 }
503 }
504#endif
505}
506
315/* 507/*
316 * Do minimum CPU detection early. 508 * Do minimum CPU detection early.
317 * Fields really needed: vendor, cpuid_level, family, model, mask, 509 * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -321,25 +513,61 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
321 * WARNING: this function is only called on the BP. Don't add code here 513 * WARNING: this function is only called on the BP. Don't add code here
322 * that is supposed to run on all CPUs. 514 * that is supposed to run on all CPUs.
323 */ 515 */
324static void __init early_cpu_detect(void) 516static void __init early_identify_cpu(struct cpuinfo_x86 *c)
325{ 517{
326 struct cpuinfo_x86 *c = &boot_cpu_data; 518#ifdef CONFIG_X86_64
327 519 c->x86_clflush_size = 64;
328 c->x86_cache_alignment = 32; 520#else
329 c->x86_clflush_size = 32; 521 c->x86_clflush_size = 32;
522#endif
523 c->x86_cache_alignment = c->x86_clflush_size;
524
525 memset(&c->x86_capability, 0, sizeof c->x86_capability);
526 c->extended_cpuid_level = 0;
330 527
331 if (!have_cpuid_p()) 528 if (!have_cpuid_p())
529 identify_cpu_without_cpuid(c);
530
531 /* cyrix could have cpuid enabled via c_identify()*/
532 if (!have_cpuid_p())
332 return; 533 return;
333 534
334 cpu_detect(c); 535 cpu_detect(c);
335 536
336 get_cpu_vendor(c, 1); 537 get_cpu_vendor(c);
337 538
338 early_get_cap(c); 539 get_cpu_cap(c);
339 540
340 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 541 if (this_cpu->c_early_init)
341 cpu_devs[c->x86_vendor]->c_early_init) 542 this_cpu->c_early_init(c);
342 cpu_devs[c->x86_vendor]->c_early_init(c); 543
544 validate_pat_support(c);
545}
546
547void __init early_cpu_init(void)
548{
549 struct cpu_dev **cdev;
550 int count = 0;
551
552 printk("KERNEL supported cpus:\n");
553 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
554 struct cpu_dev *cpudev = *cdev;
555 unsigned int j;
556
557 if (count >= X86_VENDOR_NUM)
558 break;
559 cpu_devs[count] = cpudev;
560 count++;
561
562 for (j = 0; j < 2; j++) {
563 if (!cpudev->c_ident[j])
564 continue;
565 printk(" %s %s\n", cpudev->c_vendor,
566 cpudev->c_ident[j]);
567 }
568 }
569
570 early_identify_cpu(&boot_cpu_data);
343} 571}
344 572
345/* 573/*
@@ -357,86 +585,41 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
357 585
358static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 586static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
359{ 587{
360 u32 tfms, xlvl; 588 c->extended_cpuid_level = 0;
361 unsigned int ebx;
362
363 if (have_cpuid_p()) {
364 /* Get vendor name */
365 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
366 (unsigned int *)&c->x86_vendor_id[0],
367 (unsigned int *)&c->x86_vendor_id[8],
368 (unsigned int *)&c->x86_vendor_id[4]);
369
370 get_cpu_vendor(c, 0);
371 /* Initialize the standard set of capabilities */
372 /* Note that the vendor-specific code below might override */
373 /* Intel-defined flags: level 0x00000001 */
374 if (c->cpuid_level >= 0x00000001) {
375 u32 capability, excap;
376 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
377 c->x86_capability[0] = capability;
378 c->x86_capability[4] = excap;
379 c->x86 = (tfms >> 8) & 15;
380 c->x86_model = (tfms >> 4) & 15;
381 if (c->x86 == 0xf)
382 c->x86 += (tfms >> 20) & 0xff;
383 if (c->x86 >= 0x6)
384 c->x86_model += ((tfms >> 16) & 0xF) << 4;
385 c->x86_mask = tfms & 15;
386 c->initial_apicid = (ebx >> 24) & 0xFF;
387#ifdef CONFIG_X86_HT
388 c->apicid = phys_pkg_id(c->initial_apicid, 0);
389 c->phys_proc_id = c->initial_apicid;
390#else
391 c->apicid = c->initial_apicid;
392#endif
393 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
394 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
395 } else {
396 /* Have CPUID level 0 only - unheard of */
397 c->x86 = 4;
398 }
399 589
400 /* AMD-defined flags: level 0x80000001 */ 590 if (!have_cpuid_p())
401 xlvl = cpuid_eax(0x80000000); 591 identify_cpu_without_cpuid(c);
402 if ((xlvl & 0xffff0000) == 0x80000000) {
403 if (xlvl >= 0x80000001) {
404 c->x86_capability[1] = cpuid_edx(0x80000001);
405 c->x86_capability[6] = cpuid_ecx(0x80000001);
406 }
407 if (xlvl >= 0x80000004)
408 get_model_name(c); /* Default name */
409 }
410 592
411 init_scattered_cpuid_features(c); 593 /* cyrix could have cpuid enabled via c_identify()*/
412 detect_nopl(c); 594 if (!have_cpuid_p())
413 } 595 return;
414}
415 596
416static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 597 cpu_detect(c);
417{
418 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
419 /* Disable processor serial number */
420 unsigned long lo, hi;
421 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
422 lo |= 0x200000;
423 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
424 printk(KERN_NOTICE "CPU serial number disabled.\n");
425 clear_cpu_cap(c, X86_FEATURE_PN);
426 598
427 /* Disabling the serial number may affect the cpuid level */ 599 get_cpu_vendor(c);
428 c->cpuid_level = cpuid_eax(0);
429 }
430}
431 600
432static int __init x86_serial_nr_setup(char *s) 601 get_cpu_cap(c);
433{
434 disable_x86_serial_nr = 0;
435 return 1;
436}
437__setup("serialnumber", x86_serial_nr_setup);
438 602
603 if (c->cpuid_level >= 0x00000001) {
604 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
605#ifdef CONFIG_X86_32
606# ifdef CONFIG_X86_HT
607 c->apicid = phys_pkg_id(c->initial_apicid, 0);
608# else
609 c->apicid = c->initial_apicid;
610# endif
611#endif
439 612
613#ifdef CONFIG_X86_HT
614 c->phys_proc_id = c->initial_apicid;
615#endif
616 }
617
618 get_model_name(c); /* Default name */
619
620 init_scattered_cpuid_features(c);
621 detect_nopl(c);
622}
440 623
441/* 624/*
442 * This does the hard work of actually picking apart the CPU stuff... 625 * This does the hard work of actually picking apart the CPU stuff...
@@ -448,30 +631,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
448 c->loops_per_jiffy = loops_per_jiffy; 631 c->loops_per_jiffy = loops_per_jiffy;
449 c->x86_cache_size = -1; 632 c->x86_cache_size = -1;
450 c->x86_vendor = X86_VENDOR_UNKNOWN; 633 c->x86_vendor = X86_VENDOR_UNKNOWN;
451 c->cpuid_level = -1; /* CPUID not detected */
452 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 634 c->x86_model = c->x86_mask = 0; /* So far unknown... */
453 c->x86_vendor_id[0] = '\0'; /* Unset */ 635 c->x86_vendor_id[0] = '\0'; /* Unset */
454 c->x86_model_id[0] = '\0'; /* Unset */ 636 c->x86_model_id[0] = '\0'; /* Unset */
455 c->x86_max_cores = 1; 637 c->x86_max_cores = 1;
638 c->x86_coreid_bits = 0;
639#ifdef CONFIG_X86_64
640 c->x86_clflush_size = 64;
641#else
642 c->cpuid_level = -1; /* CPUID not detected */
456 c->x86_clflush_size = 32; 643 c->x86_clflush_size = 32;
644#endif
645 c->x86_cache_alignment = c->x86_clflush_size;
457 memset(&c->x86_capability, 0, sizeof c->x86_capability); 646 memset(&c->x86_capability, 0, sizeof c->x86_capability);
458 647
459 if (!have_cpuid_p()) {
460 /*
461 * First of all, decide if this is a 486 or higher
462 * It's a 486 if we can modify the AC flag
463 */
464 if (flag_is_changeable_p(X86_EFLAGS_AC))
465 c->x86 = 4;
466 else
467 c->x86 = 3;
468 }
469
470 generic_identify(c); 648 generic_identify(c);
471 649
472 if (this_cpu->c_identify) 650 if (this_cpu->c_identify)
473 this_cpu->c_identify(c); 651 this_cpu->c_identify(c);
474 652
653#ifdef CONFIG_X86_64
654 c->apicid = phys_pkg_id(0);
655#endif
656
475 /* 657 /*
476 * Vendor-specific initialization. In this section we 658 * Vendor-specific initialization. In this section we
477 * canonicalize the feature flags, meaning if there are 659 * canonicalize the feature flags, meaning if there are
@@ -505,6 +687,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
505 c->x86, c->x86_model); 687 c->x86, c->x86_model);
506 } 688 }
507 689
690#ifdef CONFIG_X86_64
691 detect_ht(c);
692#endif
693
508 /* 694 /*
509 * On SMP, boot_cpu_data holds the common feature set between 695 * On SMP, boot_cpu_data holds the common feature set between
510 * all CPUs; so make sure that we indicate which features are 696 * all CPUs; so make sure that we indicate which features are
@@ -513,7 +699,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
513 */ 699 */
514 if (c != &boot_cpu_data) { 700 if (c != &boot_cpu_data) {
515 /* AND the already accumulated flags with these */ 701 /* AND the already accumulated flags with these */
516 for (i = 0 ; i < NCAPINTS ; i++) 702 for (i = 0; i < NCAPINTS; i++)
517 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 703 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
518 } 704 }
519 705
@@ -521,72 +707,79 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
521 for (i = 0; i < NCAPINTS; i++) 707 for (i = 0; i < NCAPINTS; i++)
522 c->x86_capability[i] &= ~cleared_cpu_caps[i]; 708 c->x86_capability[i] &= ~cleared_cpu_caps[i];
523 709
710#ifdef CONFIG_X86_MCE
524 /* Init Machine Check Exception if available. */ 711 /* Init Machine Check Exception if available. */
525 mcheck_init(c); 712 mcheck_init(c);
713#endif
526 714
527 select_idle_routine(c); 715 select_idle_routine(c);
716
717#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
718 numa_add_cpu(smp_processor_id());
719#endif
528} 720}
529 721
530void __init identify_boot_cpu(void) 722void __init identify_boot_cpu(void)
531{ 723{
532 identify_cpu(&boot_cpu_data); 724 identify_cpu(&boot_cpu_data);
725#ifdef CONFIG_X86_32
533 sysenter_setup(); 726 sysenter_setup();
534 enable_sep_cpu(); 727 enable_sep_cpu();
728#endif
535} 729}
536 730
537void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 731void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
538{ 732{
539 BUG_ON(c == &boot_cpu_data); 733 BUG_ON(c == &boot_cpu_data);
540 identify_cpu(c); 734 identify_cpu(c);
735#ifdef CONFIG_X86_32
541 enable_sep_cpu(); 736 enable_sep_cpu();
737#endif
542 mtrr_ap_init(); 738 mtrr_ap_init();
543} 739}
544 740
545#ifdef CONFIG_X86_HT 741struct msr_range {
546void __cpuinit detect_ht(struct cpuinfo_x86 *c) 742 unsigned min;
547{ 743 unsigned max;
548 u32 eax, ebx, ecx, edx; 744};
549 int index_msb, core_bits;
550
551 cpuid(1, &eax, &ebx, &ecx, &edx);
552
553 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
554 return;
555
556 smp_num_siblings = (ebx & 0xff0000) >> 16;
557 745
558 if (smp_num_siblings == 1) { 746static struct msr_range msr_range_array[] __cpuinitdata = {
559 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 747 { 0x00000000, 0x00000418},
560 } else if (smp_num_siblings > 1) { 748 { 0xc0000000, 0xc000040b},
749 { 0xc0010000, 0xc0010142},
750 { 0xc0011000, 0xc001103b},
751};
561 752
562 if (smp_num_siblings > NR_CPUS) { 753static void __cpuinit print_cpu_msr(void)
563 printk(KERN_WARNING "CPU: Unsupported number of the " 754{
564 "siblings %d", smp_num_siblings); 755 unsigned index;
565 smp_num_siblings = 1; 756 u64 val;
566 return; 757 int i;
758 unsigned index_min, index_max;
759
760 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
761 index_min = msr_range_array[i].min;
762 index_max = msr_range_array[i].max;
763 for (index = index_min; index < index_max; index++) {
764 if (rdmsrl_amd_safe(index, &val))
765 continue;
766 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
567 } 767 }
768 }
769}
568 770
569 index_msb = get_count_order(smp_num_siblings); 771static int show_msr __cpuinitdata;
570 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); 772static __init int setup_show_msr(char *arg)
571 773{
572 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 774 int num;
573 c->phys_proc_id);
574
575 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
576
577 index_msb = get_count_order(smp_num_siblings) ;
578 775
579 core_bits = get_count_order(c->x86_max_cores); 776 get_option(&arg, &num);
580 777
581 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & 778 if (num > 0)
582 ((1 << core_bits) - 1); 779 show_msr = num;
583 780 return 1;
584 if (c->x86_max_cores > 1)
585 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
586 c->cpu_core_id);
587 }
588} 781}
589#endif 782__setup("show_msr=", setup_show_msr);
590 783
591static __init int setup_noclflush(char *arg) 784static __init int setup_noclflush(char *arg)
592{ 785{
@@ -605,17 +798,25 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
605 vendor = c->x86_vendor_id; 798 vendor = c->x86_vendor_id;
606 799
607 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 800 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
608 printk("%s ", vendor); 801 printk(KERN_CONT "%s ", vendor);
609 802
610 if (!c->x86_model_id[0]) 803 if (c->x86_model_id[0])
611 printk("%d86", c->x86); 804 printk(KERN_CONT "%s", c->x86_model_id);
612 else 805 else
613 printk("%s", c->x86_model_id); 806 printk(KERN_CONT "%d86", c->x86);
614 807
615 if (c->x86_mask || c->cpuid_level >= 0) 808 if (c->x86_mask || c->cpuid_level >= 0)
616 printk(" stepping %02x\n", c->x86_mask); 809 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
617 else 810 else
618 printk("\n"); 811 printk(KERN_CONT "\n");
812
813#ifdef CONFIG_SMP
814 if (c->cpu_index < show_msr)
815 print_cpu_msr();
816#else
817 if (show_msr)
818 print_cpu_msr();
819#endif
619} 820}
620 821
621static __init int setup_disablecpuid(char *arg) 822static __init int setup_disablecpuid(char *arg)
@@ -631,19 +832,89 @@ __setup("clearcpuid=", setup_disablecpuid);
631 832
632cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 833cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
633 834
634void __init early_cpu_init(void) 835#ifdef CONFIG_X86_64
836struct x8664_pda **_cpu_pda __read_mostly;
837EXPORT_SYMBOL(_cpu_pda);
838
839struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
840
841char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
842
843void __cpuinit pda_init(int cpu)
844{
845 struct x8664_pda *pda = cpu_pda(cpu);
846
847 /* Setup up data that may be needed in __get_free_pages early */
848 loadsegment(fs, 0);
849 loadsegment(gs, 0);
850 /* Memory clobbers used to order PDA accessed */
851 mb();
852 wrmsrl(MSR_GS_BASE, pda);
853 mb();
854
855 pda->cpunumber = cpu;
856 pda->irqcount = -1;
857 pda->kernelstack = (unsigned long)stack_thread_info() -
858 PDA_STACKOFFSET + THREAD_SIZE;
859 pda->active_mm = &init_mm;
860 pda->mmu_state = 0;
861
862 if (cpu == 0) {
863 /* others are initialized in smpboot.c */
864 pda->pcurrent = &init_task;
865 pda->irqstackptr = boot_cpu_stack;
866 pda->irqstackptr += IRQSTACKSIZE - 64;
867 } else {
868 if (!pda->irqstackptr) {
869 pda->irqstackptr = (char *)
870 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
871 if (!pda->irqstackptr)
872 panic("cannot allocate irqstack for cpu %d",
873 cpu);
874 pda->irqstackptr += IRQSTACKSIZE - 64;
875 }
876
877 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
878 pda->nodenumber = cpu_to_node(cpu);
879 }
880}
881
882char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
883 DEBUG_STKSZ] __page_aligned_bss;
884
885extern asmlinkage void ignore_sysret(void);
886
887/* May not be marked __init: used by software suspend */
888void syscall_init(void)
635{ 889{
636 struct cpu_vendor_dev *cvdev; 890 /*
891 * LSTAR and STAR live in a bit strange symbiosis.
892 * They both write to the same internal register. STAR allows to
893 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
894 */
895 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
896 wrmsrl(MSR_LSTAR, system_call);
897 wrmsrl(MSR_CSTAR, ignore_sysret);
637 898
638 for (cvdev = __x86cpuvendor_start ; 899#ifdef CONFIG_IA32_EMULATION
639 cvdev < __x86cpuvendor_end ; 900 syscall32_cpu_init();
640 cvdev++) 901#endif
641 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
642 902
643 early_cpu_detect(); 903 /* Flags to clear on syscall */
644 validate_pat_support(&boot_cpu_data); 904 wrmsrl(MSR_SYSCALL_MASK,
905 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
645} 906}
646 907
908unsigned long kernel_eflags;
909
910/*
911 * Copies of the original ist values from the tss are only accessed during
912 * debugging, no special alignment required.
913 */
914DEFINE_PER_CPU(struct orig_ist, orig_ist);
915
916#else
917
647/* Make sure %fs is initialized properly in idle threads */ 918/* Make sure %fs is initialized properly in idle threads */
648struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 919struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
649{ 920{
@@ -651,25 +922,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
651 regs->fs = __KERNEL_PERCPU; 922 regs->fs = __KERNEL_PERCPU;
652 return regs; 923 return regs;
653} 924}
654 925#endif
655/* Current gdt points %fs at the "master" per-cpu area: after this,
656 * it's on the real one. */
657void switch_to_new_gdt(void)
658{
659 struct desc_ptr gdt_descr;
660
661 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
662 gdt_descr.size = GDT_SIZE - 1;
663 load_gdt(&gdt_descr);
664 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
665}
666 926
667/* 927/*
668 * cpu_init() initializes state that is per-CPU. Some data is already 928 * cpu_init() initializes state that is per-CPU. Some data is already
669 * initialized (naturally) in the bootstrap process, such as the GDT 929 * initialized (naturally) in the bootstrap process, such as the GDT
670 * and IDT. We reload them nevertheless, this function acts as a 930 * and IDT. We reload them nevertheless, this function acts as a
671 * 'CPU state barrier', nothing should get across. 931 * 'CPU state barrier', nothing should get across.
932 * A lot of state is already set up in PDA init for 64 bit
672 */ 933 */
934#ifdef CONFIG_X86_64
935void __cpuinit cpu_init(void)
936{
937 int cpu = stack_smp_processor_id();
938 struct tss_struct *t = &per_cpu(init_tss, cpu);
939 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
940 unsigned long v;
941 char *estacks = NULL;
942 struct task_struct *me;
943 int i;
944
945 /* CPU 0 is initialised in head64.c */
946 if (cpu != 0)
947 pda_init(cpu);
948 else
949 estacks = boot_exception_stacks;
950
951 me = current;
952
953 if (cpu_test_and_set(cpu, cpu_initialized))
954 panic("CPU#%d already initialized!\n", cpu);
955
956 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
957
958 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
959
960 /*
961 * Initialize the per-CPU GDT with the boot GDT,
962 * and set up the GDT descriptor:
963 */
964
965 switch_to_new_gdt();
966 load_idt((const struct desc_ptr *)&idt_descr);
967
968 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
969 syscall_init();
970
971 wrmsrl(MSR_FS_BASE, 0);
972 wrmsrl(MSR_KERNEL_GS_BASE, 0);
973 barrier();
974
975 check_efer();
976 if (cpu != 0 && x2apic)
977 enable_x2apic();
978
979 /*
980 * set up and load the per-CPU TSS
981 */
982 if (!orig_ist->ist[0]) {
983 static const unsigned int order[N_EXCEPTION_STACKS] = {
984 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
985 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
986 };
987 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
988 if (cpu) {
989 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
990 if (!estacks)
991 panic("Cannot allocate exception "
992 "stack %ld %d\n", v, cpu);
993 }
994 estacks += PAGE_SIZE << order[v];
995 orig_ist->ist[v] = t->x86_tss.ist[v] =
996 (unsigned long)estacks;
997 }
998 }
999
1000 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1001 /*
1002 * <= is required because the CPU will access up to
1003 * 8 bits beyond the end of the IO permission bitmap.
1004 */
1005 for (i = 0; i <= IO_BITMAP_LONGS; i++)
1006 t->io_bitmap[i] = ~0UL;
1007
1008 atomic_inc(&init_mm.mm_count);
1009 me->active_mm = &init_mm;
1010 if (me->mm)
1011 BUG();
1012 enter_lazy_tlb(&init_mm, me);
1013
1014 load_sp0(t, &current->thread);
1015 set_tss_desc(cpu, t);
1016 load_TR_desc();
1017 load_LDT(&init_mm.context);
1018
1019#ifdef CONFIG_KGDB
1020 /*
1021 * If the kgdb is connected no debug regs should be altered. This
1022 * is only applicable when KGDB and a KGDB I/O module are built
1023 * into the kernel and you are using early debugging with
1024 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
1025 */
1026 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1027 arch_kgdb_ops.correct_hw_break();
1028 else {
1029#endif
1030 /*
1031 * Clear all 6 debug registers:
1032 */
1033
1034 set_debugreg(0UL, 0);
1035 set_debugreg(0UL, 1);
1036 set_debugreg(0UL, 2);
1037 set_debugreg(0UL, 3);
1038 set_debugreg(0UL, 6);
1039 set_debugreg(0UL, 7);
1040#ifdef CONFIG_KGDB
1041 /* If the kgdb is connected no debug regs should be altered. */
1042 }
1043#endif
1044
1045 fpu_init();
1046
1047 raw_local_save_flags(kernel_eflags);
1048
1049 if (is_uv_system())
1050 uv_cpu_init();
1051}
1052
1053#else
1054
673void __cpuinit cpu_init(void) 1055void __cpuinit cpu_init(void)
674{ 1056{
675 int cpu = smp_processor_id(); 1057 int cpu = smp_processor_id();
@@ -723,9 +1105,20 @@ void __cpuinit cpu_init(void)
723 /* 1105 /*
724 * Force FPU initialization: 1106 * Force FPU initialization:
725 */ 1107 */
726 current_thread_info()->status = 0; 1108 if (cpu_has_xsave)
1109 current_thread_info()->status = TS_XSAVE;
1110 else
1111 current_thread_info()->status = 0;
727 clear_used_math(); 1112 clear_used_math();
728 mxcsr_feature_mask_init(); 1113 mxcsr_feature_mask_init();
1114
1115 /*
1116 * Boot processor to setup the FP and extended state context info.
1117 */
1118 if (!smp_processor_id())
1119 init_thread_xstate();
1120
1121 xsave_init();
729} 1122}
730 1123
731#ifdef CONFIG_HOTPLUG_CPU 1124#ifdef CONFIG_HOTPLUG_CPU
@@ -739,3 +1132,5 @@ void __cpuinit cpu_uninit(void)
739 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; 1132 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
740} 1133}
741#endif 1134#endif
1135
1136#endif
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
deleted file mode 100644
index 305b465889b0..000000000000
--- a/arch/x86/kernel/cpu/common_64.c
+++ /dev/null
@@ -1,763 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
10#include <linux/delay.h>
11#include <linux/smp.h>
12#include <linux/percpu.h>
13#include <asm/i387.h>
14#include <asm/msr.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h>
18#include <asm/mtrr.h>
19#include <asm/mce.h>
20#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#ifdef CONFIG_X86_LOCAL_APIC
24#include <asm/mpspec.h>
25#include <asm/apic.h>
26#include <mach_apic.h>
27#endif
28#include <asm/pda.h>
29#include <asm/pgtable.h>
30#include <asm/processor.h>
31#include <asm/desc.h>
32#include <asm/atomic.h>
33#include <asm/proto.h>
34#include <asm/sections.h>
35#include <asm/setup.h>
36#include <asm/genapic.h>
37
38#include "cpu.h"
39
40/* We need valid kernel segments for data and code in long mode too
41 * IRET will check the segment types kkeil 2000/10/28
42 * Also sysret mandates a special GDT layout
43 */
44/* The TLS descriptors are currently at a different place compared to i386.
45 Hopefully nobody expects them at a fixed place (Wine?) */
46DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
47 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
48 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
49 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
50 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
51 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
52 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
53} };
54EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
55
56__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
57
58/* Current gdt points %fs at the "master" per-cpu area: after this,
59 * it's on the real one. */
60void switch_to_new_gdt(void)
61{
62 struct desc_ptr gdt_descr;
63
64 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
65 gdt_descr.size = GDT_SIZE - 1;
66 load_gdt(&gdt_descr);
67}
68
69struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
70
71static void __cpuinit default_init(struct cpuinfo_x86 *c)
72{
73 display_cacheinfo(c);
74}
75
76static struct cpu_dev __cpuinitdata default_cpu = {
77 .c_init = default_init,
78 .c_vendor = "Unknown",
79};
80static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
81
82int __cpuinit get_model_name(struct cpuinfo_x86 *c)
83{
84 unsigned int *v;
85
86 if (c->extended_cpuid_level < 0x80000004)
87 return 0;
88
89 v = (unsigned int *) c->x86_model_id;
90 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
91 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
92 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
93 c->x86_model_id[48] = 0;
94 return 1;
95}
96
97
98void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
99{
100 unsigned int n, dummy, ebx, ecx, edx;
101
102 n = c->extended_cpuid_level;
103
104 if (n >= 0x80000005) {
105 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
106 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
107 "D cache %dK (%d bytes/line)\n",
108 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
109 c->x86_cache_size = (ecx>>24) + (edx>>24);
110 /* On K8 L1 TLB is inclusive, so don't count it */
111 c->x86_tlbsize = 0;
112 }
113
114 if (n >= 0x80000006) {
115 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
116 ecx = cpuid_ecx(0x80000006);
117 c->x86_cache_size = ecx >> 16;
118 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
119
120 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
121 c->x86_cache_size, ecx & 0xFF);
122 }
123}
124
125void __cpuinit detect_ht(struct cpuinfo_x86 *c)
126{
127#ifdef CONFIG_SMP
128 u32 eax, ebx, ecx, edx;
129 int index_msb, core_bits;
130
131 cpuid(1, &eax, &ebx, &ecx, &edx);
132
133
134 if (!cpu_has(c, X86_FEATURE_HT))
135 return;
136 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
137 goto out;
138
139 smp_num_siblings = (ebx & 0xff0000) >> 16;
140
141 if (smp_num_siblings == 1) {
142 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
143 } else if (smp_num_siblings > 1) {
144
145 if (smp_num_siblings > NR_CPUS) {
146 printk(KERN_WARNING "CPU: Unsupported number of "
147 "siblings %d", smp_num_siblings);
148 smp_num_siblings = 1;
149 return;
150 }
151
152 index_msb = get_count_order(smp_num_siblings);
153 c->phys_proc_id = phys_pkg_id(index_msb);
154
155 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
156
157 index_msb = get_count_order(smp_num_siblings);
158
159 core_bits = get_count_order(c->x86_max_cores);
160
161 c->cpu_core_id = phys_pkg_id(index_msb) &
162 ((1 << core_bits) - 1);
163 }
164out:
165 if ((c->x86_max_cores * smp_num_siblings) > 1) {
166 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
167 c->phys_proc_id);
168 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
169 c->cpu_core_id);
170 }
171
172#endif
173}
174
175static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
176{
177 char *v = c->x86_vendor_id;
178 int i;
179 static int printed;
180
181 for (i = 0; i < X86_VENDOR_NUM; i++) {
182 if (cpu_devs[i]) {
183 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
184 (cpu_devs[i]->c_ident[1] &&
185 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
186 c->x86_vendor = i;
187 this_cpu = cpu_devs[i];
188 return;
189 }
190 }
191 }
192 if (!printed) {
193 printed++;
194 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
195 printk(KERN_ERR "CPU: Your system may be unstable.\n");
196 }
197 c->x86_vendor = X86_VENDOR_UNKNOWN;
198}
199
200static void __init early_cpu_support_print(void)
201{
202 int i,j;
203 struct cpu_dev *cpu_devx;
204
205 printk("KERNEL supported cpus:\n");
206 for (i = 0; i < X86_VENDOR_NUM; i++) {
207 cpu_devx = cpu_devs[i];
208 if (!cpu_devx)
209 continue;
210 for (j = 0; j < 2; j++) {
211 if (!cpu_devx->c_ident[j])
212 continue;
213 printk(" %s %s\n", cpu_devx->c_vendor,
214 cpu_devx->c_ident[j]);
215 }
216 }
217}
218
219/*
220 * The NOPL instruction is supposed to exist on all CPUs with
221 * family >= 6, unfortunately, that's not true in practice because
222 * of early VIA chips and (more importantly) broken virtualizers that
223 * are not easy to detect. Hence, probe for it based on first
224 * principles.
225 *
226 * Note: no 64-bit chip is known to lack these, but put the code here
227 * for consistency with 32 bits, and to make it utterly trivial to
228 * diagnose the problem should it ever surface.
229 */
230static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
231{
232 const u32 nopl_signature = 0x888c53b1; /* Random number */
233 u32 has_nopl = nopl_signature;
234
235 clear_cpu_cap(c, X86_FEATURE_NOPL);
236 if (c->x86 >= 6) {
237 asm volatile("\n"
238 "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
239 "2:\n"
240 " .section .fixup,\"ax\"\n"
241 "3: xor %0,%0\n"
242 " jmp 2b\n"
243 " .previous\n"
244 _ASM_EXTABLE(1b,3b)
245 : "+a" (has_nopl));
246
247 if (has_nopl == nopl_signature)
248 set_cpu_cap(c, X86_FEATURE_NOPL);
249 }
250}
251
252static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
253
254void __init early_cpu_init(void)
255{
256 struct cpu_vendor_dev *cvdev;
257
258 for (cvdev = __x86cpuvendor_start ;
259 cvdev < __x86cpuvendor_end ;
260 cvdev++)
261 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
262 early_cpu_support_print();
263 early_identify_cpu(&boot_cpu_data);
264}
265
266/* Do some early cpuid on the boot CPU to get some parameter that are
267 needed before check_bugs. Everything advanced is in identify_cpu
268 below. */
269static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
270{
271 u32 tfms, xlvl;
272
273 c->loops_per_jiffy = loops_per_jiffy;
274 c->x86_cache_size = -1;
275 c->x86_vendor = X86_VENDOR_UNKNOWN;
276 c->x86_model = c->x86_mask = 0; /* So far unknown... */
277 c->x86_vendor_id[0] = '\0'; /* Unset */
278 c->x86_model_id[0] = '\0'; /* Unset */
279 c->x86_clflush_size = 64;
280 c->x86_cache_alignment = c->x86_clflush_size;
281 c->x86_max_cores = 1;
282 c->x86_coreid_bits = 0;
283 c->extended_cpuid_level = 0;
284 memset(&c->x86_capability, 0, sizeof c->x86_capability);
285
286 /* Get vendor name */
287 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
288 (unsigned int *)&c->x86_vendor_id[0],
289 (unsigned int *)&c->x86_vendor_id[8],
290 (unsigned int *)&c->x86_vendor_id[4]);
291
292 get_cpu_vendor(c);
293
294 /* Initialize the standard set of capabilities */
295 /* Note that the vendor-specific code below might override */
296
297 /* Intel-defined flags: level 0x00000001 */
298 if (c->cpuid_level >= 0x00000001) {
299 __u32 misc;
300 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
301 &c->x86_capability[0]);
302 c->x86 = (tfms >> 8) & 0xf;
303 c->x86_model = (tfms >> 4) & 0xf;
304 c->x86_mask = tfms & 0xf;
305 if (c->x86 == 0xf)
306 c->x86 += (tfms >> 20) & 0xff;
307 if (c->x86 >= 0x6)
308 c->x86_model += ((tfms >> 16) & 0xF) << 4;
309 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
310 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
311 } else {
312 /* Have CPUID level 0 only - unheard of */
313 c->x86 = 4;
314 }
315
316 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
317#ifdef CONFIG_SMP
318 c->phys_proc_id = c->initial_apicid;
319#endif
320 /* AMD-defined flags: level 0x80000001 */
321 xlvl = cpuid_eax(0x80000000);
322 c->extended_cpuid_level = xlvl;
323 if ((xlvl & 0xffff0000) == 0x80000000) {
324 if (xlvl >= 0x80000001) {
325 c->x86_capability[1] = cpuid_edx(0x80000001);
326 c->x86_capability[6] = cpuid_ecx(0x80000001);
327 }
328 if (xlvl >= 0x80000004)
329 get_model_name(c); /* Default name */
330 }
331
332 /* Transmeta-defined flags: level 0x80860001 */
333 xlvl = cpuid_eax(0x80860000);
334 if ((xlvl & 0xffff0000) == 0x80860000) {
335 /* Don't set x86_cpuid_level here for now to not confuse. */
336 if (xlvl >= 0x80860001)
337 c->x86_capability[2] = cpuid_edx(0x80860001);
338 }
339
340 if (c->extended_cpuid_level >= 0x80000007)
341 c->x86_power = cpuid_edx(0x80000007);
342
343 if (c->extended_cpuid_level >= 0x80000008) {
344 u32 eax = cpuid_eax(0x80000008);
345
346 c->x86_virt_bits = (eax >> 8) & 0xff;
347 c->x86_phys_bits = eax & 0xff;
348 }
349
350 detect_nopl(c);
351
352 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
353 cpu_devs[c->x86_vendor]->c_early_init)
354 cpu_devs[c->x86_vendor]->c_early_init(c);
355
356 validate_pat_support(c);
357}
358
359/*
360 * This does the hard work of actually picking apart the CPU stuff...
361 */
362static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
363{
364 int i;
365
366 early_identify_cpu(c);
367
368 init_scattered_cpuid_features(c);
369
370 c->apicid = phys_pkg_id(0);
371
372 /*
373 * Vendor-specific initialization. In this section we
374 * canonicalize the feature flags, meaning if there are
375 * features a certain CPU supports which CPUID doesn't
376 * tell us, CPUID claiming incorrect flags, or other bugs,
377 * we handle them here.
378 *
379 * At the end of this section, c->x86_capability better
380 * indicate the features this CPU genuinely supports!
381 */
382 if (this_cpu->c_init)
383 this_cpu->c_init(c);
384
385 detect_ht(c);
386
387 /*
388 * On SMP, boot_cpu_data holds the common feature set between
389 * all CPUs; so make sure that we indicate which features are
390 * common between the CPUs. The first time this routine gets
391 * executed, c == &boot_cpu_data.
392 */
393 if (c != &boot_cpu_data) {
394 /* AND the already accumulated flags with these */
395 for (i = 0; i < NCAPINTS; i++)
396 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
397 }
398
399 /* Clear all flags overriden by options */
400 for (i = 0; i < NCAPINTS; i++)
401 c->x86_capability[i] &= ~cleared_cpu_caps[i];
402
403#ifdef CONFIG_X86_MCE
404 mcheck_init(c);
405#endif
406 select_idle_routine(c);
407
408#ifdef CONFIG_NUMA
409 numa_add_cpu(smp_processor_id());
410#endif
411
412}
413
414void __cpuinit identify_boot_cpu(void)
415{
416 identify_cpu(&boot_cpu_data);
417}
418
419void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
420{
421 BUG_ON(c == &boot_cpu_data);
422 identify_cpu(c);
423 mtrr_ap_init();
424}
425
426static __init int setup_noclflush(char *arg)
427{
428 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
429 return 1;
430}
431__setup("noclflush", setup_noclflush);
432
433struct msr_range {
434 unsigned min;
435 unsigned max;
436};
437
438static struct msr_range msr_range_array[] __cpuinitdata = {
439 { 0x00000000, 0x00000418},
440 { 0xc0000000, 0xc000040b},
441 { 0xc0010000, 0xc0010142},
442 { 0xc0011000, 0xc001103b},
443};
444
445static void __cpuinit print_cpu_msr(void)
446{
447 unsigned index;
448 u64 val;
449 int i;
450 unsigned index_min, index_max;
451
452 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
453 index_min = msr_range_array[i].min;
454 index_max = msr_range_array[i].max;
455 for (index = index_min; index < index_max; index++) {
456 if (rdmsrl_amd_safe(index, &val))
457 continue;
458 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
459 }
460 }
461}
462
463static int show_msr __cpuinitdata;
464static __init int setup_show_msr(char *arg)
465{
466 int num;
467
468 get_option(&arg, &num);
469
470 if (num > 0)
471 show_msr = num;
472 return 1;
473}
474__setup("show_msr=", setup_show_msr);
475
476void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
477{
478 if (c->x86_model_id[0])
479 printk(KERN_CONT "%s", c->x86_model_id);
480
481 if (c->x86_mask || c->cpuid_level >= 0)
482 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
483 else
484 printk(KERN_CONT "\n");
485
486#ifdef CONFIG_SMP
487 if (c->cpu_index < show_msr)
488 print_cpu_msr();
489#else
490 if (show_msr)
491 print_cpu_msr();
492#endif
493}
494
495static __init int setup_disablecpuid(char *arg)
496{
497 int bit;
498 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
499 setup_clear_cpu_cap(bit);
500 else
501 return 0;
502 return 1;
503}
504__setup("clearcpuid=", setup_disablecpuid);
505
506cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
507
508struct x8664_pda **_cpu_pda __read_mostly;
509EXPORT_SYMBOL(_cpu_pda);
510
511struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
512
513char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
514
515unsigned long __supported_pte_mask __read_mostly = ~0UL;
516EXPORT_SYMBOL_GPL(__supported_pte_mask);
517
518static int do_not_nx __cpuinitdata;
519
520/* noexec=on|off
521Control non executable mappings for 64bit processes.
522
523on Enable(default)
524off Disable
525*/
526static int __init nonx_setup(char *str)
527{
528 if (!str)
529 return -EINVAL;
530 if (!strncmp(str, "on", 2)) {
531 __supported_pte_mask |= _PAGE_NX;
532 do_not_nx = 0;
533 } else if (!strncmp(str, "off", 3)) {
534 do_not_nx = 1;
535 __supported_pte_mask &= ~_PAGE_NX;
536 }
537 return 0;
538}
539early_param("noexec", nonx_setup);
540
541int force_personality32;
542
543/* noexec32=on|off
544Control non executable heap for 32bit processes.
545To control the stack too use noexec=off
546
547on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
548off PROT_READ implies PROT_EXEC
549*/
550static int __init nonx32_setup(char *str)
551{
552 if (!strcmp(str, "on"))
553 force_personality32 &= ~READ_IMPLIES_EXEC;
554 else if (!strcmp(str, "off"))
555 force_personality32 |= READ_IMPLIES_EXEC;
556 return 1;
557}
558__setup("noexec32=", nonx32_setup);
559
560void pda_init(int cpu)
561{
562 struct x8664_pda *pda = cpu_pda(cpu);
563
564 /* Setup up data that may be needed in __get_free_pages early */
565 loadsegment(fs, 0);
566 loadsegment(gs, 0);
567 /* Memory clobbers used to order PDA accessed */
568 mb();
569 wrmsrl(MSR_GS_BASE, pda);
570 mb();
571
572 pda->cpunumber = cpu;
573 pda->irqcount = -1;
574 pda->kernelstack = (unsigned long)stack_thread_info() -
575 PDA_STACKOFFSET + THREAD_SIZE;
576 pda->active_mm = &init_mm;
577 pda->mmu_state = 0;
578
579 if (cpu == 0) {
580 /* others are initialized in smpboot.c */
581 pda->pcurrent = &init_task;
582 pda->irqstackptr = boot_cpu_stack;
583 pda->irqstackptr += IRQSTACKSIZE - 64;
584 } else {
585 if (!pda->irqstackptr) {
586 pda->irqstackptr = (char *)
587 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
588 if (!pda->irqstackptr)
589 panic("cannot allocate irqstack for cpu %d",
590 cpu);
591 pda->irqstackptr += IRQSTACKSIZE - 64;
592 }
593
594 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
595 pda->nodenumber = cpu_to_node(cpu);
596 }
597}
598
599char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
600 DEBUG_STKSZ] __page_aligned_bss;
601
602extern asmlinkage void ignore_sysret(void);
603
604/* May not be marked __init: used by software suspend */
605void syscall_init(void)
606{
607 /*
608 * LSTAR and STAR live in a bit strange symbiosis.
609 * They both write to the same internal register. STAR allows to
610 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
611 */
612 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
613 wrmsrl(MSR_LSTAR, system_call);
614 wrmsrl(MSR_CSTAR, ignore_sysret);
615
616#ifdef CONFIG_IA32_EMULATION
617 syscall32_cpu_init();
618#endif
619
620 /* Flags to clear on syscall */
621 wrmsrl(MSR_SYSCALL_MASK,
622 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
623}
624
625void __cpuinit check_efer(void)
626{
627 unsigned long efer;
628
629 rdmsrl(MSR_EFER, efer);
630 if (!(efer & EFER_NX) || do_not_nx)
631 __supported_pte_mask &= ~_PAGE_NX;
632}
633
634unsigned long kernel_eflags;
635
636/*
637 * Copies of the original ist values from the tss are only accessed during
638 * debugging, no special alignment required.
639 */
640DEFINE_PER_CPU(struct orig_ist, orig_ist);
641
642/*
643 * cpu_init() initializes state that is per-CPU. Some data is already
644 * initialized (naturally) in the bootstrap process, such as the GDT
645 * and IDT. We reload them nevertheless, this function acts as a
646 * 'CPU state barrier', nothing should get across.
647 * A lot of state is already set up in PDA init.
648 */
649void __cpuinit cpu_init(void)
650{
651 int cpu = stack_smp_processor_id();
652 struct tss_struct *t = &per_cpu(init_tss, cpu);
653 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
654 unsigned long v;
655 char *estacks = NULL;
656 struct task_struct *me;
657 int i;
658
659 /* CPU 0 is initialised in head64.c */
660 if (cpu != 0)
661 pda_init(cpu);
662 else
663 estacks = boot_exception_stacks;
664
665 me = current;
666
667 if (cpu_test_and_set(cpu, cpu_initialized))
668 panic("CPU#%d already initialized!\n", cpu);
669
670 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
671
672 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
673
674 /*
675 * Initialize the per-CPU GDT with the boot GDT,
676 * and set up the GDT descriptor:
677 */
678
679 switch_to_new_gdt();
680 load_idt((const struct desc_ptr *)&idt_descr);
681
682 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
683 syscall_init();
684
685 wrmsrl(MSR_FS_BASE, 0);
686 wrmsrl(MSR_KERNEL_GS_BASE, 0);
687 barrier();
688
689 check_efer();
690
691 /*
692 * set up and load the per-CPU TSS
693 */
694 if (!orig_ist->ist[0]) {
695 static const unsigned int order[N_EXCEPTION_STACKS] = {
696 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
697 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
698 };
699 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
700 if (cpu) {
701 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
702 if (!estacks)
703 panic("Cannot allocate exception "
704 "stack %ld %d\n", v, cpu);
705 }
706 estacks += PAGE_SIZE << order[v];
707 orig_ist->ist[v] = t->x86_tss.ist[v] =
708 (unsigned long)estacks;
709 }
710 }
711
712 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
713 /*
714 * <= is required because the CPU will access up to
715 * 8 bits beyond the end of the IO permission bitmap.
716 */
717 for (i = 0; i <= IO_BITMAP_LONGS; i++)
718 t->io_bitmap[i] = ~0UL;
719
720 atomic_inc(&init_mm.mm_count);
721 me->active_mm = &init_mm;
722 if (me->mm)
723 BUG();
724 enter_lazy_tlb(&init_mm, me);
725
726 load_sp0(t, &current->thread);
727 set_tss_desc(cpu, t);
728 load_TR_desc();
729 load_LDT(&init_mm.context);
730
731#ifdef CONFIG_KGDB
732 /*
733 * If the kgdb is connected no debug regs should be altered. This
734 * is only applicable when KGDB and a KGDB I/O module are built
735 * into the kernel and you are using early debugging with
736 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
737 */
738 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
739 arch_kgdb_ops.correct_hw_break();
740 else {
741#endif
742 /*
743 * Clear all 6 debug registers:
744 */
745
746 set_debugreg(0UL, 0);
747 set_debugreg(0UL, 1);
748 set_debugreg(0UL, 2);
749 set_debugreg(0UL, 3);
750 set_debugreg(0UL, 6);
751 set_debugreg(0UL, 7);
752#ifdef CONFIG_KGDB
753 /* If the kgdb is connected no debug regs should be altered. */
754 }
755#endif
756
757 fpu_init();
758
759 raw_local_save_flags(kernel_eflags);
760
761 if (is_uv_system())
762 uv_cpu_init();
763}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4d894e8565fe..de4094a39210 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -21,23 +21,16 @@ struct cpu_dev {
21 void (*c_init)(struct cpuinfo_x86 * c); 21 void (*c_init)(struct cpuinfo_x86 * c);
22 void (*c_identify)(struct cpuinfo_x86 * c); 22 void (*c_identify)(struct cpuinfo_x86 * c);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); 23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
24 int c_x86_vendor;
24}; 25};
25 26
26extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX;
27 31
28struct cpu_vendor_dev { 32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
29 int vendor;
30 struct cpu_dev *cpu_dev;
31};
32
33#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
34 static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
35 __attribute__((__section__(".x86cpuvendor.init"))) = \
36 { cpu_vendor_id, cpu_dev }
37
38extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
39 33
40extern int get_model_name(struct cpuinfo_x86 *c);
41extern void display_cacheinfo(struct cpuinfo_x86 *c); 34extern void display_cacheinfo(struct cpuinfo_x86 *c);
42 35
43#endif 36#endif
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 898a5a2002ed..ffd0f5ed071a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
122 122
123 /* Load/Store Serialize to mem access disable (=reorder it) */ 123 /* Load/Store Serialize to mem access disable (=reorder it) */
124 setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); 124 setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
125 /* set load/store serialize from 1GB to 4GB */ 125 /* set load/store serialize from 1GB to 4GB */
126 ccr3 |= 0xe0; 126 ccr3 |= 0xe0;
127 setCx86(CX86_CCR3, ccr3); 127 setCx86(CX86_CCR3, ccr3);
@@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); 132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
133 133
134 /* CCR2 bit 2: unlock NW bit */ 134 /* CCR2 bit 2: unlock NW bit */
135 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 135 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
136 /* set 'Not Write-through' */ 136 /* set 'Not Write-through' */
137 write_cr0(read_cr0() | X86_CR0_NW); 137 write_cr0(read_cr0() | X86_CR0_NW);
138 /* CCR2 bit 2: lock NW bit and set WT1 */ 138 /* CCR2 bit 2: lock NW bit and set WT1 */
139 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); 139 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
140} 140}
141 141
142/* 142/*
@@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void)
150 local_irq_save(flags); 150 local_irq_save(flags);
151 151
152 /* Suspend on halt power saving and enable #SUSP pin */ 152 /* Suspend on halt power saving and enable #SUSP pin */
153 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); 153 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
154 154
155 ccr3 = getCx86(CX86_CCR3); 155 ccr3 = getCx86(CX86_CCR3);
156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
157 157
158 158
159 /* FPU fast, DTE cache, Mem bypass */ 159 /* FPU fast, DTE cache, Mem bypass */
160 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); 160 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
162 162
163 set_cx86_memwb(); 163 set_cx86_memwb();
@@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
291 /* GXm supports extended cpuid levels 'ala' AMD */ 291 /* GXm supports extended cpuid levels 'ala' AMD */
292 if (c->cpuid_level == 2) { 292 if (c->cpuid_level == 2) {
293 /* Enable cxMMX extensions (GX1 Datasheet 54) */ 293 /* Enable cxMMX extensions (GX1 Datasheet 54) */
294 setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); 294 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
295 295
296 /* 296 /*
297 * GXm : 0x30 ... 0x5f GXm datasheet 51 297 * GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
301 */ 301 */
302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) 302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
303 geode_configure(); 303 geode_configure();
304 get_model_name(c); /* get CPU marketing name */
305 return; 304 return;
306 } else { /* MediaGX */ 305 } else { /* MediaGX */
307 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; 306 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
@@ -314,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
314 if (dir1 > 7) { 313 if (dir1 > 7) {
315 dir0_msn++; /* M II */ 314 dir0_msn++; /* M II */
316 /* Enable MMX extensions (App note 108) */ 315 /* Enable MMX extensions (App note 108) */
317 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); 316 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
318 } else { 317 } else {
319 c->coma_bug = 1; /* 6x86MX, it has the bug. */ 318 c->coma_bug = 1; /* 6x86MX, it has the bug. */
320 } 319 }
@@ -429,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
429 local_irq_save(flags); 428 local_irq_save(flags);
430 ccr3 = getCx86(CX86_CCR3); 429 ccr3 = getCx86(CX86_CCR3);
431 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
432 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ 431 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
433 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
434 local_irq_restore(flags); 433 local_irq_restore(flags);
435 } 434 }
@@ -442,14 +441,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
442 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
443 .c_init = init_cyrix, 442 .c_init = init_cyrix,
444 .c_identify = cyrix_identify, 443 .c_identify = cyrix_identify,
444 .c_x86_vendor = X86_VENDOR_CYRIX,
445}; 445};
446 446
447cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
453 .c_x86_vendor = X86_VENDOR_NSC,
453}; 454};
454 455
455cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); 456cpu_dev_register(nsc_cpu_dev);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index c9017799497c..000000000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,84 +0,0 @@
1/*
2 * Strings for the various x86 capability flags.
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9/*
10 * These flag bits must match the definitions in <asm/cpufeature.h>.
11 * NULL means this bit is undefined or reserved; either way it doesn't
12 * have meaning as far as Linux is concerned. Note that it's important
13 * to realize there is a difference between this table and CPUID -- if
14 * applications want to get the raw CPUID data, they should access
15 * /dev/cpu/<cpu_nr>/cpuid instead.
16 */
17const char * const x86_cap_flags[NCAPINTS*32] = {
18 /* Intel-defined */
19 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
20 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
21 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
22 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
23
24 /* AMD-defined */
25 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
26 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
27 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
28 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
29 "3dnowext", "3dnow",
30
31 /* Transmeta-defined */
32 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
33 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
34 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
35 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
36
37 /* Other (Linux-defined) */
38 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
39 NULL, NULL, NULL, NULL,
40 "constant_tsc", "up", NULL, "arch_perfmon",
41 "pebs", "bts", NULL, NULL,
42 "rep_good", NULL, NULL, NULL,
43 "nopl", NULL, NULL, NULL,
44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
45
46 /* Intel-defined (#2) */
47 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
48 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
49 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
51
52 /* VIA/Cyrix/Centaur-defined */
53 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
54 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
55 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
56 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
57
58 /* AMD-defined (#2) */
59 "lahf_lm", "cmp_legacy", "svm", "extapic",
60 "cr8_legacy", "abm", "sse4a", "misalignsse",
61 "3dnowprefetch", "osvw", "ibs", "sse5",
62 "skinit", "wdt", NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
65
66 /* Auxiliary (Linux-defined) */
67 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
69 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
70 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
71};
72
73const char *const x86_power_flags[32] = {
74 "ts", /* temperature sensor */
75 "fid", /* frequency id control */
76 "vid", /* voltage id control */
77 "ttp", /* thermal trip */
78 "tm",
79 "stc",
80 "100mhzsteps",
81 "hwpstate",
82 "", /* tsc invariant mapped to constant_tsc */
83 /* nothing */
84};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f113ef4595f6..99468dbd08da 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,11 @@
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h> 16#include <asm/bugs.h>
17 17
18#ifdef CONFIG_X86_64
19#include <asm/topology.h>
20#include <asm/numa_64.h>
21#endif
22
18#include "cpu.h" 23#include "cpu.h"
19 24
20#ifdef CONFIG_X86_LOCAL_APIC 25#ifdef CONFIG_X86_LOCAL_APIC
@@ -23,23 +28,22 @@
23#include <mach_apic.h> 28#include <mach_apic.h>
24#endif 29#endif
25 30
26#ifdef CONFIG_X86_INTEL_USERCOPY
27/*
28 * Alignment at which movsl is preferred for bulk memory copies.
29 */
30struct movsl_mask movsl_mask __read_mostly;
31#endif
32
33static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 31static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
34{ 32{
35 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
36 if (c->x86 == 15 && c->x86_cache_alignment == 64)
37 c->x86_cache_alignment = 128;
38 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 33 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
39 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 34 (c->x86 == 0x6 && c->x86_model >= 0x0e))
40 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 35 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
36
37#ifdef CONFIG_X86_64
38 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
39#else
40 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
41 if (c->x86 == 15 && c->x86_cache_alignment == 64)
42 c->x86_cache_alignment = 128;
43#endif
41} 44}
42 45
46#ifdef CONFIG_X86_32
43/* 47/*
44 * Early probe support logic for ppro memory erratum #50 48 * Early probe support logic for ppro memory erratum #50
45 * 49 *
@@ -59,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void)
59 return 0; 63 return 0;
60} 64}
61 65
66#ifdef CONFIG_X86_F00F_BUG
67static void __cpuinit trap_init_f00f_bug(void)
68{
69 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
62 70
63/* 71 /*
64 * P4 Xeon errata 037 workaround. 72 * Update the IDT descriptor and reload the IDT so that
65 * Hardware prefetcher may cause stale data to be loaded into the cache. 73 * it uses the read-only mapped virtual address.
66 */ 74 */
67static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) 75 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
76 load_idt(&idt_descr);
77}
78#endif
79
80static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
68{ 81{
69 unsigned long lo, hi; 82 unsigned long lo, hi;
70 83
84#ifdef CONFIG_X86_F00F_BUG
85 /*
86 * All current models of Pentium and Pentium with MMX technology CPUs
87 * have the F0 0F bug, which lets nonprivileged users lock up the system.
88 * Note that the workaround only should be initialized once...
89 */
90 c->f00f_bug = 0;
91 if (!paravirt_enabled() && c->x86 == 5) {
92 static int f00f_workaround_enabled;
93
94 c->f00f_bug = 1;
95 if (!f00f_workaround_enabled) {
96 trap_init_f00f_bug();
97 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
98 f00f_workaround_enabled = 1;
99 }
100 }
101#endif
102
103 /*
104 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
105 * model 3 mask 3
106 */
107 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
108 clear_cpu_cap(c, X86_FEATURE_SEP);
109
110 /*
111 * P4 Xeon errata 037 workaround.
112 * Hardware prefetcher may cause stale data to be loaded into the cache.
113 */
71 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 114 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
72 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 115 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
73 if ((lo & (1<<9)) == 0) { 116 if ((lo & (1<<9)) == 0) {
@@ -77,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
77 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 120 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
78 } 121 }
79 } 122 }
123
124 /*
125 * See if we have a good local APIC by checking for buggy Pentia,
126 * i.e. all B steppings and the C2 stepping of P54C when using their
127 * integrated APIC (see 11AP erratum in "Pentium Processor
128 * Specification Update").
129 */
130 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
131 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
132 set_cpu_cap(c, X86_FEATURE_11AP);
133
134
135#ifdef CONFIG_X86_INTEL_USERCOPY
136 /*
137 * Set up the preferred alignment for movsl bulk memory moves
138 */
139 switch (c->x86) {
140 case 4: /* 486: untested */
141 break;
142 case 5: /* Old Pentia: untested */
143 break;
144 case 6: /* PII/PIII only like movsl with 8-byte alignment */
145 movsl_mask.mask = 7;
146 break;
147 case 15: /* P4 is OK down to 8-byte alignment */
148 movsl_mask.mask = 7;
149 break;
150 }
151#endif
152
153#ifdef CONFIG_X86_NUMAQ
154 numaq_tsc_disable();
155#endif
80} 156}
157#else
158static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
159{
160}
161#endif
81 162
163static void __cpuinit srat_detect_node(void)
164{
165#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
166 unsigned node;
167 int cpu = smp_processor_id();
168 int apicid = hard_smp_processor_id();
169
170 /* Don't do the funky fallback heuristics the AMD version employs
171 for now. */
172 node = apicid_to_node[apicid];
173 if (node == NUMA_NO_NODE || !node_online(node))
174 node = first_node(node_online_map);
175 numa_set_node(cpu, node);
176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
178#endif
179}
82 180
83/* 181/*
84 * find out the number of processor cores on the die 182 * find out the number of processor cores on the die
85 */ 183 */
86static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) 184static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
87{ 185{
88 unsigned int eax, ebx, ecx, edx; 186 unsigned int eax, ebx, ecx, edx;
89 187
@@ -98,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
98 return 1; 196 return 1;
99} 197}
100 198
101#ifdef CONFIG_X86_F00F_BUG 199static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
102static void __cpuinit trap_init_f00f_bug(void)
103{ 200{
104 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 201 /* Intel VMX MSR indicated features */
105 202#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
106 /* 203#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
107 * Update the IDT descriptor and reload the IDT so that 204#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
108 * it uses the read-only mapped virtual address. 205#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
109 */ 206#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
110 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 207#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
111 load_idt(&idt_descr); 208
209 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
210
211 clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
212 clear_cpu_cap(c, X86_FEATURE_VNMI);
213 clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
214 clear_cpu_cap(c, X86_FEATURE_EPT);
215 clear_cpu_cap(c, X86_FEATURE_VPID);
216
217 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
218 msr_ctl = vmx_msr_high | vmx_msr_low;
219 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
220 set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
221 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
222 set_cpu_cap(c, X86_FEATURE_VNMI);
223 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
224 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
225 vmx_msr_low, vmx_msr_high);
226 msr_ctl2 = vmx_msr_high | vmx_msr_low;
227 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
228 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
229 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
230 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
231 set_cpu_cap(c, X86_FEATURE_EPT);
232 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
233 set_cpu_cap(c, X86_FEATURE_VPID);
234 }
112} 235}
113#endif
114 236
115static void __cpuinit init_intel(struct cpuinfo_x86 *c) 237static void __cpuinit init_intel(struct cpuinfo_x86 *c)
116{ 238{
117 unsigned int l2 = 0; 239 unsigned int l2 = 0;
118 char *p = NULL;
119 240
120 early_init_intel(c); 241 early_init_intel(c);
121 242
122#ifdef CONFIG_X86_F00F_BUG 243 intel_workarounds(c);
123 /*
124 * All current models of Pentium and Pentium with MMX technology CPUs
125 * have the F0 0F bug, which lets nonprivileged users lock up the system.
126 * Note that the workaround only should be initialized once...
127 */
128 c->f00f_bug = 0;
129 if (!paravirt_enabled() && c->x86 == 5) {
130 static int f00f_workaround_enabled;
131
132 c->f00f_bug = 1;
133 if (!f00f_workaround_enabled) {
134 trap_init_f00f_bug();
135 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
136 f00f_workaround_enabled = 1;
137 }
138 }
139#endif
140 244
141 l2 = init_intel_cacheinfo(c); 245 l2 = init_intel_cacheinfo(c);
142 if (c->cpuid_level > 9) { 246 if (c->cpuid_level > 9) {
@@ -146,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
146 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 250 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
147 } 251 }
148 252
149 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 253 if (cpu_has_xmm2)
150 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 254 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
151 clear_cpu_cap(c, X86_FEATURE_SEP); 255 if (cpu_has_ds) {
256 unsigned int l1;
257 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
258 if (!(l1 & (1<<11)))
259 set_cpu_cap(c, X86_FEATURE_BTS);
260 if (!(l1 & (1<<12)))
261 set_cpu_cap(c, X86_FEATURE_PEBS);
262 ds_init_intel(c);
263 }
152 264
265#ifdef CONFIG_X86_64
266 if (c->x86 == 15)
267 c->x86_cache_alignment = c->x86_clflush_size * 2;
268 if (c->x86 == 6)
269 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
270#else
153 /* 271 /*
154 * Names for the Pentium II/Celeron processors 272 * Names for the Pentium II/Celeron processors
155 * detectable only by also checking the cache size. 273 * detectable only by also checking the cache size.
156 * Dixon is NOT a Celeron. 274 * Dixon is NOT a Celeron.
157 */ 275 */
158 if (c->x86 == 6) { 276 if (c->x86 == 6) {
277 char *p = NULL;
278
159 switch (c->x86_model) { 279 switch (c->x86_model) {
160 case 5: 280 case 5:
161 if (c->x86_mask == 0) { 281 if (c->x86_mask == 0) {
@@ -178,71 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
178 p = "Celeron (Coppermine)"; 298 p = "Celeron (Coppermine)";
179 break; 299 break;
180 } 300 }
181 }
182
183 if (p)
184 strcpy(c->x86_model_id, p);
185
186 c->x86_max_cores = num_cpu_cores(c);
187
188 detect_ht(c);
189 301
190 /* Work around errata */ 302 if (p)
191 Intel_errata_workarounds(c); 303 strcpy(c->x86_model_id, p);
192
193#ifdef CONFIG_X86_INTEL_USERCOPY
194 /*
195 * Set up the preferred alignment for movsl bulk memory moves
196 */
197 switch (c->x86) {
198 case 4: /* 486: untested */
199 break;
200 case 5: /* Old Pentia: untested */
201 break;
202 case 6: /* PII/PIII only like movsl with 8-byte alignment */
203 movsl_mask.mask = 7;
204 break;
205 case 15: /* P4 is OK down to 8-byte alignment */
206 movsl_mask.mask = 7;
207 break;
208 } 304 }
209#endif
210 305
211 if (cpu_has_xmm2) 306 if (c->x86 == 15)
212 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
213 if (c->x86 == 15) {
214 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
215 }
216 if (c->x86 == 6) 308 if (c->x86 == 6)
217 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
218 if (cpu_has_ds) {
219 unsigned int l1;
220 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
221 if (!(l1 & (1<<11)))
222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
226 }
227 310
228 if (cpu_has_bts) 311 if (cpu_has_bts)
229 ptrace_bts_init_intel(c); 312 ptrace_bts_init_intel(c);
230 313
231 /* 314#endif
232 * See if we have a good local APIC by checking for buggy Pentia,
233 * i.e. all B steppings and the C2 stepping of P54C when using their
234 * integrated APIC (see 11AP erratum in "Pentium Processor
235 * Specification Update").
236 */
237 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
238 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
239 set_cpu_cap(c, X86_FEATURE_11AP);
240 315
241#ifdef CONFIG_X86_NUMAQ 316 detect_extended_topology(c);
242 numaq_tsc_disable(); 317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /*
319 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
320 * detection.
321 */
322 c->x86_max_cores = intel_num_cpu_cores(c);
323#ifdef CONFIG_X86_32
324 detect_ht(c);
243#endif 325#endif
326 }
327
328 /* Work around errata */
329 srat_detect_node();
330
331 if (cpu_has(c, X86_FEATURE_VMX))
332 detect_vmx_virtcap(c);
244} 333}
245 334
335#ifdef CONFIG_X86_32
246static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 336static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
247{ 337{
248 /* 338 /*
@@ -255,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
255 size = 256; 345 size = 256;
256 return size; 346 return size;
257} 347}
348#endif
258 349
259static struct cpu_dev intel_cpu_dev __cpuinitdata = { 350static struct cpu_dev intel_cpu_dev __cpuinitdata = {
260 .c_vendor = "Intel", 351 .c_vendor = "Intel",
261 .c_ident = { "GenuineIntel" }, 352 .c_ident = { "GenuineIntel" },
353#ifdef CONFIG_X86_32
262 .c_models = { 354 .c_models = {
263 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 355 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
264 { 356 {
@@ -308,76 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
308 } 400 }
309 }, 401 },
310 }, 402 },
403 .c_size_cache = intel_size_cache,
404#endif
311 .c_early_init = early_init_intel, 405 .c_early_init = early_init_intel,
312 .c_init = init_intel, 406 .c_init = init_intel,
313 .c_size_cache = intel_size_cache, 407 .c_x86_vendor = X86_VENDOR_INTEL,
314}; 408};
315 409
316cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); 410cpu_dev_register(intel_cpu_dev);
317
318#ifndef CONFIG_X86_CMPXCHG
319unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
320{
321 u8 prev;
322 unsigned long flags;
323
324 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
325 local_irq_save(flags);
326 prev = *(u8 *)ptr;
327 if (prev == old)
328 *(u8 *)ptr = new;
329 local_irq_restore(flags);
330 return prev;
331}
332EXPORT_SYMBOL(cmpxchg_386_u8);
333
334unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
335{
336 u16 prev;
337 unsigned long flags;
338
339 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
340 local_irq_save(flags);
341 prev = *(u16 *)ptr;
342 if (prev == old)
343 *(u16 *)ptr = new;
344 local_irq_restore(flags);
345 return prev;
346}
347EXPORT_SYMBOL(cmpxchg_386_u16);
348
349unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
350{
351 u32 prev;
352 unsigned long flags;
353
354 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
355 local_irq_save(flags);
356 prev = *(u32 *)ptr;
357 if (prev == old)
358 *(u32 *)ptr = new;
359 local_irq_restore(flags);
360 return prev;
361}
362EXPORT_SYMBOL(cmpxchg_386_u32);
363#endif
364
365#ifndef CONFIG_X86_CMPXCHG64
366unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
367{
368 u64 prev;
369 unsigned long flags;
370
371 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
372 local_irq_save(flags);
373 prev = *(u64 *)ptr;
374 if (prev == old)
375 *(u64 *)ptr = new;
376 local_irq_restore(flags);
377 return prev;
378}
379EXPORT_SYMBOL(cmpxchg_486_u64);
380#endif
381
382/* arch_initcall(intel_cpu_init); */
383 411
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
deleted file mode 100644
index 1019c58d39f0..000000000000
--- a/arch/x86/kernel/cpu/intel_64.c
+++ /dev/null
@@ -1,95 +0,0 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
17}
18
19/*
20 * find out the number of processor cores on the die
21 */
22static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
23{
24 unsigned int eax, t;
25
26 if (c->cpuid_level < 4)
27 return 1;
28
29 cpuid_count(4, 0, &eax, &t, &t, &t);
30
31 if (eax & 0x1f)
32 return ((eax >> 26) + 1);
33 else
34 return 1;
35}
36
37static void __cpuinit srat_detect_node(void)
38{
39#ifdef CONFIG_NUMA
40 unsigned node;
41 int cpu = smp_processor_id();
42 int apicid = hard_smp_processor_id();
43
44 /* Don't do the funky fallback heuristics the AMD version employs
45 for now. */
46 node = apicid_to_node[apicid];
47 if (node == NUMA_NO_NODE || !node_online(node))
48 node = first_node(node_online_map);
49 numa_set_node(cpu, node);
50
51 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
52#endif
53}
54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{
57 init_intel_cacheinfo(c);
58 if (c->cpuid_level > 9) {
59 unsigned eax = cpuid_eax(10);
60 /* Check for version and the number of counters */
61 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
62 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
63 }
64
65 if (cpu_has_ds) {
66 unsigned int l1, l2;
67 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
68 if (!(l1 & (1<<11)))
69 set_cpu_cap(c, X86_FEATURE_BTS);
70 if (!(l1 & (1<<12)))
71 set_cpu_cap(c, X86_FEATURE_PEBS);
72 }
73
74
75 if (cpu_has_bts)
76 ds_init_intel(c);
77
78 if (c->x86 == 15)
79 c->x86_cache_alignment = c->x86_clflush_size * 2;
80 if (c->x86 == 6)
81 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
82 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
83 c->x86_max_cores = intel_num_cpu_cores(c);
84
85 srat_detect_node();
86}
87
88static struct cpu_dev intel_cpu_dev __cpuinitdata = {
89 .c_vendor = "Intel",
90 .c_ident = { "GenuineIntel" },
91 .c_early_init = early_init_intel,
92 .c_init = init_intel,
93};
94cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
95
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b0a10b002f1..3f46afbb1cf1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * Routines to indentify caches on Intel CPU. 2 * Routines to indentify caches on Intel CPU.
3 * 3 *
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
@@ -13,6 +13,7 @@
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/pci.h>
16 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
@@ -130,9 +131,18 @@ struct _cpuid4_info {
130 union _cpuid4_leaf_ebx ebx; 131 union _cpuid4_leaf_ebx ebx;
131 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
132 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable;
133 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
134}; 136};
135 137
138#ifdef CONFIG_PCI
139static struct pci_device_id k8_nb_id[] = {
140 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
141 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
142 {}
143};
144#endif
145
136unsigned short num_cache_leaves; 146unsigned short num_cache_leaves;
137 147
138/* AMD doesn't have CPUID4. Emulate it here to report the same 148/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
182static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 192static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
183static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 193static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
184 194
185static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 195static void __cpuinit
186 union _cpuid4_leaf_ebx *ebx, 196amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
187 union _cpuid4_leaf_ecx *ecx) 197 union _cpuid4_leaf_ebx *ebx,
198 union _cpuid4_leaf_ecx *ecx)
188{ 199{
189 unsigned dummy; 200 unsigned dummy;
190 unsigned line_size, lines_per_tag, assoc, size_in_kb; 201 unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
251 (ebx->split.ways_of_associativity + 1) - 1; 262 (ebx->split.ways_of_associativity + 1) - 1;
252} 263}
253 264
254static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 265static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
267{
268 if (index < 3)
269 return;
270 this_leaf->can_disable = 1;
271}
272
273static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
255{ 275{
256 union _cpuid4_leaf_eax eax; 276 union _cpuid4_leaf_eax eax;
257 union _cpuid4_leaf_ebx ebx; 277 union _cpuid4_leaf_ebx ebx;
258 union _cpuid4_leaf_ecx ecx; 278 union _cpuid4_leaf_ecx ecx;
259 unsigned edx; 279 unsigned edx;
260 280
261 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 281 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
262 amd_cpuid4(index, &eax, &ebx, &ecx); 282 amd_cpuid4(index, &eax, &ebx, &ecx);
263 else 283 if (boot_cpu_data.x86 >= 0x10)
264 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 284 amd_check_l3_disable(index, this_leaf);
285 } else {
286 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
287 }
288
265 if (eax.split.type == CACHE_TYPE_NULL) 289 if (eax.split.type == CACHE_TYPE_NULL)
266 return -EIO; /* better error ? */ 290 return -EIO; /* better error ? */
267 291
268 this_leaf->eax = eax; 292 this_leaf->eax = eax;
269 this_leaf->ebx = ebx; 293 this_leaf->ebx = ebx;
270 this_leaf->ecx = ecx; 294 this_leaf->ecx = ecx;
271 this_leaf->size = (ecx.split.number_of_sets + 1) * 295 this_leaf->size = (ecx.split.number_of_sets + 1) *
272 (ebx.split.coherency_line_size + 1) * 296 (ebx.split.coherency_line_size + 1) *
273 (ebx.split.physical_line_partition + 1) * 297 (ebx.split.physical_line_partition + 1) *
274 (ebx.split.ways_of_associativity + 1); 298 (ebx.split.ways_of_associativity + 1);
275 return 0; 299 return 0;
276} 300}
277 301
@@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
453 477
454/* pointer to _cpuid4_info array (for each cache leaf) */ 478/* pointer to _cpuid4_info array (for each cache leaf) */
455static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 479static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
456#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 480#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
457 481
458#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
459static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 483static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
490 514
491 this_leaf = CPUID4_INFO_IDX(cpu, index); 515 this_leaf = CPUID4_INFO_IDX(cpu, index);
492 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
493 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 517 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
494 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 518 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
495 } 519 }
496} 520}
@@ -572,7 +596,7 @@ struct _index_kobject {
572 596
573/* pointer to array of kobjects for cpuX/cache/indexY */ 597/* pointer to array of kobjects for cpuX/cache/indexY */
574static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); 598static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
575#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) 599#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
576 600
577#define show_one_plus(file_name, object, val) \ 601#define show_one_plus(file_name, object, val) \
578static ssize_t show_##file_name \ 602static ssize_t show_##file_name \
@@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
637 } 661 }
638} 662}
639 663
664#define to_object(k) container_of(k, struct _index_kobject, kobj)
665#define to_attr(a) container_of(a, struct _cache_attr, attr)
666
667#ifdef CONFIG_PCI
668static struct pci_dev *get_k8_northbridge(int node)
669{
670 struct pci_dev *dev = NULL;
671 int i;
672
673 for (i = 0; i <= node; i++) {
674 do {
675 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
676 if (!dev)
677 break;
678 } while (!pci_match_id(&k8_nb_id[0], dev));
679 if (!dev)
680 break;
681 }
682 return dev;
683}
684#else
685static struct pci_dev *get_k8_northbridge(int node)
686{
687 return NULL;
688}
689#endif
690
691static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
692{
693 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
694 struct pci_dev *dev = NULL;
695 ssize_t ret = 0;
696 int i;
697
698 if (!this_leaf->can_disable)
699 return sprintf(buf, "Feature not enabled\n");
700
701 dev = get_k8_northbridge(node);
702 if (!dev) {
703 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
704 return -EINVAL;
705 }
706
707 for (i = 0; i < 2; i++) {
708 unsigned int reg;
709
710 pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
711
712 ret += sprintf(buf, "%sEntry: %d\n", buf, i);
713 ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
714 buf,
715 reg & 0x80000000 ? "Disabled" : "Allowed",
716 reg & 0x40000000 ? "Disabled" : "Allowed");
717 ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
718 buf, (reg & 0x30000) >> 16, reg & 0xfff);
719 }
720 return ret;
721}
722
723static ssize_t
724store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
725 size_t count)
726{
727 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
728 struct pci_dev *dev = NULL;
729 unsigned int ret, index, val;
730
731 if (!this_leaf->can_disable)
732 return 0;
733
734 if (strlen(buf) > 15)
735 return -EINVAL;
736
737 ret = sscanf(buf, "%x %x", &index, &val);
738 if (ret != 2)
739 return -EINVAL;
740 if (index > 1)
741 return -EINVAL;
742
743 val |= 0xc0000000;
744 dev = get_k8_northbridge(node);
745 if (!dev) {
746 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
747 return -EINVAL;
748 }
749
750 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
751 wbinvd();
752 pci_write_config_dword(dev, 0x1BC + index * 4, val);
753
754 return 1;
755}
756
640struct _cache_attr { 757struct _cache_attr {
641 struct attribute attr; 758 struct attribute attr;
642 ssize_t (*show)(struct _cpuid4_info *, char *); 759 ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@ define_one_ro(size);
657define_one_ro(shared_cpu_map); 774define_one_ro(shared_cpu_map);
658define_one_ro(shared_cpu_list); 775define_one_ro(shared_cpu_list);
659 776
777static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
778
660static struct attribute * default_attrs[] = { 779static struct attribute * default_attrs[] = {
661 &type.attr, 780 &type.attr,
662 &level.attr, 781 &level.attr,
@@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = {
667 &size.attr, 786 &size.attr,
668 &shared_cpu_map.attr, 787 &shared_cpu_map.attr,
669 &shared_cpu_list.attr, 788 &shared_cpu_list.attr,
789 &cache_disable.attr,
670 NULL 790 NULL
671}; 791};
672 792
673#define to_object(k) container_of(k, struct _index_kobject, kobj)
674#define to_attr(a) container_of(a, struct _cache_attr, attr)
675
676static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 793static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
677{ 794{
678 struct _cache_attr *fattr = to_attr(attr); 795 struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
682 ret = fattr->show ? 799 ret = fattr->show ?
683 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 800 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
684 buf) : 801 buf) :
685 0; 802 0;
686 return ret; 803 return ret;
687} 804}
688 805
689static ssize_t store(struct kobject * kobj, struct attribute * attr, 806static ssize_t store(struct kobject * kobj, struct attribute * attr,
690 const char * buf, size_t count) 807 const char * buf, size_t count)
691{ 808{
692 return 0; 809 struct _cache_attr *fattr = to_attr(attr);
810 struct _index_kobject *this_leaf = to_object(kobj);
811 ssize_t ret;
812
813 ret = fattr->store ?
814 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
815 buf, count) :
816 0;
817 return ret;
693} 818}
694 819
695static struct sysfs_ops sysfs_ops = { 820static struct sysfs_ops sysfs_ops = {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 726a5fcdf341..4b031a4ac856 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -860,7 +860,7 @@ error:
860 return err; 860 return err;
861} 861}
862 862
863static void mce_remove_device(unsigned int cpu) 863static __cpuinit void mce_remove_device(unsigned int cpu)
864{ 864{
865 int i; 865 int i;
866 866
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 000000000000..dfea390e1608
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
1#!/usr/bin/perl
2#
3# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
4#
5
6($in, $out) = @ARGV;
7
8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
10
11print OUT "#include <asm/cpufeature.h>\n\n";
12print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
13
14while (defined($line = <IN>)) {
15 if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
16 $macro = $1;
17 $feature = $2;
18 $tail = $3;
19 if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
20 $feature = $1;
21 }
22
23 if ($feature ne '') {
24 printf OUT "\t%-32s = \"%s\",\n",
25 "[$macro]", "\L$feature";
26 }
27 }
28}
29print OUT "};\n";
30
31close(IN);
32close(OUT);
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 000000000000..5abbea297e0c
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
1/*
2 * Strings for the various x86 power flags
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9const char *const x86_power_flags[32] = {
10 "ts", /* temperature sensor */
11 "fid", /* frequency id control */
12 "vid", /* voltage id control */
13 "ttp", /* thermal trip */
14 "tm",
15 "stc",
16 "100mhzsteps",
17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */
20};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index b911a2c61b8f..52b3fefbd5af 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,6 +5,18 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
9{
10 u32 xlvl;
11
12 /* Transmeta-defined flags: level 0x80860001 */
13 xlvl = cpuid_eax(0x80860000);
14 if ((xlvl & 0xffff0000) == 0x80860000) {
15 if (xlvl >= 0x80860001)
16 c->x86_capability[2] = cpuid_edx(0x80860001);
17 }
18}
19
8static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) 20static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 21{
10 unsigned int cap_mask, uk, max, dummy; 22 unsigned int cap_mask, uk, max, dummy;
@@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
12 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; 24 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
13 char cpu_info[65]; 25 char cpu_info[65];
14 26
15 get_model_name(c); /* Same as AMD/Cyrix */ 27 early_init_transmeta(c);
28
16 display_cacheinfo(c); 29 display_cacheinfo(c);
17 30
18 /* Print CMS and CPU revision */ 31 /* Print CMS and CPU revision */
@@ -85,23 +98,12 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
85#endif 98#endif
86} 99}
87 100
88static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
89{
90 u32 xlvl;
91
92 /* Transmeta-defined flags: level 0x80860001 */
93 xlvl = cpuid_eax(0x80860000);
94 if ((xlvl & 0xffff0000) == 0x80860000) {
95 if (xlvl >= 0x80860001)
96 c->x86_capability[2] = cpuid_edx(0x80860001);
97 }
98}
99
100static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
101 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
102 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta,
103 .c_init = init_transmeta, 105 .c_init = init_transmeta,
104 .c_identify = transmeta_identify, 106 .c_x86_vendor = X86_VENDOR_TRANSMETA,
105}; 107};
106 108
107cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); 109cpu_dev_register(transmeta_cpu_dev);
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index b1fc90989d75..e777f79e0960 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = {
19 } 19 }
20 }, 20 },
21 }, 21 },
22 .c_x86_vendor = X86_VENDOR_UMC,
22}; 23};
23 24
24cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); 25cpu_dev_register(umc_cpu_dev);
25 26
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 66e48aa2dd1b..78e642feac30 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -148,6 +148,9 @@ void __init e820_print_map(char *who)
148 case E820_NVS: 148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n"); 149 printk(KERN_CONT "(ACPI NVS)\n");
150 break; 150 break;
151 case E820_UNUSABLE:
152 printk("(unusable)\n");
153 break;
151 default: 154 default:
152 printk(KERN_CONT "type %u\n", e820.map[i].type); 155 printk(KERN_CONT "type %u\n", e820.map[i].type);
153 break; 156 break;
@@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type)
1260 case E820_RAM: return "System RAM"; 1263 case E820_RAM: return "System RAM";
1261 case E820_ACPI: return "ACPI Tables"; 1264 case E820_ACPI: return "ACPI Tables";
1262 case E820_NVS: return "ACPI Non-volatile Storage"; 1265 case E820_NVS: return "ACPI Non-volatile Storage";
1266 case E820_UNUSABLE: return "Unusable memory";
1263 default: return "reserved"; 1267 default: return "reserved";
1264 } 1268 }
1265} 1269}
@@ -1267,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type)
1267/* 1271/*
1268 * Mark e820 reserved areas as busy for the resource manager. 1272 * Mark e820 reserved areas as busy for the resource manager.
1269 */ 1273 */
1274static struct resource __initdata *e820_res;
1270void __init e820_reserve_resources(void) 1275void __init e820_reserve_resources(void)
1271{ 1276{
1272 int i; 1277 int i;
@@ -1274,6 +1279,7 @@ void __init e820_reserve_resources(void)
1274 u64 end; 1279 u64 end;
1275 1280
1276 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1281 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1282 e820_res = res;
1277 for (i = 0; i < e820.nr_map; i++) { 1283 for (i = 0; i < e820.nr_map; i++) {
1278 end = e820.map[i].addr + e820.map[i].size - 1; 1284 end = e820.map[i].addr + e820.map[i].size - 1;
1279#ifndef CONFIG_RESOURCES_64BIT 1285#ifndef CONFIG_RESOURCES_64BIT
@@ -1287,7 +1293,14 @@ void __init e820_reserve_resources(void)
1287 res->end = end; 1293 res->end = end;
1288 1294
1289 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 1295 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1290 insert_resource(&iomem_resource, res); 1296
1297 /*
1298 * don't register the region that could be conflicted with
1299 * pci device BAR resource and insert them later in
1300 * pcibios_resource_survey()
1301 */
1302 if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
1303 insert_resource(&iomem_resource, res);
1291 res++; 1304 res++;
1292 } 1305 }
1293 1306
@@ -1299,6 +1312,19 @@ void __init e820_reserve_resources(void)
1299 } 1312 }
1300} 1313}
1301 1314
1315void __init e820_reserve_resources_late(void)
1316{
1317 int i;
1318 struct resource *res;
1319
1320 res = e820_res;
1321 for (i = 0; i < e820.nr_map; i++) {
1322 if (!res->parent && res->end)
1323 reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
1324 res++;
1325 }
1326}
1327
1302char *__init default_machine_specific_memory_setup(void) 1328char *__init default_machine_specific_memory_setup(void)
1303{ 1329{
1304 char *who = "BIOS-e820"; 1330 char *who = "BIOS-e820";
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/kernel/es7000_32.c
index 50189af14b85..849e5cd485b8 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -39,10 +39,93 @@
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/apicdef.h> 41#include <asm/apicdef.h>
42#include "es7000.h"
43#include <mach_mpparse.h> 42#include <mach_mpparse.h>
44 43
45/* 44/*
45 * ES7000 chipsets
46 */
47
48#define NON_UNISYS 0
49#define ES7000_CLASSIC 1
50#define ES7000_ZORRO 2
51
52
53#define MIP_REG 1
54#define MIP_PSAI_REG 4
55
56#define MIP_BUSY 1
57#define MIP_SPIN 0xf0000
58#define MIP_VALID 0x0100000000000000ULL
59#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
60
61#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
62
63struct mip_reg_info {
64 unsigned long long mip_info;
65 unsigned long long delivery_info;
66 unsigned long long host_reg;
67 unsigned long long mip_reg;
68};
69
70struct part_info {
71 unsigned char type;
72 unsigned char length;
73 unsigned char part_id;
74 unsigned char apic_mode;
75 unsigned long snum;
76 char ptype[16];
77 char sname[64];
78 char pname[64];
79};
80
81struct psai {
82 unsigned long long entry_type;
83 unsigned long long addr;
84 unsigned long long bep_addr;
85};
86
87struct es7000_mem_info {
88 unsigned char type;
89 unsigned char length;
90 unsigned char resv[6];
91 unsigned long long start;
92 unsigned long long size;
93};
94
95struct es7000_oem_table {
96 unsigned long long hdr;
97 struct mip_reg_info mip;
98 struct part_info pif;
99 struct es7000_mem_info shm;
100 struct psai psai;
101};
102
103#ifdef CONFIG_ACPI
104
105struct oem_table {
106 struct acpi_table_header Header;
107 u32 OEMTableAddr;
108 u32 OEMTableSize;
109};
110
111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
112#endif
113
114struct mip_reg {
115 unsigned long long off_0;
116 unsigned long long off_8;
117 unsigned long long off_10;
118 unsigned long long off_18;
119 unsigned long long off_20;
120 unsigned long long off_28;
121 unsigned long long off_30;
122 unsigned long long off_38;
123};
124
125#define MIP_SW_APIC 0x1020b
126#define MIP_FUNC(VALUE) (VALUE & 0xff)
127
128/*
46 * ES7000 Globals 129 * ES7000 Globals
47 */ 130 */
48 131
@@ -72,7 +155,7 @@ es7000_rename_gsi(int ioapic, int gsi)
72 base += nr_ioapic_registers[i]; 155 base += nr_ioapic_registers[i];
73 } 156 }
74 157
75 if (!ioapic && (gsi < 16)) 158 if (!ioapic && (gsi < 16))
76 gsi += base; 159 gsi += base;
77 return gsi; 160 return gsi;
78} 161}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb1444..6c9bfc9e1e95 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..e4bf2cc0d743
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int phys_pkg_id(int index_msb)
124{
125 return current_cpu_data.initial_apicid >> index_msb;
126}
127
128static void x2apic_send_IPI_self(int vector)
129{
130 apic_write(APIC_SELF_IPI, vector);
131}
132
133static void init_x2apic_ldr(void)
134{
135 int cpu = smp_processor_id();
136
137 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
138 return;
139}
140
141struct genapic apic_x2apic_cluster = {
142 .name = "cluster x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_LowestPrio,
145 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..8f1343df2627
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,154 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int phys_pkg_id(int index_msb)
122{
123 return current_cpu_data.initial_apicid >> index_msb;
124}
125
126void x2apic_send_IPI_self(int vector)
127{
128 apic_write(APIC_SELF_IPI, vector);
129}
130
131void init_x2apic_ldr(void)
132{
133 return;
134}
135
136struct genapic apic_x2apic_phys = {
137 .name = "physical x2apic",
138 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
139 .int_delivery_mode = dest_Fixed,
140 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
141 .target_cpus = x2apic_target_cpus,
142 .vector_allocation_domain = x2apic_vector_allocation_domain,
143 .apic_id_registered = x2apic_apic_id_registered,
144 .init_apic_ldr = init_x2apic_ldr,
145 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask,
148 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
150 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id,
153 .apic_id_mask = (0xFFFFFFFFu),
154};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16be..ae2ffc8a400c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,36 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57EXPORT_SYMBOL_GPL(is_uv_system);
58
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 59DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 60EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 61
@@ -123,6 +153,10 @@ static int uv_apic_id_registered(void)
123 return 1; 153 return 1;
124} 154}
125 155
156static void uv_init_apic_ldr(void)
157{
158}
159
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 160static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 161{
128 int cpu; 162 int cpu;
@@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 172 return BAD_APICID;
139} 173}
140 174
175static unsigned int get_apic_id(unsigned long x)
176{
177 unsigned int id;
178
179 WARN_ON(preemptible() && num_online_cpus() > 1);
180 id = x | __get_cpu_var(x2apic_extra_bits);
181
182 return id;
183}
184
185static unsigned long set_apic_id(unsigned int id)
186{
187 unsigned long x;
188
189 /* maskout x2apic_extra_bits ? */
190 x = id;
191 return x;
192}
193
194static unsigned int uv_read_apic_id(void)
195{
196
197 return get_apic_id(apic_read(APIC_ID));
198}
199
141static unsigned int phys_pkg_id(int index_msb) 200static unsigned int phys_pkg_id(int index_msb)
142{ 201{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 202 return uv_read_apic_id() >> index_msb;
144} 203}
145 204
146#ifdef ZZZ /* Needs x2apic patch */ 205#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector)
152 211
153struct genapic apic_x2apic_uv_x = { 212struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 213 .name = "UV large system",
214 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 215 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 217 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 219 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 221 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 222 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 223 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
227 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu),
166}; 230};
167 231
168static __cpuinit void set_x2apic_extra_bits(int pnode) 232static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +465,5 @@ void __cpuinit uv_cpu_init(void)
401 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 465 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
402 set_x2apic_extra_bits(uv_hub_info->pnode); 466 set_x2apic_extra_bits(uv_hub_info->pnode);
403} 467}
468
469
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb82..45723f1fe198 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
21# include <asm/sigcontext32.h> 21# include <asm/sigcontext32.h>
22# include <asm/user32.h> 22# include <asm/user32.h>
23#else 23#else
24# define save_i387_ia32 save_i387 24# define save_i387_xstate_ia32 save_i387_xstate
25# define restore_i387_ia32 restore_i387 25# define restore_i387_xstate_ia32 restore_i387_xstate
26# define _fpstate_ia32 _fpstate 26# define _fpstate_ia32 _fpstate
27# define _xstate_ia32 _xstate
28# define sig_xstate_ia32_size sig_xstate_size
29# define fx_sw_reserved_ia32 fx_sw_reserved
27# define user_i387_ia32_struct user_i387_struct 30# define user_i387_ia32_struct user_i387_struct
28# define user32_fxsr_struct user_fxsr_struct 31# define user32_fxsr_struct user_fxsr_struct
29#endif 32#endif
@@ -36,6 +39,7 @@
36 39
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 40static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size; 41unsigned int xstate_size;
42unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
39static struct i387_fxsave_struct fx_scratch __cpuinitdata; 43static struct i387_fxsave_struct fx_scratch __cpuinitdata;
40 44
41void __cpuinit mxcsr_feature_mask_init(void) 45void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
61 return; 65 return;
62 } 66 }
63 67
68 if (cpu_has_xsave) {
69 xsave_cntxt_init();
70 return;
71 }
72
64 if (cpu_has_fxsr) 73 if (cpu_has_fxsr)
65 xstate_size = sizeof(struct i387_fxsave_struct); 74 xstate_size = sizeof(struct i387_fxsave_struct);
66#ifdef CONFIG_X86_32 75#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
83 92
84 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 93 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
85 94
95 /*
96 * Boot processor to setup the FP and extended state context info.
97 */
98 if (!smp_processor_id())
99 init_thread_xstate();
100 xsave_init();
101
86 mxcsr_feature_mask_init(); 102 mxcsr_feature_mask_init();
87 /* clean state in init */ 103 /* clean state in init */
88 current_thread_info()->status = 0; 104 if (cpu_has_xsave)
105 current_thread_info()->status = TS_XSAVE;
106 else
107 current_thread_info()->status = 0;
89 clear_used_math(); 108 clear_used_math();
90} 109}
91#endif /* CONFIG_X86_64 */ 110#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
195 */ 214 */
196 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 215 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
197 216
217 /*
218 * update the header bits in the xsave header, indicating the
219 * presence of FP and SSE state.
220 */
221 if (cpu_has_xsave)
222 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
223
198 return ret; 224 return ret;
199} 225}
200 226
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
395 if (!ret) 421 if (!ret)
396 convert_to_fxsr(target, &env); 422 convert_to_fxsr(target, &env);
397 423
424 /*
425 * update the header bit in the xsave header, indicating the
426 * presence of FP.
427 */
428 if (cpu_has_xsave)
429 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
398 return ret; 430 return ret;
399} 431}
400 432
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
407 struct task_struct *tsk = current; 439 struct task_struct *tsk = current;
408 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; 440 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
409 441
410 unlazy_fpu(tsk);
411 fp->status = fp->swd; 442 fp->status = fp->swd;
412 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) 443 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
413 return -1; 444 return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
421 struct user_i387_ia32_struct env; 452 struct user_i387_ia32_struct env;
422 int err = 0; 453 int err = 0;
423 454
424 unlazy_fpu(tsk);
425
426 convert_from_fxsr(&env, tsk); 455 convert_from_fxsr(&env, tsk);
427 if (__copy_to_user(buf, &env, sizeof(env))) 456 if (__copy_to_user(buf, &env, sizeof(env)))
428 return -1; 457 return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
432 if (err) 461 if (err)
433 return -1; 462 return -1;
434 463
435 if (__copy_to_user(&buf->_fxsr_env[0], fx, 464 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
436 sizeof(struct i387_fxsave_struct)))
437 return -1; 465 return -1;
438 return 1; 466 return 1;
439} 467}
440 468
441int save_i387_ia32(struct _fpstate_ia32 __user *buf) 469static int save_i387_xsave(void __user *buf)
470{
471 struct _fpstate_ia32 __user *fx = buf;
472 int err = 0;
473
474 if (save_i387_fxsave(fx) < 0)
475 return -1;
476
477 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
478 sizeof(struct _fpx_sw_bytes));
479 err |= __put_user(FP_XSTATE_MAGIC2,
480 (__u32 __user *) (buf + sig_xstate_ia32_size
481 - FP_XSTATE_MAGIC2_SIZE));
482 if (err)
483 return -1;
484
485 return 1;
486}
487
488int save_i387_xstate_ia32(void __user *buf)
442{ 489{
490 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
491 struct task_struct *tsk = current;
492
443 if (!used_math()) 493 if (!used_math())
444 return 0; 494 return 0;
495
496 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
497 return -EACCES;
445 /* 498 /*
446 * This will cause a "finit" to be triggered by the next 499 * This will cause a "finit" to be triggered by the next
447 * attempted FPU operation by the 'current' process. 500 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
451 if (!HAVE_HWFP) { 504 if (!HAVE_HWFP) {
452 return fpregs_soft_get(current, NULL, 505 return fpregs_soft_get(current, NULL,
453 0, sizeof(struct user_i387_ia32_struct), 506 0, sizeof(struct user_i387_ia32_struct),
454 NULL, buf) ? -1 : 1; 507 NULL, fp) ? -1 : 1;
455 } 508 }
456 509
510 unlazy_fpu(tsk);
511
512 if (cpu_has_xsave)
513 return save_i387_xsave(fp);
457 if (cpu_has_fxsr) 514 if (cpu_has_fxsr)
458 return save_i387_fxsave(buf); 515 return save_i387_fxsave(fp);
459 else 516 else
460 return save_i387_fsave(buf); 517 return save_i387_fsave(fp);
461} 518}
462 519
463static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) 520static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
468 sizeof(struct i387_fsave_struct)); 525 sizeof(struct i387_fsave_struct));
469} 526}
470 527
471static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) 528static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
529 unsigned int size)
472{ 530{
473 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
474 struct user_i387_ia32_struct env; 532 struct user_i387_ia32_struct env;
475 int err; 533 int err;
476 534
477 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], 535 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
478 sizeof(struct i387_fxsave_struct)); 536 size);
479 /* mxcsr reserved bits must be masked to zero for security reasons */ 537 /* mxcsr reserved bits must be masked to zero for security reasons */
480 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 538 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
481 if (err || __copy_from_user(&env, buf, sizeof(env))) 539 if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
485 return 0; 543 return 0;
486} 544}
487 545
488int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 546static int restore_i387_xsave(void __user *buf)
547{
548 struct _fpx_sw_bytes fx_sw_user;
549 struct _fpstate_ia32 __user *fx_user =
550 ((struct _fpstate_ia32 __user *) buf);
551 struct i387_fxsave_struct __user *fx =
552 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
553 struct xsave_hdr_struct *xsave_hdr =
554 &current->thread.xstate->xsave.xsave_hdr;
555 u64 mask;
556 int err;
557
558 if (check_for_xstate(fx, buf, &fx_sw_user))
559 goto fx_only;
560
561 mask = fx_sw_user.xstate_bv;
562
563 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
564
565 xsave_hdr->xstate_bv &= pcntxt_mask;
566 /*
567 * These bits must be zero.
568 */
569 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
570
571 /*
572 * Init the state that is not present in the memory layout
573 * and enabled by the OS.
574 */
575 mask = ~(pcntxt_mask & ~mask);
576 xsave_hdr->xstate_bv &= mask;
577
578 return err;
579fx_only:
580 /*
581 * Couldn't find the extended state information in the memory
582 * layout. Restore the FP/SSE and init the other extended state
583 * enabled by the OS.
584 */
585 xsave_hdr->xstate_bv = XSTATE_FPSSE;
586 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
587}
588
589int restore_i387_xstate_ia32(void __user *buf)
489{ 590{
490 int err; 591 int err;
491 struct task_struct *tsk = current; 592 struct task_struct *tsk = current;
593 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
492 594
493 if (HAVE_HWFP) 595 if (HAVE_HWFP)
494 clear_fpu(tsk); 596 clear_fpu(tsk);
495 597
598 if (!buf) {
599 if (used_math()) {
600 clear_fpu(tsk);
601 clear_used_math();
602 }
603
604 return 0;
605 } else
606 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
607 return -EACCES;
608
496 if (!used_math()) { 609 if (!used_math()) {
497 err = init_fpu(tsk); 610 err = init_fpu(tsk);
498 if (err) 611 if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
500 } 613 }
501 614
502 if (HAVE_HWFP) { 615 if (HAVE_HWFP) {
503 if (cpu_has_fxsr) 616 if (cpu_has_xsave)
504 err = restore_i387_fxsave(buf); 617 err = restore_i387_xsave(buf);
618 else if (cpu_has_fxsr)
619 err = restore_i387_fxsave(fp, sizeof(struct
620 i387_fxsave_struct));
505 else 621 else
506 err = restore_i387_fsave(buf); 622 err = restore_i387_fsave(fp);
507 } else { 623 } else {
508 err = fpregs_soft_set(current, NULL, 624 err = fpregs_soft_set(current, NULL,
509 0, sizeof(struct user_i387_ia32_struct), 625 0, sizeof(struct user_i387_ia32_struct),
510 NULL, buf) != 0; 626 NULL, fp) != 0;
511 } 627 }
512 set_used_math(); 628 set_used_math();
513 629
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec4..e710289f673e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,10 +46,13 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
52 53
54#define __apicdebuginit(type) static type __init
55
53int (*ioapic_renumber_irq)(int ioapic, int irq); 56int (*ioapic_renumber_irq)(int ioapic, int irq);
54atomic_t irq_mis_count; 57atomic_t irq_mis_count;
55 58
@@ -1341,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1341 ioapic_write_entry(apic, pin, entry); 1344 ioapic_write_entry(apic, pin, entry);
1342} 1345}
1343 1346
1344void __init print_IO_APIC(void) 1347
1348__apicdebuginit(void) print_IO_APIC(void)
1345{ 1349{
1346 int apic, i; 1350 int apic, i;
1347 union IO_APIC_reg_00 reg_00; 1351 union IO_APIC_reg_00 reg_00;
@@ -1456,9 +1460,7 @@ void __init print_IO_APIC(void)
1456 return; 1460 return;
1457} 1461}
1458 1462
1459#if 0 1463__apicdebuginit(void) print_APIC_bitfield(int base)
1460
1461static void print_APIC_bitfield(int base)
1462{ 1464{
1463 unsigned int v; 1465 unsigned int v;
1464 int i, j; 1466 int i, j;
@@ -1479,9 +1481,10 @@ static void print_APIC_bitfield(int base)
1479 } 1481 }
1480} 1482}
1481 1483
1482void /*__init*/ print_local_APIC(void *dummy) 1484__apicdebuginit(void) print_local_APIC(void *dummy)
1483{ 1485{
1484 unsigned int v, ver, maxlvt; 1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1485 1488
1486 if (apic_verbosity == APIC_QUIET) 1489 if (apic_verbosity == APIC_QUIET)
1487 return; 1490 return;
@@ -1490,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1493 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1494 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1496 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1497 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1499 ver = GET_APIC_VERSION(v);
@@ -1532,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy)
1532 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1533 } 1536 }
1534 1537
1535 v = apic_read(APIC_ICR); 1538 icr = apic_icr_read();
1536 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1537 v = apic_read(APIC_ICR2); 1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1538 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1539 1541
1540 v = apic_read(APIC_LVTT); 1542 v = apic_read(APIC_LVTT);
1541 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1563,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy)
1563 printk("\n"); 1565 printk("\n");
1564} 1566}
1565 1567
1566void print_all_local_APICs(void) 1568__apicdebuginit(void) print_all_local_APICs(void)
1567{ 1569{
1568 on_each_cpu(print_local_APIC, NULL, 1); 1570 on_each_cpu(print_local_APIC, NULL, 1);
1569} 1571}
1570 1572
1571void /*__init*/ print_PIC(void) 1573__apicdebuginit(void) print_PIC(void)
1572{ 1574{
1573 unsigned int v; 1575 unsigned int v;
1574 unsigned long flags; 1576 unsigned long flags;
@@ -1600,7 +1602,17 @@ void /*__init*/ print_PIC(void)
1600 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1601} 1603}
1602 1604
1603#endif /* 0 */ 1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1604 1616
1605static void __init enable_IO_APIC(void) 1617static void __init enable_IO_APIC(void)
1606{ 1618{
@@ -1698,8 +1710,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1710 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1712 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1713 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1714
1704 /* 1715 /*
1705 * Add it to the IO-APIC irq-routing table: 1716 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1736 unsigned char old_id;
1726 unsigned long flags; 1737 unsigned long flags;
1727 1738
1728#ifdef CONFIG_X86_NUMAQ 1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1740 return;
1731#endif
1732 1741
1733 /* 1742 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1743 * Don't check I/O APIC IDs for xAPIC systems. They have
@@ -2329,8 +2338,6 @@ void __init setup_IO_APIC(void)
2329 setup_IO_APIC_irqs(); 2338 setup_IO_APIC_irqs();
2330 init_IO_APIC_traps(); 2339 init_IO_APIC_traps();
2331 check_timer(); 2340 check_timer();
2332 if (!acpi_ioapic)
2333 print_IO_APIC();
2334} 2341}
2335 2342
2336/* 2343/*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18f..a1bec2969c6a 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,10 +50,13 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
55 57
58#define __apicdebuginit(type) static type __init
59
56struct irq_cfg { 60struct irq_cfg {
57 cpumask_t domain; 61 cpumask_t domain;
58 cpumask_t old_domain; 62 cpumask_t old_domain;
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe;
87 91
88char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; 92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
89 93
90#define __apicdebuginit __init
91
92int sis_apic_bug; /* not actually supported, dummy for compile */ 94int sis_apic_bug; /* not actually supported, dummy for compile */
93 95
94static int no_timer_check; 96static int no_timer_check;
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882
883 entry.delivery_mode = INT_DELIVERY_MODE;
884 entry.dest_mode = INT_DEST_MODE;
885 entry.dest = cpu_mask_to_apicid(mask);
886 entry.mask = 0; /* enable IRQ */
887 entry.trigger = trigger;
888 entry.polarity = polarity;
889 entry.vector = cfg->vector;
890 1031
891 /* Mask level triggered irqs. 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1033 cpu_mask_to_apicid(mask), trigger, polarity,
893 */ 1034 cfg->vector)) {
894 if (trigger) 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
895 entry.mask = 1; 1036 mp_ioapics[apic].mp_apicid, pin);
1037 __clear_irq_vector(irq);
1038 return;
1039 }
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
970 ioapic_write_entry(apic, pin, entry); 1117 ioapic_write_entry(apic, pin, entry);
971} 1118}
972 1119
973void __apicdebuginit print_IO_APIC(void) 1120
1121__apicdebuginit(void) print_IO_APIC(void)
974{ 1122{
975 int apic, i; 1123 int apic, i;
976 union IO_APIC_reg_00 reg_00; 1124 union IO_APIC_reg_00 reg_00;
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void)
1064 return; 1212 return;
1065} 1213}
1066 1214
1067#if 0 1215__apicdebuginit(void) print_APIC_bitfield(int base)
1068
1069static __apicdebuginit void print_APIC_bitfield (int base)
1070{ 1216{
1071 unsigned int v; 1217 unsigned int v;
1072 int i, j; 1218 int i, j;
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1087 } 1233 }
1088} 1234}
1089 1235
1090void __apicdebuginit print_local_APIC(void * dummy) 1236__apicdebuginit(void) print_local_APIC(void *dummy)
1091{ 1237{
1092 unsigned int v, ver, maxlvt; 1238 unsigned int v, ver, maxlvt;
1239 unsigned long icr;
1093 1240
1094 if (apic_verbosity == APIC_QUIET) 1241 if (apic_verbosity == APIC_QUIET)
1095 return; 1242 return;
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1244 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1245 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1246 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1247 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1248 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1249 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1250 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1280 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_ICR); 1283 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1285 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1286
1141 v = apic_read(APIC_LVTT); 1287 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1288 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
1164 printk("\n"); 1310 printk("\n");
1165} 1311}
1166 1312
1167void print_all_local_APICs (void) 1313__apicdebuginit(void) print_all_local_APICs(void)
1168{ 1314{
1169 on_each_cpu(print_local_APIC, NULL, 1); 1315 on_each_cpu(print_local_APIC, NULL, 1);
1170} 1316}
1171 1317
1172void __apicdebuginit print_PIC(void) 1318__apicdebuginit(void) print_PIC(void)
1173{ 1319{
1174 unsigned int v; 1320 unsigned int v;
1175 unsigned long flags; 1321 unsigned long flags;
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void)
1201 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1347 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1202} 1348}
1203 1349
1204#endif /* 0 */ 1350__apicdebuginit(int) print_all_ICs(void)
1351{
1352 print_PIC();
1353 print_all_local_APICs();
1354 print_IO_APIC();
1355
1356 return 0;
1357}
1358
1359fs_initcall(print_all_ICs);
1360
1205 1361
1206void __init enable_IO_APIC(void) 1362void __init enable_IO_APIC(void)
1207{ 1363{
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1447 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1448 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1449 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1450 entry.dest = read_apic_id();
1295 1451
1296 /* 1452 /*
1297 * Add it to the IO-APIC irq-routing table: 1453 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1553 */
1398 1554
1399#ifdef CONFIG_SMP 1555#ifdef CONFIG_SMP
1556
1557#ifdef CONFIG_INTR_REMAP
1558static void ir_irq_migration(struct work_struct *work);
1559
1560static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1561
1562/*
1563 * Migrate the IO-APIC irq in the presence of intr-remapping.
1564 *
1565 * For edge triggered, irq migration is a simple atomic update(of vector
1566 * and cpu destination) of IRTE and flush the hardware cache.
1567 *
1568 * For level triggered, we need to modify the io-apic RTE aswell with the update
1569 * vector information, along with modifying IRTE with vector and destination.
1570 * So irq migration for level triggered is little bit more complex compared to
1571 * edge triggered migration. But the good news is, we use the same algorithm
1572 * for level triggered migration as we have today, only difference being,
1573 * we now initiate the irq migration from process context instead of the
1574 * interrupt context.
1575 *
1576 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1577 * suppression) to the IO-APIC, level triggered irq migration will also be
1578 * as simple as edge triggered migration and we can do the irq migration
1579 * with a simple atomic update to IO-APIC RTE.
1580 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{
1583 struct irq_cfg *cfg = irq_cfg + irq;
1584 struct irq_desc *desc = irq_desc + irq;
1585 cpumask_t tmp, cleanup_mask;
1586 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1588 unsigned int dest;
1589 unsigned long flags;
1590
1591 cpus_and(tmp, mask, cpu_online_map);
1592 if (cpus_empty(tmp))
1593 return;
1594
1595 if (get_irte(irq, &irte))
1596 return;
1597
1598 if (assign_irq_vector(irq, mask))
1599 return;
1600
1601 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp);
1603
1604 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector);
1607 spin_unlock_irqrestore(&ioapic_lock, flags);
1608 }
1609
1610 irte.vector = cfg->vector;
1611 irte.dest_id = IRTE_DEST(dest);
1612
1613 /*
1614 * Modified the IRTE and flushes the Interrupt entry cache.
1615 */
1616 modify_irte(irq, &irte);
1617
1618 if (cfg->move_in_progress) {
1619 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1620 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1621 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1622 cfg->move_in_progress = 0;
1623 }
1624
1625 irq_desc[irq].affinity = mask;
1626}
1627
1628static int migrate_irq_remapped_level(int irq)
1629{
1630 int ret = -1;
1631
1632 mask_IO_APIC_irq(irq);
1633
1634 if (io_apic_level_ack_pending(irq)) {
1635 /*
1636 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance.
1640 */
1641 schedule_delayed_work(&ir_migration_work, 1);
1642 goto unmask;
1643 }
1644
1645 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1647
1648 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask);
1651
1652unmask:
1653 unmask_IO_APIC_irq(irq);
1654 return ret;
1655}
1656
1657static void ir_irq_migration(struct work_struct *work)
1658{
1659 int irq;
1660
1661 for (irq = 0; irq < NR_IRQS; irq++) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags;
1665
1666 spin_lock_irqsave(&desc->lock, flags);
1667 if (!desc->chip->set_affinity ||
1668 !(desc->status & IRQ_MOVE_PENDING)) {
1669 desc->status &= ~IRQ_MOVE_PENDING;
1670 spin_unlock_irqrestore(&desc->lock, flags);
1671 continue;
1672 }
1673
1674 desc->chip->set_affinity(irq,
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags);
1677 }
1678 }
1679}
1680
1681/*
1682 * Migrates the IRQ destination in the process context.
1683 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{
1686 if (irq_desc[irq].status & IRQ_LEVEL) {
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1688 irq_desc[irq].pending_mask = mask;
1689 migrate_irq_remapped_level(irq);
1690 return;
1691 }
1692
1693 migrate_ioapic_irq(irq, mask);
1694}
1695#endif
1696
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1697asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1698{
1402 unsigned vector, me; 1699 unsigned vector, me;
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1750#else
1454static inline void irq_complete_move(unsigned int irq) {} 1751static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1752#endif
1753#ifdef CONFIG_INTR_REMAP
1754static void ack_x2apic_level(unsigned int irq)
1755{
1756 ack_x2APIC_irq();
1757}
1758
1759static void ack_x2apic_edge(unsigned int irq)
1760{
1761 ack_x2APIC_irq();
1762}
1763#endif
1456 1764
1457static void ack_apic_edge(unsigned int irq) 1765static void ack_apic_edge(unsigned int irq)
1458{ 1766{
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1835 .retrigger = ioapic_retrigger_irq,
1528}; 1836};
1529 1837
1838#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif
1849 .retrigger = ioapic_retrigger_irq,
1850};
1851#endif
1852
1530static inline void init_IO_APIC_traps(void) 1853static inline void init_IO_APIC_traps(void)
1531{ 1854{
1532 int irq; 1855 int irq;
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2035 * 8259A.
1713 */ 2036 */
1714 if (pin1 == -1) { 2037 if (pin1 == -1) {
2038 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2040 pin1 = pin2;
1716 apic1 = apic2; 2041 apic1 = apic2;
1717 no_pin1 = 1; 2042 no_pin1 = 1;
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2063 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2064 goto out;
1740 } 2065 }
2066 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2068 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2069 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void)
1854 setup_IO_APIC_irqs(); 2181 setup_IO_APIC_irqs();
1855 init_IO_APIC_traps(); 2182 init_IO_APIC_traps();
1856 check_timer(); 2183 check_timer();
1857 if (!acpi_ioapic)
1858 print_IO_APIC();
1859} 2184}
1860 2185
1861struct sysfs_ioapic_data { 2186struct sysfs_ioapic_data {
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq)
1977 2302
1978 dynamic_irq_cleanup(irq); 2303 dynamic_irq_cleanup(irq);
1979 2304
2305#ifdef CONFIG_INTR_REMAP
2306 free_irte(irq);
2307#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2308 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2309 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2310 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2323
1996 tmp = TARGET_CPUS; 2324 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2325 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2326 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2327 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2328
2329 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp);
2331
2332#ifdef CONFIG_INTR_REMAP
2333 if (irq_remapped(irq)) {
2334 struct irte irte;
2335 int ir_index;
2336 u16 sub_handle;
2337
2338 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2339 BUG_ON(ir_index == -1);
2340
2341 memset (&irte, 0, sizeof(irte));
2342
2343 irte.present = 1;
2344 irte.dst_mode = INT_DEST_MODE;
2345 irte.trigger_mode = 0; /* edge */
2346 irte.dlvry_mode = INT_DELIVERY_MODE;
2347 irte.vector = cfg->vector;
2348 irte.dest_id = IRTE_DEST(dest);
2349
2350 modify_irte(irq, &irte);
2001 2351
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2352 msg->address_hi = MSI_ADDR_BASE_HI;
2353 msg->data = sub_handle;
2354 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2355 MSI_ADDR_IR_SHV |
2356 MSI_ADDR_IR_INDEX1(ir_index) |
2357 MSI_ADDR_IR_INDEX2(ir_index);
2358 } else
2359#endif
2360 {
2361 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2362 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2363 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2364 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2408 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2409 irq_desc[irq].affinity = mask;
2051} 2410}
2411
2412#ifdef CONFIG_INTR_REMAP
2413/*
2414 * Migrate the MSI irq to another cpumask. This migration is
2415 * done in the process context using interrupt-remapping hardware.
2416 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{
2419 struct irq_cfg *cfg = irq_cfg + irq;
2420 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask;
2422 struct irte irte;
2423
2424 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp))
2426 return;
2427
2428 if (get_irte(irq, &irte))
2429 return;
2430
2431 if (assign_irq_vector(irq, mask))
2432 return;
2433
2434 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp);
2436
2437 irte.vector = cfg->vector;
2438 irte.dest_id = IRTE_DEST(dest);
2439
2440 /*
2441 * atomically update the IRTE with the new destination and vector.
2442 */
2443 modify_irte(irq, &irte);
2444
2445 /*
2446 * After this point, all the interrupts will start arriving
2447 * at the new destination. So, time to cleanup the previous
2448 * vector allocation.
2449 */
2450 if (cfg->move_in_progress) {
2451 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2452 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2453 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2454 cfg->move_in_progress = 0;
2455 }
2456
2457 irq_desc[irq].affinity = mask;
2458}
2459#endif
2052#endif /* CONFIG_SMP */ 2460#endif /* CONFIG_SMP */
2053 2461
2054/* 2462/*
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2474 .retrigger = ioapic_retrigger_irq,
2067}; 2475};
2068 2476
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2477#ifdef CONFIG_INTR_REMAP
2478static struct irq_chip msi_ir_chip = {
2479 .name = "IR-PCI-MSI",
2480 .unmask = unmask_msi_irq,
2481 .mask = mask_msi_irq,
2482 .ack = ack_x2apic_edge,
2483#ifdef CONFIG_SMP
2484 .set_affinity = ir_set_msi_irq_affinity,
2485#endif
2486 .retrigger = ioapic_retrigger_irq,
2487};
2488
2489/*
2490 * Map the PCI dev to the corresponding remapping hardware unit
2491 * and allocate 'nvec' consecutive interrupt-remapping table entries
2492 * in it.
2493 */
2494static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2070{ 2495{
2496 struct intel_iommu *iommu;
2497 int index;
2498
2499 iommu = map_dev_to_ir(dev);
2500 if (!iommu) {
2501 printk(KERN_ERR
2502 "Unable to map PCI %s to iommu\n", pci_name(dev));
2503 return -ENOENT;
2504 }
2505
2506 index = alloc_irte(iommu, irq, nvec);
2507 if (index < 0) {
2508 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev));
2511 return -ENOSPC;
2512 }
2513 return index;
2514}
2515#endif
2516
2517static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2518{
2519 int ret;
2071 struct msi_msg msg; 2520 struct msi_msg msg;
2521
2522 ret = msi_compose_msg(dev, irq, &msg);
2523 if (ret < 0)
2524 return ret;
2525
2526 set_irq_msi(irq, desc);
2527 write_msi_msg(irq, &msg);
2528
2529#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq;
2532 /*
2533 * irq migration in process context
2534 */
2535 desc->status |= IRQ_MOVE_PCNTXT;
2536 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2537 } else
2538#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540
2541 return 0;
2542}
2543
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{
2072 int irq, ret; 2546 int irq, ret;
2547
2073 irq = create_irq(); 2548 irq = create_irq();
2074 if (irq < 0) 2549 if (irq < 0)
2075 return irq; 2550 return irq;
2076 2551
2077 ret = msi_compose_msg(dev, irq, &msg); 2552#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled)
2554 goto no_ir;
2555
2556 ret = msi_alloc_irte(dev, irq, 1);
2557 if (ret < 0)
2558 goto error;
2559no_ir:
2560#endif
2561 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2562 if (ret < 0) {
2079 destroy_irq(irq); 2563 destroy_irq(irq);
2080 return ret; 2564 return ret;
2081 } 2565 }
2566 return 0;
2082 2567
2083 set_irq_msi(irq, desc); 2568#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2569error:
2570 destroy_irq(irq);
2571 return ret;
2572#endif
2573}
2085 2574
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{
2577 int irq, ret, sub_handle;
2578 struct msi_desc *desc;
2579#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0;
2581 int index = 0;
2582#endif
2583
2584 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq();
2587 if (irq < 0)
2588 return irq;
2589#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled)
2591 goto no_ir;
2087 2592
2593 if (!sub_handle) {
2594 /*
2595 * allocate the consecutive block of IRTE's
2596 * for 'nvec'
2597 */
2598 index = msi_alloc_irte(dev, irq, nvec);
2599 if (index < 0) {
2600 ret = index;
2601 goto error;
2602 }
2603 } else {
2604 iommu = map_dev_to_ir(dev);
2605 if (!iommu) {
2606 ret = -ENOENT;
2607 goto error;
2608 }
2609 /*
2610 * setup the mapping between the irq and the IRTE
2611 * base index, the sub_handle pointing to the
2612 * appropriate interrupt remap table entry.
2613 */
2614 set_irte_irq(irq, iommu, index, sub_handle);
2615 }
2616no_ir:
2617#endif
2618 ret = setup_msi_irq(dev, desc, irq);
2619 if (ret < 0)
2620 goto error;
2621 sub_handle++;
2622 }
2088 return 0; 2623 return 0;
2624
2625error:
2626 destroy_irq(irq);
2627 return ret;
2089} 2628}
2090 2629
2091void arch_teardown_msi_irq(unsigned int irq) 2630void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2872 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2873 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2874 irq_polarity(irq_entry));
2875#ifdef CONFIG_INTR_REMAP
2876 else if (intr_remapping_enabled)
2877 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2878#endif
2336 else 2879 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2880 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2881 }
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee1..9200a1e2752d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
74 } 74 }
75} 75}
76 76
77/*
78 * IRQ2 is cascade interrupt to second interrupt controller
79 */
80static struct irqaction irq2 = {
81 .handler = no_action,
82 .mask = CPU_MASK_NONE,
83 .name = "cascade",
84};
85
77/* Overridden in paravirt.c */ 86/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79 88
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
98 set_intr_gate(vector, interrupt[i]); 107 set_intr_gate(vector, interrupt[i]);
99 } 108 }
100 109
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116
117 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup.
120 */
121 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
122
123 /* IPI for invalidation */
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
131#endif
132
133#ifdef CONFIG_X86_LOCAL_APIC
134 /* self generated IPI for local APIC timer */
135 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
136
137 /* IPI vectors for APIC spurious and error interrupts */
138 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
139 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
140#endif
141
142#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
143 /* thermal monitor LVT interrupt */
144 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
145#endif
146
147 if (!acpi_ioapic)
148 setup_irq(2, &irq2);
149
101 /* setup after call gates are initialised (usually add in 150 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates) 151 * the architecture specific gates)
103 */ 152 */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725cb..f98f4e1dba09 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f010..4caff39078e0 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e2f43768723a..6b0bb73998dd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
374 374
375struct pv_apic_ops pv_apic_ops = { 375struct pv_apic_ops pv_apic_ops = {
376#ifdef CONFIG_X86_LOCAL_APIC 376#ifdef CONFIG_X86_LOCAL_APIC
377 .apic_write = native_apic_write,
378 .apic_read = native_apic_read,
379 .setup_boot_clock = setup_boot_APIC_clock, 377 .setup_boot_clock = setup_boot_APIC_clock,
380 .setup_secondary_clock = setup_secondary_APIC_clock, 378 .setup_secondary_clock = setup_secondary_APIC_clock,
381 .startup_ipi_hook = paravirt_nop, 379 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ec7a2ba9bce8..c622772744d8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,7 +15,6 @@ unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait); 15EXPORT_SYMBOL(idle_nomwait);
16 16
17struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
18static int force_mwait __cpuinitdata;
19 18
20int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 19int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
21{ 20{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 141efab52400..46c98efbbf8d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -758,6 +758,8 @@ void __init setup_arch(char **cmdline_p)
758#else 758#else
759 num_physpages = max_pfn; 759 num_physpages = max_pfn;
760 760
761 if (cpu_has_x2apic)
762 check_x2apic();
761 763
762 /* How many end-of-memory variables you have, grandma! */ 764 /* How many end-of-memory variables you have, grandma! */
763 /* need this before calling reserve_initrd */ 765 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 8b4956e800ac..cc673aa55ce4 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
3 char __user *pretcode; 3 char __user *pretcode;
4 int sig; 4 int sig;
5 struct sigcontext sc; 5 struct sigcontext sc;
6 struct _fpstate fpstate; 6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
7 unsigned long extramask[_NSIG_WORDS-1]; 15 unsigned long extramask[_NSIG_WORDS-1];
8 char retcode[8]; 16 char retcode[8];
17 /* fp state follows here */
9}; 18};
10 19
11struct rt_sigframe { 20struct rt_sigframe {
@@ -15,14 +24,15 @@ struct rt_sigframe {
15 void __user *puc; 24 void __user *puc;
16 struct siginfo info; 25 struct siginfo info;
17 struct ucontext uc; 26 struct ucontext uc;
18 struct _fpstate fpstate;
19 char retcode[8]; 27 char retcode[8];
28 /* fp state follows here */
20}; 29};
21#else 30#else
22struct rt_sigframe { 31struct rt_sigframe {
23 char __user *pretcode; 32 char __user *pretcode;
24 struct ucontext uc; 33 struct ucontext uc;
25 struct siginfo info; 34 struct siginfo info;
35 /* fp state follows here */
26}; 36};
27 37
28int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 38int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 2a2435d3037d..b21070ea33a4 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -161,28 +161,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
161 } 161 }
162 162
163 { 163 {
164 struct _fpstate __user *buf; 164 void __user *buf;
165 165
166 err |= __get_user(buf, &sc->fpstate); 166 err |= __get_user(buf, &sc->fpstate);
167 if (buf) { 167 err |= restore_i387_xstate(buf);
168 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
169 goto badframe;
170 err |= restore_i387(buf);
171 } else {
172 struct task_struct *me = current;
173
174 if (used_math()) {
175 clear_fpu(me);
176 clear_used_math();
177 }
178 }
179 } 168 }
180 169
181 err |= __get_user(*pax, &sc->ax); 170 err |= __get_user(*pax, &sc->ax);
182 return err; 171 return err;
183
184badframe:
185 return 1;
186} 172}
187 173
188asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 174asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -264,7 +250,7 @@ badframe:
264 * Set up a signal frame. 250 * Set up a signal frame.
265 */ 251 */
266static int 252static int
267setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, 253setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
268 struct pt_regs *regs, unsigned long mask) 254 struct pt_regs *regs, unsigned long mask)
269{ 255{
270 int tmp, err = 0; 256 int tmp, err = 0;
@@ -291,7 +277,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
291 err |= __put_user(regs->sp, &sc->sp_at_signal); 277 err |= __put_user(regs->sp, &sc->sp_at_signal);
292 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 278 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
293 279
294 tmp = save_i387(fpstate); 280 tmp = save_i387_xstate(fpstate);
295 if (tmp < 0) 281 if (tmp < 0)
296 err = 1; 282 err = 1;
297 else 283 else
@@ -308,7 +294,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
308 * Determine which stack to use.. 294 * Determine which stack to use..
309 */ 295 */
310static inline void __user * 296static inline void __user *
311get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) 297get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
298 void **fpstate)
312{ 299{
313 unsigned long sp; 300 unsigned long sp;
314 301
@@ -334,6 +321,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
334 sp = (unsigned long) ka->sa.sa_restorer; 321 sp = (unsigned long) ka->sa.sa_restorer;
335 } 322 }
336 323
324 if (used_math()) {
325 sp = sp - sig_xstate_size;
326 *fpstate = (struct _fpstate *) sp;
327 }
328
337 sp -= frame_size; 329 sp -= frame_size;
338 /* 330 /*
339 * Align the stack pointer according to the i386 ABI, 331 * Align the stack pointer according to the i386 ABI,
@@ -352,8 +344,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
352 void __user *restorer; 344 void __user *restorer;
353 int err = 0; 345 int err = 0;
354 int usig; 346 int usig;
347 void __user *fpstate = NULL;
355 348
356 frame = get_sigframe(ka, regs, sizeof(*frame)); 349 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
357 350
358 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 351 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
359 goto give_sigsegv; 352 goto give_sigsegv;
@@ -368,7 +361,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
368 if (err) 361 if (err)
369 goto give_sigsegv; 362 goto give_sigsegv;
370 363
371 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); 364 err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
372 if (err) 365 if (err)
373 goto give_sigsegv; 366 goto give_sigsegv;
374 367
@@ -429,8 +422,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
429 void __user *restorer; 422 void __user *restorer;
430 int err = 0; 423 int err = 0;
431 int usig; 424 int usig;
425 void __user *fpstate = NULL;
432 426
433 frame = get_sigframe(ka, regs, sizeof(*frame)); 427 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
434 428
435 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 429 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
436 goto give_sigsegv; 430 goto give_sigsegv;
@@ -449,13 +443,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
449 goto give_sigsegv; 443 goto give_sigsegv;
450 444
451 /* Create the ucontext. */ 445 /* Create the ucontext. */
452 err |= __put_user(0, &frame->uc.uc_flags); 446 if (cpu_has_xsave)
447 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
448 else
449 err |= __put_user(0, &frame->uc.uc_flags);
453 err |= __put_user(0, &frame->uc.uc_link); 450 err |= __put_user(0, &frame->uc.uc_link);
454 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 451 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
455 err |= __put_user(sas_ss_flags(regs->sp), 452 err |= __put_user(sas_ss_flags(regs->sp),
456 &frame->uc.uc_stack.ss_flags); 453 &frame->uc.uc_stack.ss_flags);
457 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 454 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
458 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 455 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
459 regs, set->sig[0]); 456 regs, set->sig[0]);
460 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 457 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
461 if (err) 458 if (err)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 694aa888bb19..823a55bf8c39 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
53} 53}
54 54
55/* 55/*
56 * Signal frame handlers.
57 */
58
59static inline int save_i387(struct _fpstate __user *buf)
60{
61 struct task_struct *tsk = current;
62 int err = 0;
63
64 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
65 sizeof(tsk->thread.xstate->fxsave));
66
67 if ((unsigned long)buf % 16)
68 printk("save_i387: bad fpstate %p\n", buf);
69
70 if (!used_math())
71 return 0;
72 clear_used_math(); /* trigger finit */
73 if (task_thread_info(tsk)->status & TS_USEDFPU) {
74 err = save_i387_checking((struct i387_fxsave_struct __user *)
75 buf);
76 if (err)
77 return err;
78 task_thread_info(tsk)->status &= ~TS_USEDFPU;
79 stts();
80 } else {
81 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
82 sizeof(struct i387_fxsave_struct)))
83 return -1;
84 }
85 return 1;
86}
87
88/*
89 * This restores directly out of user space. Exceptions are handled.
90 */
91static inline int restore_i387(struct _fpstate __user *buf)
92{
93 struct task_struct *tsk = current;
94 int err;
95
96 if (!used_math()) {
97 err = init_fpu(tsk);
98 if (err)
99 return err;
100 }
101
102 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
103 clts();
104 task_thread_info(current)->status |= TS_USEDFPU;
105 }
106 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
107 if (unlikely(err)) {
108 /*
109 * Encountered an error while doing the restore from the
110 * user buffer, clear the fpu state.
111 */
112 clear_fpu(tsk);
113 clear_used_math();
114 }
115 return err;
116}
117
118/*
119 * Do a signal return; undo the signal stack. 56 * Do a signal return; undo the signal stack.
120 */ 57 */
121static int 58static int
@@ -159,25 +96,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
159 { 96 {
160 struct _fpstate __user *buf; 97 struct _fpstate __user *buf;
161 err |= __get_user(buf, &sc->fpstate); 98 err |= __get_user(buf, &sc->fpstate);
162 99 err |= restore_i387_xstate(buf);
163 if (buf) {
164 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
165 goto badframe;
166 err |= restore_i387(buf);
167 } else {
168 struct task_struct *me = current;
169 if (used_math()) {
170 clear_fpu(me);
171 clear_used_math();
172 }
173 }
174 } 100 }
175 101
176 err |= __get_user(*pax, &sc->ax); 102 err |= __get_user(*pax, &sc->ax);
177 return err; 103 return err;
178
179badframe:
180 return 1;
181} 104}
182 105
183asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 106asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +192,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
269 sp = current->sas_ss_sp + current->sas_ss_size; 192 sp = current->sas_ss_sp + current->sas_ss_size;
270 } 193 }
271 194
272 return (void __user *)round_down(sp - size, 16); 195 return (void __user *)round_down(sp - size, 64);
273} 196}
274 197
275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 198static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
276 sigset_t *set, struct pt_regs *regs) 199 sigset_t *set, struct pt_regs *regs)
277{ 200{
278 struct rt_sigframe __user *frame; 201 struct rt_sigframe __user *frame;
279 struct _fpstate __user *fp = NULL; 202 void __user *fp = NULL;
280 int err = 0; 203 int err = 0;
281 struct task_struct *me = current; 204 struct task_struct *me = current;
282 205
283 if (used_math()) { 206 if (used_math()) {
284 fp = get_stack(ka, regs, sizeof(struct _fpstate)); 207 fp = get_stack(ka, regs, sig_xstate_size);
285 frame = (void __user *)round_down( 208 frame = (void __user *)round_down(
286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; 209 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
287 210
288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) 211 if (save_i387_xstate(fp) < 0)
289 goto give_sigsegv;
290
291 if (save_i387(fp) < 0)
292 err |= -1; 212 err |= -1;
293 } else 213 } else
294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; 214 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +223,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
303 } 223 }
304 224
305 /* Create the ucontext. */ 225 /* Create the ucontext. */
306 err |= __put_user(0, &frame->uc.uc_flags); 226 if (cpu_has_xsave)
227 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
228 else
229 err |= __put_user(0, &frame->uc.uc_flags);
307 err |= __put_user(0, &frame->uc.uc_link); 230 err |= __put_user(0, &frame->uc.uc_link);
308 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 231 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
309 err |= __put_user(sas_ss_flags(regs->sp), 232 err |= __put_user(sas_ss_flags(regs->sp),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4e7ccb0e2a9b..9056f7e272c0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -551,8 +552,7 @@ static inline void __inquire_remote_apic(int apicid)
551 printk(KERN_CONT 552 printk(KERN_CONT
552 "a previous APIC delivery may have failed\n"); 553 "a previous APIC delivery may have failed\n");
553 554
554 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 555 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
555 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
556 556
557 timeout = 0; 557 timeout = 0;
558 do { 558 do {
@@ -584,11 +584,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
584 int maxlvt; 584 int maxlvt;
585 585
586 /* Target chip */ 586 /* Target chip */
587 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
588
589 /* Boot on the stack */ 587 /* Boot on the stack */
590 /* Kick the second */ 588 /* Kick the second */
591 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 589 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
592 590
593 pr_debug("Waiting for send to finish...\n"); 591 pr_debug("Waiting for send to finish...\n");
594 send_status = safe_apic_wait_icr_idle(); 592 send_status = safe_apic_wait_icr_idle();
@@ -641,13 +639,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
641 /* 639 /*
642 * Turn INIT on target chip 640 * Turn INIT on target chip
643 */ 641 */
644 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
645
646 /* 642 /*
647 * Send IPI 643 * Send IPI
648 */ 644 */
649 apic_write(APIC_ICR, 645 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
650 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 646 phys_apicid);
651 647
652 pr_debug("Waiting for send to finish...\n"); 648 pr_debug("Waiting for send to finish...\n");
653 send_status = safe_apic_wait_icr_idle(); 649 send_status = safe_apic_wait_icr_idle();
@@ -657,10 +653,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
657 pr_debug("Deasserting INIT.\n"); 653 pr_debug("Deasserting INIT.\n");
658 654
659 /* Target chip */ 655 /* Target chip */
660 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
661
662 /* Send IPI */ 656 /* Send IPI */
663 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 657 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
664 658
665 pr_debug("Waiting for send to finish...\n"); 659 pr_debug("Waiting for send to finish...\n");
666 send_status = safe_apic_wait_icr_idle(); 660 send_status = safe_apic_wait_icr_idle();
@@ -703,11 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
703 */ 697 */
704 698
705 /* Target chip */ 699 /* Target chip */
706 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
707
708 /* Boot on the stack */ 700 /* Boot on the stack */
709 /* Kick the second */ 701 /* Kick the second */
710 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 702 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
703 phys_apicid);
711 704
712 /* 705 /*
713 * Give the other CPU some time to accept the IPI. 706 * Give the other CPU some time to accept the IPI.
@@ -1176,10 +1169,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1176 * Setup boot CPU information 1169 * Setup boot CPU information
1177 */ 1170 */
1178 smp_store_cpu_info(0); /* Final full version of the data */ 1171 smp_store_cpu_info(0); /* Final full version of the data */
1172#ifdef CONFIG_X86_32
1179 boot_cpu_logical_apicid = logical_smp_processor_id(); 1173 boot_cpu_logical_apicid = logical_smp_processor_id();
1174#endif
1180 current_thread_info()->cpu = 0; /* needed? */ 1175 current_thread_info()->cpu = 0; /* needed? */
1181 set_cpu_sibling_map(0); 1176 set_cpu_sibling_map(0);
1182 1177
1178#ifdef CONFIG_X86_64
1179 enable_IR_x2apic();
1180 setup_apic_routing();
1181#endif
1182
1183 if (smp_sanity_check(max_cpus) < 0) { 1183 if (smp_sanity_check(max_cpus) < 0) {
1184 printk(KERN_INFO "SMP disabled\n"); 1184 printk(KERN_INFO "SMP disabled\n");
1185 disable_smp(); 1185 disable_smp();
@@ -1187,9 +1187,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1187 } 1187 }
1188 1188
1189 preempt_disable(); 1189 preempt_disable();
1190 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1190 if (read_apic_id() != boot_cpu_physical_apicid) {
1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1192 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1192 read_apic_id(), boot_cpu_physical_apicid);
1193 /* Or can we switch back to PIC here? */ 1193 /* Or can we switch back to PIC here? */
1194 } 1194 }
1195 preempt_enable(); 1195 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044ba..7b987852e876 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a1..da5a5964fccb 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
1228 1228
1229 set_bit(SYSCALL_VECTOR, used_vectors); 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1230 1230
1231 init_thread_xstate();
1232 /* 1231 /*
1233 * Should be a barrier for any external CPU state: 1232 * Should be a barrier for any external CPU state:
1234 */ 1233 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 7a31f104bef9..2887a789e38f 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1138,7 +1138,7 @@ asmlinkage void math_state_restore(void)
1138 /* 1138 /*
1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1140 */ 1140 */
1141 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1141 if (unlikely(restore_fpu_checking(me))) {
1142 stts(); 1142 stts();
1143 force_sig(SIGSEGV, me); 1143 force_sig(SIGSEGV, me);
1144 return; 1144 return;
@@ -1179,10 +1179,6 @@ void __init trap_init(void)
1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1180#endif 1180#endif
1181 /* 1181 /*
1182 * initialize the per thread extended state:
1183 */
1184 init_thread_xstate();
1185 /*
1186 * Should be a barrier for any external CPU state: 1182 * Should be a barrier for any external CPU state:
1187 */ 1183 */
1188 cpu_init(); 1184 cpu_init();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8c9ad02af5a2..8b6c393ab9fd 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index af5bdad84604..a9b8560adbc2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -140,10 +140,10 @@ SECTIONS
140 *(.con_initcall.init) 140 *(.con_initcall.init)
141 __con_initcall_end = .; 141 __con_initcall_end = .;
142 } 142 }
143 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 143 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
144 __x86cpuvendor_start = .; 144 __x86_cpu_dev_start = .;
145 *(.x86cpuvendor.init) 145 *(.x86_cpu_dev.init)
146 __x86cpuvendor_end = .; 146 __x86_cpu_dev_end = .;
147 } 147 }
148 SECURITY_INIT 148 SECURITY_INIT
149 . = ALIGN(4); 149 . = ALIGN(4);
@@ -180,6 +180,7 @@ SECTIONS
180 . = ALIGN(PAGE_SIZE); 180 . = ALIGN(PAGE_SIZE);
181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
182 __per_cpu_start = .; 182 __per_cpu_start = .;
183 *(.data.percpu.page_aligned)
183 *(.data.percpu) 184 *(.data.percpu)
184 *(.data.percpu.shared_aligned) 185 *(.data.percpu.shared_aligned)
185 __per_cpu_end = .; 186 __per_cpu_end = .;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 63e5c1a22e88..201e81a91a95 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -168,13 +168,12 @@ SECTIONS
168 *(.con_initcall.init) 168 *(.con_initcall.init)
169 } 169 }
170 __con_initcall_end = .; 170 __con_initcall_end = .;
171 . = ALIGN(16); 171 __x86_cpu_dev_start = .;
172 __x86cpuvendor_start = .; 172 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
173 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 173 *(.x86_cpu_dev.init)
174 *(.x86cpuvendor.init)
175 } 174 }
176 __x86cpuvendor_end = .;
177 SECURITY_INIT 175 SECURITY_INIT
176 __x86_cpu_dev_end = .;
178 177
179 . = ALIGN(8); 178 . = ALIGN(8);
180 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 179 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 000000000000..07713d64debe
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6#include <linux/bootmem.h>
7#include <linux/compat.h>
8#include <asm/i387.h>
9#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h>
11#endif
12#include <asm/xcr.h>
13
14/*
15 * Supported feature mask by the CPU and the kernel.
16 */
17u64 pcntxt_mask;
18
19struct _fpx_sw_bytes fx_sw_reserved;
20#ifdef CONFIG_IA32_EMULATION
21struct _fpx_sw_bytes fx_sw_reserved_ia32;
22#endif
23
24/*
25 * Check for the presence of extended state information in the
26 * user fpstate pointer in the sigcontext.
27 */
28int check_for_xstate(struct i387_fxsave_struct __user *buf,
29 void __user *fpstate,
30 struct _fpx_sw_bytes *fx_sw_user)
31{
32 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
33 sizeof(struct xsave_hdr_struct);
34 unsigned int magic2;
35 int err;
36
37 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
38 sizeof(struct _fpx_sw_bytes));
39
40 if (err)
41 return err;
42
43 /*
44 * First Magic check failed.
45 */
46 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
47 return -1;
48
49 /*
50 * Check for error scenarios.
51 */
52 if (fx_sw_user->xstate_size < min_xstate_size ||
53 fx_sw_user->xstate_size > xstate_size ||
54 fx_sw_user->xstate_size > fx_sw_user->extended_size)
55 return -1;
56
57 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
58 fx_sw_user->extended_size -
59 FP_XSTATE_MAGIC2_SIZE));
60 /*
61 * Check for the presence of second magic word at the end of memory
62 * layout. This detects the case where the user just copied the legacy
63 * fpstate layout with out copying the extended state information
64 * in the memory layout.
65 */
66 if (err || magic2 != FP_XSTATE_MAGIC2)
67 return -1;
68
69 return 0;
70}
71
72#ifdef CONFIG_X86_64
73/*
74 * Signal frame handlers.
75 */
76
77int save_i387_xstate(void __user *buf)
78{
79 struct task_struct *tsk = current;
80 int err = 0;
81
82 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
83 return -EACCES;
84
85 BUG_ON(sig_xstate_size < xstate_size);
86
87 if ((unsigned long)buf % 64)
88 printk("save_i387_xstate: bad fpstate %p\n", buf);
89
90 if (!used_math())
91 return 0;
92 clear_used_math(); /* trigger finit */
93 if (task_thread_info(tsk)->status & TS_USEDFPU) {
94 /*
95 * Start with clearing the user buffer. This will present a
96 * clean context for the bytes not touched by the fxsave/xsave.
97 */
98 __clear_user(buf, sig_xstate_size);
99
100 if (task_thread_info(tsk)->status & TS_XSAVE)
101 err = xsave_user(buf);
102 else
103 err = fxsave_user(buf);
104
105 if (err)
106 return err;
107 task_thread_info(tsk)->status &= ~TS_USEDFPU;
108 stts();
109 } else {
110 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
111 xstate_size))
112 return -1;
113 }
114
115 if (task_thread_info(tsk)->status & TS_XSAVE) {
116 struct _fpstate __user *fx = buf;
117
118 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
119 sizeof(struct _fpx_sw_bytes));
120
121 err |= __put_user(FP_XSTATE_MAGIC2,
122 (__u32 __user *) (buf + sig_xstate_size
123 - FP_XSTATE_MAGIC2_SIZE));
124 }
125
126 return 1;
127}
128
129/*
130 * Restore the extended state if present. Otherwise, restore the FP/SSE
131 * state.
132 */
133int restore_user_xstate(void __user *buf)
134{
135 struct _fpx_sw_bytes fx_sw_user;
136 u64 mask;
137 int err;
138
139 if (((unsigned long)buf % 64) ||
140 check_for_xstate(buf, buf, &fx_sw_user))
141 goto fx_only;
142
143 mask = fx_sw_user.xstate_bv;
144
145 /*
146 * restore the state passed by the user.
147 */
148 err = xrestore_user(buf, mask);
149 if (err)
150 return err;
151
152 /*
153 * init the state skipped by the user.
154 */
155 mask = pcntxt_mask & ~mask;
156
157 xrstor_state(init_xstate_buf, mask);
158
159 return 0;
160
161fx_only:
162 /*
163 * couldn't find the extended state information in the
164 * memory layout. Restore just the FP/SSE and init all
165 * the other extended state.
166 */
167 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
168 return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
169}
170
171/*
172 * This restores directly out of user space. Exceptions are handled.
173 */
174int restore_i387_xstate(void __user *buf)
175{
176 struct task_struct *tsk = current;
177 int err = 0;
178
179 if (!buf) {
180 if (used_math())
181 goto clear;
182 return 0;
183 } else
184 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
185 return -EACCES;
186
187 if (!used_math()) {
188 err = init_fpu(tsk);
189 if (err)
190 return err;
191 }
192
193 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
194 clts();
195 task_thread_info(current)->status |= TS_USEDFPU;
196 }
197 if (task_thread_info(tsk)->status & TS_XSAVE)
198 err = restore_user_xstate(buf);
199 else
200 err = fxrstor_checking((__force struct i387_fxsave_struct *)
201 buf);
202 if (unlikely(err)) {
203 /*
204 * Encountered an error while doing the restore from the
205 * user buffer, clear the fpu state.
206 */
207clear:
208 clear_fpu(tsk);
209 clear_used_math();
210 }
211 return err;
212}
213#endif
214
215/*
216 * Prepare the SW reserved portion of the fxsave memory layout, indicating
217 * the presence of the extended state information in the memory layout
218 * pointed by the fpstate pointer in the sigcontext.
219 * This will be saved when ever the FP and extended state context is
220 * saved on the user stack during the signal handler delivery to the user.
221 */
222void prepare_fx_sw_frame(void)
223{
224 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
225 FP_XSTATE_MAGIC2_SIZE;
226
227 sig_xstate_size = sizeof(struct _fpstate) + size_extended;
228
229#ifdef CONFIG_IA32_EMULATION
230 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
231#endif
232
233 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
234
235 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
236 fx_sw_reserved.extended_size = sig_xstate_size;
237 fx_sw_reserved.xstate_bv = pcntxt_mask;
238 fx_sw_reserved.xstate_size = xstate_size;
239#ifdef CONFIG_IA32_EMULATION
240 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
241 sizeof(struct _fpx_sw_bytes));
242 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
243#endif
244}
245
246/*
247 * Represents init state for the supported extended state.
248 */
249struct xsave_struct *init_xstate_buf;
250
251#ifdef CONFIG_X86_64
252unsigned int sig_xstate_size = sizeof(struct _fpstate);
253#endif
254
255/*
256 * Enable the extended processor state save/restore feature
257 */
258void __cpuinit xsave_init(void)
259{
260 if (!cpu_has_xsave)
261 return;
262
263 set_in_cr4(X86_CR4_OSXSAVE);
264
265 /*
266 * Enable all the features that the HW is capable of
267 * and the Linux kernel is aware of.
268 */
269 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
270}
271
272/*
273 * setup the xstate image representing the init state
274 */
275void setup_xstate_init(void)
276{
277 init_xstate_buf = alloc_bootmem(xstate_size);
278 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
279}
280
281/*
282 * Enable and initialize the xsave feature.
283 */
284void __init xsave_cntxt_init(void)
285{
286 unsigned int eax, ebx, ecx, edx;
287
288 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
289 pcntxt_mask = eax + ((u64)edx << 32);
290
291 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
292 printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
293 pcntxt_mask);
294 BUG();
295 }
296
297 /*
298 * for now OS knows only about FP/SSE
299 */
300 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
301 xsave_init();
302
303 /*
304 * Recompute the context size for enabled features
305 */
306 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
307 xstate_size = ebx;
308
309 prepare_fx_sw_frame();
310
311 setup_xstate_init();
312
313 printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
314 "cntxt size 0x%x\n",
315 pcntxt_mask, xstate_size);
316}
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 23e8373507ad..17e25995b65b 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -331,21 +331,6 @@ enum vmcs_field {
331 331
332#define AR_RESERVD_MASK 0xfffe0f00 332#define AR_RESERVD_MASK 0xfffe0f00
333 333
334#define MSR_IA32_VMX_BASIC 0x480
335#define MSR_IA32_VMX_PINBASED_CTLS 0x481
336#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
337#define MSR_IA32_VMX_EXIT_CTLS 0x483
338#define MSR_IA32_VMX_ENTRY_CTLS 0x484
339#define MSR_IA32_VMX_MISC 0x485
340#define MSR_IA32_VMX_CR0_FIXED0 0x486
341#define MSR_IA32_VMX_CR0_FIXED1 0x487
342#define MSR_IA32_VMX_CR4_FIXED0 0x488
343#define MSR_IA32_VMX_CR4_FIXED1 0x489
344#define MSR_IA32_VMX_VMCS_ENUM 0x48a
345#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
346#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
347
348#define MSR_IA32_FEATURE_CONTROL 0x3a
349#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 334#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
350#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 335#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
351 336
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a882aa5..65f0b8a47bed 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
55#include <linux/lguest_launcher.h> 55#include <linux/lguest_launcher.h>
56#include <linux/virtio_console.h> 56#include <linux/virtio_console.h>
57#include <linux/pm.h> 57#include <linux/pm.h>
58#include <asm/apic.h>
58#include <asm/lguest.h> 59#include <asm/lguest.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 784 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 785 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 786#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 787static void lguest_apic_write(u32 reg, u32 v)
787{ 788{
788} 789}
789 790
790static u32 lguest_apic_read(unsigned long reg) 791static u32 lguest_apic_read(u32 reg)
791{ 792{
792 return 0; 793 return 0;
793} 794}
795
796static u64 lguest_apic_icr_read(void)
797{
798 return 0;
799}
800
801static void lguest_apic_icr_write(u32 low, u32 id)
802{
803 /* Warn to see if there's any stray references */
804 WARN_ON(1);
805}
806
807static void lguest_apic_wait_icr_idle(void)
808{
809 return;
810}
811
812static u32 lguest_apic_safe_wait_icr_idle(void)
813{
814 return 0;
815}
816
817static struct apic_ops lguest_basic_apic_ops = {
818 .read = lguest_apic_read,
819 .write = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_read = lguest_apic_read;
995#endif 1025#endif
996 1026
997 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index aa3fa4119424..55e11aa6d66c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y)
17 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 17 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
18else 18else
19 obj-y += io_64.o iomap_copy_64.o 19 obj-y += io_64.o iomap_copy_64.o
20
21 CFLAGS_csum-partial_64.o := -funroll-loops
22
23 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 20 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
24 lib-y += thunk_64.o clear_page_64.o copy_page_64.o 21 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
25 lib-y += memmove_64.o memset_64.o 22 lib-y += memmove_64.o memset_64.o
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 24e60944971a..9e68075544f6 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -14,6 +14,13 @@
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/mmx.h> 15#include <asm/mmx.h>
16 16
17#ifdef CONFIG_X86_INTEL_USERCOPY
18/*
19 * Alignment at which movsl is preferred for bulk memory copies.
20 */
21struct movsl_mask movsl_mask __read_mostly;
22#endif
23
17static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) 24static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
18{ 25{
19#ifdef CONFIG_X86_INTEL_USERCOPY 26#ifdef CONFIG_X86_INTEL_USERCOPY
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3f2cf11f201a..37b9ae4d44c5 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -38,15 +38,6 @@ void __init pre_intr_init_hook(void)
38 init_ISA_irqs(); 38 init_ISA_irqs();
39} 39}
40 40
41/*
42 * IRQ2 is cascade interrupt to second interrupt controller
43 */
44static struct irqaction irq2 = {
45 .handler = no_action,
46 .mask = CPU_MASK_NONE,
47 .name = "cascade",
48};
49
50/** 41/**
51 * intr_init_hook - post gate setup interrupt initialisation 42 * intr_init_hook - post gate setup interrupt initialisation
52 * 43 *
@@ -62,12 +53,6 @@ void __init intr_init_hook(void)
62 if (x86_quirks->arch_intr_init()) 53 if (x86_quirks->arch_intr_init())
63 return; 54 return;
64 } 55 }
65#ifdef CONFIG_X86_LOCAL_APIC
66 apic_intr_init();
67#endif
68
69 if (!acpi_ioapic)
70 setup_irq(2, &irq2);
71} 56}
72 57
73/** 58/**
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
deleted file mode 100644
index 3ef8b43b62fc..000000000000
--- a/arch/x86/mach-es7000/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-$(CONFIG_X86_ES7000) := es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h
deleted file mode 100644
index c8d5aa132fa0..000000000000
--- a/arch/x86/mach-es7000/es7000.h
+++ /dev/null
@@ -1,114 +0,0 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27/*
28 * ES7000 chipsets
29 */
30
31#define NON_UNISYS 0
32#define ES7000_CLASSIC 1
33#define ES7000_ZORRO 2
34
35
36#define MIP_REG 1
37#define MIP_PSAI_REG 4
38
39#define MIP_BUSY 1
40#define MIP_SPIN 0xf0000
41#define MIP_VALID 0x0100000000000000ULL
42#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
43
44#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
45
46struct mip_reg_info {
47 unsigned long long mip_info;
48 unsigned long long delivery_info;
49 unsigned long long host_reg;
50 unsigned long long mip_reg;
51};
52
53struct part_info {
54 unsigned char type;
55 unsigned char length;
56 unsigned char part_id;
57 unsigned char apic_mode;
58 unsigned long snum;
59 char ptype[16];
60 char sname[64];
61 char pname[64];
62};
63
64struct psai {
65 unsigned long long entry_type;
66 unsigned long long addr;
67 unsigned long long bep_addr;
68};
69
70struct es7000_mem_info {
71 unsigned char type;
72 unsigned char length;
73 unsigned char resv[6];
74 unsigned long long start;
75 unsigned long long size;
76};
77
78struct es7000_oem_table {
79 unsigned long long hdr;
80 struct mip_reg_info mip;
81 struct part_info pif;
82 struct es7000_mem_info shm;
83 struct psai psai;
84};
85
86#ifdef CONFIG_ACPI
87
88struct oem_table {
89 struct acpi_table_header Header;
90 u32 OEMTableAddr;
91 u32 OEMTableSize;
92};
93
94extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
95#endif
96
97struct mip_reg {
98 unsigned long long off_0;
99 unsigned long long off_8;
100 unsigned long long off_10;
101 unsigned long long off_18;
102 unsigned long long off_20;
103 unsigned long long off_28;
104 unsigned long long off_30;
105 unsigned long long off_38;
106};
107
108#define MIP_SW_APIC 0x1020b
109#define MIP_FUNC(VALUE) (VALUE & 0xff)
110
111extern int parse_unisys_oem (char *oemptr);
112extern void setup_unisys(void);
113extern int es7000_start_cpu(int cpu, unsigned long eip);
114extern void es7000_sw_apic(void);
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 0dbd7803a1d5..6730f4e7c744 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -9,4 +9,3 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
9obj-$(CONFIG_X86_SUMMIT) += summit.o 9obj-$(CONFIG_X86_SUMMIT) += summit.o
10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o 10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
11obj-$(CONFIG_X86_ES7000) += es7000.o 11obj-$(CONFIG_X86_ES7000) += es7000.o
12obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d771714559..df37fc9d6a26 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,18 +5,17 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h> 15#include <asm/bigsmp/apicdef.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 16#include <linux/smp.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
22static int dmi_bigsmp; /* can be set by dmi scanners */ 21static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c4..520cca0ee04e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,20 +4,19 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/es7000/apicdef.h>
17#include <asm/mach-es7000/mach_apic.h> 15#include <linux/smp.h>
18#include <asm/mach-es7000/mach_ipi.h> 16#include <asm/es7000/apic.h>
19#include <asm/mach-es7000/mach_mpparse.h> 17#include <asm/es7000/ipi.h>
20#include <asm/mach-es7000/mach_wakecpu.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h>
21 20
22static int probe_es7000(void) 21static int probe_es7000(void)
23{ 22{
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c4..8cf58394975e 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,11 +11,12 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h> 14#include <asm/numaq/apicdef.h>
16#include <asm/mach-numaq/mach_apicdef.h> 15#include <linux/smp.h>
17#include <asm/mach-numaq/mach_ipi.h> 16#include <asm/numaq/apic.h>
18#include <asm/mach-numaq/mach_mpparse.h> 17#include <asm/numaq/ipi.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 18#include <asm/numaq/mpparse.h>
19#include <asm/numaq/wakecpu.h>
20#include <asm/numaq.h> 20#include <asm/numaq.h>
21 21
22static int mps_oem_check(struct mp_config_table *mpc, char *oem, 22static int mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1e..6ad6b67a723d 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,19 +4,18 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h> 14#include <asm/summit/apicdef.h>
17#include <asm/mach-summit/mach_apicdef.h> 15#include <linux/smp.h>
18#include <asm/mach-summit/mach_ipi.h> 16#include <asm/summit/apic.h>
19#include <asm/mach-summit/mach_mpparse.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h>
20 19
21static int probe_summit(void) 20static int probe_summit(void)
22{ 21{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fb30486c82f7..83e13f2d53d2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -88,6 +88,62 @@ early_param("gbpages", parse_direct_gbpages_on);
88 88
89int after_bootmem; 89int after_bootmem;
90 90
91unsigned long __supported_pte_mask __read_mostly = ~0UL;
92EXPORT_SYMBOL_GPL(__supported_pte_mask);
93
94static int do_not_nx __cpuinitdata;
95
96/*
97 * noexec=on|off
98 * Control non-executable mappings for 64-bit processes.
99 *
100 * on Enable (default)
101 * off Disable
102 */
103static int __init nonx_setup(char *str)
104{
105 if (!str)
106 return -EINVAL;
107 if (!strncmp(str, "on", 2)) {
108 __supported_pte_mask |= _PAGE_NX;
109 do_not_nx = 0;
110 } else if (!strncmp(str, "off", 3)) {
111 do_not_nx = 1;
112 __supported_pte_mask &= ~_PAGE_NX;
113 }
114 return 0;
115}
116early_param("noexec", nonx_setup);
117
118void __cpuinit check_efer(void)
119{
120 unsigned long efer;
121
122 rdmsrl(MSR_EFER, efer);
123 if (!(efer & EFER_NX) || do_not_nx)
124 __supported_pte_mask &= ~_PAGE_NX;
125}
126
127int force_personality32;
128
129/*
130 * noexec32=on|off
131 * Control non executable heap for 32bit processes.
132 * To control the stack too use noexec=off
133 *
134 * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
135 * off PROT_READ implies PROT_EXEC
136 */
137static int __init nonx32_setup(char *str)
138{
139 if (!strcmp(str, "on"))
140 force_personality32 &= ~READ_IMPLIES_EXEC;
141 else if (!strcmp(str, "off"))
142 force_personality32 |= READ_IMPLIES_EXEC;
143 return 1;
144}
145__setup("noexec32=", nonx32_setup);
146
91/* 147/*
92 * NOTE: This function is marked __ref because it calls __init function 148 * NOTE: This function is marked __ref because it calls __init function
93 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 149 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 19af06927fbc..1d88d2b39771 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -250,10 +250,5 @@ int __init pci_acpi_init(void)
250 acpi_pci_irq_enable(dev); 250 acpi_pci_irq_enable(dev);
251 } 251 }
252 252
253#ifdef CONFIG_X86_IO_APIC
254 if (acpi_ioapic)
255 print_IO_APIC();
256#endif
257
258 return 0; 253 return 0;
259} 254}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 8791fc55e715..844df0cbbd3e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -33,6 +33,7 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34 34
35#include <asm/pat.h> 35#include <asm/pat.h>
36#include <asm/e820.h>
36 37
37#include "pci.h" 38#include "pci.h"
38 39
@@ -227,6 +228,8 @@ void __init pcibios_resource_survey(void)
227 pcibios_allocate_bus_resources(&pci_root_buses); 228 pcibios_allocate_bus_resources(&pci_root_buses);
228 pcibios_allocate_resources(0); 229 pcibios_allocate_resources(0);
229 pcibios_allocate_resources(1); 230 pcibios_allocate_resources(1);
231
232 e820_reserve_resources_late();
230} 233}
231 234
232/** 235/**
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index d9635764ce3d..654a2234f8f3 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
209 return name != NULL; 209 return name != NULL;
210} 210}
211 211
212static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) 212static void __init pci_mmcfg_insert_resources(void)
213{ 213{
214#define PCI_MMCFG_RESOURCE_NAME_LEN 19 214#define PCI_MMCFG_RESOURCE_NAME_LEN 19
215 int i; 215 int i;
@@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
233 cfg->pci_segment); 233 cfg->pci_segment);
234 res->start = cfg->address; 234 res->start = cfg->address;
235 res->end = res->start + (num_buses << 20) - 1; 235 res->end = res->start + (num_buses << 20) - 1;
236 res->flags = IORESOURCE_MEM | resource_flags; 236 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
237 insert_resource(&iomem_resource, res); 237 insert_resource(&iomem_resource, res);
238 names += PCI_MMCFG_RESOURCE_NAME_LEN; 238 names += PCI_MMCFG_RESOURCE_NAME_LEN;
239 } 239 }
@@ -434,11 +434,9 @@ static void __init __pci_mmcfg_init(int early)
434 (pci_mmcfg_config[0].address == 0)) 434 (pci_mmcfg_config[0].address == 0))
435 return; 435 return;
436 436
437 if (pci_mmcfg_arch_init()) { 437 if (pci_mmcfg_arch_init())
438 if (known_bridge)
439 pci_mmcfg_insert_resources(IORESOURCE_BUSY);
440 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 438 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
441 } else { 439 else {
442 /* 440 /*
443 * Signal not to attempt to insert mmcfg resources because 441 * Signal not to attempt to insert mmcfg resources because
444 * the architecture mmcfg setup could not initialize. 442 * the architecture mmcfg setup could not initialize.
@@ -475,7 +473,7 @@ static int __init pci_mmcfg_late_insert_resources(void)
475 * marked so it won't cause request errors when __request_region is 473 * marked so it won't cause request errors when __request_region is
476 * called. 474 * called.
477 */ 475 */
478 pci_mmcfg_insert_resources(0); 476 pci_mmcfg_insert_resources();
479 477
480 return 0; 478 return 0;
481} 479}
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index d3e083dea720..274d06082f48 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -11,6 +11,7 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <asm/mtrr.h> 12#include <asm/mtrr.h>
13#include <asm/mce.h> 13#include <asm/mce.h>
14#include <asm/xcr.h>
14 15
15static struct saved_context saved_context; 16static struct saved_context saved_context;
16 17
@@ -126,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
126 if (boot_cpu_has(X86_FEATURE_SEP)) 127 if (boot_cpu_has(X86_FEATURE_SEP))
127 enable_sep_cpu(); 128 enable_sep_cpu();
128 129
130 /*
131 * restore XCR0 for xsave capable cpu's.
132 */
133 if (cpu_has_xsave)
134 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
135
129 fix_processor_context(); 136 fix_processor_context();
130 do_fpu_end(); 137 do_fpu_end();
131 mtrr_ap_init(); 138 mtrr_ap_init();
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 66bdfb591fd8..e3b6cf70d62c 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -14,6 +14,7 @@
14#include <asm/page.h> 14#include <asm/page.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/mtrr.h> 16#include <asm/mtrr.h>
17#include <asm/xcr.h>
17 18
18static void fix_processor_context(void); 19static void fix_processor_context(void);
19 20
@@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
122 wrmsrl(MSR_GS_BASE, ctxt->gs_base); 123 wrmsrl(MSR_GS_BASE, ctxt->gs_base);
123 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 124 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
124 125
126 /*
127 * restore XCR0 for xsave capable cpu's.
128 */
129 if (cpu_has_xsave)
130 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
131
125 fix_processor_context(); 132 fix_processor_context();
126 133
127 do_fpu_end(); 134 do_fpu_end();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7dcd321a0508..a27d562a9744 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
36#include <xen/hvc-console.h> 36#include <xen/hvc-console.h>
37 37
38#include <asm/paravirt.h> 38#include <asm/paravirt.h>
39#include <asm/apic.h>
39#include <asm/page.h> 40#include <asm/page.h>
40#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
580} 581}
581 582
582#ifdef CONFIG_X86_LOCAL_APIC 583#ifdef CONFIG_X86_LOCAL_APIC
583static u32 xen_apic_read(unsigned long reg) 584static u32 xen_apic_read(u32 reg)
584{ 585{
585 return 0; 586 return 0;
586} 587}
587 588
588static void xen_apic_write(unsigned long reg, u32 val) 589static void xen_apic_write(u32 reg, u32 val)
589{ 590{
590 /* Warn to see if there's any stray references */ 591 /* Warn to see if there's any stray references */
591 WARN_ON(1); 592 WARN_ON(1);
592} 593}
594
595static u64 xen_apic_icr_read(void)
596{
597 return 0;
598}
599
600static void xen_apic_icr_write(u32 low, u32 id)
601{
602 /* Warn to see if there's any stray references */
603 WARN_ON(1);
604}
605
606static void xen_apic_wait_icr_idle(void)
607{
608 return;
609}
610
611static u32 xen_safe_apic_wait_icr_idle(void)
612{
613 return 0;
614}
615
616static struct apic_ops xen_basic_apic_ops = {
617 .read = xen_apic_read,
618 .write = xen_apic_write,
619 .icr_read = xen_apic_icr_read,
620 .icr_write = xen_apic_icr_write,
621 .wait_icr_idle = xen_apic_wait_icr_idle,
622 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
623};
624
593#endif 625#endif
594 626
595static void xen_flush_tlb(void) 627static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1273 1305
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1306static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1307#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write,
1277 .apic_read = xen_apic_read,
1278 .setup_boot_clock = paravirt_nop, 1308 .setup_boot_clock = paravirt_nop,
1279 .setup_secondary_clock = paravirt_nop, 1309 .setup_secondary_clock = paravirt_nop,
1280 .startup_ipi_hook = paravirt_nop, 1310 .startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
1677 pv_apic_ops = xen_apic_ops; 1707 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1708 pv_mmu_ops = xen_mmu_ops;
1679 1709
1710#ifdef CONFIG_X86_LOCAL_APIC
1711 /*
1712 * set up the basic apic ops.
1713 */
1714 apic_ops = &xen_basic_apic_ops;
1715#endif
1716
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1717 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1718 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1719 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 8dd3336efd7e..3c578ef78c48 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -369,7 +369,6 @@ static int __init acpi_rtc_init(void)
369 DBG("RTC unavailable?\n"); 369 DBG("RTC unavailable?\n");
370 return 0; 370 return 0;
371} 371}
372/* do this between RTC subsys_initcall() and rtc_cmos driver_initcall() */ 372module_init(acpi_rtc_init);
373fs_initcall(acpi_rtc_init);
374 373
375#endif 374#endif
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 59fe051957ef..5d312dc9be9f 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -503,7 +503,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
503 scsi_cmd[0] = ATA_16; 503 scsi_cmd[0] = ATA_16;
504 504
505 scsi_cmd[4] = args[2]; 505 scsi_cmd[4] = args[2];
506 if (args[0] == WIN_SMART) { /* hack -- ide driver does this too... */ 506 if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */
507 scsi_cmd[6] = args[3]; 507 scsi_cmd[6] = args[3];
508 scsi_cmd[8] = args[1]; 508 scsi_cmd[8] = args[1];
509 scsi_cmd[10] = 0x4f; 509 scsi_cmd[10] = 0x4f;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 682243bf2e46..482c0c4b964f 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -39,6 +39,7 @@
39#include <linux/ioport.h> 39#include <linux/ioport.h>
40#include <linux/init.h> 40#include <linux/init.h>
41#include <linux/blkpg.h> 41#include <linux/blkpg.h>
42#include <linux/ata.h>
42#include <linux/hdreg.h> 43#include <linux/hdreg.h>
43 44
44#define REALLY_SLOW_IO 45#define REALLY_SLOW_IO
@@ -370,7 +371,7 @@ repeat:
370 struct hd_i_struct *disk = &hd_info[i]; 371 struct hd_i_struct *disk = &hd_info[i];
371 disk->special_op = disk->recalibrate = 1; 372 disk->special_op = disk->recalibrate = 1;
372 hd_out(disk, disk->sect, disk->sect, disk->head-1, 373 hd_out(disk, disk->sect, disk->sect, disk->head-1,
373 disk->cyl, WIN_SPECIFY, &reset_hd); 374 disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd);
374 if (reset) 375 if (reset)
375 goto repeat; 376 goto repeat;
376 } else 377 } else
@@ -558,7 +559,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req)
558{ 559{
559 if (disk->recalibrate) { 560 if (disk->recalibrate) {
560 disk->recalibrate = 0; 561 disk->recalibrate = 0;
561 hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); 562 hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr);
562 return reset; 563 return reset;
563 } 564 }
564 if (disk->head > 16) { 565 if (disk->head > 16) {
@@ -631,13 +632,13 @@ repeat:
631 if (blk_fs_request(req)) { 632 if (blk_fs_request(req)) {
632 switch (rq_data_dir(req)) { 633 switch (rq_data_dir(req)) {
633 case READ: 634 case READ:
634 hd_out(disk, nsect, sec, head, cyl, WIN_READ, 635 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
635 &read_intr); 636 &read_intr);
636 if (reset) 637 if (reset)
637 goto repeat; 638 goto repeat;
638 break; 639 break;
639 case WRITE: 640 case WRITE:
640 hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, 641 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
641 &write_intr); 642 &write_intr);
642 if (reset) 643 if (reset)
643 goto repeat; 644 goto repeat;
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 052879a6f853..b50b5dac95b0 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -131,29 +131,6 @@ config BLK_DEV_IDEDISK
131 131
132 If unsure, say Y. 132 If unsure, say Y.
133 133
134config IDEDISK_MULTI_MODE
135 bool "Use multiple sector mode for Programmed Input/Output by default"
136 help
137 This setting is irrelevant for most IDE disks, with direct memory
138 access, to which multiple sector mode does not apply. Multiple sector
139 mode is a feature of most modern IDE hard drives, permitting the
140 transfer of multiple sectors per Programmed Input/Output interrupt,
141 rather than the usual one sector per interrupt. When this feature is
142 enabled, it can reduce operating system overhead for disk Programmed
143 Input/Output. On some systems, it also can increase the data
144 throughput of Programmed Input/Output. Some drives, however, seemed
145 to run slower with multiple sector mode enabled. Some drives claimed
146 to support multiple sector mode, but lost data at some settings.
147 Under rare circumstances, such failures could result in massive
148 filesystem corruption.
149
150 If you get the following error, try to say Y here:
151
152 hda: set_multmode: status=0x51 { DriveReady SeekComplete Error }
153 hda: set_multmode: error=0x04 { DriveStatusError }
154
155 If in doubt, say N.
156
157config BLK_DEV_IDECS 134config BLK_DEV_IDECS
158 tristate "PCMCIA IDE support" 135 tristate "PCMCIA IDE support"
159 depends on PCMCIA 136 depends on PCMCIA
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 64e0ecdc4ed5..308b8a12f314 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -4,8 +4,8 @@
4 4
5EXTRA_CFLAGS += -Idrivers/ide 5EXTRA_CFLAGS += -Idrivers/ide
6 6
7ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \ 7ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
8 ide-pio-blacklist.o 8 ide-taskfile.o ide-pio-blacklist.o
9 9
10# core IDE code 10# core IDE code
11ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o 11ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o
@@ -37,11 +37,12 @@ obj-$(CONFIG_IDE_GENERIC) += ide-generic.o
37obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o 37obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o
38 38
39ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o 39ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
40ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o
40 41
41obj-$(CONFIG_BLK_DEV_IDEDISK) += ide-disk.o 42obj-$(CONFIG_BLK_DEV_IDEDISK) += ide-disk.o
42obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o 43obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o
44obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy_mod.o
43obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o 45obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o
44obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy.o
45 46
46ifeq ($(CONFIG_BLK_DEV_IDECS), y) 47ifeq ($(CONFIG_BLK_DEV_IDECS), y)
47 ide-cs-core-y += legacy/ide-cs.o 48 ide-cs-core-y += legacy/ide-cs.o
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c
index df4af4083954..70f5b164828b 100644
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -10,7 +10,6 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/blkdev.h> 11#include <linux/blkdev.h>
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/hdreg.h>
14#include <linux/ide.h> 13#include <linux/ide.h>
15#include <linux/dma-mapping.h> 14#include <linux/dma-mapping.h>
16#include <linux/device.h> 15#include <linux/device.h>
@@ -265,8 +264,8 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
265 * If we're going to be doing MW_DMA_1 or MW_DMA_2, we should 264 * If we're going to be doing MW_DMA_1 or MW_DMA_2, we should
266 * take care to note the values in the ID... 265 * take care to note the values in the ID...
267 */ 266 */
268 if (use_dma_info && drive->id->eide_dma_time > cycle_time) 267 if (use_dma_info && drive->id[ATA_ID_EIDE_DMA_TIME] > cycle_time)
269 cycle_time = drive->id->eide_dma_time; 268 cycle_time = drive->id[ATA_ID_EIDE_DMA_TIME];
270 269
271 drive->drive_data = cycle_time; 270 drive->drive_data = cycle_time;
272 271
diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index 4fd91dcf1dc2..122ed3c072fd 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -27,7 +27,6 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/ioport.h> 29#include <linux/ioport.h>
30#include <linux/hdreg.h>
31#include <linux/ide.h> 30#include <linux/ide.h>
32#include <linux/delay.h> 31#include <linux/delay.h>
33#include <linux/init.h> 32#include <linux/init.h>
@@ -180,7 +179,7 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
180 val32 |= (t2i << (dev ? 8 : 0)); 179 val32 |= (t2i << (dev ? 8 : 0));
181 writel(val32, base + BK3710_DATRCVR); 180 writel(val32, base + BK3710_DATRCVR);
182 181
183 if (mate && mate->present) { 182 if (mate) {
184 u8 mode2 = ide_get_best_pio_mode(mate, 255, 4); 183 u8 mode2 = ide_get_best_pio_mode(mate, 255, 4);
185 184
186 if (mode2 < mode) 185 if (mode2 < mode)
@@ -213,7 +212,8 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
213 palm_bk3710_setudmamode(base, is_slave, 212 palm_bk3710_setudmamode(base, is_slave,
214 xferspeed - XFER_UDMA_0); 213 xferspeed - XFER_UDMA_0);
215 } else { 214 } else {
216 palm_bk3710_setdmamode(base, is_slave, drive->id->eide_dma_min, 215 palm_bk3710_setdmamode(base, is_slave,
216 drive->id[ATA_ID_EIDE_DMA_MIN],
217 xferspeed); 217 xferspeed);
218 } 218 }
219} 219}
@@ -229,7 +229,7 @@ static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio)
229 * Obtain the drive PIO data for tuning the Palm Chip registers 229 * Obtain the drive PIO data for tuning the Palm Chip registers
230 */ 230 */
231 cycle_time = ide_pio_cycle_time(drive, pio); 231 cycle_time = ide_pio_cycle_time(drive, pio);
232 mate = ide_get_paired_drive(drive); 232 mate = ide_get_pair_dev(drive);
233 palm_bk3710_setpiomode(base, mate, is_slave, cycle_time, pio); 233 palm_bk3710_setpiomode(base, mate, is_slave, cycle_time, pio);
234} 234}
235 235
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 6f704628c27d..2427c380b3dc 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -584,7 +584,7 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
584 * This function executes the _STM ACPI method for the target channel. 584 * This function executes the _STM ACPI method for the target channel.
585 * 585 *
586 * _STM requires Identify Drive data, which has to passed as an argument. 586 * _STM requires Identify Drive data, which has to passed as an argument.
587 * Unfortunately hd_driveid is a mangled version which we can't readily 587 * Unfortunately drive->id is a mangled version which we can't readily
588 * use; hence we'll get the information afresh. 588 * use; hence we'll get the information afresh.
589 */ 589 */
590void ide_acpi_push_timing(ide_hwif_t *hwif) 590void ide_acpi_push_timing(ide_hwif_t *hwif)
@@ -614,10 +614,10 @@ void ide_acpi_push_timing(ide_hwif_t *hwif)
614 in_params[0].buffer.length = sizeof(struct GTM_buffer); 614 in_params[0].buffer.length = sizeof(struct GTM_buffer);
615 in_params[0].buffer.pointer = (u8 *)&hwif->acpidata->gtm; 615 in_params[0].buffer.pointer = (u8 *)&hwif->acpidata->gtm;
616 in_params[1].type = ACPI_TYPE_BUFFER; 616 in_params[1].type = ACPI_TYPE_BUFFER;
617 in_params[1].buffer.length = sizeof(struct hd_driveid); 617 in_params[1].buffer.length = sizeof(ATA_ID_WORDS * 2);
618 in_params[1].buffer.pointer = (u8 *)&master->idbuff; 618 in_params[1].buffer.pointer = (u8 *)&master->idbuff;
619 in_params[2].type = ACPI_TYPE_BUFFER; 619 in_params[2].type = ACPI_TYPE_BUFFER;
620 in_params[2].buffer.length = sizeof(struct hd_driveid); 620 in_params[2].buffer.length = sizeof(ATA_ID_WORDS * 2);
621 in_params[2].buffer.pointer = (u8 *)&slave->idbuff; 621 in_params[2].buffer.pointer = (u8 *)&slave->idbuff;
622 /* Output buffer: _STM has no output */ 622 /* Output buffer: _STM has no output */
623 623
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index adf04f99cdeb..608c5bade929 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -14,12 +14,201 @@
14#define debug_log(fmt, args...) do {} while (0) 14#define debug_log(fmt, args...) do {} while (0)
15#endif 15#endif
16 16
17/*
18 * Check whether we can support a device,
19 * based on the ATAPI IDENTIFY command results.
20 */
21int ide_check_atapi_device(ide_drive_t *drive, const char *s)
22{
23 u16 *id = drive->id;
24 u8 gcw[2], protocol, device_type, removable, drq_type, packet_size;
25
26 *((u16 *)&gcw) = id[ATA_ID_CONFIG];
27
28 protocol = (gcw[1] & 0xC0) >> 6;
29 device_type = gcw[1] & 0x1F;
30 removable = (gcw[0] & 0x80) >> 7;
31 drq_type = (gcw[0] & 0x60) >> 5;
32 packet_size = gcw[0] & 0x03;
33
34#ifdef CONFIG_PPC
35 /* kludge for Apple PowerBook internal zip */
36 if (drive->media == ide_floppy && device_type == 5 &&
37 !strstr((char *)&id[ATA_ID_PROD], "CD-ROM") &&
38 strstr((char *)&id[ATA_ID_PROD], "ZIP"))
39 device_type = 0;
40#endif
41
42 if (protocol != 2)
43 printk(KERN_ERR "%s: %s: protocol (0x%02x) is not ATAPI\n",
44 s, drive->name, protocol);
45 else if ((drive->media == ide_floppy && device_type != 0) ||
46 (drive->media == ide_tape && device_type != 1))
47 printk(KERN_ERR "%s: %s: invalid device type (0x%02x)\n",
48 s, drive->name, device_type);
49 else if (removable == 0)
50 printk(KERN_ERR "%s: %s: the removable flag is not set\n",
51 s, drive->name);
52 else if (drive->media == ide_floppy && drq_type == 3)
53 printk(KERN_ERR "%s: %s: sorry, DRQ type (0x%02x) not "
54 "supported\n", s, drive->name, drq_type);
55 else if (packet_size != 0)
56 printk(KERN_ERR "%s: %s: packet size (0x%02x) is not 12 "
57 "bytes\n", s, drive->name, packet_size);
58 else
59 return 1;
60 return 0;
61}
62EXPORT_SYMBOL_GPL(ide_check_atapi_device);
63
64/* PIO data transfer routine using the scatter gather table. */
65int ide_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
66 unsigned int bcount, int write)
67{
68 ide_hwif_t *hwif = drive->hwif;
69 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
70 xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data;
71 struct scatterlist *sg = pc->sg;
72 char *buf;
73 int count, done = 0;
74
75 while (bcount) {
76 count = min(sg->length - pc->b_count, bcount);
77
78 if (PageHighMem(sg_page(sg))) {
79 unsigned long flags;
80
81 local_irq_save(flags);
82 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
83 xf(drive, NULL, buf + pc->b_count, count);
84 kunmap_atomic(buf - sg->offset, KM_IRQ0);
85 local_irq_restore(flags);
86 } else {
87 buf = sg_virt(sg);
88 xf(drive, NULL, buf + pc->b_count, count);
89 }
90
91 bcount -= count;
92 pc->b_count += count;
93 done += count;
94
95 if (pc->b_count == sg->length) {
96 if (!--pc->sg_cnt)
97 break;
98 pc->sg = sg = sg_next(sg);
99 pc->b_count = 0;
100 }
101 }
102
103 if (bcount) {
104 printk(KERN_ERR "%s: %d leftover bytes, %s\n", drive->name,
105 bcount, write ? "padding with zeros"
106 : "discarding data");
107 ide_pad_transfer(drive, write, bcount);
108 }
109
110 return done;
111}
112EXPORT_SYMBOL_GPL(ide_io_buffers);
113
114void ide_init_pc(struct ide_atapi_pc *pc)
115{
116 memset(pc, 0, sizeof(*pc));
117 pc->buf = pc->pc_buf;
118 pc->buf_size = IDE_PC_BUFFER_SIZE;
119}
120EXPORT_SYMBOL_GPL(ide_init_pc);
121
122/*
123 * Generate a new packet command request in front of the request queue, before
124 * the current request, so that it will be processed immediately, on the next
125 * pass through the driver.
126 */
127void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
128 struct ide_atapi_pc *pc, struct request *rq)
129{
130 blk_rq_init(NULL, rq);
131 rq->cmd_type = REQ_TYPE_SPECIAL;
132 rq->cmd_flags |= REQ_PREEMPT;
133 rq->buffer = (char *)pc;
134 rq->rq_disk = disk;
135 memcpy(rq->cmd, pc->c, 12);
136 if (drive->media == ide_tape)
137 rq->cmd[13] = REQ_IDETAPE_PC1;
138 ide_do_drive_cmd(drive, rq);
139}
140EXPORT_SYMBOL_GPL(ide_queue_pc_head);
141
142/*
143 * Add a special packet command request to the tail of the request queue,
144 * and wait for it to be serviced.
145 */
146int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
147 struct ide_atapi_pc *pc)
148{
149 struct request *rq;
150 int error;
151
152 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
153 rq->cmd_type = REQ_TYPE_SPECIAL;
154 rq->buffer = (char *)pc;
155 memcpy(rq->cmd, pc->c, 12);
156 if (drive->media == ide_tape)
157 rq->cmd[13] = REQ_IDETAPE_PC1;
158 error = blk_execute_rq(drive->queue, disk, rq, 0);
159 blk_put_request(rq);
160
161 return error;
162}
163EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
164
165int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk)
166{
167 struct ide_atapi_pc pc;
168
169 ide_init_pc(&pc);
170 pc.c[0] = TEST_UNIT_READY;
171
172 return ide_queue_pc_tail(drive, disk, &pc);
173}
174EXPORT_SYMBOL_GPL(ide_do_test_unit_ready);
175
176int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start)
177{
178 struct ide_atapi_pc pc;
179
180 ide_init_pc(&pc);
181 pc.c[0] = START_STOP;
182 pc.c[4] = start;
183
184 if (drive->media == ide_tape)
185 pc.flags |= PC_FLAG_WAIT_FOR_DSC;
186
187 return ide_queue_pc_tail(drive, disk, &pc);
188}
189EXPORT_SYMBOL_GPL(ide_do_start_stop);
190
191int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
192{
193 struct ide_atapi_pc pc;
194
195 if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK)
196 return 0;
197
198 ide_init_pc(&pc);
199 pc.c[0] = ALLOW_MEDIUM_REMOVAL;
200 pc.c[4] = on;
201
202 return ide_queue_pc_tail(drive, disk, &pc);
203}
204EXPORT_SYMBOL_GPL(ide_set_media_lock);
205
17/* TODO: unify the code thus making some arguments go away */ 206/* TODO: unify the code thus making some arguments go away */
18ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, 207ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
19 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, 208 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
20 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), 209 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
21 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), 210 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
22 void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int)) 211 int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int))
23{ 212{
24 ide_hwif_t *hwif = drive->hwif; 213 ide_hwif_t *hwif = drive->hwif;
25 struct request *rq = hwif->hwgroup->rq; 214 struct request *rq = hwif->hwgroup->rq;
@@ -41,7 +230,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
41 230
42 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { 231 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
43 if (hwif->dma_ops->dma_end(drive) || 232 if (hwif->dma_ops->dma_end(drive) ||
44 (drive->media == ide_tape && !scsi && (stat & ERR_STAT))) { 233 (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) {
45 if (drive->media == ide_floppy && !scsi) 234 if (drive->media == ide_floppy && !scsi)
46 printk(KERN_ERR "%s: DMA %s error\n", 235 printk(KERN_ERR "%s: DMA %s error\n",
47 drive->name, rq_data_dir(pc->rq) 236 drive->name, rq_data_dir(pc->rq)
@@ -56,7 +245,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
56 } 245 }
57 246
58 /* No more interrupts */ 247 /* No more interrupts */
59 if ((stat & DRQ_STAT) == 0) { 248 if ((stat & ATA_DRQ) == 0) {
60 debug_log("Packet command completed, %d bytes transferred\n", 249 debug_log("Packet command completed, %d bytes transferred\n",
61 pc->xferred); 250 pc->xferred);
62 251
@@ -65,10 +254,10 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
65 local_irq_enable_in_hardirq(); 254 local_irq_enable_in_hardirq();
66 255
67 if (drive->media == ide_tape && !scsi && 256 if (drive->media == ide_tape && !scsi &&
68 (stat & ERR_STAT) && rq->cmd[0] == REQUEST_SENSE) 257 (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE)
69 stat &= ~ERR_STAT; 258 stat &= ~ATA_ERR;
70 259
71 if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) { 260 if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) {
72 /* Error detected */ 261 /* Error detected */
73 debug_log("%s: I/O error\n", drive->name); 262 debug_log("%s: I/O error\n", drive->name);
74 263
@@ -95,7 +284,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
95cmd_finished: 284cmd_finished:
96 pc->error = 0; 285 pc->error = 0;
97 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && 286 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) &&
98 (stat & SEEK_STAT) == 0) { 287 (stat & ATA_DSC) == 0) {
99 dsc_handle(drive); 288 dsc_handle(drive);
100 return ide_stopped; 289 return ide_stopped;
101 } 290 }
@@ -117,17 +306,18 @@ cmd_finished:
117 /* Get the number of bytes to transfer on this interrupt. */ 306 /* Get the number of bytes to transfer on this interrupt. */
118 ide_read_bcount_and_ireason(drive, &bcount, &ireason); 307 ide_read_bcount_and_ireason(drive, &bcount, &ireason);
119 308
120 if (ireason & CD) { 309 if (ireason & ATAPI_COD) {
121 printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__); 310 printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__);
122 return ide_do_reset(drive); 311 return ide_do_reset(drive);
123 } 312 }
124 313
125 if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) { 314 if (((ireason & ATAPI_IO) == ATAPI_IO) ==
315 !!(pc->flags & PC_FLAG_WRITING)) {
126 /* Hopefully, we will never get here */ 316 /* Hopefully, we will never get here */
127 printk(KERN_ERR "%s: We wanted to %s, but the device wants us " 317 printk(KERN_ERR "%s: We wanted to %s, but the device wants us "
128 "to %s!\n", drive->name, 318 "to %s!\n", drive->name,
129 (ireason & IO) ? "Write" : "Read", 319 (ireason & ATAPI_IO) ? "Write" : "Read",
130 (ireason & IO) ? "Read" : "Write"); 320 (ireason & ATAPI_IO) ? "Read" : "Write");
131 return ide_do_reset(drive); 321 return ide_do_reset(drive);
132 } 322 }
133 323
@@ -171,9 +361,14 @@ cmd_finished:
171 361
172 if ((drive->media == ide_floppy && !scsi && !pc->buf) || 362 if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
173 (drive->media == ide_tape && !scsi && pc->bh) || 363 (drive->media == ide_tape && !scsi && pc->bh) ||
174 (scsi && pc->sg)) 364 (scsi && pc->sg)) {
175 io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING)); 365 int done = io_buffers(drive, pc, bcount,
176 else 366 !!(pc->flags & PC_FLAG_WRITING));
367
368 /* FIXME: don't do partial completions */
369 if (drive->media == ide_floppy && !scsi)
370 ide_end_request(drive, 1, done >> 9);
371 } else
177 xferfunc(drive, NULL, pc->cur_pos, bcount); 372 xferfunc(drive, NULL, pc->cur_pos, bcount);
178 373
179 /* Update the current position */ 374 /* Update the current position */
@@ -205,7 +400,8 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
205{ 400{
206 int retries = 100; 401 int retries = 100;
207 402
208 while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) { 403 while (retries-- && ((ireason & ATAPI_COD) == 0 ||
404 (ireason & ATAPI_IO))) {
209 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " 405 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
210 "a packet command, retrying\n", drive->name); 406 "a packet command, retrying\n", drive->name);
211 udelay(100); 407 udelay(100);
@@ -214,8 +410,8 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
214 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " 410 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
215 "a packet command, ignoring\n", 411 "a packet command, ignoring\n",
216 drive->name); 412 drive->name);
217 ireason |= CD; 413 ireason |= ATAPI_COD;
218 ireason &= ~IO; 414 ireason &= ~ATAPI_IO;
219 } 415 }
220 } 416 }
221 417
@@ -231,7 +427,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
231 ide_startstop_t startstop; 427 ide_startstop_t startstop;
232 u8 ireason; 428 u8 ireason;
233 429
234 if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) { 430 if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) {
235 printk(KERN_ERR "%s: Strange, packet command initiated yet " 431 printk(KERN_ERR "%s: Strange, packet command initiated yet "
236 "DRQ isn't asserted\n", drive->name); 432 "DRQ isn't asserted\n", drive->name);
237 return startstop; 433 return startstop;
@@ -241,7 +437,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
241 if (drive->media == ide_tape && !drive->scsi) 437 if (drive->media == ide_tape && !drive->scsi)
242 ireason = ide_wait_ireason(drive, ireason); 438 ireason = ide_wait_ireason(drive, ireason);
243 439
244 if ((ireason & CD) == 0 || (ireason & IO)) { 440 if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
245 printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " 441 printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
246 "a packet command\n", drive->name); 442 "a packet command\n", drive->name);
247 return ide_do_reset(drive); 443 return ide_do_reset(drive);
@@ -303,7 +499,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
303 499
304 /* Issue the packet command */ 500 /* Issue the packet command */
305 if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { 501 if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
306 ide_execute_command(drive, WIN_PACKETCMD, handler, 502 ide_execute_command(drive, ATA_CMD_PACKET, handler,
307 timeout, NULL); 503 timeout, NULL);
308 return ide_started; 504 return ide_started;
309 } else { 505 } else {
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 03c2cb6a58bc..465a92ca0179 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -436,7 +436,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
436 ide_dump_status_no_sense(drive, "media error (blank)", 436 ide_dump_status_no_sense(drive, "media error (blank)",
437 stat); 437 stat);
438 do_end_request = 1; 438 do_end_request = 1;
439 } else if ((err & ~ABRT_ERR) != 0) { 439 } else if ((err & ~ATA_ABORTED) != 0) {
440 /* go to the default handler for other errors */ 440 /* go to the default handler for other errors */
441 ide_error(drive, "cdrom_decode_status", stat); 441 ide_error(drive, "cdrom_decode_status", stat);
442 return 1; 442 return 1;
@@ -457,7 +457,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
457 * If we got a CHECK_CONDITION status, queue 457 * If we got a CHECK_CONDITION status, queue
458 * a request sense command. 458 * a request sense command.
459 */ 459 */
460 if (stat & ERR_STAT) 460 if (stat & ATA_ERR)
461 cdrom_queue_request_sense(drive, NULL, NULL); 461 cdrom_queue_request_sense(drive, NULL, NULL);
462 } else { 462 } else {
463 blk_dump_rq_flags(rq, "ide-cd: bad rq"); 463 blk_dump_rq_flags(rq, "ide-cd: bad rq");
@@ -468,7 +468,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
468 return 1; 468 return 1;
469 469
470end_request: 470end_request:
471 if (stat & ERR_STAT) { 471 if (stat & ATA_ERR) {
472 unsigned long flags; 472 unsigned long flags;
473 473
474 spin_lock_irqsave(&ide_lock, flags); 474 spin_lock_irqsave(&ide_lock, flags);
@@ -541,7 +541,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
541 drive->waiting_for_dma = 0; 541 drive->waiting_for_dma = 0;
542 542
543 /* packet command */ 543 /* packet command */
544 ide_execute_command(drive, WIN_PACKETCMD, handler, 544 ide_execute_command(drive, ATA_CMD_PACKET, handler,
545 ATAPI_WAIT_PC, cdrom_timer_expiry); 545 ATAPI_WAIT_PC, cdrom_timer_expiry);
546 return ide_started; 546 return ide_started;
547 } else { 547 } else {
@@ -574,7 +574,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
574 */ 574 */
575 575
576 /* check for errors */ 576 /* check for errors */
577 if (cdrom_decode_status(drive, DRQ_STAT, NULL)) 577 if (cdrom_decode_status(drive, ATA_DRQ, NULL))
578 return ide_stopped; 578 return ide_stopped;
579 579
580 /* ok, next interrupt will be DMA interrupt */ 580 /* ok, next interrupt will be DMA interrupt */
@@ -582,8 +582,8 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
582 drive->waiting_for_dma = 1; 582 drive->waiting_for_dma = 1;
583 } else { 583 } else {
584 /* otherwise, we must wait for DRQ to get set */ 584 /* otherwise, we must wait for DRQ to get set */
585 if (ide_wait_stat(&startstop, drive, DRQ_STAT, 585 if (ide_wait_stat(&startstop, drive, ATA_DRQ,
586 BUSY_STAT, WAIT_READY)) 586 ATA_BUSY, WAIT_READY))
587 return startstop; 587 return startstop;
588 } 588 }
589 589
@@ -938,7 +938,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
938 thislen = len; 938 thislen = len;
939 939
940 /* If DRQ is clear, the command has completed. */ 940 /* If DRQ is clear, the command has completed. */
941 if ((stat & DRQ_STAT) == 0) { 941 if ((stat & ATA_DRQ) == 0) {
942 if (blk_fs_request(rq)) { 942 if (blk_fs_request(rq)) {
943 /* 943 /*
944 * If we're not done reading/writing, complain. 944 * If we're not done reading/writing, complain.
@@ -1164,13 +1164,12 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
1164 if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { 1164 if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
1165 struct request_queue *q = drive->queue; 1165 struct request_queue *q = drive->queue;
1166 unsigned int alignment; 1166 unsigned int alignment;
1167 unsigned long addr; 1167 char *buf;
1168 unsigned long stack_mask = ~(THREAD_SIZE - 1);
1169 1168
1170 if (rq->bio) 1169 if (rq->bio)
1171 addr = (unsigned long)bio_data(rq->bio); 1170 buf = bio_data(rq->bio);
1172 else 1171 else
1173 addr = (unsigned long)rq->data; 1172 buf = rq->data;
1174 1173
1175 info->dma = drive->using_dma; 1174 info->dma = drive->using_dma;
1176 1175
@@ -1181,11 +1180,8 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
1181 * separate masks. 1180 * separate masks.
1182 */ 1181 */
1183 alignment = queue_dma_alignment(q) | q->dma_pad_mask; 1182 alignment = queue_dma_alignment(q) | q->dma_pad_mask;
1184 if (addr & alignment || rq->data_len & alignment) 1183 if ((unsigned long)buf & alignment || rq->data_len & alignment
1185 info->dma = 0; 1184 || object_is_on_stack(buf))
1186
1187 if (!((addr & stack_mask) ^
1188 ((unsigned long)current->stack & stack_mask)))
1189 info->dma = 0; 1185 info->dma = 0;
1190 } 1186 }
1191} 1187}
@@ -1206,7 +1202,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
1206 unsigned long elapsed = jiffies - info->start_seek; 1202 unsigned long elapsed = jiffies - info->start_seek;
1207 int stat = hwif->tp_ops->read_status(hwif); 1203 int stat = hwif->tp_ops->read_status(hwif);
1208 1204
1209 if ((stat & SEEK_STAT) != SEEK_STAT) { 1205 if ((stat & ATA_DSC) != ATA_DSC) {
1210 if (elapsed < IDECD_SEEK_TIMEOUT) { 1206 if (elapsed < IDECD_SEEK_TIMEOUT) {
1211 ide_stall_queue(drive, 1207 ide_stall_queue(drive,
1212 IDECD_SEEK_TIMER); 1208 IDECD_SEEK_TIMER);
@@ -1813,13 +1809,12 @@ static ide_proc_entry_t idecd_proc[] = {
1813 { NULL, 0, NULL, NULL } 1809 { NULL, 0, NULL, NULL }
1814}; 1810};
1815 1811
1816static void ide_cdrom_add_settings(ide_drive_t *drive) 1812ide_devset_rw_field(dsc_overlap, dsc_overlap);
1817{ 1813
1818 ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, 1814static const struct ide_proc_devset idecd_settings[] = {
1819 &drive->dsc_overlap, NULL); 1815 IDE_PROC_DEVSET(dsc_overlap, 0, 1),
1820} 1816 { 0 },
1821#else 1817};
1822static inline void ide_cdrom_add_settings(ide_drive_t *drive) { ; }
1823#endif 1818#endif
1824 1819
1825static const struct cd_list_entry ide_cd_quirks_list[] = { 1820static const struct cd_list_entry ide_cd_quirks_list[] = {
@@ -1866,14 +1861,14 @@ static const struct cd_list_entry ide_cd_quirks_list[] = {
1866 { NULL, NULL, 0 } 1861 { NULL, NULL, 0 }
1867}; 1862};
1868 1863
1869static unsigned int ide_cd_flags(struct hd_driveid *id) 1864static unsigned int ide_cd_flags(u16 *id)
1870{ 1865{
1871 const struct cd_list_entry *cle = ide_cd_quirks_list; 1866 const struct cd_list_entry *cle = ide_cd_quirks_list;
1872 1867
1873 while (cle->id_model) { 1868 while (cle->id_model) {
1874 if (strcmp(cle->id_model, id->model) == 0 && 1869 if (strcmp(cle->id_model, (char *)&id[ATA_ID_PROD]) == 0 &&
1875 (cle->id_firmware == NULL || 1870 (cle->id_firmware == NULL ||
1876 strstr(id->fw_rev, cle->id_firmware))) 1871 strstr((char *)&id[ATA_ID_FW_REV], cle->id_firmware)))
1877 return cle->cd_flags; 1872 return cle->cd_flags;
1878 cle++; 1873 cle++;
1879 } 1874 }
@@ -1885,7 +1880,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
1885{ 1880{
1886 struct cdrom_info *cd = drive->driver_data; 1881 struct cdrom_info *cd = drive->driver_data;
1887 struct cdrom_device_info *cdi = &cd->devinfo; 1882 struct cdrom_device_info *cdi = &cd->devinfo;
1888 struct hd_driveid *id = drive->id; 1883 u16 *id = drive->id;
1884 char *fw_rev = (char *)&id[ATA_ID_FW_REV];
1889 int nslots; 1885 int nslots;
1890 1886
1891 blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); 1887 blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn);
@@ -1900,15 +1896,15 @@ static int ide_cdrom_setup(ide_drive_t *drive)
1900 drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT | 1896 drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT |
1901 ide_cd_flags(id); 1897 ide_cd_flags(id);
1902 1898
1903 if ((id->config & 0x0060) == 0x20) 1899 if ((id[ATA_ID_CONFIG] & 0x0060) == 0x20)
1904 drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; 1900 drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
1905 1901
1906 if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) && 1902 if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
1907 id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') 1903 fw_rev[4] == '1' && fw_rev[6] <= '2')
1908 drive->atapi_flags |= (IDE_AFLAG_TOCTRACKS_AS_BCD | 1904 drive->atapi_flags |= (IDE_AFLAG_TOCTRACKS_AS_BCD |
1909 IDE_AFLAG_TOCADDR_AS_BCD); 1905 IDE_AFLAG_TOCADDR_AS_BCD);
1910 else if ((drive->atapi_flags & IDE_AFLAG_VERTOS_600_ESD) && 1906 else if ((drive->atapi_flags & IDE_AFLAG_VERTOS_600_ESD) &&
1911 id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') 1907 fw_rev[4] == '1' && fw_rev[6] <= '2')
1912 drive->atapi_flags |= IDE_AFLAG_TOCTRACKS_AS_BCD; 1908 drive->atapi_flags |= IDE_AFLAG_TOCTRACKS_AS_BCD;
1913 else if (drive->atapi_flags & IDE_AFLAG_SANYO_3CD) 1909 else if (drive->atapi_flags & IDE_AFLAG_SANYO_3CD)
1914 /* 3 => use CD in slot 0 */ 1910 /* 3 => use CD in slot 0 */
@@ -1927,7 +1923,8 @@ static int ide_cdrom_setup(ide_drive_t *drive)
1927 cd->devinfo.handle = NULL; 1923 cd->devinfo.handle = NULL;
1928 return 1; 1924 return 1;
1929 } 1925 }
1930 ide_cdrom_add_settings(drive); 1926
1927 ide_proc_register_driver(drive, cd->driver);
1931 return 0; 1928 return 0;
1932} 1929}
1933 1930
@@ -1972,12 +1969,12 @@ static ide_driver_t ide_cdrom_driver = {
1972 .remove = ide_cd_remove, 1969 .remove = ide_cd_remove,
1973 .version = IDECD_VERSION, 1970 .version = IDECD_VERSION,
1974 .media = ide_cdrom, 1971 .media = ide_cdrom,
1975 .supports_dsc_overlap = 1,
1976 .do_request = ide_cd_do_request, 1972 .do_request = ide_cd_do_request,
1977 .end_request = ide_end_request, 1973 .end_request = ide_end_request,
1978 .error = __ide_error, 1974 .error = __ide_error,
1979#ifdef CONFIG_IDE_PROC_FS 1975#ifdef CONFIG_IDE_PROC_FS
1980 .proc = idecd_proc, 1976 .proc = idecd_proc,
1977 .settings = idecd_settings,
1981#endif 1978#endif
1982}; 1979};
1983 1980
@@ -2112,10 +2109,10 @@ static int ide_cd_probe(ide_drive_t *drive)
2112 2109
2113 if (!strstr("ide-cdrom", drive->driver_req)) 2110 if (!strstr("ide-cdrom", drive->driver_req))
2114 goto failed; 2111 goto failed;
2115 if (!drive->present) 2112
2116 goto failed;
2117 if (drive->media != ide_cdrom && drive->media != ide_optical) 2113 if (drive->media != ide_cdrom && drive->media != ide_optical)
2118 goto failed; 2114 goto failed;
2115
2119 /* skip drives that we were told to ignore */ 2116 /* skip drives that we were told to ignore */
2120 if (ignore != NULL) { 2117 if (ignore != NULL) {
2121 if (strstr(ignore, drive->name)) { 2118 if (strstr(ignore, drive->name)) {
@@ -2137,8 +2134,6 @@ static int ide_cd_probe(ide_drive_t *drive)
2137 2134
2138 ide_init_disk(g, drive); 2135 ide_init_disk(g, drive);
2139 2136
2140 ide_proc_register_driver(drive, &ide_cdrom_driver);
2141
2142 kref_init(&info->kref); 2137 kref_init(&info->kref);
2143 2138
2144 info->drive = drive; 2139 info->drive = drive;
@@ -2153,7 +2148,6 @@ static int ide_cd_probe(ide_drive_t *drive)
2153 g->driverfs_dev = &drive->gendev; 2148 g->driverfs_dev = &drive->gendev;
2154 g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; 2149 g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE;
2155 if (ide_cdrom_setup(drive)) { 2150 if (ide_cdrom_setup(drive)) {
2156 ide_proc_unregister_driver(drive, &ide_cdrom_driver);
2157 ide_cd_release(&info->kref); 2151 ide_cd_release(&info->kref);
2158 goto failed; 2152 goto failed;
2159 } 2153 }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 33ea8c048717..01846f244b40 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -30,10 +30,8 @@
30#include <linux/delay.h> 30#include <linux/delay.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/leds.h> 32#include <linux/leds.h>
33
34#define _IDE_DISK
35
36#include <linux/ide.h> 33#include <linux/ide.h>
34#include <linux/hdreg.h>
37 35
38#include <asm/byteorder.h> 36#include <asm/byteorder.h>
39#include <asm/irq.h> 37#include <asm/irq.h>
@@ -90,68 +88,19 @@ static void ide_disk_put(struct ide_disk_obj *idkp)
90 mutex_unlock(&idedisk_ref_mutex); 88 mutex_unlock(&idedisk_ref_mutex);
91} 89}
92 90
93/*
94 * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
95 * value for this drive (from its reported identification information).
96 *
97 * Returns: 1 if lba_capacity looks sensible
98 * 0 otherwise
99 *
100 * It is called only once for each drive.
101 */
102static int lba_capacity_is_ok(struct hd_driveid *id)
103{
104 unsigned long lba_sects, chs_sects, head, tail;
105
106 /* No non-LBA info .. so valid! */
107 if (id->cyls == 0)
108 return 1;
109
110 /*
111 * The ATA spec tells large drives to return
112 * C/H/S = 16383/16/63 independent of their size.
113 * Some drives can be jumpered to use 15 heads instead of 16.
114 * Some drives can be jumpered to use 4092 cyls instead of 16383.
115 */
116 if ((id->cyls == 16383
117 || (id->cyls == 4092 && id->cur_cyls == 16383)) &&
118 id->sectors == 63 &&
119 (id->heads == 15 || id->heads == 16) &&
120 (id->lba_capacity >= 16383*63*id->heads))
121 return 1;
122
123 lba_sects = id->lba_capacity;
124 chs_sects = id->cyls * id->heads * id->sectors;
125
126 /* perform a rough sanity check on lba_sects: within 10% is OK */
127 if ((lba_sects - chs_sects) < chs_sects/10)
128 return 1;
129
130 /* some drives have the word order reversed */
131 head = ((lba_sects >> 16) & 0xffff);
132 tail = (lba_sects & 0xffff);
133 lba_sects = (head | (tail << 16));
134 if ((lba_sects - chs_sects) < chs_sects/10) {
135 id->lba_capacity = lba_sects;
136 return 1; /* lba_capacity is (now) good */
137 }
138
139 return 0; /* lba_capacity value may be bad */
140}
141
142static const u8 ide_rw_cmds[] = { 91static const u8 ide_rw_cmds[] = {
143 WIN_MULTREAD, 92 ATA_CMD_READ_MULTI,
144 WIN_MULTWRITE, 93 ATA_CMD_WRITE_MULTI,
145 WIN_MULTREAD_EXT, 94 ATA_CMD_READ_MULTI_EXT,
146 WIN_MULTWRITE_EXT, 95 ATA_CMD_WRITE_MULTI_EXT,
147 WIN_READ, 96 ATA_CMD_PIO_READ,
148 WIN_WRITE, 97 ATA_CMD_PIO_WRITE,
149 WIN_READ_EXT, 98 ATA_CMD_PIO_READ_EXT,
150 WIN_WRITE_EXT, 99 ATA_CMD_PIO_WRITE_EXT,
151 WIN_READDMA, 100 ATA_CMD_READ,
152 WIN_WRITEDMA, 101 ATA_CMD_WRITE,
153 WIN_READDMA_EXT, 102 ATA_CMD_READ_EXT,
154 WIN_WRITEDMA_EXT, 103 ATA_CMD_WRITE_EXT,
155}; 104};
156 105
157static const u8 ide_data_phases[] = { 106static const u8 ide_data_phases[] = {
@@ -322,9 +271,9 @@ static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48)
322 /* Create IDE/ATA command request structure */ 271 /* Create IDE/ATA command request structure */
323 memset(&args, 0, sizeof(ide_task_t)); 272 memset(&args, 0, sizeof(ide_task_t));
324 if (lba48) 273 if (lba48)
325 tf->command = WIN_READ_NATIVE_MAX_EXT; 274 tf->command = ATA_CMD_READ_NATIVE_MAX_EXT;
326 else 275 else
327 tf->command = WIN_READ_NATIVE_MAX; 276 tf->command = ATA_CMD_READ_NATIVE_MAX;
328 tf->device = ATA_LBA; 277 tf->device = ATA_LBA;
329 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 278 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
330 if (lba48) 279 if (lba48)
@@ -359,10 +308,10 @@ static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48)
359 tf->hob_lbal = (addr_req >>= 8) & 0xff; 308 tf->hob_lbal = (addr_req >>= 8) & 0xff;
360 tf->hob_lbam = (addr_req >>= 8) & 0xff; 309 tf->hob_lbam = (addr_req >>= 8) & 0xff;
361 tf->hob_lbah = (addr_req >>= 8) & 0xff; 310 tf->hob_lbah = (addr_req >>= 8) & 0xff;
362 tf->command = WIN_SET_MAX_EXT; 311 tf->command = ATA_CMD_SET_MAX_EXT;
363 } else { 312 } else {
364 tf->device = (addr_req >>= 8) & 0x0f; 313 tf->device = (addr_req >>= 8) & 0x0f;
365 tf->command = WIN_SET_MAX; 314 tf->command = ATA_CMD_SET_MAX;
366 } 315 }
367 tf->device |= ATA_LBA; 316 tf->device |= ATA_LBA;
368 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 317 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
@@ -385,25 +334,6 @@ static unsigned long long sectors_to_MB(unsigned long long n)
385} 334}
386 335
387/* 336/*
388 * Bits 10 of command_set_1 and cfs_enable_1 must be equal,
389 * so on non-buggy drives we need test only one.
390 * However, we should also check whether these fields are valid.
391 */
392static inline int idedisk_supports_hpa(const struct hd_driveid *id)
393{
394 return (id->command_set_1 & 0x0400) && (id->cfs_enable_1 & 0x0400);
395}
396
397/*
398 * The same here.
399 */
400static inline int idedisk_supports_lba48(const struct hd_driveid *id)
401{
402 return (id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)
403 && id->lba_capacity_2;
404}
405
406/*
407 * Some disks report total number of sectors instead of 337 * Some disks report total number of sectors instead of
408 * maximum sector address. We list them here. 338 * maximum sector address. We list them here.
409 */ 339 */
@@ -417,7 +347,7 @@ static const struct drive_list_entry hpa_list[] = {
417static void idedisk_check_hpa(ide_drive_t *drive) 347static void idedisk_check_hpa(ide_drive_t *drive)
418{ 348{
419 unsigned long long capacity, set_max; 349 unsigned long long capacity, set_max;
420 int lba48 = idedisk_supports_lba48(drive->id); 350 int lba48 = ata_id_lba48_enabled(drive->id);
421 351
422 capacity = drive->capacity64; 352 capacity = drive->capacity64;
423 353
@@ -453,23 +383,23 @@ static void idedisk_check_hpa(ide_drive_t *drive)
453 383
454static void init_idedisk_capacity(ide_drive_t *drive) 384static void init_idedisk_capacity(ide_drive_t *drive)
455{ 385{
456 struct hd_driveid *id = drive->id; 386 u16 *id = drive->id;
457 /* 387 /*
458 * If this drive supports the Host Protected Area feature set, 388 * If this drive supports the Host Protected Area feature set,
459 * then we may need to change our opinion about the drive's capacity. 389 * then we may need to change our opinion about the drive's capacity.
460 */ 390 */
461 int hpa = idedisk_supports_hpa(id); 391 int hpa = ata_id_hpa_enabled(id);
462 392
463 if (idedisk_supports_lba48(id)) { 393 if (ata_id_lba48_enabled(id)) {
464 /* drive speaks 48-bit LBA */ 394 /* drive speaks 48-bit LBA */
465 drive->select.b.lba = 1; 395 drive->select.b.lba = 1;
466 drive->capacity64 = id->lba_capacity_2; 396 drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
467 if (hpa) 397 if (hpa)
468 idedisk_check_hpa(drive); 398 idedisk_check_hpa(drive);
469 } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { 399 } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) {
470 /* drive speaks 28-bit LBA */ 400 /* drive speaks 28-bit LBA */
471 drive->select.b.lba = 1; 401 drive->select.b.lba = 1;
472 drive->capacity64 = id->lba_capacity; 402 drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
473 if (hpa) 403 if (hpa)
474 idedisk_check_hpa(drive); 404 idedisk_check_hpa(drive);
475 } else { 405 } else {
@@ -480,7 +410,7 @@ static void init_idedisk_capacity(ide_drive_t *drive)
480 410
481static sector_t idedisk_capacity(ide_drive_t *drive) 411static sector_t idedisk_capacity(ide_drive_t *drive)
482{ 412{
483 return drive->capacity64 - drive->sect0; 413 return drive->capacity64;
484} 414}
485 415
486#ifdef CONFIG_IDE_PROC_FS 416#ifdef CONFIG_IDE_PROC_FS
@@ -490,10 +420,10 @@ static int smart_enable(ide_drive_t *drive)
490 struct ide_taskfile *tf = &args.tf; 420 struct ide_taskfile *tf = &args.tf;
491 421
492 memset(&args, 0, sizeof(ide_task_t)); 422 memset(&args, 0, sizeof(ide_task_t));
493 tf->feature = SMART_ENABLE; 423 tf->feature = ATA_SMART_ENABLE;
494 tf->lbam = SMART_LCYL_PASS; 424 tf->lbam = ATA_SMART_LBAM_PASS;
495 tf->lbah = SMART_HCYL_PASS; 425 tf->lbah = ATA_SMART_LBAH_PASS;
496 tf->command = WIN_SMART; 426 tf->command = ATA_CMD_SMART;
497 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 427 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
498 return ide_no_data_taskfile(drive, &args); 428 return ide_no_data_taskfile(drive, &args);
499} 429}
@@ -506,9 +436,9 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
506 memset(&args, 0, sizeof(ide_task_t)); 436 memset(&args, 0, sizeof(ide_task_t));
507 tf->feature = sub_cmd; 437 tf->feature = sub_cmd;
508 tf->nsect = 0x01; 438 tf->nsect = 0x01;
509 tf->lbam = SMART_LCYL_PASS; 439 tf->lbam = ATA_SMART_LBAM_PASS;
510 tf->lbah = SMART_HCYL_PASS; 440 tf->lbah = ATA_SMART_LBAH_PASS;
511 tf->command = WIN_SMART; 441 tf->command = ATA_CMD_SMART;
512 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 442 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
513 args.data_phase = TASKFILE_IN; 443 args.data_phase = TASKFILE_IN;
514 (void) smart_enable(drive); 444 (void) smart_enable(drive);
@@ -523,7 +453,7 @@ static int proc_idedisk_read_cache
523 int len; 453 int len;
524 454
525 if (drive->id_read) 455 if (drive->id_read)
526 len = sprintf(out, "%i\n", drive->id->buf_size / 2); 456 len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
527 else 457 else
528 len = sprintf(out, "(none)\n"); 458 len = sprintf(out, "(none)\n");
529 459
@@ -549,13 +479,14 @@ static int proc_idedisk_read_smart(char *page, char **start, off_t off,
549 479
550 if (get_smart_data(drive, page, sub_cmd) == 0) { 480 if (get_smart_data(drive, page, sub_cmd) == 0) {
551 unsigned short *val = (unsigned short *) page; 481 unsigned short *val = (unsigned short *) page;
552 char *out = ((char *)val) + (SECTOR_WORDS * 4); 482 char *out = (char *)val + SECTOR_SIZE;
483
553 page = out; 484 page = out;
554 do { 485 do {
555 out += sprintf(out, "%04x%c", le16_to_cpu(*val), 486 out += sprintf(out, "%04x%c", le16_to_cpu(*val),
556 (++i & 7) ? ' ' : '\n'); 487 (++i & 7) ? ' ' : '\n');
557 val += 1; 488 val += 1;
558 } while (i < (SECTOR_WORDS * 2)); 489 } while (i < SECTOR_SIZE / 2);
559 len = out - page; 490 len = out - page;
560 } 491 }
561 492
@@ -566,14 +497,14 @@ static int proc_idedisk_read_sv
566 (char *page, char **start, off_t off, int count, int *eof, void *data) 497 (char *page, char **start, off_t off, int count, int *eof, void *data)
567{ 498{
568 return proc_idedisk_read_smart(page, start, off, count, eof, data, 499 return proc_idedisk_read_smart(page, start, off, count, eof, data,
569 SMART_READ_VALUES); 500 ATA_SMART_READ_VALUES);
570} 501}
571 502
572static int proc_idedisk_read_st 503static int proc_idedisk_read_st
573 (char *page, char **start, off_t off, int count, int *eof, void *data) 504 (char *page, char **start, off_t off, int count, int *eof, void *data)
574{ 505{
575 return proc_idedisk_read_smart(page, start, off, count, eof, data, 506 return proc_idedisk_read_smart(page, start, off, count, eof, data,
576 SMART_READ_THRESHOLDS); 507 ATA_SMART_READ_THRESHOLDS);
577} 508}
578 509
579static ide_proc_entry_t idedisk_proc[] = { 510static ide_proc_entry_t idedisk_proc[] = {
@@ -595,11 +526,11 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
595 BUG_ON(task == NULL); 526 BUG_ON(task == NULL);
596 527
597 memset(task, 0, sizeof(*task)); 528 memset(task, 0, sizeof(*task));
598 if (ide_id_has_flush_cache_ext(drive->id) && 529 if (ata_id_flush_ext_enabled(drive->id) &&
599 (drive->capacity64 >= (1UL << 28))) 530 (drive->capacity64 >= (1UL << 28)))
600 task->tf.command = WIN_FLUSH_CACHE_EXT; 531 task->tf.command = ATA_CMD_FLUSH_EXT;
601 else 532 else
602 task->tf.command = WIN_FLUSH_CACHE; 533 task->tf.command = ATA_CMD_FLUSH;
603 task->tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE | 534 task->tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE |
604 IDE_TFLAG_DYN; 535 IDE_TFLAG_DYN;
605 task->data_phase = TASKFILE_NO_DATA; 536 task->data_phase = TASKFILE_NO_DATA;
@@ -609,6 +540,8 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
609 rq->special = task; 540 rq->special = task;
610} 541}
611 542
543ide_devset_get(multcount, mult_count);
544
612/* 545/*
613 * This is tightly woven into the driver->do_special can not touch. 546 * This is tightly woven into the driver->do_special can not touch.
614 * DON'T do it again until a total personality rewrite is committed. 547 * DON'T do it again until a total personality rewrite is committed.
@@ -618,7 +551,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
618 struct request *rq; 551 struct request *rq;
619 int error; 552 int error;
620 553
621 if (arg < 0 || arg > drive->id->max_multsect) 554 if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
622 return -EINVAL; 555 return -EINVAL;
623 556
624 if (drive->special.b.set_multmode) 557 if (drive->special.b.set_multmode)
@@ -635,22 +568,21 @@ static int set_multcount(ide_drive_t *drive, int arg)
635 return (drive->mult_count == arg) ? 0 : -EIO; 568 return (drive->mult_count == arg) ? 0 : -EIO;
636} 569}
637 570
571ide_devset_get(nowerr, nowerr);
572
638static int set_nowerr(ide_drive_t *drive, int arg) 573static int set_nowerr(ide_drive_t *drive, int arg)
639{ 574{
640 if (arg < 0 || arg > 1) 575 if (arg < 0 || arg > 1)
641 return -EINVAL; 576 return -EINVAL;
642 577
643 if (ide_spin_wait_hwgroup(drive))
644 return -EBUSY;
645 drive->nowerr = arg; 578 drive->nowerr = arg;
646 drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; 579 drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
647 spin_unlock_irq(&ide_lock);
648 return 0; 580 return 0;
649} 581}
650 582
651static void update_ordered(ide_drive_t *drive) 583static void update_ordered(ide_drive_t *drive)
652{ 584{
653 struct hd_driveid *id = drive->id; 585 u16 *id = drive->id;
654 unsigned ordered = QUEUE_ORDERED_NONE; 586 unsigned ordered = QUEUE_ORDERED_NONE;
655 prepare_flush_fn *prep_fn = NULL; 587 prepare_flush_fn *prep_fn = NULL;
656 588
@@ -666,9 +598,9 @@ static void update_ordered(ide_drive_t *drive)
666 * not available so we don't need to recheck that. 598 * not available so we don't need to recheck that.
667 */ 599 */
668 capacity = idedisk_capacity(drive); 600 capacity = idedisk_capacity(drive);
669 barrier = ide_id_has_flush_cache(id) && !drive->noflush && 601 barrier = ata_id_flush_enabled(id) && !drive->noflush &&
670 (drive->addressing == 0 || capacity <= (1ULL << 28) || 602 (drive->addressing == 0 || capacity <= (1ULL << 28) ||
671 ide_id_has_flush_cache_ext(id)); 603 ata_id_flush_ext_enabled(id));
672 604
673 printk(KERN_INFO "%s: cache flushes %ssupported\n", 605 printk(KERN_INFO "%s: cache flushes %ssupported\n",
674 drive->name, barrier ? "" : "not "); 606 drive->name, barrier ? "" : "not ");
@@ -683,7 +615,9 @@ static void update_ordered(ide_drive_t *drive)
683 blk_queue_ordered(drive->queue, ordered, prep_fn); 615 blk_queue_ordered(drive->queue, ordered, prep_fn);
684} 616}
685 617
686static int write_cache(ide_drive_t *drive, int arg) 618ide_devset_get(wcache, wcache);
619
620static int set_wcache(ide_drive_t *drive, int arg)
687{ 621{
688 ide_task_t args; 622 ide_task_t args;
689 int err = 1; 623 int err = 1;
@@ -691,11 +625,11 @@ static int write_cache(ide_drive_t *drive, int arg)
691 if (arg < 0 || arg > 1) 625 if (arg < 0 || arg > 1)
692 return -EINVAL; 626 return -EINVAL;
693 627
694 if (ide_id_has_flush_cache(drive->id)) { 628 if (ata_id_flush_enabled(drive->id)) {
695 memset(&args, 0, sizeof(ide_task_t)); 629 memset(&args, 0, sizeof(ide_task_t));
696 args.tf.feature = arg ? 630 args.tf.feature = arg ?
697 SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; 631 SETFEATURES_WC_ON : SETFEATURES_WC_OFF;
698 args.tf.command = WIN_SETFEATURES; 632 args.tf.command = ATA_CMD_SET_FEATURES;
699 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 633 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
700 err = ide_no_data_taskfile(drive, &args); 634 err = ide_no_data_taskfile(drive, &args);
701 if (err == 0) 635 if (err == 0)
@@ -712,14 +646,16 @@ static int do_idedisk_flushcache(ide_drive_t *drive)
712 ide_task_t args; 646 ide_task_t args;
713 647
714 memset(&args, 0, sizeof(ide_task_t)); 648 memset(&args, 0, sizeof(ide_task_t));
715 if (ide_id_has_flush_cache_ext(drive->id)) 649 if (ata_id_flush_ext_enabled(drive->id))
716 args.tf.command = WIN_FLUSH_CACHE_EXT; 650 args.tf.command = ATA_CMD_FLUSH_EXT;
717 else 651 else
718 args.tf.command = WIN_FLUSH_CACHE; 652 args.tf.command = ATA_CMD_FLUSH;
719 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 653 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
720 return ide_no_data_taskfile(drive, &args); 654 return ide_no_data_taskfile(drive, &args);
721} 655}
722 656
657ide_devset_get(acoustic, acoustic);
658
723static int set_acoustic(ide_drive_t *drive, int arg) 659static int set_acoustic(ide_drive_t *drive, int arg)
724{ 660{
725 ide_task_t args; 661 ide_task_t args;
@@ -728,22 +664,24 @@ static int set_acoustic(ide_drive_t *drive, int arg)
728 return -EINVAL; 664 return -EINVAL;
729 665
730 memset(&args, 0, sizeof(ide_task_t)); 666 memset(&args, 0, sizeof(ide_task_t));
731 args.tf.feature = arg ? SETFEATURES_EN_AAM : SETFEATURES_DIS_AAM; 667 args.tf.feature = arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF;
732 args.tf.nsect = arg; 668 args.tf.nsect = arg;
733 args.tf.command = WIN_SETFEATURES; 669 args.tf.command = ATA_CMD_SET_FEATURES;
734 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 670 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
735 ide_no_data_taskfile(drive, &args); 671 ide_no_data_taskfile(drive, &args);
736 drive->acoustic = arg; 672 drive->acoustic = arg;
737 return 0; 673 return 0;
738} 674}
739 675
676ide_devset_get(addressing, addressing);
677
740/* 678/*
741 * drive->addressing: 679 * drive->addressing:
742 * 0: 28-bit 680 * 0: 28-bit
743 * 1: 48-bit 681 * 1: 48-bit
744 * 2: 48-bit capable doing 28-bit 682 * 2: 48-bit capable doing 28-bit
745 */ 683 */
746static int set_lba_addressing(ide_drive_t *drive, int arg) 684static int set_addressing(ide_drive_t *drive, int arg)
747{ 685{
748 if (arg < 0 || arg > 2) 686 if (arg < 0 || arg > 2)
749 return -EINVAL; 687 return -EINVAL;
@@ -753,52 +691,54 @@ static int set_lba_addressing(ide_drive_t *drive, int arg)
753 if (drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) 691 if (drive->hwif->host_flags & IDE_HFLAG_NO_LBA48)
754 return 0; 692 return 0;
755 693
756 if (!idedisk_supports_lba48(drive->id)) 694 if (ata_id_lba48_enabled(drive->id) == 0)
757 return -EIO; 695 return -EIO;
696
758 drive->addressing = arg; 697 drive->addressing = arg;
698
759 return 0; 699 return 0;
760} 700}
761 701
702ide_devset_rw(acoustic, acoustic);
703ide_devset_rw(address, addressing);
704ide_devset_rw(multcount, multcount);
705ide_devset_rw(wcache, wcache);
706
707ide_devset_rw_sync(nowerr, nowerr);
708
762#ifdef CONFIG_IDE_PROC_FS 709#ifdef CONFIG_IDE_PROC_FS
763static void idedisk_add_settings(ide_drive_t *drive) 710ide_devset_rw_field(bios_cyl, bios_cyl);
764{ 711ide_devset_rw_field(bios_head, bios_head);
765 struct hd_driveid *id = drive->id; 712ide_devset_rw_field(bios_sect, bios_sect);
766 713ide_devset_rw_field(failures, failures);
767 ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 65535, 1, 1, 714ide_devset_rw_field(lun, lun);
768 &drive->bios_cyl, NULL); 715ide_devset_rw_field(max_failures, max_failures);
769 ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, 716
770 &drive->bios_head, NULL); 717static const struct ide_proc_devset idedisk_settings[] = {
771 ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, 718 IDE_PROC_DEVSET(acoustic, 0, 254),
772 &drive->bios_sect, NULL); 719 IDE_PROC_DEVSET(address, 0, 2),
773 ide_add_setting(drive, "address", SETTING_RW, TYPE_BYTE, 0, 2, 1, 1, 720 IDE_PROC_DEVSET(bios_cyl, 0, 65535),
774 &drive->addressing, set_lba_addressing); 721 IDE_PROC_DEVSET(bios_head, 0, 255),
775 ide_add_setting(drive, "multcount", SETTING_RW, TYPE_BYTE, 0, 722 IDE_PROC_DEVSET(bios_sect, 0, 63),
776 id->max_multsect, 1, 1, &drive->mult_count, 723 IDE_PROC_DEVSET(failures, 0, 65535),
777 set_multcount); 724 IDE_PROC_DEVSET(lun, 0, 7),
778 ide_add_setting(drive, "nowerr", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, 725 IDE_PROC_DEVSET(max_failures, 0, 65535),
779 &drive->nowerr, set_nowerr); 726 IDE_PROC_DEVSET(multcount, 0, 16),
780 ide_add_setting(drive, "lun", SETTING_RW, TYPE_INT, 0, 7, 1, 1, 727 IDE_PROC_DEVSET(nowerr, 0, 1),
781 &drive->lun, NULL); 728 IDE_PROC_DEVSET(wcache, 0, 1),
782 ide_add_setting(drive, "wcache", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, 729 { 0 },
783 &drive->wcache, write_cache); 730};
784 ide_add_setting(drive, "acoustic", SETTING_RW, TYPE_BYTE, 0, 254, 1, 1,
785 &drive->acoustic, set_acoustic);
786 ide_add_setting(drive, "failures", SETTING_RW, TYPE_INT, 0, 65535, 1, 1,
787 &drive->failures, NULL);
788 ide_add_setting(drive, "max_failures", SETTING_RW, TYPE_INT, 0, 65535,
789 1, 1, &drive->max_failures, NULL);
790}
791#else
792static inline void idedisk_add_settings(ide_drive_t *drive) { ; }
793#endif 731#endif
794 732
795static void idedisk_setup(ide_drive_t *drive) 733static void idedisk_setup(ide_drive_t *drive)
796{ 734{
735 struct ide_disk_obj *idkp = drive->driver_data;
797 ide_hwif_t *hwif = drive->hwif; 736 ide_hwif_t *hwif = drive->hwif;
798 struct hd_driveid *id = drive->id; 737 u16 *id = drive->id;
738 char *m = (char *)&id[ATA_ID_PROD];
799 unsigned long long capacity; 739 unsigned long long capacity;
800 740
801 idedisk_add_settings(drive); 741 ide_proc_register_driver(drive, idkp->driver);
802 742
803 if (drive->id_read == 0) 743 if (drive->id_read == 0)
804 return; 744 return;
@@ -807,11 +747,11 @@ static void idedisk_setup(ide_drive_t *drive)
807 /* 747 /*
808 * Removable disks (eg. SYQUEST); ignore 'WD' drives 748 * Removable disks (eg. SYQUEST); ignore 'WD' drives
809 */ 749 */
810 if (id->model[0] != 'W' || id->model[1] != 'D') 750 if (m[0] != 'W' || m[1] != 'D')
811 drive->doorlocking = 1; 751 drive->doorlocking = 1;
812 } 752 }
813 753
814 (void)set_lba_addressing(drive, 1); 754 (void)set_addressing(drive, 1);
815 755
816 if (drive->addressing == 1) { 756 if (drive->addressing == 1) {
817 int max_s = 2048; 757 int max_s = 2048;
@@ -853,8 +793,7 @@ static void idedisk_setup(ide_drive_t *drive)
853 capacity = idedisk_capacity(drive); 793 capacity = idedisk_capacity(drive);
854 794
855 if (!drive->forced_geom) { 795 if (!drive->forced_geom) {
856 796 if (ata_id_lba48_enabled(drive->id)) {
857 if (idedisk_supports_lba48(drive->id)) {
858 /* compatibility */ 797 /* compatibility */
859 drive->bios_sect = 63; 798 drive->bios_sect = 63;
860 drive->bios_head = 255; 799 drive->bios_head = 255;
@@ -880,22 +819,22 @@ static void idedisk_setup(ide_drive_t *drive)
880 drive->name, capacity, sectors_to_MB(capacity)); 819 drive->name, capacity, sectors_to_MB(capacity));
881 820
882 /* Only print cache size when it was specified */ 821 /* Only print cache size when it was specified */
883 if (id->buf_size) 822 if (id[ATA_ID_BUF_SIZE])
884 printk(KERN_CONT " w/%dKiB Cache", id->buf_size / 2); 823 printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2);
885 824
886 printk(KERN_CONT ", CHS=%d/%d/%d\n", 825 printk(KERN_CONT ", CHS=%d/%d/%d\n",
887 drive->bios_cyl, drive->bios_head, drive->bios_sect); 826 drive->bios_cyl, drive->bios_head, drive->bios_sect);
888 827
889 /* write cache enabled? */ 828 /* write cache enabled? */
890 if ((id->csfo & 1) || (id->cfs_enable_1 & (1 << 5))) 829 if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id))
891 drive->wcache = 1; 830 drive->wcache = 1;
892 831
893 write_cache(drive, 1); 832 set_wcache(drive, 1);
894} 833}
895 834
896static void ide_cacheflush_p(ide_drive_t *drive) 835static void ide_cacheflush_p(ide_drive_t *drive)
897{ 836{
898 if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) 837 if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0)
899 return; 838 return;
900 839
901 if (do_idedisk_flushcache(drive)) 840 if (do_idedisk_flushcache(drive))
@@ -937,7 +876,7 @@ static int ide_disk_probe(ide_drive_t *drive);
937 */ 876 */
938static void ide_disk_resume(ide_drive_t *drive) 877static void ide_disk_resume(ide_drive_t *drive)
939{ 878{
940 if (idedisk_supports_hpa(drive->id)) 879 if (ata_id_hpa_enabled(drive->id))
941 init_idedisk_capacity(drive); 880 init_idedisk_capacity(drive);
942} 881}
943 882
@@ -980,12 +919,12 @@ static ide_driver_t idedisk_driver = {
980 .shutdown = ide_device_shutdown, 919 .shutdown = ide_device_shutdown,
981 .version = IDEDISK_VERSION, 920 .version = IDEDISK_VERSION,
982 .media = ide_disk, 921 .media = ide_disk,
983 .supports_dsc_overlap = 0,
984 .do_request = ide_do_rw_disk, 922 .do_request = ide_do_rw_disk,
985 .end_request = ide_end_request, 923 .end_request = ide_end_request,
986 .error = __ide_error, 924 .error = __ide_error,
987#ifdef CONFIG_IDE_PROC_FS 925#ifdef CONFIG_IDE_PROC_FS
988 .proc = idedisk_proc, 926 .proc = idedisk_proc,
927 .settings = idedisk_settings,
989#endif 928#endif
990}; 929};
991 930
@@ -994,7 +933,7 @@ static int idedisk_set_doorlock(ide_drive_t *drive, int on)
994 ide_task_t task; 933 ide_task_t task;
995 934
996 memset(&task, 0, sizeof(task)); 935 memset(&task, 0, sizeof(task));
997 task.tf.command = on ? WIN_DOORLOCK : WIN_DOORUNLOCK; 936 task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
998 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 937 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
999 938
1000 return ide_no_data_taskfile(drive, &task); 939 return ide_no_data_taskfile(drive, &task);
@@ -1059,52 +998,28 @@ static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1059 return 0; 998 return 0;
1060} 999}
1061 1000
1001static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
1002{ HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, &ide_devset_address },
1003{ HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, &ide_devset_multcount },
1004{ HDIO_GET_NOWERR, HDIO_SET_NOWERR, &ide_devset_nowerr },
1005{ HDIO_GET_WCACHE, HDIO_SET_WCACHE, &ide_devset_wcache },
1006{ HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, &ide_devset_acoustic },
1007{ 0 }
1008};
1009
1062static int idedisk_ioctl(struct inode *inode, struct file *file, 1010static int idedisk_ioctl(struct inode *inode, struct file *file,
1063 unsigned int cmd, unsigned long arg) 1011 unsigned int cmd, unsigned long arg)
1064{ 1012{
1065 unsigned long flags;
1066 struct block_device *bdev = inode->i_bdev; 1013 struct block_device *bdev = inode->i_bdev;
1067 struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk); 1014 struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
1068 ide_drive_t *drive = idkp->drive; 1015 ide_drive_t *drive = idkp->drive;
1069 int err, (*setfunc)(ide_drive_t *, int); 1016 int err;
1070 u8 *val;
1071
1072 switch (cmd) {
1073 case HDIO_GET_ADDRESS: val = &drive->addressing; goto read_val;
1074 case HDIO_GET_MULTCOUNT: val = &drive->mult_count; goto read_val;
1075 case HDIO_GET_NOWERR: val = &drive->nowerr; goto read_val;
1076 case HDIO_GET_WCACHE: val = &drive->wcache; goto read_val;
1077 case HDIO_GET_ACOUSTIC: val = &drive->acoustic; goto read_val;
1078 case HDIO_SET_ADDRESS: setfunc = set_lba_addressing; goto set_val;
1079 case HDIO_SET_MULTCOUNT: setfunc = set_multcount; goto set_val;
1080 case HDIO_SET_NOWERR: setfunc = set_nowerr; goto set_val;
1081 case HDIO_SET_WCACHE: setfunc = write_cache; goto set_val;
1082 case HDIO_SET_ACOUSTIC: setfunc = set_acoustic; goto set_val;
1083 }
1084 1017
1085 return generic_ide_ioctl(drive, file, bdev, cmd, arg); 1018 err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
1019 if (err != -EOPNOTSUPP)
1020 return err;
1086 1021
1087read_val: 1022 return generic_ide_ioctl(drive, file, bdev, cmd, arg);
1088 mutex_lock(&ide_setting_mtx);
1089 spin_lock_irqsave(&ide_lock, flags);
1090 err = *val;
1091 spin_unlock_irqrestore(&ide_lock, flags);
1092 mutex_unlock(&ide_setting_mtx);
1093 return err >= 0 ? put_user(err, (long __user *)arg) : err;
1094
1095set_val:
1096 if (bdev != bdev->bd_contains)
1097 err = -EINVAL;
1098 else {
1099 if (!capable(CAP_SYS_ADMIN))
1100 err = -EACCES;
1101 else {
1102 mutex_lock(&ide_setting_mtx);
1103 err = setfunc(drive, arg);
1104 mutex_unlock(&ide_setting_mtx);
1105 }
1106 }
1107 return err;
1108} 1023}
1109 1024
1110static int idedisk_media_changed(struct gendisk *disk) 1025static int idedisk_media_changed(struct gendisk *disk)
@@ -1148,8 +1063,7 @@ static int ide_disk_probe(ide_drive_t *drive)
1148 /* strstr("foo", "") is non-NULL */ 1063 /* strstr("foo", "") is non-NULL */
1149 if (!strstr("ide-disk", drive->driver_req)) 1064 if (!strstr("ide-disk", drive->driver_req))
1150 goto failed; 1065 goto failed;
1151 if (!drive->present) 1066
1152 goto failed;
1153 if (drive->media != ide_disk) 1067 if (drive->media != ide_disk)
1154 goto failed; 1068 goto failed;
1155 1069
@@ -1163,8 +1077,6 @@ static int ide_disk_probe(ide_drive_t *drive)
1163 1077
1164 ide_init_disk(g, drive); 1078 ide_init_disk(g, drive);
1165 1079
1166 ide_proc_register_driver(drive, &idedisk_driver);
1167
1168 kref_init(&idkp->kref); 1080 kref_init(&idkp->kref);
1169 1081
1170 idkp->drive = drive; 1082 idkp->drive = drive;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 3fa07c0aeaa4..ef2f1504c0d5 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -106,7 +106,7 @@ ide_startstop_t ide_dma_intr (ide_drive_t *drive)
106 dma_stat = hwif->dma_ops->dma_end(drive); 106 dma_stat = hwif->dma_ops->dma_end(drive);
107 stat = hwif->tp_ops->read_status(hwif); 107 stat = hwif->tp_ops->read_status(hwif);
108 108
109 if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { 109 if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) {
110 if (!dma_stat) { 110 if (!dma_stat) {
111 struct request *rq = HWGROUP(drive)->rq; 111 struct request *rq = HWGROUP(drive)->rq;
112 112
@@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(ide_destroy_dmatable);
288static int config_drive_for_dma (ide_drive_t *drive) 288static int config_drive_for_dma (ide_drive_t *drive)
289{ 289{
290 ide_hwif_t *hwif = drive->hwif; 290 ide_hwif_t *hwif = drive->hwif;
291 struct hd_driveid *id = drive->id; 291 u16 *id = drive->id;
292 292
293 if (drive->media != ide_disk) { 293 if (drive->media != ide_disk) {
294 if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA) 294 if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA)
@@ -299,16 +299,17 @@ static int config_drive_for_dma (ide_drive_t *drive)
299 * Enable DMA on any drive that has 299 * Enable DMA on any drive that has
300 * UltraDMA (mode 0/1/2/3/4/5/6) enabled 300 * UltraDMA (mode 0/1/2/3/4/5/6) enabled
301 */ 301 */
302 if ((id->field_valid & 4) && ((id->dma_ultra >> 8) & 0x7f)) 302 if ((id[ATA_ID_FIELD_VALID] & 4) &&
303 ((id[ATA_ID_UDMA_MODES] >> 8) & 0x7f))
303 return 1; 304 return 1;
304 305
305 /* 306 /*
306 * Enable DMA on any drive that has mode2 DMA 307 * Enable DMA on any drive that has mode2 DMA
307 * (multi or single) enabled 308 * (multi or single) enabled
308 */ 309 */
309 if (id->field_valid & 2) /* regular DMA */ 310 if (id[ATA_ID_FIELD_VALID] & 2) /* regular DMA */
310 if ((id->dma_mword & 0x404) == 0x404 || 311 if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 ||
311 (id->dma_1word & 0x404) == 0x404) 312 (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404)
312 return 1; 313 return 1;
313 314
314 /* Consult the list of known "good" drives */ 315 /* Consult the list of known "good" drives */
@@ -591,12 +592,12 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
591 592
592int __ide_dma_bad_drive (ide_drive_t *drive) 593int __ide_dma_bad_drive (ide_drive_t *drive)
593{ 594{
594 struct hd_driveid *id = drive->id; 595 u16 *id = drive->id;
595 596
596 int blacklist = ide_in_drive_list(id, drive_blacklist); 597 int blacklist = ide_in_drive_list(id, drive_blacklist);
597 if (blacklist) { 598 if (blacklist) {
598 printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n", 599 printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n",
599 drive->name, id->model); 600 drive->name, (char *)&id[ATA_ID_PROD]);
600 return blacklist; 601 return blacklist;
601 } 602 }
602 return 0; 603 return 0;
@@ -612,21 +613,21 @@ static const u8 xfer_mode_bases[] = {
612 613
613static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) 614static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode)
614{ 615{
615 struct hd_driveid *id = drive->id; 616 u16 *id = drive->id;
616 ide_hwif_t *hwif = drive->hwif; 617 ide_hwif_t *hwif = drive->hwif;
617 const struct ide_port_ops *port_ops = hwif->port_ops; 618 const struct ide_port_ops *port_ops = hwif->port_ops;
618 unsigned int mask = 0; 619 unsigned int mask = 0;
619 620
620 switch(base) { 621 switch(base) {
621 case XFER_UDMA_0: 622 case XFER_UDMA_0:
622 if ((id->field_valid & 4) == 0) 623 if ((id[ATA_ID_FIELD_VALID] & 4) == 0)
623 break; 624 break;
624 625
625 if (port_ops && port_ops->udma_filter) 626 if (port_ops && port_ops->udma_filter)
626 mask = port_ops->udma_filter(drive); 627 mask = port_ops->udma_filter(drive);
627 else 628 else
628 mask = hwif->ultra_mask; 629 mask = hwif->ultra_mask;
629 mask &= id->dma_ultra; 630 mask &= id[ATA_ID_UDMA_MODES];
630 631
631 /* 632 /*
632 * avoid false cable warning from eighty_ninty_three() 633 * avoid false cable warning from eighty_ninty_three()
@@ -637,19 +638,19 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode)
637 } 638 }
638 break; 639 break;
639 case XFER_MW_DMA_0: 640 case XFER_MW_DMA_0:
640 if ((id->field_valid & 2) == 0) 641 if ((id[ATA_ID_FIELD_VALID] & 2) == 0)
641 break; 642 break;
642 if (port_ops && port_ops->mdma_filter) 643 if (port_ops && port_ops->mdma_filter)
643 mask = port_ops->mdma_filter(drive); 644 mask = port_ops->mdma_filter(drive);
644 else 645 else
645 mask = hwif->mwdma_mask; 646 mask = hwif->mwdma_mask;
646 mask &= id->dma_mword; 647 mask &= id[ATA_ID_MWDMA_MODES];
647 break; 648 break;
648 case XFER_SW_DMA_0: 649 case XFER_SW_DMA_0:
649 if (id->field_valid & 2) { 650 if (id[ATA_ID_FIELD_VALID] & 2) {
650 mask = id->dma_1word & hwif->swdma_mask; 651 mask = id[ATA_ID_SWDMA_MODES] & hwif->swdma_mask;
651 } else if (id->tDMA) { 652 } else if (id[ATA_ID_OLD_DMA_MODES] >> 8) {
652 u8 mode = id->tDMA; 653 u8 mode = id[ATA_ID_OLD_DMA_MODES] >> 8;
653 654
654 /* 655 /*
655 * if the mode is valid convert it to the mask 656 * if the mode is valid convert it to the mask
@@ -706,7 +707,8 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode)
706 /* 707 /*
707 * is this correct? 708 * is this correct?
708 */ 709 */
709 if (ide_dma_good_drive(drive) && drive->id->eide_dma_time < 150) 710 if (ide_dma_good_drive(drive) &&
711 drive->id[ATA_ID_EIDE_DMA_TIME] < 150)
710 mode = XFER_MW_DMA_1; 712 mode = XFER_MW_DMA_1;
711 } 713 }
712 714
@@ -725,7 +727,7 @@ static int ide_tune_dma(ide_drive_t *drive)
725 ide_hwif_t *hwif = drive->hwif; 727 ide_hwif_t *hwif = drive->hwif;
726 u8 speed; 728 u8 speed;
727 729
728 if (drive->nodma || (drive->id->capability & 1) == 0) 730 if (drive->nodma || ata_id_has_dma(drive->id) == 0)
729 return 0; 731 return 0;
730 732
731 /* consult the list of known "bad" drives */ 733 /* consult the list of known "bad" drives */
@@ -767,13 +769,15 @@ static int ide_dma_check(ide_drive_t *drive)
767 769
768int ide_id_dma_bug(ide_drive_t *drive) 770int ide_id_dma_bug(ide_drive_t *drive)
769{ 771{
770 struct hd_driveid *id = drive->id; 772 u16 *id = drive->id;
771 773
772 if (id->field_valid & 4) { 774 if (id[ATA_ID_FIELD_VALID] & 4) {
773 if ((id->dma_ultra >> 8) && (id->dma_mword >> 8)) 775 if ((id[ATA_ID_UDMA_MODES] >> 8) &&
776 (id[ATA_ID_MWDMA_MODES] >> 8))
774 goto err_out; 777 goto err_out;
775 } else if (id->field_valid & 2) { 778 } else if (id[ATA_ID_FIELD_VALID] & 2) {
776 if ((id->dma_mword >> 8) && (id->dma_1word >> 8)) 779 if ((id[ATA_ID_MWDMA_MODES] >> 8) &&
780 (id[ATA_ID_SWDMA_MODES] >> 8))
777 goto err_out; 781 goto err_out;
778 } 782 }
779 return 0; 783 return 0;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index e9034c0125f3..d36f155470a4 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -15,6 +15,8 @@
15 * Documentation/ide/ChangeLog.ide-floppy.1996-2002 15 * Documentation/ide/ChangeLog.ide-floppy.1996-2002
16 */ 16 */
17 17
18#define DRV_NAME "ide-floppy"
19
18#define IDEFLOPPY_VERSION "1.00" 20#define IDEFLOPPY_VERSION "1.00"
19 21
20#include <linux/module.h> 22#include <linux/module.h>
@@ -31,8 +33,10 @@
31#include <linux/slab.h> 33#include <linux/slab.h>
32#include <linux/cdrom.h> 34#include <linux/cdrom.h>
33#include <linux/ide.h> 35#include <linux/ide.h>
36#include <linux/hdreg.h>
34#include <linux/bitops.h> 37#include <linux/bitops.h>
35#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/scatterlist.h>
36 40
37#include <scsi/scsi_ioctl.h> 41#include <scsi/scsi_ioctl.h>
38 42
@@ -42,6 +46,8 @@
42#include <linux/io.h> 46#include <linux/io.h>
43#include <asm/unaligned.h> 47#include <asm/unaligned.h>
44 48
49#include "ide-floppy.h"
50
45/* define to see debug info */ 51/* define to see debug info */
46#define IDEFLOPPY_DEBUG_LOG 0 52#define IDEFLOPPY_DEBUG_LOG 0
47 53
@@ -55,102 +61,23 @@
55#define debug_log(fmt, args...) do {} while (0) 61#define debug_log(fmt, args...) do {} while (0)
56#endif 62#endif
57 63
58
59/* Some drives require a longer irq timeout. */
60#define IDEFLOPPY_WAIT_CMD (5 * WAIT_CMD)
61
62/* 64/*
63 * After each failed packet command we issue a request sense command and retry 65 * After each failed packet command we issue a request sense command and retry
64 * the packet command IDEFLOPPY_MAX_PC_RETRIES times. 66 * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
65 */ 67 */
66#define IDEFLOPPY_MAX_PC_RETRIES 3 68#define IDEFLOPPY_MAX_PC_RETRIES 3
67 69
68/*
69 * With each packet command, we allocate a buffer of IDEFLOPPY_PC_BUFFER_SIZE
70 * bytes.
71 */
72#define IDEFLOPPY_PC_BUFFER_SIZE 256
73
74/*
75 * In various places in the driver, we need to allocate storage for packet
76 * commands and requests, which will remain valid while we leave the driver to
77 * wait for an interrupt or a timeout event.
78 */
79#define IDEFLOPPY_PC_STACK (10 + IDEFLOPPY_MAX_PC_RETRIES)
80
81/* format capacities descriptor codes */ 70/* format capacities descriptor codes */
82#define CAPACITY_INVALID 0x00 71#define CAPACITY_INVALID 0x00
83#define CAPACITY_UNFORMATTED 0x01 72#define CAPACITY_UNFORMATTED 0x01
84#define CAPACITY_CURRENT 0x02 73#define CAPACITY_CURRENT 0x02
85#define CAPACITY_NO_CARTRIDGE 0x03 74#define CAPACITY_NO_CARTRIDGE 0x03
86 75
87/*
88 * Most of our global data which we need to save even as we leave the driver
89 * due to an interrupt or a timer event is stored in a variable of type
90 * idefloppy_floppy_t, defined below.
91 */
92typedef struct ide_floppy_obj {
93 ide_drive_t *drive;
94 ide_driver_t *driver;
95 struct gendisk *disk;
96 struct kref kref;
97 unsigned int openers; /* protected by BKL for now */
98
99 /* Current packet command */
100 struct ide_atapi_pc *pc;
101 /* Last failed packet command */
102 struct ide_atapi_pc *failed_pc;
103 /* Packet command stack */
104 struct ide_atapi_pc pc_stack[IDEFLOPPY_PC_STACK];
105 /* Next free packet command storage space */
106 int pc_stack_index;
107 struct request rq_stack[IDEFLOPPY_PC_STACK];
108 /* We implement a circular array */
109 int rq_stack_index;
110
111 /* Last error information */
112 u8 sense_key, asc, ascq;
113 /* delay this long before sending packet command */
114 u8 ticks;
115 int progress_indication;
116
117 /* Device information */
118 /* Current format */
119 int blocks, block_size, bs_factor;
120 /* Last format capacity descriptor */
121 u8 cap_desc[8];
122 /* Copy of the flexible disk page */
123 u8 flexible_disk_page[32];
124 /* Write protect */
125 int wp;
126 /* Supports format progress report */
127 int srfp;
128} idefloppy_floppy_t;
129
130#define IDEFLOPPY_TICKS_DELAY HZ/20 /* default delay for ZIP 100 (50ms) */ 76#define IDEFLOPPY_TICKS_DELAY HZ/20 /* default delay for ZIP 100 (50ms) */
131 77
132/* Defines for the MODE SENSE command */
133#define MODE_SENSE_CURRENT 0x00
134#define MODE_SENSE_CHANGEABLE 0x01
135#define MODE_SENSE_DEFAULT 0x02
136#define MODE_SENSE_SAVED 0x03
137
138/* IOCTLs used in low-level formatting. */
139#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600
140#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601
141#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602
142#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603
143
144/* Error code returned in rq->errors to the higher part of the driver. */ 78/* Error code returned in rq->errors to the higher part of the driver. */
145#define IDEFLOPPY_ERROR_GENERAL 101 79#define IDEFLOPPY_ERROR_GENERAL 101
146 80
147/*
148 * Pages of the SELECT SENSE / MODE SENSE packet commands.
149 * See SFF-8070i spec.
150 */
151#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b
152#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05
153
154static DEFINE_MUTEX(idefloppy_ref_mutex); 81static DEFINE_MUTEX(idefloppy_ref_mutex);
155 82
156#define to_ide_floppy(obj) container_of(obj, struct ide_floppy_obj, kref) 83#define to_ide_floppy(obj) container_of(obj, struct ide_floppy_obj, kref)
@@ -219,44 +146,6 @@ static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
219 return 0; 146 return 0;
220} 147}
221 148
222static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
223 unsigned int bcount, int direction)
224{
225 ide_hwif_t *hwif = drive->hwif;
226 struct request *rq = pc->rq;
227 struct req_iterator iter;
228 struct bio_vec *bvec;
229 unsigned long flags;
230 int count, done = 0;
231 char *data;
232
233 rq_for_each_segment(bvec, rq, iter) {
234 if (!bcount)
235 break;
236
237 count = min(bvec->bv_len, bcount);
238
239 data = bvec_kmap_irq(bvec, &flags);
240 if (direction)
241 hwif->tp_ops->output_data(drive, NULL, data, count);
242 else
243 hwif->tp_ops->input_data(drive, NULL, data, count);
244 bvec_kunmap_irq(data, &flags);
245
246 bcount -= count;
247 pc->b_count += count;
248 done += count;
249 }
250
251 idefloppy_end_request(drive, 1, done >> 9);
252
253 if (bcount) {
254 printk(KERN_ERR "%s: leftover data in %s, bcount == %d\n",
255 drive->name, __func__, bcount);
256 ide_pad_transfer(drive, direction, bcount);
257 }
258}
259
260static void idefloppy_update_buffers(ide_drive_t *drive, 149static void idefloppy_update_buffers(ide_drive_t *drive,
261 struct ide_atapi_pc *pc) 150 struct ide_atapi_pc *pc)
262{ 151{
@@ -267,43 +156,6 @@ static void idefloppy_update_buffers(ide_drive_t *drive,
267 idefloppy_end_request(drive, 1, 0); 156 idefloppy_end_request(drive, 1, 0);
268} 157}
269 158
270/*
271 * Generate a new packet command request in front of the request queue, before
272 * the current request so that it will be processed immediately, on the next
273 * pass through the driver.
274 */
275static void idefloppy_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
276 struct request *rq)
277{
278 struct ide_floppy_obj *floppy = drive->driver_data;
279
280 blk_rq_init(NULL, rq);
281 rq->buffer = (char *) pc;
282 rq->cmd_type = REQ_TYPE_SPECIAL;
283 rq->cmd_flags |= REQ_PREEMPT;
284 rq->rq_disk = floppy->disk;
285 memcpy(rq->cmd, pc->c, 12);
286 ide_do_drive_cmd(drive, rq);
287}
288
289static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive)
290{
291 idefloppy_floppy_t *floppy = drive->driver_data;
292
293 if (floppy->pc_stack_index == IDEFLOPPY_PC_STACK)
294 floppy->pc_stack_index = 0;
295 return (&floppy->pc_stack[floppy->pc_stack_index++]);
296}
297
298static struct request *idefloppy_next_rq_storage(ide_drive_t *drive)
299{
300 idefloppy_floppy_t *floppy = drive->driver_data;
301
302 if (floppy->rq_stack_index == IDEFLOPPY_PC_STACK)
303 floppy->rq_stack_index = 0;
304 return (&floppy->rq_stack[floppy->rq_stack_index++]);
305}
306
307static void ide_floppy_callback(ide_drive_t *drive) 159static void ide_floppy_callback(ide_drive_t *drive)
308{ 160{
309 idefloppy_floppy_t *floppy = drive->driver_data; 161 idefloppy_floppy_t *floppy = drive->driver_data;
@@ -341,16 +193,9 @@ static void ide_floppy_callback(ide_drive_t *drive)
341 idefloppy_end_request(drive, uptodate, 0); 193 idefloppy_end_request(drive, uptodate, 0);
342} 194}
343 195
344static void idefloppy_init_pc(struct ide_atapi_pc *pc) 196void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
345{
346 memset(pc, 0, sizeof(*pc));
347 pc->buf = pc->pc_buf;
348 pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE;
349}
350
351static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
352{ 197{
353 idefloppy_init_pc(pc); 198 ide_init_pc(pc);
354 pc->c[0] = GPCMD_REQUEST_SENSE; 199 pc->c[0] = GPCMD_REQUEST_SENSE;
355 pc->c[4] = 255; 200 pc->c[4] = 255;
356 pc->req_xfer = 18; 201 pc->req_xfer = 18;
@@ -362,14 +207,13 @@ static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
362 */ 207 */
363static void idefloppy_retry_pc(ide_drive_t *drive) 208static void idefloppy_retry_pc(ide_drive_t *drive)
364{ 209{
365 struct ide_atapi_pc *pc; 210 struct ide_floppy_obj *floppy = drive->driver_data;
366 struct request *rq; 211 struct request *rq = &floppy->request_sense_rq;
212 struct ide_atapi_pc *pc = &floppy->request_sense_pc;
367 213
368 (void)ide_read_error(drive); 214 (void)ide_read_error(drive);
369 pc = idefloppy_next_pc_storage(drive); 215 ide_floppy_create_request_sense_cmd(pc);
370 rq = idefloppy_next_rq_storage(drive); 216 ide_queue_pc_head(drive, floppy->disk, pc, rq);
371 idefloppy_create_request_sense_cmd(pc);
372 idefloppy_queue_pc_head(drive, pc, rq);
373} 217}
374 218
375/* The usual interrupt handler called during a packet command. */ 219/* The usual interrupt handler called during a packet command. */
@@ -378,8 +222,8 @@ static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive)
378 idefloppy_floppy_t *floppy = drive->driver_data; 222 idefloppy_floppy_t *floppy = drive->driver_data;
379 223
380 return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr, 224 return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr,
381 IDEFLOPPY_WAIT_CMD, NULL, idefloppy_update_buffers, 225 WAIT_FLOPPY_CMD, NULL, idefloppy_update_buffers,
382 idefloppy_retry_pc, NULL, ide_floppy_io_buffers); 226 idefloppy_retry_pc, NULL, ide_io_buffers);
383} 227}
384 228
385/* 229/*
@@ -396,10 +240,9 @@ static int idefloppy_transfer_pc(ide_drive_t *drive)
396 drive->hwif->tp_ops->output_data(drive, NULL, floppy->pc->c, 12); 240 drive->hwif->tp_ops->output_data(drive, NULL, floppy->pc->c, 12);
397 241
398 /* Timeout for the packet command */ 242 /* Timeout for the packet command */
399 return IDEFLOPPY_WAIT_CMD; 243 return WAIT_FLOPPY_CMD;
400} 244}
401 245
402
403/* 246/*
404 * Called as an interrupt (or directly). When the device says it's ready for a 247 * Called as an interrupt (or directly). When the device says it's ready for a
405 * packet, we schedule the packet transfer to occur about 2-3 ticks later in 248 * packet, we schedule the packet transfer to occur about 2-3 ticks later in
@@ -424,7 +267,7 @@ static ide_startstop_t idefloppy_start_pc_transfer(ide_drive_t *drive)
424 timeout = floppy->ticks; 267 timeout = floppy->ticks;
425 expiry = &idefloppy_transfer_pc; 268 expiry = &idefloppy_transfer_pc;
426 } else { 269 } else {
427 timeout = IDEFLOPPY_WAIT_CMD; 270 timeout = WAIT_FLOPPY_CMD;
428 expiry = NULL; 271 expiry = NULL;
429 } 272 }
430 273
@@ -474,58 +317,27 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
474 pc->retries++; 317 pc->retries++;
475 318
476 return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer, 319 return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer,
477 IDEFLOPPY_WAIT_CMD, NULL); 320 WAIT_FLOPPY_CMD, NULL);
478}
479
480static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent)
481{
482 debug_log("creating prevent removal command, prevent = %d\n", prevent);
483
484 idefloppy_init_pc(pc);
485 pc->c[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
486 pc->c[4] = prevent;
487} 321}
488 322
489static void idefloppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) 323void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
490{ 324{
491 idefloppy_init_pc(pc); 325 ide_init_pc(pc);
492 pc->c[0] = GPCMD_READ_FORMAT_CAPACITIES; 326 pc->c[0] = GPCMD_READ_FORMAT_CAPACITIES;
493 pc->c[7] = 255; 327 pc->c[7] = 255;
494 pc->c[8] = 255; 328 pc->c[8] = 255;
495 pc->req_xfer = 255; 329 pc->req_xfer = 255;
496} 330}
497 331
498static void idefloppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
499 int l, int flags)
500{
501 idefloppy_init_pc(pc);
502 pc->c[0] = GPCMD_FORMAT_UNIT;
503 pc->c[1] = 0x17;
504
505 memset(pc->buf, 0, 12);
506 pc->buf[1] = 0xA2;
507 /* Default format list header, u8 1: FOV/DCRT/IMM bits set */
508
509 if (flags & 1) /* Verify bit on... */
510 pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */
511 pc->buf[3] = 8;
512
513 put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4]));
514 put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8]));
515 pc->buf_size = 12;
516 pc->flags |= PC_FLAG_WRITING;
517}
518
519/* A mode sense command is used to "sense" floppy parameters. */ 332/* A mode sense command is used to "sense" floppy parameters. */
520static void idefloppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, 333void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
521 u8 page_code, u8 type)
522{ 334{
523 u16 length = 8; /* sizeof(Mode Parameter Header) = 8 Bytes */ 335 u16 length = 8; /* sizeof(Mode Parameter Header) = 8 Bytes */
524 336
525 idefloppy_init_pc(pc); 337 ide_init_pc(pc);
526 pc->c[0] = GPCMD_MODE_SENSE_10; 338 pc->c[0] = GPCMD_MODE_SENSE_10;
527 pc->c[1] = 0; 339 pc->c[1] = 0;
528 pc->c[2] = page_code + (type << 6); 340 pc->c[2] = page_code;
529 341
530 switch (page_code) { 342 switch (page_code) {
531 case IDEFLOPPY_CAPABILITIES_PAGE: 343 case IDEFLOPPY_CAPABILITIES_PAGE:
@@ -542,13 +354,6 @@ static void idefloppy_create_mode_sense_cmd(struct ide_atapi_pc *pc,
542 pc->req_xfer = length; 354 pc->req_xfer = length;
543} 355}
544 356
545static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start)
546{
547 idefloppy_init_pc(pc);
548 pc->c[0] = GPCMD_START_STOP_UNIT;
549 pc->c[4] = start;
550}
551
552static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, 357static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
553 struct ide_atapi_pc *pc, struct request *rq, 358 struct ide_atapi_pc *pc, struct request *rq,
554 unsigned long sector) 359 unsigned long sector)
@@ -560,7 +365,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
560 debug_log("create_rw10_cmd: block == %d, blocks == %d\n", 365 debug_log("create_rw10_cmd: block == %d, blocks == %d\n",
561 block, blocks); 366 block, blocks);
562 367
563 idefloppy_init_pc(pc); 368 ide_init_pc(pc);
564 pc->c[0] = cmd == READ ? GPCMD_READ_10 : GPCMD_WRITE_10; 369 pc->c[0] = cmd == READ ? GPCMD_READ_10 : GPCMD_WRITE_10;
565 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); 370 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
566 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); 371 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
@@ -568,7 +373,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
568 memcpy(rq->cmd, pc->c, 12); 373 memcpy(rq->cmd, pc->c, 12);
569 374
570 pc->rq = rq; 375 pc->rq = rq;
571 pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; 376 pc->b_count = 0;
572 if (rq->cmd_flags & REQ_RW) 377 if (rq->cmd_flags & REQ_RW)
573 pc->flags |= PC_FLAG_WRITING; 378 pc->flags |= PC_FLAG_WRITING;
574 pc->buf = NULL; 379 pc->buf = NULL;
@@ -579,10 +384,10 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
579static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, 384static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
580 struct ide_atapi_pc *pc, struct request *rq) 385 struct ide_atapi_pc *pc, struct request *rq)
581{ 386{
582 idefloppy_init_pc(pc); 387 ide_init_pc(pc);
583 memcpy(pc->c, rq->cmd, sizeof(pc->c)); 388 memcpy(pc->c, rq->cmd, sizeof(pc->c));
584 pc->rq = rq; 389 pc->rq = rq;
585 pc->b_count = rq->data_len; 390 pc->b_count = 0;
586 if (rq->data_len && rq_data_dir(rq) == WRITE) 391 if (rq->data_len && rq_data_dir(rq) == WRITE)
587 pc->flags |= PC_FLAG_WRITING; 392 pc->flags |= PC_FLAG_WRITING;
588 pc->buf = rq->data; 393 pc->buf = rq->data;
@@ -599,15 +404,17 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
599 struct request *rq, sector_t block_s) 404 struct request *rq, sector_t block_s)
600{ 405{
601 idefloppy_floppy_t *floppy = drive->driver_data; 406 idefloppy_floppy_t *floppy = drive->driver_data;
407 ide_hwif_t *hwif = drive->hwif;
602 struct ide_atapi_pc *pc; 408 struct ide_atapi_pc *pc;
603 unsigned long block = (unsigned long)block_s; 409 unsigned long block = (unsigned long)block_s;
604 410
605 debug_log("dev: %s, cmd_type: %x, errors: %d\n", 411 debug_log("%s: dev: %s, cmd: 0x%x, cmd_type: %x, errors: %d\n",
606 rq->rq_disk ? rq->rq_disk->disk_name : "?", 412 __func__, rq->rq_disk ? rq->rq_disk->disk_name : "?",
607 rq->cmd_type, rq->errors); 413 rq->cmd[0], rq->cmd_type, rq->errors);
608 debug_log("sector: %ld, nr_sectors: %ld, " 414
609 "current_nr_sectors: %d\n", (long)rq->sector, 415 debug_log("%s: sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",
610 rq->nr_sectors, rq->current_nr_sectors); 416 __func__, (long)rq->sector, rq->nr_sectors,
417 rq->current_nr_sectors);
611 418
612 if (rq->errors >= ERROR_MAX) { 419 if (rq->errors >= ERROR_MAX) {
613 if (floppy->failed_pc) 420 if (floppy->failed_pc)
@@ -626,12 +433,12 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
626 idefloppy_end_request(drive, 0, 0); 433 idefloppy_end_request(drive, 0, 0);
627 return ide_stopped; 434 return ide_stopped;
628 } 435 }
629 pc = idefloppy_next_pc_storage(drive); 436 pc = &floppy->queued_pc;
630 idefloppy_create_rw_cmd(floppy, pc, rq, block); 437 idefloppy_create_rw_cmd(floppy, pc, rq, block);
631 } else if (blk_special_request(rq)) { 438 } else if (blk_special_request(rq)) {
632 pc = (struct ide_atapi_pc *) rq->buffer; 439 pc = (struct ide_atapi_pc *) rq->buffer;
633 } else if (blk_pc_request(rq)) { 440 } else if (blk_pc_request(rq)) {
634 pc = idefloppy_next_pc_storage(drive); 441 pc = &floppy->queued_pc;
635 idefloppy_blockpc_cmd(floppy, pc, rq); 442 idefloppy_blockpc_cmd(floppy, pc, rq);
636 } else { 443 } else {
637 blk_dump_rq_flags(rq, 444 blk_dump_rq_flags(rq,
@@ -640,29 +447,15 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
640 return ide_stopped; 447 return ide_stopped;
641 } 448 }
642 449
643 pc->rq = rq; 450 ide_init_sg_cmd(drive, rq);
451 ide_map_sg(drive, rq);
644 452
645 return idefloppy_issue_pc(drive, pc); 453 pc->sg = hwif->sg_table;
646} 454 pc->sg_cnt = hwif->sg_nents;
647 455
648/* 456 pc->rq = rq;
649 * Add a special packet command request to the tail of the request queue,
650 * and wait for it to be serviced.
651 */
652static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
653{
654 struct ide_floppy_obj *floppy = drive->driver_data;
655 struct request *rq;
656 int error;
657
658 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
659 rq->buffer = (char *) pc;
660 rq->cmd_type = REQ_TYPE_SPECIAL;
661 memcpy(rq->cmd, pc->c, 12);
662 error = blk_execute_rq(drive->queue, floppy->disk, rq, 0);
663 blk_put_request(rq);
664 457
665 return error; 458 return idefloppy_issue_pc(drive, pc);
666} 459}
667 460
668/* 461/*
@@ -672,22 +465,28 @@ static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
672static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) 465static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
673{ 466{
674 idefloppy_floppy_t *floppy = drive->driver_data; 467 idefloppy_floppy_t *floppy = drive->driver_data;
468 struct gendisk *disk = floppy->disk;
675 struct ide_atapi_pc pc; 469 struct ide_atapi_pc pc;
676 u8 *page; 470 u8 *page;
677 int capacity, lba_capacity; 471 int capacity, lba_capacity;
678 u16 transfer_rate, sector_size, cyls, rpm; 472 u16 transfer_rate, sector_size, cyls, rpm;
679 u8 heads, sectors; 473 u8 heads, sectors;
680 474
681 idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE, 475 ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
682 MODE_SENSE_CURRENT);
683 476
684 if (idefloppy_queue_pc_tail(drive, &pc)) { 477 if (ide_queue_pc_tail(drive, disk, &pc)) {
685 printk(KERN_ERR "ide-floppy: Can't get flexible disk page" 478 printk(KERN_ERR "ide-floppy: Can't get flexible disk page"
686 " parameters\n"); 479 " parameters\n");
687 return 1; 480 return 1;
688 } 481 }
689 floppy->wp = !!(pc.buf[3] & 0x80); 482
690 set_disk_ro(floppy->disk, floppy->wp); 483 if (pc.buf[3] & 0x80)
484 drive->atapi_flags |= IDE_AFLAG_WP;
485 else
486 drive->atapi_flags &= ~IDE_AFLAG_WP;
487
488 set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP));
489
691 page = &pc.buf[8]; 490 page = &pc.buf[8];
692 491
693 transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); 492 transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]);
@@ -721,23 +520,6 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive)
721 return 0; 520 return 0;
722} 521}
723 522
724static int idefloppy_get_sfrp_bit(ide_drive_t *drive)
725{
726 idefloppy_floppy_t *floppy = drive->driver_data;
727 struct ide_atapi_pc pc;
728
729 floppy->srfp = 0;
730 idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE,
731 MODE_SENSE_CURRENT);
732
733 pc.flags |= PC_FLAG_SUPPRESS_ERROR;
734 if (idefloppy_queue_pc_tail(drive, &pc))
735 return 1;
736
737 floppy->srfp = pc.buf[8 + 2] & 0x40;
738 return (0);
739}
740
741/* 523/*
742 * Determine if a media is present in the floppy drive, and if so, its LBA 524 * Determine if a media is present in the floppy drive, and if so, its LBA
743 * capacity. 525 * capacity.
@@ -745,6 +527,7 @@ static int idefloppy_get_sfrp_bit(ide_drive_t *drive)
745static int ide_floppy_get_capacity(ide_drive_t *drive) 527static int ide_floppy_get_capacity(ide_drive_t *drive)
746{ 528{
747 idefloppy_floppy_t *floppy = drive->driver_data; 529 idefloppy_floppy_t *floppy = drive->driver_data;
530 struct gendisk *disk = floppy->disk;
748 struct ide_atapi_pc pc; 531 struct ide_atapi_pc pc;
749 u8 *cap_desc; 532 u8 *cap_desc;
750 u8 header_len, desc_cnt; 533 u8 header_len, desc_cnt;
@@ -756,8 +539,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
756 floppy->bs_factor = 1; 539 floppy->bs_factor = 1;
757 set_capacity(floppy->disk, 0); 540 set_capacity(floppy->disk, 0);
758 541
759 idefloppy_create_read_capacity_cmd(&pc); 542 ide_floppy_create_read_capacity_cmd(&pc);
760 if (idefloppy_queue_pc_tail(drive, &pc)) { 543 if (ide_queue_pc_tail(drive, disk, &pc)) {
761 printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); 544 printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
762 return 1; 545 return 1;
763 } 546 }
@@ -832,202 +615,55 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
832 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) 615 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
833 (void) ide_floppy_get_flexible_disk_page(drive); 616 (void) ide_floppy_get_flexible_disk_page(drive);
834 617
835 set_capacity(floppy->disk, floppy->blocks * floppy->bs_factor); 618 set_capacity(disk, floppy->blocks * floppy->bs_factor);
619
836 return rc; 620 return rc;
837} 621}
838 622
839/* 623static sector_t idefloppy_capacity(ide_drive_t *drive)
840 * Obtain the list of formattable capacities.
841 * Very similar to ide_floppy_get_capacity, except that we push the capacity
842 * descriptors to userland, instead of our own structures.
843 *
844 * Userland gives us the following structure:
845 *
846 * struct idefloppy_format_capacities {
847 * int nformats;
848 * struct {
849 * int nblocks;
850 * int blocksize;
851 * } formats[];
852 * };
853 *
854 * userland initializes nformats to the number of allocated formats[] records.
855 * On exit we set nformats to the number of records we've actually initialized.
856 */
857
858static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
859{ 624{
860 struct ide_atapi_pc pc; 625 idefloppy_floppy_t *floppy = drive->driver_data;
861 u8 header_len, desc_cnt; 626 unsigned long capacity = floppy->blocks * floppy->bs_factor;
862 int i, blocks, length, u_array_size, u_index;
863 int __user *argp;
864
865 if (get_user(u_array_size, arg))
866 return (-EFAULT);
867
868 if (u_array_size <= 0)
869 return (-EINVAL);
870
871 idefloppy_create_read_capacity_cmd(&pc);
872 if (idefloppy_queue_pc_tail(drive, &pc)) {
873 printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
874 return (-EIO);
875 }
876 header_len = pc.buf[3];
877 desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
878
879 u_index = 0;
880 argp = arg + 1;
881
882 /*
883 * We always skip the first capacity descriptor. That's the current
884 * capacity. We are interested in the remaining descriptors, the
885 * formattable capacities.
886 */
887 for (i = 1; i < desc_cnt; i++) {
888 unsigned int desc_start = 4 + i*8;
889
890 if (u_index >= u_array_size)
891 break; /* User-supplied buffer too small */
892
893 blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
894 length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
895
896 if (put_user(blocks, argp))
897 return(-EFAULT);
898 ++argp;
899
900 if (put_user(length, argp))
901 return (-EFAULT);
902 ++argp;
903
904 ++u_index;
905 }
906 627
907 if (put_user(u_index, arg)) 628 return capacity;
908 return (-EFAULT);
909 return (0);
910} 629}
911 630
912/* 631#ifdef CONFIG_IDE_PROC_FS
913 * Get ATAPI_FORMAT_UNIT progress indication. 632ide_devset_rw_field(bios_cyl, bios_cyl);
914 * 633ide_devset_rw_field(bios_head, bios_head);
915 * Userland gives a pointer to an int. The int is set to a progress 634ide_devset_rw_field(bios_sect, bios_sect);
916 * indicator 0-65536, with 65536=100%.
917 *
918 * If the drive does not support format progress indication, we just check
919 * the dsc bit, and return either 0 or 65536.
920 */
921 635
922static int idefloppy_get_format_progress(ide_drive_t *drive, int __user *arg) 636static int get_ticks(ide_drive_t *drive)
923{ 637{
924 idefloppy_floppy_t *floppy = drive->driver_data; 638 idefloppy_floppy_t *floppy = drive->driver_data;
925 struct ide_atapi_pc pc; 639 return floppy->ticks;
926 int progress_indication = 0x10000;
927
928 if (floppy->srfp) {
929 idefloppy_create_request_sense_cmd(&pc);
930 if (idefloppy_queue_pc_tail(drive, &pc))
931 return (-EIO);
932
933 if (floppy->sense_key == 2 &&
934 floppy->asc == 4 &&
935 floppy->ascq == 4)
936 progress_indication = floppy->progress_indication;
937
938 /* Else assume format_unit has finished, and we're at 0x10000 */
939 } else {
940 ide_hwif_t *hwif = drive->hwif;
941 unsigned long flags;
942 u8 stat;
943
944 local_irq_save(flags);
945 stat = hwif->tp_ops->read_status(hwif);
946 local_irq_restore(flags);
947
948 progress_indication = ((stat & SEEK_STAT) == 0) ? 0 : 0x10000;
949 }
950 if (put_user(progress_indication, arg))
951 return (-EFAULT);
952
953 return (0);
954} 640}
955 641
956static sector_t idefloppy_capacity(ide_drive_t *drive) 642static int set_ticks(ide_drive_t *drive, int arg)
957{ 643{
958 idefloppy_floppy_t *floppy = drive->driver_data; 644 idefloppy_floppy_t *floppy = drive->driver_data;
959 unsigned long capacity = floppy->blocks * floppy->bs_factor; 645 floppy->ticks = arg;
960
961 return capacity;
962}
963
964/*
965 * Check whether we can support a drive, based on the ATAPI IDENTIFY command
966 * results.
967 */
968static int idefloppy_identify_device(ide_drive_t *drive, struct hd_driveid *id)
969{
970 u8 gcw[2];
971 u8 device_type, protocol, removable, drq_type, packet_size;
972
973 *((u16 *) &gcw) = id->config;
974
975 device_type = gcw[1] & 0x1F;
976 removable = (gcw[0] & 0x80) >> 7;
977 protocol = (gcw[1] & 0xC0) >> 6;
978 drq_type = (gcw[0] & 0x60) >> 5;
979 packet_size = gcw[0] & 0x03;
980
981#ifdef CONFIG_PPC
982 /* kludge for Apple PowerBook internal zip */
983 if (device_type == 5 &&
984 !strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP"))
985 device_type = 0;
986#endif
987
988 if (protocol != 2)
989 printk(KERN_ERR "ide-floppy: Protocol (0x%02x) is not ATAPI\n",
990 protocol);
991 else if (device_type != 0)
992 printk(KERN_ERR "ide-floppy: Device type (0x%02x) is not set "
993 "to floppy\n", device_type);
994 else if (!removable)
995 printk(KERN_ERR "ide-floppy: The removable flag is not set\n");
996 else if (drq_type == 3)
997 printk(KERN_ERR "ide-floppy: Sorry, DRQ type (0x%02x) not "
998 "supported\n", drq_type);
999 else if (packet_size != 0)
1000 printk(KERN_ERR "ide-floppy: Packet size (0x%02x) is not 12 "
1001 "bytes\n", packet_size);
1002 else
1003 return 1;
1004 return 0; 646 return 0;
1005} 647}
1006 648
1007#ifdef CONFIG_IDE_PROC_FS 649IDE_DEVSET(ticks, DS_SYNC, get_ticks, set_ticks);
1008static void idefloppy_add_settings(ide_drive_t *drive)
1009{
1010 idefloppy_floppy_t *floppy = drive->driver_data;
1011 650
1012 ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, 651static const struct ide_proc_devset idefloppy_settings[] = {
1013 &drive->bios_cyl, NULL); 652 IDE_PROC_DEVSET(bios_cyl, 0, 1023),
1014 ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, 653 IDE_PROC_DEVSET(bios_head, 0, 255),
1015 &drive->bios_head, NULL); 654 IDE_PROC_DEVSET(bios_sect, 0, 63),
1016 ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, 655 IDE_PROC_DEVSET(ticks, 0, 255),
1017 &drive->bios_sect, NULL); 656 { 0 },
1018 ide_add_setting(drive, "ticks", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, 657};
1019 &floppy->ticks, NULL);
1020}
1021#else
1022static inline void idefloppy_add_settings(ide_drive_t *drive) { ; }
1023#endif 658#endif
1024 659
1025static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) 660static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
1026{ 661{
662 u16 *id = drive->id;
1027 u8 gcw[2]; 663 u8 gcw[2];
1028 664
1029 *((u16 *) &gcw) = drive->id->config; 665 *((u16 *)&gcw) = id[ATA_ID_CONFIG];
1030 floppy->pc = floppy->pc_stack; 666
1031 drive->pc_callback = ide_floppy_callback; 667 drive->pc_callback = ide_floppy_callback;
1032 668
1033 if (((gcw[0] & 0x60) >> 5) == 1) 669 if (((gcw[0] & 0x60) >> 5) == 1)
@@ -1041,7 +677,7 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
1041 * it. It should be fixed as of version 1.9, but to be on the safe side 677 * it. It should be fixed as of version 1.9, but to be on the safe side
1042 * we'll leave the limitation below for the 2.2.x tree. 678 * we'll leave the limitation below for the 2.2.x tree.
1043 */ 679 */
1044 if (!strncmp(drive->id->model, "IOMEGA ZIP 100 ATAPI", 20)) { 680 if (!strncmp((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI", 20)) {
1045 drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE; 681 drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE;
1046 /* This value will be visible in the /proc/ide/hdx/settings */ 682 /* This value will be visible in the /proc/ide/hdx/settings */
1047 floppy->ticks = IDEFLOPPY_TICKS_DELAY; 683 floppy->ticks = IDEFLOPPY_TICKS_DELAY;
@@ -1052,13 +688,16 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy)
1052 * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes 688 * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes
1053 * nasty clicking noises without it, so please don't remove this. 689 * nasty clicking noises without it, so please don't remove this.
1054 */ 690 */
1055 if (strncmp(drive->id->model, "IOMEGA Clik!", 11) == 0) { 691 if (strncmp((char *)&id[ATA_ID_PROD], "IOMEGA Clik!", 11) == 0) {
1056 blk_queue_max_sectors(drive->queue, 64); 692 blk_queue_max_sectors(drive->queue, 64);
1057 drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE; 693 drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
694 /* IOMEGA Clik! drives do not support lock/unlock commands */
695 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
1058 } 696 }
1059 697
1060 (void) ide_floppy_get_capacity(drive); 698 (void) ide_floppy_get_capacity(drive);
1061 idefloppy_add_settings(drive); 699
700 ide_proc_register_driver(drive, floppy->driver);
1062} 701}
1063 702
1064static void ide_floppy_remove(ide_drive_t *drive) 703static void ide_floppy_remove(ide_drive_t *drive)
@@ -1115,12 +754,12 @@ static ide_driver_t idefloppy_driver = {
1115 .remove = ide_floppy_remove, 754 .remove = ide_floppy_remove,
1116 .version = IDEFLOPPY_VERSION, 755 .version = IDEFLOPPY_VERSION,
1117 .media = ide_floppy, 756 .media = ide_floppy,
1118 .supports_dsc_overlap = 0,
1119 .do_request = idefloppy_do_request, 757 .do_request = idefloppy_do_request,
1120 .end_request = idefloppy_end_request, 758 .end_request = idefloppy_end_request,
1121 .error = __ide_error, 759 .error = __ide_error,
1122#ifdef CONFIG_IDE_PROC_FS 760#ifdef CONFIG_IDE_PROC_FS
1123 .proc = idefloppy_proc, 761 .proc = idefloppy_proc,
762 .settings = idefloppy_settings,
1124#endif 763#endif
1125}; 764};
1126 765
@@ -1129,7 +768,6 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
1129 struct gendisk *disk = inode->i_bdev->bd_disk; 768 struct gendisk *disk = inode->i_bdev->bd_disk;
1130 struct ide_floppy_obj *floppy; 769 struct ide_floppy_obj *floppy;
1131 ide_drive_t *drive; 770 ide_drive_t *drive;
1132 struct ide_atapi_pc pc;
1133 int ret = 0; 771 int ret = 0;
1134 772
1135 debug_log("Reached %s\n", __func__); 773 debug_log("Reached %s\n", __func__);
@@ -1146,13 +784,8 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
1146 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; 784 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
1147 /* Just in case */ 785 /* Just in case */
1148 786
1149 idefloppy_init_pc(&pc); 787 if (ide_do_test_unit_ready(drive, disk))
1150 pc.c[0] = GPCMD_TEST_UNIT_READY; 788 ide_do_start_stop(drive, disk, 1);
1151
1152 if (idefloppy_queue_pc_tail(drive, &pc)) {
1153 idefloppy_create_start_stop_cmd(&pc, 1);
1154 (void) idefloppy_queue_pc_tail(drive, &pc);
1155 }
1156 789
1157 if (ide_floppy_get_capacity(drive) 790 if (ide_floppy_get_capacity(drive)
1158 && (filp->f_flags & O_NDELAY) == 0 791 && (filp->f_flags & O_NDELAY) == 0
@@ -1166,16 +799,13 @@ static int idefloppy_open(struct inode *inode, struct file *filp)
1166 goto out_put_floppy; 799 goto out_put_floppy;
1167 } 800 }
1168 801
1169 if (floppy->wp && (filp->f_mode & 2)) { 802 if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) {
1170 ret = -EROFS; 803 ret = -EROFS;
1171 goto out_put_floppy; 804 goto out_put_floppy;
1172 } 805 }
806
1173 drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED; 807 drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED;
1174 /* IOMEGA Clik! drives do not support lock/unlock commands */ 808 ide_set_media_lock(drive, disk, 1);
1175 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) {
1176 idefloppy_create_prevent_cmd(&pc, 1);
1177 (void) idefloppy_queue_pc_tail(drive, &pc);
1178 }
1179 check_disk_change(inode->i_bdev); 809 check_disk_change(inode->i_bdev);
1180 } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) { 810 } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) {
1181 ret = -EBUSY; 811 ret = -EBUSY;
@@ -1194,17 +824,11 @@ static int idefloppy_release(struct inode *inode, struct file *filp)
1194 struct gendisk *disk = inode->i_bdev->bd_disk; 824 struct gendisk *disk = inode->i_bdev->bd_disk;
1195 struct ide_floppy_obj *floppy = ide_floppy_g(disk); 825 struct ide_floppy_obj *floppy = ide_floppy_g(disk);
1196 ide_drive_t *drive = floppy->drive; 826 ide_drive_t *drive = floppy->drive;
1197 struct ide_atapi_pc pc;
1198 827
1199 debug_log("Reached %s\n", __func__); 828 debug_log("Reached %s\n", __func__);
1200 829
1201 if (floppy->openers == 1) { 830 if (floppy->openers == 1) {
1202 /* IOMEGA Clik! drives do not support lock/unlock commands */ 831 ide_set_media_lock(drive, disk, 0);
1203 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) {
1204 idefloppy_create_prevent_cmd(&pc, 0);
1205 (void) idefloppy_queue_pc_tail(drive, &pc);
1206 }
1207
1208 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; 832 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
1209 } 833 }
1210 834
@@ -1230,80 +854,20 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
1230 unsigned long arg, unsigned int cmd) 854 unsigned long arg, unsigned int cmd)
1231{ 855{
1232 idefloppy_floppy_t *floppy = drive->driver_data; 856 idefloppy_floppy_t *floppy = drive->driver_data;
857 struct gendisk *disk = floppy->disk;
858 int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
1233 859
1234 if (floppy->openers > 1) 860 if (floppy->openers > 1)
1235 return -EBUSY; 861 return -EBUSY;
1236 862
1237 /* The IOMEGA Clik! Drive doesn't support this command - 863 ide_set_media_lock(drive, disk, prevent);
1238 * no room for an eject mechanism */
1239 if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) {
1240 int prevent = arg ? 1 : 0;
1241
1242 if (cmd == CDROMEJECT)
1243 prevent = 0;
1244 864
1245 idefloppy_create_prevent_cmd(pc, prevent); 865 if (cmd == CDROMEJECT)
1246 (void) idefloppy_queue_pc_tail(floppy->drive, pc); 866 ide_do_start_stop(drive, disk, 2);
1247 }
1248
1249 if (cmd == CDROMEJECT) {
1250 idefloppy_create_start_stop_cmd(pc, 2);
1251 (void) idefloppy_queue_pc_tail(floppy->drive, pc);
1252 }
1253 867
1254 return 0; 868 return 0;
1255} 869}
1256 870
1257static int ide_floppy_format_unit(idefloppy_floppy_t *floppy,
1258 int __user *arg)
1259{
1260 struct ide_atapi_pc pc;
1261 ide_drive_t *drive = floppy->drive;
1262 int blocks, length, flags, err = 0;
1263
1264 if (floppy->openers > 1) {
1265 /* Don't format if someone is using the disk */
1266 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
1267 return -EBUSY;
1268 }
1269
1270 drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
1271
1272 /*
1273 * Send ATAPI_FORMAT_UNIT to the drive.
1274 *
1275 * Userland gives us the following structure:
1276 *
1277 * struct idefloppy_format_command {
1278 * int nblocks;
1279 * int blocksize;
1280 * int flags;
1281 * } ;
1282 *
1283 * flags is a bitmask, currently, the only defined flag is:
1284 *
1285 * 0x01 - verify media after format.
1286 */
1287 if (get_user(blocks, arg) ||
1288 get_user(length, arg+1) ||
1289 get_user(flags, arg+2)) {
1290 err = -EFAULT;
1291 goto out;
1292 }
1293
1294 (void) idefloppy_get_sfrp_bit(drive);
1295 idefloppy_create_format_unit_cmd(&pc, blocks, length, flags);
1296
1297 if (idefloppy_queue_pc_tail(drive, &pc))
1298 err = -EIO;
1299
1300out:
1301 if (err)
1302 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
1303 return err;
1304}
1305
1306
1307static int idefloppy_ioctl(struct inode *inode, struct file *file, 871static int idefloppy_ioctl(struct inode *inode, struct file *file,
1308 unsigned int cmd, unsigned long arg) 872 unsigned int cmd, unsigned long arg)
1309{ 873{
@@ -1314,23 +878,12 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file,
1314 void __user *argp = (void __user *)arg; 878 void __user *argp = (void __user *)arg;
1315 int err; 879 int err;
1316 880
1317 switch (cmd) { 881 if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR)
1318 case CDROMEJECT:
1319 /* fall through */
1320 case CDROM_LOCKDOOR:
1321 return ide_floppy_lockdoor(drive, &pc, arg, cmd); 882 return ide_floppy_lockdoor(drive, &pc, arg, cmd);
1322 case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: 883
1323 return 0; 884 err = ide_floppy_format_ioctl(drive, file, cmd, argp);
1324 case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: 885 if (err != -ENOTTY)
1325 return ide_floppy_get_format_capacities(drive, argp); 886 return err;
1326 case IDEFLOPPY_IOCTL_FORMAT_START:
1327 if (!(file->f_mode & 2))
1328 return -EPERM;
1329
1330 return ide_floppy_format_unit(floppy, (int __user *)arg);
1331 case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS:
1332 return idefloppy_get_format_progress(drive, argp);
1333 }
1334 887
1335 /* 888 /*
1336 * skip SCSI_IOCTL_SEND_COMMAND (deprecated) 889 * skip SCSI_IOCTL_SEND_COMMAND (deprecated)
@@ -1339,8 +892,6 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file,
1339 if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) 892 if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND)
1340 err = scsi_cmd_ioctl(file, bdev->bd_disk->queue, 893 err = scsi_cmd_ioctl(file, bdev->bd_disk->queue,
1341 bdev->bd_disk, cmd, argp); 894 bdev->bd_disk, cmd, argp);
1342 else
1343 err = -ENOTTY;
1344 895
1345 if (err == -ENOTTY) 896 if (err == -ENOTTY)
1346 err = generic_ide_ioctl(drive, file, bdev, cmd, arg); 897 err = generic_ide_ioctl(drive, file, bdev, cmd, arg);
@@ -1388,11 +939,11 @@ static int ide_floppy_probe(ide_drive_t *drive)
1388 939
1389 if (!strstr("ide-floppy", drive->driver_req)) 940 if (!strstr("ide-floppy", drive->driver_req))
1390 goto failed; 941 goto failed;
1391 if (!drive->present) 942
1392 goto failed;
1393 if (drive->media != ide_floppy) 943 if (drive->media != ide_floppy)
1394 goto failed; 944 goto failed;
1395 if (!idefloppy_identify_device(drive, drive->id)) { 945
946 if (!ide_check_atapi_device(drive, DRV_NAME)) {
1396 printk(KERN_ERR "ide-floppy: %s: not supported by this version" 947 printk(KERN_ERR "ide-floppy: %s: not supported by this version"
1397 " of ide-floppy\n", drive->name); 948 " of ide-floppy\n", drive->name);
1398 goto failed; 949 goto failed;
@@ -1410,8 +961,6 @@ static int ide_floppy_probe(ide_drive_t *drive)
1410 961
1411 ide_init_disk(g, drive); 962 ide_init_disk(g, drive);
1412 963
1413 ide_proc_register_driver(drive, &idefloppy_driver);
1414
1415 kref_init(&floppy->kref); 964 kref_init(&floppy->kref);
1416 965
1417 floppy->drive = drive; 966 floppy->drive = drive;
@@ -1450,6 +999,7 @@ static int __init idefloppy_init(void)
1450} 999}
1451 1000
1452MODULE_ALIAS("ide:*m-floppy*"); 1001MODULE_ALIAS("ide:*m-floppy*");
1002MODULE_ALIAS("ide-floppy");
1453module_init(idefloppy_init); 1003module_init(idefloppy_init);
1454module_exit(idefloppy_exit); 1004module_exit(idefloppy_exit);
1455MODULE_LICENSE("GPL"); 1005MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
new file mode 100644
index 000000000000..ecadc2bc322d
--- /dev/null
+++ b/drivers/ide/ide-floppy.h
@@ -0,0 +1,63 @@
1#ifndef __IDE_FLOPPY_H
2#define __IDE_FLOPPY_H
3
4/*
5 * Most of our global data which we need to save even as we leave the driver
6 * due to an interrupt or a timer event is stored in a variable of type
7 * idefloppy_floppy_t, defined below.
8 */
9typedef struct ide_floppy_obj {
10 ide_drive_t *drive;
11 ide_driver_t *driver;
12 struct gendisk *disk;
13 struct kref kref;
14 unsigned int openers; /* protected by BKL for now */
15
16 /* Current packet command */
17 struct ide_atapi_pc *pc;
18 /* Last failed packet command */
19 struct ide_atapi_pc *failed_pc;
20 /* used for blk_{fs,pc}_request() requests */
21 struct ide_atapi_pc queued_pc;
22
23 struct ide_atapi_pc request_sense_pc;
24 struct request request_sense_rq;
25
26 /* Last error information */
27 u8 sense_key, asc, ascq;
28 /* delay this long before sending packet command */
29 u8 ticks;
30 int progress_indication;
31
32 /* Device information */
33 /* Current format */
34 int blocks, block_size, bs_factor;
35 /* Last format capacity descriptor */
36 u8 cap_desc[8];
37 /* Copy of the flexible disk page */
38 u8 flexible_disk_page[32];
39} idefloppy_floppy_t;
40
41/*
42 * Pages of the SELECT SENSE / MODE SENSE packet commands.
43 * See SFF-8070i spec.
44 */
45#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b
46#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05
47
48/* IOCTLs used in low-level formatting. */
49#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600
50#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601
51#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602
52#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603
53
54/* ide-floppy.c */
55void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
56void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
57void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *);
58
59/* ide-floppy_ioctl.c */
60int ide_floppy_format_ioctl(ide_drive_t *, struct file *, unsigned int,
61 void __user *);
62
63#endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
new file mode 100644
index 000000000000..5ffc4512d14b
--- /dev/null
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -0,0 +1,243 @@
1/*
2 * ide-floppy IOCTLs handling.
3 */
4
5#include <linux/kernel.h>
6#include <linux/ide.h>
7#include <linux/cdrom.h>
8
9#include <asm/unaligned.h>
10
11#include <scsi/scsi_ioctl.h>
12
13#include "ide-floppy.h"
14
15/*
16 * Obtain the list of formattable capacities.
17 * Very similar to ide_floppy_get_capacity, except that we push the capacity
18 * descriptors to userland, instead of our own structures.
19 *
20 * Userland gives us the following structure:
21 *
22 * struct idefloppy_format_capacities {
23 * int nformats;
24 * struct {
25 * int nblocks;
26 * int blocksize;
27 * } formats[];
28 * };
29 *
30 * userland initializes nformats to the number of allocated formats[] records.
31 * On exit we set nformats to the number of records we've actually initialized.
32 */
33
34static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg)
35{
36 struct ide_floppy_obj *floppy = drive->driver_data;
37 struct ide_atapi_pc pc;
38 u8 header_len, desc_cnt;
39 int i, blocks, length, u_array_size, u_index;
40 int __user *argp;
41
42 if (get_user(u_array_size, arg))
43 return -EFAULT;
44
45 if (u_array_size <= 0)
46 return -EINVAL;
47
48 ide_floppy_create_read_capacity_cmd(&pc);
49 if (ide_queue_pc_tail(drive, floppy->disk, &pc)) {
50 printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
51 return -EIO;
52 }
53
54 header_len = pc.buf[3];
55 desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
56
57 u_index = 0;
58 argp = arg + 1;
59
60 /*
61 * We always skip the first capacity descriptor. That's the current
62 * capacity. We are interested in the remaining descriptors, the
63 * formattable capacities.
64 */
65 for (i = 1; i < desc_cnt; i++) {
66 unsigned int desc_start = 4 + i*8;
67
68 if (u_index >= u_array_size)
69 break; /* User-supplied buffer too small */
70
71 blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
72 length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
73
74 if (put_user(blocks, argp))
75 return -EFAULT;
76
77 ++argp;
78
79 if (put_user(length, argp))
80 return -EFAULT;
81
82 ++argp;
83
84 ++u_index;
85 }
86
87 if (put_user(u_index, arg))
88 return -EFAULT;
89
90 return 0;
91}
92
93static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b,
94 int l, int flags)
95{
96 ide_init_pc(pc);
97 pc->c[0] = GPCMD_FORMAT_UNIT;
98 pc->c[1] = 0x17;
99
100 memset(pc->buf, 0, 12);
101 pc->buf[1] = 0xA2;
102 /* Default format list header, u8 1: FOV/DCRT/IMM bits set */
103
104 if (flags & 1) /* Verify bit on... */
105 pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */
106 pc->buf[3] = 8;
107
108 put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4]));
109 put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8]));
110 pc->buf_size = 12;
111 pc->flags |= PC_FLAG_WRITING;
112}
113
114static int ide_floppy_get_sfrp_bit(ide_drive_t *drive)
115{
116 idefloppy_floppy_t *floppy = drive->driver_data;
117 struct ide_atapi_pc pc;
118
119 drive->atapi_flags &= ~IDE_AFLAG_SRFP;
120
121 ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE);
122 pc.flags |= PC_FLAG_SUPPRESS_ERROR;
123
124 if (ide_queue_pc_tail(drive, floppy->disk, &pc))
125 return 1;
126
127 if (pc.buf[8 + 2] & 0x40)
128 drive->atapi_flags |= IDE_AFLAG_SRFP;
129
130 return 0;
131}
132
133static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg)
134{
135 idefloppy_floppy_t *floppy = drive->driver_data;
136 struct ide_atapi_pc pc;
137 int blocks, length, flags, err = 0;
138
139 if (floppy->openers > 1) {
140 /* Don't format if someone is using the disk */
141 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
142 return -EBUSY;
143 }
144
145 drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS;
146
147 /*
148 * Send ATAPI_FORMAT_UNIT to the drive.
149 *
150 * Userland gives us the following structure:
151 *
152 * struct idefloppy_format_command {
153 * int nblocks;
154 * int blocksize;
155 * int flags;
156 * } ;
157 *
158 * flags is a bitmask, currently, the only defined flag is:
159 *
160 * 0x01 - verify media after format.
161 */
162 if (get_user(blocks, arg) ||
163 get_user(length, arg+1) ||
164 get_user(flags, arg+2)) {
165 err = -EFAULT;
166 goto out;
167 }
168
169 (void)ide_floppy_get_sfrp_bit(drive);
170 ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags);
171
172 if (ide_queue_pc_tail(drive, floppy->disk, &pc))
173 err = -EIO;
174
175out:
176 if (err)
177 drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS;
178 return err;
179}
180
181/*
182 * Get ATAPI_FORMAT_UNIT progress indication.
183 *
184 * Userland gives a pointer to an int. The int is set to a progress
185 * indicator 0-65536, with 65536=100%.
186 *
187 * If the drive does not support format progress indication, we just check
188 * the dsc bit, and return either 0 or 65536.
189 */
190
191static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg)
192{
193 idefloppy_floppy_t *floppy = drive->driver_data;
194 struct ide_atapi_pc pc;
195 int progress_indication = 0x10000;
196
197 if (drive->atapi_flags & IDE_AFLAG_SRFP) {
198 ide_floppy_create_request_sense_cmd(&pc);
199 if (ide_queue_pc_tail(drive, floppy->disk, &pc))
200 return -EIO;
201
202 if (floppy->sense_key == 2 &&
203 floppy->asc == 4 &&
204 floppy->ascq == 4)
205 progress_indication = floppy->progress_indication;
206
207 /* Else assume format_unit has finished, and we're at 0x10000 */
208 } else {
209 ide_hwif_t *hwif = drive->hwif;
210 unsigned long flags;
211 u8 stat;
212
213 local_irq_save(flags);
214 stat = hwif->tp_ops->read_status(hwif);
215 local_irq_restore(flags);
216
217 progress_indication = ((stat & ATA_DSC) == 0) ? 0 : 0x10000;
218 }
219
220 if (put_user(progress_indication, arg))
221 return -EFAULT;
222
223 return 0;
224}
225
226int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file,
227 unsigned int cmd, void __user *argp)
228{
229 switch (cmd) {
230 case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED:
231 return 0;
232 case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY:
233 return ide_floppy_get_format_capacities(drive, argp);
234 case IDEFLOPPY_IOCTL_FORMAT_START:
235 if (!(file->f_mode & 2))
236 return -EPERM;
237 return ide_floppy_format_unit(drive, (int __user *)argp);
238 case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS:
239 return ide_floppy_get_format_progress(drive, argp);
240 default:
241 return -ENOTTY;
242 }
243}
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 8fe8b5b9cf7d..0a3cb0c33ae5 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -19,6 +19,7 @@
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/ide.h> 21#include <linux/ide.h>
22#include <linux/pci_ids.h>
22 23
23/* FIXME: convert m32r to use ide_platform host driver */ 24/* FIXME: convert m32r to use ide_platform host driver */
24#ifdef CONFIG_M32R 25#ifdef CONFIG_M32R
@@ -27,7 +28,7 @@
27 28
28#define DRV_NAME "ide_generic" 29#define DRV_NAME "ide_generic"
29 30
30static int probe_mask = 0x03; 31static int probe_mask;
31module_param(probe_mask, int, 0); 32module_param(probe_mask, int, 0);
32MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports"); 33MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
33 34
@@ -100,19 +101,65 @@ static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 };
100static const int legacy_irqs[] = { 14, 15, 11, 10, 8, 12 }; 101static const int legacy_irqs[] = { 14, 15, 11, 10, 8, 12 };
101#endif 102#endif
102 103
104static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
105{
106 struct pci_dev *p = NULL;
107 u16 val;
108
109 for_each_pci_dev(p) {
110
111 if (pci_resource_start(p, 0) == 0x1f0)
112 *primary = 1;
113 if (pci_resource_start(p, 2) == 0x170)
114 *secondary = 1;
115
116 /* Cyrix CS55{1,2}0 pre SFF MWDMA ATA on the bridge */
117 if (p->vendor == PCI_VENDOR_ID_CYRIX &&
118 (p->device == PCI_DEVICE_ID_CYRIX_5510 ||
119 p->device == PCI_DEVICE_ID_CYRIX_5520))
120 *primary = *secondary = 1;
121
122 /* Intel MPIIX - PIO ATA on non PCI side of bridge */
123 if (p->vendor == PCI_VENDOR_ID_INTEL &&
124 p->device == PCI_DEVICE_ID_INTEL_82371MX) {
125
126 pci_read_config_word(p, 0x6C, &val);
127 if (val & 0x8000) {
128 /* ATA port enabled */
129 if (val & 0x4000)
130 *secondary = 1;
131 else
132 *primary = 1;
133 }
134 }
135 }
136}
137
103static int __init ide_generic_init(void) 138static int __init ide_generic_init(void)
104{ 139{
105 hw_regs_t hw[MAX_HWIFS], *hws[MAX_HWIFS]; 140 hw_regs_t hw[MAX_HWIFS], *hws[MAX_HWIFS];
106 struct ide_host *host; 141 struct ide_host *host;
107 unsigned long io_addr; 142 unsigned long io_addr;
108 int i, rc; 143 int i, rc, primary = 0, secondary = 0;
109 144
110#ifdef CONFIG_MIPS 145#ifdef CONFIG_MIPS
111 if (!ide_probe_legacy()) 146 if (!ide_probe_legacy())
112 return -ENODEV; 147 return -ENODEV;
113#endif 148#endif
114 printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" module " 149 ide_generic_check_pci_legacy_iobases(&primary, &secondary);
115 "parameter for probing all legacy ISA IDE ports\n"); 150
151 if (!probe_mask) {
152 printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" "
153 "module parameter for probing all legacy ISA IDE ports\n");
154
155 if (primary == 0)
156 probe_mask |= 0x1;
157
158 if (secondary == 0)
159 probe_mask |= 0x2;
160 } else
161 printk(KERN_INFO DRV_NAME ": enforcing probing of I/O ports "
162 "upon user request\n");
116 163
117 memset(hws, 0, sizeof(hw_regs_t *) * MAX_HWIFS); 164 memset(hws, 0, sizeof(hw_regs_t *) * MAX_HWIFS);
118 165
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a896a283f27f..1c51949833be 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -40,6 +40,7 @@
40#include <linux/pci.h> 40#include <linux/pci.h>
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/ide.h> 42#include <linux/ide.h>
43#include <linux/hdreg.h>
43#include <linux/completion.h> 44#include <linux/completion.h>
44#include <linux/reboot.h> 45#include <linux/reboot.h>
45#include <linux/cdrom.h> 46#include <linux/cdrom.h>
@@ -183,18 +184,18 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *
183 if (drive->media != ide_disk) 184 if (drive->media != ide_disk)
184 break; 185 break;
185 /* Not supported? Switch to next step now. */ 186 /* Not supported? Switch to next step now. */
186 if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) { 187 if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0) {
187 ide_complete_power_step(drive, rq, 0, 0); 188 ide_complete_power_step(drive, rq, 0, 0);
188 return ide_stopped; 189 return ide_stopped;
189 } 190 }
190 if (ide_id_has_flush_cache_ext(drive->id)) 191 if (ata_id_flush_ext_enabled(drive->id))
191 args->tf.command = WIN_FLUSH_CACHE_EXT; 192 args->tf.command = ATA_CMD_FLUSH_EXT;
192 else 193 else
193 args->tf.command = WIN_FLUSH_CACHE; 194 args->tf.command = ATA_CMD_FLUSH;
194 goto out_do_tf; 195 goto out_do_tf;
195 196
196 case idedisk_pm_standby: /* Suspend step 2 (standby) */ 197 case idedisk_pm_standby: /* Suspend step 2 (standby) */
197 args->tf.command = WIN_STANDBYNOW1; 198 args->tf.command = ATA_CMD_STANDBYNOW1;
198 goto out_do_tf; 199 goto out_do_tf;
199 200
200 case idedisk_pm_restore_pio: /* Resume step 1 (restore PIO) */ 201 case idedisk_pm_restore_pio: /* Resume step 1 (restore PIO) */
@@ -209,7 +210,7 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *
209 return ide_stopped; 210 return ide_stopped;
210 211
211 case idedisk_pm_idle: /* Resume step 2 (idle) */ 212 case idedisk_pm_idle: /* Resume step 2 (idle) */
212 args->tf.command = WIN_IDLEIMMEDIATE; 213 args->tf.command = ATA_CMD_IDLEIMMEDIATE;
213 goto out_do_tf; 214 goto out_do_tf;
214 215
215 case ide_pm_restore_dma: /* Resume step 3 (restore DMA) */ 216 case ide_pm_restore_dma: /* Resume step 3 (restore DMA) */
@@ -322,7 +323,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
322 ide_task_t *task = (ide_task_t *)rq->special; 323 ide_task_t *task = (ide_task_t *)rq->special;
323 324
324 if (rq->errors == 0) 325 if (rq->errors == 0)
325 rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT); 326 rq->errors = !OK_STAT(stat, ATA_DRDY, BAD_STAT);
326 327
327 if (task) { 328 if (task) {
328 struct ide_taskfile *tf = &task->tf; 329 struct ide_taskfile *tf = &task->tf;
@@ -373,29 +374,29 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8
373{ 374{
374 ide_hwif_t *hwif = drive->hwif; 375 ide_hwif_t *hwif = drive->hwif;
375 376
376 if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { 377 if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) {
377 /* other bits are useless when BUSY */ 378 /* other bits are useless when BUSY */
378 rq->errors |= ERROR_RESET; 379 rq->errors |= ERROR_RESET;
379 } else if (stat & ERR_STAT) { 380 } else if (stat & ATA_ERR) {
380 /* err has different meaning on cdrom and tape */ 381 /* err has different meaning on cdrom and tape */
381 if (err == ABRT_ERR) { 382 if (err == ATA_ABORTED) {
382 if (drive->select.b.lba && 383 if (drive->select.b.lba &&
383 /* some newer drives don't support WIN_SPECIFY */ 384 /* some newer drives don't support ATA_CMD_INIT_DEV_PARAMS */
384 hwif->tp_ops->read_status(hwif) == WIN_SPECIFY) 385 hwif->tp_ops->read_status(hwif) == ATA_CMD_INIT_DEV_PARAMS)
385 return ide_stopped; 386 return ide_stopped;
386 } else if ((err & BAD_CRC) == BAD_CRC) { 387 } else if ((err & BAD_CRC) == BAD_CRC) {
387 /* UDMA crc error, just retry the operation */ 388 /* UDMA crc error, just retry the operation */
388 drive->crc_count++; 389 drive->crc_count++;
389 } else if (err & (BBD_ERR | ECC_ERR)) { 390 } else if (err & (ATA_BBK | ATA_UNC)) {
390 /* retries won't help these */ 391 /* retries won't help these */
391 rq->errors = ERROR_MAX; 392 rq->errors = ERROR_MAX;
392 } else if (err & TRK0_ERR) { 393 } else if (err & ATA_TRK0NF) {
393 /* help it find track zero */ 394 /* help it find track zero */
394 rq->errors |= ERROR_RECAL; 395 rq->errors |= ERROR_RECAL;
395 } 396 }
396 } 397 }
397 398
398 if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ && 399 if ((stat & ATA_DRQ) && rq_data_dir(rq) == READ &&
399 (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) { 400 (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) {
400 int nsect = drive->mult_count ? drive->mult_count : 1; 401 int nsect = drive->mult_count ? drive->mult_count : 1;
401 402
@@ -407,7 +408,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8
407 return ide_stopped; 408 return ide_stopped;
408 } 409 }
409 410
410 if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) 411 if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
411 rq->errors |= ERROR_RESET; 412 rq->errors |= ERROR_RESET;
412 413
413 if ((rq->errors & ERROR_RESET) == ERROR_RESET) { 414 if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
@@ -427,16 +428,16 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u
427{ 428{
428 ide_hwif_t *hwif = drive->hwif; 429 ide_hwif_t *hwif = drive->hwif;
429 430
430 if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { 431 if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) {
431 /* other bits are useless when BUSY */ 432 /* other bits are useless when BUSY */
432 rq->errors |= ERROR_RESET; 433 rq->errors |= ERROR_RESET;
433 } else { 434 } else {
434 /* add decoding error stuff */ 435 /* add decoding error stuff */
435 } 436 }
436 437
437 if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) 438 if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
438 /* force an abort */ 439 /* force an abort */
439 hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); 440 hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
440 441
441 if (rq->errors >= ERROR_MAX) { 442 if (rq->errors >= ERROR_MAX) {
442 ide_kill_rq(drive, rq); 443 ide_kill_rq(drive, rq);
@@ -509,19 +510,19 @@ static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
509 tf->lbam = drive->cyl; 510 tf->lbam = drive->cyl;
510 tf->lbah = drive->cyl >> 8; 511 tf->lbah = drive->cyl >> 8;
511 tf->device = ((drive->head - 1) | drive->select.all) & ~ATA_LBA; 512 tf->device = ((drive->head - 1) | drive->select.all) & ~ATA_LBA;
512 tf->command = WIN_SPECIFY; 513 tf->command = ATA_CMD_INIT_DEV_PARAMS;
513} 514}
514 515
515static void ide_tf_set_restore_cmd(ide_drive_t *drive, struct ide_taskfile *tf) 516static void ide_tf_set_restore_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
516{ 517{
517 tf->nsect = drive->sect; 518 tf->nsect = drive->sect;
518 tf->command = WIN_RESTORE; 519 tf->command = ATA_CMD_RESTORE;
519} 520}
520 521
521static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf) 522static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
522{ 523{
523 tf->nsect = drive->mult_req; 524 tf->nsect = drive->mult_req;
524 tf->command = WIN_SETMULT; 525 tf->command = ATA_CMD_SET_MULTI;
525} 526}
526 527
527static ide_startstop_t ide_disk_special(ide_drive_t *drive) 528static ide_startstop_t ide_disk_special(ide_drive_t *drive)
@@ -540,8 +541,6 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive)
540 ide_tf_set_restore_cmd(drive, &args.tf); 541 ide_tf_set_restore_cmd(drive, &args.tf);
541 } else if (s->b.set_multmode) { 542 } else if (s->b.set_multmode) {
542 s->b.set_multmode = 0; 543 s->b.set_multmode = 0;
543 if (drive->mult_req > drive->id->max_multsect)
544 drive->mult_req = drive->id->max_multsect;
545 ide_tf_set_setmult_cmd(drive, &args.tf); 544 ide_tf_set_setmult_cmd(drive, &args.tf);
546 } else if (s->all) { 545 } else if (s->all) {
547 int special = s->all; 546 int special = s->all;
@@ -586,9 +585,10 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
586 * do_special - issue some special commands 585 * do_special - issue some special commands
587 * @drive: drive the command is for 586 * @drive: drive the command is for
588 * 587 *
589 * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT 588 * do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
590 * commands to a drive. It used to do much more, but has been scaled 589 * ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
591 * back. 590 *
591 * It used to do much more, but has been scaled back.
592 */ 592 */
593 593
594static ide_startstop_t do_special (ide_drive_t *drive) 594static ide_startstop_t do_special (ide_drive_t *drive)
@@ -716,9 +716,49 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
716 return ide_stopped; 716 return ide_stopped;
717} 717}
718 718
719int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
720 int arg)
721{
722 struct request_queue *q = drive->queue;
723 struct request *rq;
724 int ret = 0;
725
726 if (!(setting->flags & DS_SYNC))
727 return setting->set(drive, arg);
728
729 rq = blk_get_request(q, READ, GFP_KERNEL);
730 if (!rq)
731 return -ENOMEM;
732
733 rq->cmd_type = REQ_TYPE_SPECIAL;
734 rq->cmd_len = 5;
735 rq->cmd[0] = REQ_DEVSET_EXEC;
736 *(int *)&rq->cmd[1] = arg;
737 rq->special = setting->set;
738
739 if (blk_execute_rq(q, NULL, rq, 0))
740 ret = rq->errors;
741 blk_put_request(rq);
742
743 return ret;
744}
745EXPORT_SYMBOL_GPL(ide_devset_execute);
746
719static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) 747static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
720{ 748{
721 switch (rq->cmd[0]) { 749 switch (rq->cmd[0]) {
750 case REQ_DEVSET_EXEC:
751 {
752 int err, (*setfunc)(ide_drive_t *, int) = rq->special;
753
754 err = setfunc(drive, *(int *)&rq->cmd[1]);
755 if (err)
756 rq->errors = err;
757 else
758 err = 1;
759 ide_end_request(drive, err, 0);
760 return ide_stopped;
761 }
722 case REQ_DRIVE_RESET: 762 case REQ_DRIVE_RESET:
723 return ide_do_reset(drive); 763 return ide_do_reset(drive);
724 default: 764 default:
@@ -766,9 +806,7 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
766 * start_request - start of I/O and command issuing for IDE 806 * start_request - start of I/O and command issuing for IDE
767 * 807 *
768 * start_request() initiates handling of a new I/O request. It 808 * start_request() initiates handling of a new I/O request. It
769 * accepts commands and I/O (read/write) requests. It also does 809 * accepts commands and I/O (read/write) requests.
770 * the final remapping for weird stuff like EZDrive. Once
771 * device mapper can work sector level the EZDrive stuff can go away
772 * 810 *
773 * FIXME: this function needs a rename 811 * FIXME: this function needs a rename
774 */ 812 */
@@ -776,7 +814,6 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
776static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) 814static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
777{ 815{
778 ide_startstop_t startstop; 816 ide_startstop_t startstop;
779 sector_t block;
780 817
781 BUG_ON(!blk_rq_started(rq)); 818 BUG_ON(!blk_rq_started(rq));
782 819
@@ -791,21 +828,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
791 goto kill_rq; 828 goto kill_rq;
792 } 829 }
793 830
794 block = rq->sector;
795 if (blk_fs_request(rq) &&
796 (drive->media == ide_disk || drive->media == ide_floppy)) {
797 block += drive->sect0;
798 }
799 /* Yecch - this will shift the entire interval,
800 possibly killing some innocent following sector */
801 if (block == 0 && drive->remap_0_to_1 == 1)
802 block = 1; /* redirect MBR access to EZ-Drive partn table */
803
804 if (blk_pm_request(rq)) 831 if (blk_pm_request(rq))
805 ide_check_pm_state(drive, rq); 832 ide_check_pm_state(drive, rq);
806 833
807 SELECT_DRIVE(drive); 834 SELECT_DRIVE(drive);
808 if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { 835 if (ide_wait_stat(&startstop, drive, drive->ready_stat,
836 ATA_BUSY | ATA_DRQ, WAIT_READY)) {
809 printk(KERN_ERR "%s: drive not ready for command\n", drive->name); 837 printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
810 return startstop; 838 return startstop;
811 } 839 }
@@ -844,7 +872,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
844 return ide_special_rq(drive, rq); 872 return ide_special_rq(drive, rq);
845 873
846 drv = *(ide_driver_t **)rq->rq_disk->private_data; 874 drv = *(ide_driver_t **)rq->rq_disk->private_data;
847 return drv->do_request(drive, rq, block); 875
876 return drv->do_request(drive, rq, rq->sector);
848 } 877 }
849 return do_special(drive); 878 return do_special(drive);
850kill_rq: 879kill_rq:
@@ -1325,7 +1354,7 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
1325 if (hwif->irq == irq) { 1354 if (hwif->irq == irq) {
1326 stat = hwif->tp_ops->read_status(hwif); 1355 stat = hwif->tp_ops->read_status(hwif);
1327 1356
1328 if (!OK_STAT(stat, READY_STAT, BAD_STAT)) { 1357 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
1329 /* Try to not flood the console with msgs */ 1358 /* Try to not flood the console with msgs */
1330 static unsigned long last_msgtime, count; 1359 static unsigned long last_msgtime, count;
1331 ++count; 1360 ++count;
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
new file mode 100644
index 000000000000..cf01564901af
--- /dev/null
+++ b/drivers/ide/ide-ioctls.c
@@ -0,0 +1,290 @@
1/*
2 * IDE ioctls handling.
3 */
4
5#include <linux/hdreg.h>
6#include <linux/ide.h>
7
8static const struct ide_ioctl_devset ide_ioctl_settings[] = {
9{ HDIO_GET_32BIT, HDIO_SET_32BIT, &ide_devset_io_32bit },
10{ HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS, &ide_devset_keepsettings },
11{ HDIO_GET_UNMASKINTR, HDIO_SET_UNMASKINTR, &ide_devset_unmaskirq },
12{ HDIO_GET_DMA, HDIO_SET_DMA, &ide_devset_using_dma },
13{ -1, HDIO_SET_PIO_MODE, &ide_devset_pio_mode },
14{ 0 }
15};
16
17int ide_setting_ioctl(ide_drive_t *drive, struct block_device *bdev,
18 unsigned int cmd, unsigned long arg,
19 const struct ide_ioctl_devset *s)
20{
21 const struct ide_devset *ds;
22 unsigned long flags;
23 int err = -EOPNOTSUPP;
24
25 for (; (ds = s->setting); s++) {
26 if (ds->get && s->get_ioctl == cmd)
27 goto read_val;
28 else if (ds->set && s->set_ioctl == cmd)
29 goto set_val;
30 }
31
32 return err;
33
34read_val:
35 mutex_lock(&ide_setting_mtx);
36 spin_lock_irqsave(&ide_lock, flags);
37 err = ds->get(drive);
38 spin_unlock_irqrestore(&ide_lock, flags);
39 mutex_unlock(&ide_setting_mtx);
40 return err >= 0 ? put_user(err, (long __user *)arg) : err;
41
42set_val:
43 if (bdev != bdev->bd_contains)
44 err = -EINVAL;
45 else {
46 if (!capable(CAP_SYS_ADMIN))
47 err = -EACCES;
48 else {
49 mutex_lock(&ide_setting_mtx);
50 err = ide_devset_execute(drive, ds, arg);
51 mutex_unlock(&ide_setting_mtx);
52 }
53 }
54 return err;
55}
56EXPORT_SYMBOL_GPL(ide_setting_ioctl);
57
58static int ide_get_identity_ioctl(ide_drive_t *drive, unsigned int cmd,
59 unsigned long arg)
60{
61 u16 *id = NULL;
62 int size = (cmd == HDIO_GET_IDENTITY) ? (ATA_ID_WORDS * 2) : 142;
63 int rc = 0;
64
65 if (drive->id_read == 0) {
66 rc = -ENOMSG;
67 goto out;
68 }
69
70 id = kmalloc(size, GFP_KERNEL);
71 if (id == NULL) {
72 rc = -ENOMEM;
73 goto out;
74 }
75
76 memcpy(id, drive->id, size);
77 ata_id_to_hd_driveid(id);
78
79 if (copy_to_user((void __user *)arg, id, size))
80 rc = -EFAULT;
81
82 kfree(id);
83out:
84 return rc;
85}
86
87static int ide_get_nice_ioctl(ide_drive_t *drive, unsigned long arg)
88{
89 return put_user((drive->dsc_overlap << IDE_NICE_DSC_OVERLAP) |
90 (drive->nice1 << IDE_NICE_1), (long __user *)arg);
91}
92
93static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg)
94{
95 if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
96 return -EPERM;
97
98 if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) &&
99 (drive->media == ide_disk || drive->media == ide_floppy ||
100 drive->scsi))
101 return -EPERM;
102
103 drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1;
104 drive->nice1 = (arg >> IDE_NICE_1) & 1;
105
106 return 0;
107}
108
109static int ide_cmd_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg)
110{
111 u8 *buf = NULL;
112 int bufsize = 0, err = 0;
113 u8 args[4], xfer_rate = 0;
114 ide_task_t tfargs;
115 struct ide_taskfile *tf = &tfargs.tf;
116 u16 *id = drive->id;
117
118 if (NULL == (void *) arg) {
119 struct request *rq;
120
121 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
122 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
123 err = blk_execute_rq(drive->queue, NULL, rq, 0);
124 blk_put_request(rq);
125
126 return err;
127 }
128
129 if (copy_from_user(args, (void __user *)arg, 4))
130 return -EFAULT;
131
132 memset(&tfargs, 0, sizeof(ide_task_t));
133 tf->feature = args[2];
134 if (args[0] == ATA_CMD_SMART) {
135 tf->nsect = args[3];
136 tf->lbal = args[1];
137 tf->lbam = 0x4f;
138 tf->lbah = 0xc2;
139 tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT;
140 } else {
141 tf->nsect = args[1];
142 tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE |
143 IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT;
144 }
145 tf->command = args[0];
146 tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA;
147
148 if (args[3]) {
149 tfargs.tf_flags |= IDE_TFLAG_IO_16BIT;
150 bufsize = SECTOR_SIZE * args[3];
151 buf = kzalloc(bufsize, GFP_KERNEL);
152 if (buf == NULL)
153 return -ENOMEM;
154 }
155
156 if (tf->command == ATA_CMD_SET_FEATURES &&
157 tf->feature == SETFEATURES_XFER &&
158 tf->nsect >= XFER_SW_DMA_0 &&
159 (id[ATA_ID_UDMA_MODES] ||
160 id[ATA_ID_MWDMA_MODES] ||
161 id[ATA_ID_SWDMA_MODES])) {
162 xfer_rate = args[1];
163 if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) {
164 printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot "
165 "be set\n", drive->name);
166 goto abort;
167 }
168 }
169
170 err = ide_raw_taskfile(drive, &tfargs, buf, args[3]);
171
172 args[0] = tf->status;
173 args[1] = tf->error;
174 args[2] = tf->nsect;
175
176 if (!err && xfer_rate) {
177 /* active-retuning-calls future */
178 ide_set_xfer_rate(drive, xfer_rate);
179 ide_driveid_update(drive);
180 }
181abort:
182 if (copy_to_user((void __user *)arg, &args, 4))
183 err = -EFAULT;
184 if (buf) {
185 if (copy_to_user((void __user *)(arg + 4), buf, bufsize))
186 err = -EFAULT;
187 kfree(buf);
188 }
189 return err;
190}
191
192static int ide_task_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg)
193{
194 void __user *p = (void __user *)arg;
195 int err = 0;
196 u8 args[7];
197 ide_task_t task;
198
199 if (copy_from_user(args, p, 7))
200 return -EFAULT;
201
202 memset(&task, 0, sizeof(task));
203 memcpy(&task.tf_array[7], &args[1], 6);
204 task.tf.command = args[0];
205 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
206
207 err = ide_no_data_taskfile(drive, &task);
208
209 args[0] = task.tf.command;
210 memcpy(&args[1], &task.tf_array[7], 6);
211
212 if (copy_to_user(p, args, 7))
213 err = -EFAULT;
214
215 return err;
216}
217
218static int generic_drive_reset(ide_drive_t *drive)
219{
220 struct request *rq;
221 int ret = 0;
222
223 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
224 rq->cmd_type = REQ_TYPE_SPECIAL;
225 rq->cmd_len = 1;
226 rq->cmd[0] = REQ_DRIVE_RESET;
227 rq->cmd_flags |= REQ_SOFTBARRIER;
228 if (blk_execute_rq(drive->queue, NULL, rq, 1))
229 ret = rq->errors;
230 blk_put_request(rq);
231 return ret;
232}
233
234int generic_ide_ioctl(ide_drive_t *drive, struct file *file,
235 struct block_device *bdev,
236 unsigned int cmd, unsigned long arg)
237{
238 int err;
239
240 err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_ioctl_settings);
241 if (err != -EOPNOTSUPP)
242 return err;
243
244 switch (cmd) {
245 case HDIO_OBSOLETE_IDENTITY:
246 case HDIO_GET_IDENTITY:
247 if (bdev != bdev->bd_contains)
248 return -EINVAL;
249 return ide_get_identity_ioctl(drive, cmd, arg);
250 case HDIO_GET_NICE:
251 return ide_get_nice_ioctl(drive, arg);
252 case HDIO_SET_NICE:
253 if (!capable(CAP_SYS_ADMIN))
254 return -EACCES;
255 return ide_set_nice_ioctl(drive, arg);
256#ifdef CONFIG_IDE_TASK_IOCTL
257 case HDIO_DRIVE_TASKFILE:
258 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
259 return -EACCES;
260 if (drive->media == ide_disk)
261 return ide_taskfile_ioctl(drive, cmd, arg);
262 return -ENOMSG;
263#endif
264 case HDIO_DRIVE_CMD:
265 if (!capable(CAP_SYS_RAWIO))
266 return -EACCES;
267 return ide_cmd_ioctl(drive, cmd, arg);
268 case HDIO_DRIVE_TASK:
269 if (!capable(CAP_SYS_RAWIO))
270 return -EACCES;
271 return ide_task_ioctl(drive, cmd, arg);
272 case HDIO_DRIVE_RESET:
273 if (!capable(CAP_SYS_ADMIN))
274 return -EACCES;
275 return generic_drive_reset(drive);
276 case HDIO_GET_BUSSTATE:
277 if (!capable(CAP_SYS_ADMIN))
278 return -EACCES;
279 if (put_user(BUSSTATE_ON, (long __user *)arg))
280 return -EFAULT;
281 return 0;
282 case HDIO_SET_BUSSTATE:
283 if (!capable(CAP_SYS_ADMIN))
284 return -EACCES;
285 return -EOPNOTSUPP;
286 default:
287 return -EINVAL;
288 }
289}
290EXPORT_SYMBOL(generic_ide_ioctl);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 2cbadffe922e..0a2fd3b37ac4 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -18,7 +18,6 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/pci.h> 19#include <linux/pci.h>
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/hdreg.h>
22#include <linux/ide.h> 21#include <linux/ide.h>
23#include <linux/bitops.h> 22#include <linux/bitops.h>
24#include <linux/nmi.h> 23#include <linux/nmi.h>
@@ -400,97 +399,14 @@ const struct ide_tp_ops default_tp_ops = {
400 .output_data = ide_output_data, 399 .output_data = ide_output_data,
401}; 400};
402 401
403void ide_fix_driveid (struct hd_driveid *id) 402void ide_fix_driveid(u16 *id)
404{ 403{
405#ifndef __LITTLE_ENDIAN 404#ifndef __LITTLE_ENDIAN
406# ifdef __BIG_ENDIAN 405# ifdef __BIG_ENDIAN
407 int i; 406 int i;
408 u16 *stringcast; 407
409 408 for (i = 0; i < 256; i++)
410 id->config = __le16_to_cpu(id->config); 409 id[i] = __le16_to_cpu(id[i]);
411 id->cyls = __le16_to_cpu(id->cyls);
412 id->reserved2 = __le16_to_cpu(id->reserved2);
413 id->heads = __le16_to_cpu(id->heads);
414 id->track_bytes = __le16_to_cpu(id->track_bytes);
415 id->sector_bytes = __le16_to_cpu(id->sector_bytes);
416 id->sectors = __le16_to_cpu(id->sectors);
417 id->vendor0 = __le16_to_cpu(id->vendor0);
418 id->vendor1 = __le16_to_cpu(id->vendor1);
419 id->vendor2 = __le16_to_cpu(id->vendor2);
420 stringcast = (u16 *)&id->serial_no[0];
421 for (i = 0; i < (20/2); i++)
422 stringcast[i] = __le16_to_cpu(stringcast[i]);
423 id->buf_type = __le16_to_cpu(id->buf_type);
424 id->buf_size = __le16_to_cpu(id->buf_size);
425 id->ecc_bytes = __le16_to_cpu(id->ecc_bytes);
426 stringcast = (u16 *)&id->fw_rev[0];
427 for (i = 0; i < (8/2); i++)
428 stringcast[i] = __le16_to_cpu(stringcast[i]);
429 stringcast = (u16 *)&id->model[0];
430 for (i = 0; i < (40/2); i++)
431 stringcast[i] = __le16_to_cpu(stringcast[i]);
432 id->dword_io = __le16_to_cpu(id->dword_io);
433 id->reserved50 = __le16_to_cpu(id->reserved50);
434 id->field_valid = __le16_to_cpu(id->field_valid);
435 id->cur_cyls = __le16_to_cpu(id->cur_cyls);
436 id->cur_heads = __le16_to_cpu(id->cur_heads);
437 id->cur_sectors = __le16_to_cpu(id->cur_sectors);
438 id->cur_capacity0 = __le16_to_cpu(id->cur_capacity0);
439 id->cur_capacity1 = __le16_to_cpu(id->cur_capacity1);
440 id->lba_capacity = __le32_to_cpu(id->lba_capacity);
441 id->dma_1word = __le16_to_cpu(id->dma_1word);
442 id->dma_mword = __le16_to_cpu(id->dma_mword);
443 id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes);
444 id->eide_dma_min = __le16_to_cpu(id->eide_dma_min);
445 id->eide_dma_time = __le16_to_cpu(id->eide_dma_time);
446 id->eide_pio = __le16_to_cpu(id->eide_pio);
447 id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy);
448 for (i = 0; i < 2; ++i)
449 id->words69_70[i] = __le16_to_cpu(id->words69_70[i]);
450 for (i = 0; i < 4; ++i)
451 id->words71_74[i] = __le16_to_cpu(id->words71_74[i]);
452 id->queue_depth = __le16_to_cpu(id->queue_depth);
453 for (i = 0; i < 4; ++i)
454 id->words76_79[i] = __le16_to_cpu(id->words76_79[i]);
455 id->major_rev_num = __le16_to_cpu(id->major_rev_num);
456 id->minor_rev_num = __le16_to_cpu(id->minor_rev_num);
457 id->command_set_1 = __le16_to_cpu(id->command_set_1);
458 id->command_set_2 = __le16_to_cpu(id->command_set_2);
459 id->cfsse = __le16_to_cpu(id->cfsse);
460 id->cfs_enable_1 = __le16_to_cpu(id->cfs_enable_1);
461 id->cfs_enable_2 = __le16_to_cpu(id->cfs_enable_2);
462 id->csf_default = __le16_to_cpu(id->csf_default);
463 id->dma_ultra = __le16_to_cpu(id->dma_ultra);
464 id->trseuc = __le16_to_cpu(id->trseuc);
465 id->trsEuc = __le16_to_cpu(id->trsEuc);
466 id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues);
467 id->mprc = __le16_to_cpu(id->mprc);
468 id->hw_config = __le16_to_cpu(id->hw_config);
469 id->acoustic = __le16_to_cpu(id->acoustic);
470 id->msrqs = __le16_to_cpu(id->msrqs);
471 id->sxfert = __le16_to_cpu(id->sxfert);
472 id->sal = __le16_to_cpu(id->sal);
473 id->spg = __le32_to_cpu(id->spg);
474 id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2);
475 for (i = 0; i < 22; i++)
476 id->words104_125[i] = __le16_to_cpu(id->words104_125[i]);
477 id->last_lun = __le16_to_cpu(id->last_lun);
478 id->word127 = __le16_to_cpu(id->word127);
479 id->dlf = __le16_to_cpu(id->dlf);
480 id->csfo = __le16_to_cpu(id->csfo);
481 for (i = 0; i < 26; i++)
482 id->words130_155[i] = __le16_to_cpu(id->words130_155[i]);
483 id->word156 = __le16_to_cpu(id->word156);
484 for (i = 0; i < 3; i++)
485 id->words157_159[i] = __le16_to_cpu(id->words157_159[i]);
486 id->cfa_power = __le16_to_cpu(id->cfa_power);
487 for (i = 0; i < 15; i++)
488 id->words161_175[i] = __le16_to_cpu(id->words161_175[i]);
489 for (i = 0; i < 30; i++)
490 id->words176_205[i] = __le16_to_cpu(id->words176_205[i]);
491 for (i = 0; i < 49; i++)
492 id->words206_254[i] = __le16_to_cpu(id->words206_254[i]);
493 id->integrity_word = __le16_to_cpu(id->integrity_word);
494# else 410# else
495# error "Please fix <asm/byteorder.h>" 411# error "Please fix <asm/byteorder.h>"
496# endif 412# endif
@@ -501,19 +417,21 @@ void ide_fix_driveid (struct hd_driveid *id)
501 * ide_fixstring() cleans up and (optionally) byte-swaps a text string, 417 * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
502 * removing leading/trailing blanks and compressing internal blanks. 418 * removing leading/trailing blanks and compressing internal blanks.
503 * It is primarily used to tidy up the model name/number fields as 419 * It is primarily used to tidy up the model name/number fields as
504 * returned by the WIN_[P]IDENTIFY commands. 420 * returned by the ATA_CMD_ID_ATA[PI] commands.
505 */ 421 */
506 422
507void ide_fixstring (u8 *s, const int bytecount, const int byteswap) 423void ide_fixstring (u8 *s, const int bytecount, const int byteswap)
508{ 424{
509 u8 *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */ 425 u8 *p, *end = &s[bytecount & ~1]; /* bytecount must be even */
510 426
511 if (byteswap) { 427 if (byteswap) {
512 /* convert from big-endian to host byte order */ 428 /* convert from big-endian to host byte order */
513 for (p = end ; p != s;) 429 for (p = s ; p != end ; p += 2)
514 be16_to_cpus((u16 *)(p -= 2)); 430 be16_to_cpus((u16 *) p);
515 } 431 }
432
516 /* strip leading blanks */ 433 /* strip leading blanks */
434 p = s;
517 while (s != end && *s == ' ') 435 while (s != end && *s == ' ')
518 ++s; 436 ++s;
519 /* compress internal blanks and strip trailing blanks */ 437 /* compress internal blanks and strip trailing blanks */
@@ -556,7 +474,7 @@ int drive_is_ready (ide_drive_t *drive)
556 /* Note: this may clear a pending IRQ!! */ 474 /* Note: this may clear a pending IRQ!! */
557 stat = hwif->tp_ops->read_status(hwif); 475 stat = hwif->tp_ops->read_status(hwif);
558 476
559 if (stat & BUSY_STAT) 477 if (stat & ATA_BUSY)
560 /* drive busy: definitely not interrupting */ 478 /* drive busy: definitely not interrupting */
561 return 0; 479 return 0;
562 480
@@ -588,10 +506,10 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti
588 udelay(1); /* spec allows drive 400ns to assert "BUSY" */ 506 udelay(1); /* spec allows drive 400ns to assert "BUSY" */
589 stat = tp_ops->read_status(hwif); 507 stat = tp_ops->read_status(hwif);
590 508
591 if (stat & BUSY_STAT) { 509 if (stat & ATA_BUSY) {
592 local_irq_set(flags); 510 local_irq_set(flags);
593 timeout += jiffies; 511 timeout += jiffies;
594 while ((stat = tp_ops->read_status(hwif)) & BUSY_STAT) { 512 while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
595 if (time_after(jiffies, timeout)) { 513 if (time_after(jiffies, timeout)) {
596 /* 514 /*
597 * One last read after the timeout in case 515 * One last read after the timeout in case
@@ -599,7 +517,7 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti
599 * progress during the timeout.. 517 * progress during the timeout..
600 */ 518 */
601 stat = tp_ops->read_status(hwif); 519 stat = tp_ops->read_status(hwif);
602 if (!(stat & BUSY_STAT)) 520 if ((stat & ATA_BUSY) == 0)
603 break; 521 break;
604 522
605 local_irq_restore(flags); 523 local_irq_restore(flags);
@@ -660,18 +578,18 @@ EXPORT_SYMBOL(ide_wait_stat);
660/** 578/**
661 * ide_in_drive_list - look for drive in black/white list 579 * ide_in_drive_list - look for drive in black/white list
662 * @id: drive identifier 580 * @id: drive identifier
663 * @drive_table: list to inspect 581 * @table: list to inspect
664 * 582 *
665 * Look for a drive in the blacklist and the whitelist tables 583 * Look for a drive in the blacklist and the whitelist tables
666 * Returns 1 if the drive is found in the table. 584 * Returns 1 if the drive is found in the table.
667 */ 585 */
668 586
669int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) 587int ide_in_drive_list(u16 *id, const struct drive_list_entry *table)
670{ 588{
671 for ( ; drive_table->id_model; drive_table++) 589 for ( ; table->id_model; table++)
672 if ((!strcmp(drive_table->id_model, id->model)) && 590 if ((!strcmp(table->id_model, (char *)&id[ATA_ID_PROD])) &&
673 (!drive_table->id_firmware || 591 (!table->id_firmware ||
674 strstr(id->fw_rev, drive_table->id_firmware))) 592 strstr((char *)&id[ATA_ID_FW_REV], table->id_firmware)))
675 return 1; 593 return 1;
676 return 0; 594 return 0;
677} 595}
@@ -702,7 +620,7 @@ static const struct drive_list_entry ivb_list[] = {
702u8 eighty_ninty_three (ide_drive_t *drive) 620u8 eighty_ninty_three (ide_drive_t *drive)
703{ 621{
704 ide_hwif_t *hwif = drive->hwif; 622 ide_hwif_t *hwif = drive->hwif;
705 struct hd_driveid *id = drive->id; 623 u16 *id = drive->id;
706 int ivb = ide_in_drive_list(id, ivb_list); 624 int ivb = ide_in_drive_list(id, ivb_list);
707 625
708 if (hwif->cbl == ATA_CBL_PATA40_SHORT) 626 if (hwif->cbl == ATA_CBL_PATA40_SHORT)
@@ -712,7 +630,7 @@ u8 eighty_ninty_three (ide_drive_t *drive)
712 printk(KERN_DEBUG "%s: skipping word 93 validity check\n", 630 printk(KERN_DEBUG "%s: skipping word 93 validity check\n",
713 drive->name); 631 drive->name);
714 632
715 if (ide_dev_is_sata(id) && !ivb) 633 if (ata_id_is_sata(id) && !ivb)
716 return 1; 634 return 1;
717 635
718 if (hwif->cbl != ATA_CBL_PATA80 && !ivb) 636 if (hwif->cbl != ATA_CBL_PATA80 && !ivb)
@@ -724,7 +642,8 @@ u8 eighty_ninty_three (ide_drive_t *drive)
724 * - force bit13 (80c cable present) check also for !ivb devices 642 * - force bit13 (80c cable present) check also for !ivb devices
725 * (unless the slave device is pre-ATA3) 643 * (unless the slave device is pre-ATA3)
726 */ 644 */
727 if ((id->hw_config & 0x4000) || (ivb && (id->hw_config & 0x2000))) 645 if ((id[ATA_ID_HW_CONFIG] & 0x4000) ||
646 (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000)))
728 return 1; 647 return 1;
729 648
730no_80w: 649no_80w:
@@ -745,8 +664,8 @@ int ide_driveid_update(ide_drive_t *drive)
745{ 664{
746 ide_hwif_t *hwif = drive->hwif; 665 ide_hwif_t *hwif = drive->hwif;
747 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 666 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
748 struct hd_driveid *id; 667 u16 *id;
749 unsigned long timeout, flags; 668 unsigned long flags;
750 u8 stat; 669 u8 stat;
751 670
752 /* 671 /*
@@ -757,29 +676,24 @@ int ide_driveid_update(ide_drive_t *drive)
757 SELECT_MASK(drive, 1); 676 SELECT_MASK(drive, 1);
758 tp_ops->set_irq(hwif, 0); 677 tp_ops->set_irq(hwif, 0);
759 msleep(50); 678 msleep(50);
760 tp_ops->exec_command(hwif, WIN_IDENTIFY); 679 tp_ops->exec_command(hwif, ATA_CMD_ID_ATA);
761 timeout = jiffies + WAIT_WORSTCASE;
762 do {
763 if (time_after(jiffies, timeout)) {
764 SELECT_MASK(drive, 0);
765 return 0; /* drive timed-out */
766 }
767 680
768 msleep(50); /* give drive a breather */ 681 if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 1)) {
769 stat = tp_ops->read_altstatus(hwif); 682 SELECT_MASK(drive, 0);
770 } while (stat & BUSY_STAT); 683 return 0;
684 }
771 685
772 msleep(50); /* wait for IRQ and DRQ_STAT */ 686 msleep(50); /* wait for IRQ and ATA_DRQ */
773 stat = tp_ops->read_status(hwif); 687 stat = tp_ops->read_status(hwif);
774 688
775 if (!OK_STAT(stat, DRQ_STAT, BAD_R_STAT)) { 689 if (!OK_STAT(stat, ATA_DRQ, BAD_R_STAT)) {
776 SELECT_MASK(drive, 0); 690 SELECT_MASK(drive, 0);
777 printk("%s: CHECK for good STATUS\n", drive->name); 691 printk("%s: CHECK for good STATUS\n", drive->name);
778 return 0; 692 return 0;
779 } 693 }
780 local_irq_save(flags); 694 local_irq_save(flags);
781 SELECT_MASK(drive, 0); 695 SELECT_MASK(drive, 0);
782 id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); 696 id = kmalloc(SECTOR_SIZE, GFP_ATOMIC);
783 if (!id) { 697 if (!id) {
784 local_irq_restore(flags); 698 local_irq_restore(flags);
785 return 0; 699 return 0;
@@ -789,16 +703,16 @@ int ide_driveid_update(ide_drive_t *drive)
789 local_irq_enable(); 703 local_irq_enable();
790 local_irq_restore(flags); 704 local_irq_restore(flags);
791 ide_fix_driveid(id); 705 ide_fix_driveid(id);
792 if (id) { 706
793 drive->id->dma_ultra = id->dma_ultra; 707 drive->id[ATA_ID_UDMA_MODES] = id[ATA_ID_UDMA_MODES];
794 drive->id->dma_mword = id->dma_mword; 708 drive->id[ATA_ID_MWDMA_MODES] = id[ATA_ID_MWDMA_MODES];
795 drive->id->dma_1word = id->dma_1word; 709 drive->id[ATA_ID_SWDMA_MODES] = id[ATA_ID_SWDMA_MODES];
796 /* anything more ? */ 710 /* anything more ? */
797 kfree(id); 711
798 712 kfree(id);
799 if (drive->using_dma && ide_id_dma_bug(drive)) 713
800 ide_dma_off(drive); 714 if (drive->using_dma && ide_id_dma_bug(drive))
801 } 715 ide_dma_off(drive);
802 716
803 return 1; 717 return 1;
804} 718}
@@ -807,6 +721,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
807{ 721{
808 ide_hwif_t *hwif = drive->hwif; 722 ide_hwif_t *hwif = drive->hwif;
809 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 723 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
724 u16 *id = drive->id, i;
810 int error = 0; 725 int error = 0;
811 u8 stat; 726 u8 stat;
812 ide_task_t task; 727 ide_task_t task;
@@ -817,7 +732,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
817#endif 732#endif
818 733
819 /* Skip setting PIO flow-control modes on pre-EIDE drives */ 734 /* Skip setting PIO flow-control modes on pre-EIDE drives */
820 if ((speed & 0xf8) == XFER_PIO_0 && !(drive->id->capability & 0x08)) 735 if ((speed & 0xf8) == XFER_PIO_0 && ata_id_has_iordy(drive->id) == 0)
821 goto skip; 736 goto skip;
822 737
823 /* 738 /*
@@ -851,13 +766,13 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
851 766
852 tp_ops->tf_load(drive, &task); 767 tp_ops->tf_load(drive, &task);
853 768
854 tp_ops->exec_command(hwif, WIN_SETFEATURES); 769 tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
855 770
856 if (drive->quirk_list == 2) 771 if (drive->quirk_list == 2)
857 tp_ops->set_irq(hwif, 1); 772 tp_ops->set_irq(hwif, 1);
858 773
859 error = __ide_wait_stat(drive, drive->ready_stat, 774 error = __ide_wait_stat(drive, drive->ready_stat,
860 BUSY_STAT|DRQ_STAT|ERR_STAT, 775 ATA_BUSY | ATA_DRQ | ATA_ERR,
861 WAIT_CMD, &stat); 776 WAIT_CMD, &stat);
862 777
863 SELECT_MASK(drive, 0); 778 SELECT_MASK(drive, 0);
@@ -869,9 +784,9 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
869 return error; 784 return error;
870 } 785 }
871 786
872 drive->id->dma_ultra &= ~0xFF00; 787 id[ATA_ID_UDMA_MODES] &= ~0xFF00;
873 drive->id->dma_mword &= ~0x0F00; 788 id[ATA_ID_MWDMA_MODES] &= ~0x0F00;
874 drive->id->dma_1word &= ~0x0F00; 789 id[ATA_ID_SWDMA_MODES] &= ~0x0F00;
875 790
876 skip: 791 skip:
877#ifdef CONFIG_BLK_DEV_IDEDMA 792#ifdef CONFIG_BLK_DEV_IDEDMA
@@ -881,23 +796,17 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
881 ide_dma_off_quietly(drive); 796 ide_dma_off_quietly(drive);
882#endif 797#endif
883 798
884 switch(speed) { 799 if (speed >= XFER_UDMA_0) {
885 case XFER_UDMA_7: drive->id->dma_ultra |= 0x8080; break; 800 i = 1 << (speed - XFER_UDMA_0);
886 case XFER_UDMA_6: drive->id->dma_ultra |= 0x4040; break; 801 id[ATA_ID_UDMA_MODES] |= (i << 8 | i);
887 case XFER_UDMA_5: drive->id->dma_ultra |= 0x2020; break; 802 } else if (speed >= XFER_MW_DMA_0) {
888 case XFER_UDMA_4: drive->id->dma_ultra |= 0x1010; break; 803 i = 1 << (speed - XFER_MW_DMA_0);
889 case XFER_UDMA_3: drive->id->dma_ultra |= 0x0808; break; 804 id[ATA_ID_MWDMA_MODES] |= (i << 8 | i);
890 case XFER_UDMA_2: drive->id->dma_ultra |= 0x0404; break; 805 } else if (speed >= XFER_SW_DMA_0) {
891 case XFER_UDMA_1: drive->id->dma_ultra |= 0x0202; break; 806 i = 1 << (speed - XFER_SW_DMA_0);
892 case XFER_UDMA_0: drive->id->dma_ultra |= 0x0101; break; 807 id[ATA_ID_SWDMA_MODES] |= (i << 8 | i);
893 case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break;
894 case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break;
895 case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break;
896 case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break;
897 case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break;
898 case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break;
899 default: break;
900 } 808 }
809
901 if (!drive->init_speed) 810 if (!drive->init_speed)
902 drive->init_speed = speed; 811 drive->init_speed = speed;
903 drive->current_speed = speed; 812 drive->current_speed = speed;
@@ -977,7 +886,7 @@ void ide_execute_pkt_cmd(ide_drive_t *drive)
977 unsigned long flags; 886 unsigned long flags;
978 887
979 spin_lock_irqsave(&ide_lock, flags); 888 spin_lock_irqsave(&ide_lock, flags);
980 hwif->tp_ops->exec_command(hwif, WIN_PACKETCMD); 889 hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
981 ndelay(400); 890 ndelay(400);
982 spin_unlock_irqrestore(&ide_lock, flags); 891 spin_unlock_irqrestore(&ide_lock, flags);
983} 892}
@@ -1010,7 +919,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
1010 udelay (10); 919 udelay (10);
1011 stat = hwif->tp_ops->read_status(hwif); 920 stat = hwif->tp_ops->read_status(hwif);
1012 921
1013 if (OK_STAT(stat, 0, BUSY_STAT)) 922 if (OK_STAT(stat, 0, ATA_BUSY))
1014 printk("%s: ATAPI reset complete\n", drive->name); 923 printk("%s: ATAPI reset complete\n", drive->name);
1015 else { 924 else {
1016 if (time_before(jiffies, hwgroup->poll_timeout)) { 925 if (time_before(jiffies, hwgroup->poll_timeout)) {
@@ -1056,7 +965,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
1056 965
1057 tmp = hwif->tp_ops->read_status(hwif); 966 tmp = hwif->tp_ops->read_status(hwif);
1058 967
1059 if (!OK_STAT(tmp, 0, BUSY_STAT)) { 968 if (!OK_STAT(tmp, 0, ATA_BUSY)) {
1060 if (time_before(jiffies, hwgroup->poll_timeout)) { 969 if (time_before(jiffies, hwgroup->poll_timeout)) {
1061 ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); 970 ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
1062 /* continue polling */ 971 /* continue polling */
@@ -1102,7 +1011,7 @@ out:
1102 1011
1103static void ide_disk_pre_reset(ide_drive_t *drive) 1012static void ide_disk_pre_reset(ide_drive_t *drive)
1104{ 1013{
1105 int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; 1014 int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
1106 1015
1107 drive->special.all = 0; 1016 drive->special.all = 0;
1108 drive->special.b.set_geometry = legacy; 1017 drive->special.b.set_geometry = legacy;
@@ -1187,7 +1096,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1187 pre_reset(drive); 1096 pre_reset(drive);
1188 SELECT_DRIVE(drive); 1097 SELECT_DRIVE(drive);
1189 udelay (20); 1098 udelay (20);
1190 tp_ops->exec_command(hwif, WIN_SRST); 1099 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
1191 ndelay(400); 1100 ndelay(400);
1192 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; 1101 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
1193 hwgroup->polling = 1; 1102 hwgroup->polling = 1;
@@ -1270,7 +1179,7 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout)
1270 */ 1179 */
1271 mdelay(1); 1180 mdelay(1);
1272 stat = hwif->tp_ops->read_status(hwif); 1181 stat = hwif->tp_ops->read_status(hwif);
1273 if ((stat & BUSY_STAT) == 0) 1182 if ((stat & ATA_BUSY) == 0)
1274 return 0; 1183 return 0;
1275 /* 1184 /*
1276 * Assume a value of 0xff means nothing is connected to 1185 * Assume a value of 0xff means nothing is connected to
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 97fefabea8b8..ed426dd0fdd8 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -2,7 +2,6 @@
2#include <linux/string.h> 2#include <linux/string.h>
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/interrupt.h> 4#include <linux/interrupt.h>
5#include <linux/hdreg.h>
6#include <linux/ide.h> 5#include <linux/ide.h>
7#include <linux/bitops.h> 6#include <linux/bitops.h>
8 7
@@ -90,29 +89,31 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed)
90 89
91u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode) 90u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
92{ 91{
93 int pio_mode; 92 u16 *id = drive->id;
94 struct hd_driveid* id = drive->id; 93 int pio_mode = -1, overridden = 0;
95 int overridden = 0;
96 94
97 if (mode_wanted != 255) 95 if (mode_wanted != 255)
98 return min_t(u8, mode_wanted, max_mode); 96 return min_t(u8, mode_wanted, max_mode);
99 97
100 if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0 && 98 if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0)
101 (pio_mode = ide_scan_pio_blacklist(id->model)) != -1) { 99 pio_mode = ide_scan_pio_blacklist((char *)&id[ATA_ID_PROD]);
100
101 if (pio_mode != -1) {
102 printk(KERN_INFO "%s: is on PIO blacklist\n", drive->name); 102 printk(KERN_INFO "%s: is on PIO blacklist\n", drive->name);
103 } else { 103 } else {
104 pio_mode = id->tPIO; 104 pio_mode = id[ATA_ID_OLD_PIO_MODES] >> 8;
105 if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */ 105 if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */
106 pio_mode = 2; 106 pio_mode = 2;
107 overridden = 1; 107 overridden = 1;
108 } 108 }
109 if (id->field_valid & 2) { /* drive implements ATA2? */ 109
110 if (id->capability & 8) { /* IORDY supported? */ 110 if (id[ATA_ID_FIELD_VALID] & 2) { /* ATA2? */
111 if (id->eide_pio_modes & 7) { 111 if (ata_id_has_iordy(id)) {
112 if (id[ATA_ID_PIO_MODES] & 7) {
112 overridden = 0; 113 overridden = 0;
113 if (id->eide_pio_modes & 4) 114 if (id[ATA_ID_PIO_MODES] & 4)
114 pio_mode = 5; 115 pio_mode = 5;
115 else if (id->eide_pio_modes & 2) 116 else if (id[ATA_ID_PIO_MODES] & 2)
116 pio_mode = 4; 117 pio_mode = 4;
117 else 118 else
118 pio_mode = 3; 119 pio_mode = 3;
@@ -338,16 +339,16 @@ static void ide_dump_sector(ide_drive_t *drive)
338static void ide_dump_ata_error(ide_drive_t *drive, u8 err) 339static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
339{ 340{
340 printk("{ "); 341 printk("{ ");
341 if (err & ABRT_ERR) printk("DriveStatusError "); 342 if (err & ATA_ABORTED) printk("DriveStatusError ");
342 if (err & ICRC_ERR) 343 if (err & ATA_ICRC)
343 printk((err & ABRT_ERR) ? "BadCRC " : "BadSector "); 344 printk((err & ATA_ABORTED) ? "BadCRC " : "BadSector ");
344 if (err & ECC_ERR) printk("UncorrectableError "); 345 if (err & ATA_UNC) printk("UncorrectableError ");
345 if (err & ID_ERR) printk("SectorIdNotFound "); 346 if (err & ATA_IDNF) printk("SectorIdNotFound ");
346 if (err & TRK0_ERR) printk("TrackZeroNotFound "); 347 if (err & ATA_TRK0NF) printk("TrackZeroNotFound ");
347 if (err & MARK_ERR) printk("AddrMarkNotFound "); 348 if (err & ATA_AMNF) printk("AddrMarkNotFound ");
348 printk("}"); 349 printk("}");
349 if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || 350 if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK ||
350 (err & (ECC_ERR|ID_ERR|MARK_ERR))) { 351 (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) {
351 ide_dump_sector(drive); 352 ide_dump_sector(drive);
352 if (HWGROUP(drive) && HWGROUP(drive)->rq) 353 if (HWGROUP(drive) && HWGROUP(drive)->rq)
353 printk(", sector=%llu", 354 printk(", sector=%llu",
@@ -359,12 +360,12 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
359static void ide_dump_atapi_error(ide_drive_t *drive, u8 err) 360static void ide_dump_atapi_error(ide_drive_t *drive, u8 err)
360{ 361{
361 printk("{ "); 362 printk("{ ");
362 if (err & ILI_ERR) printk("IllegalLengthIndication "); 363 if (err & ATAPI_ILI) printk("IllegalLengthIndication ");
363 if (err & EOM_ERR) printk("EndOfMedia "); 364 if (err & ATAPI_EOM) printk("EndOfMedia ");
364 if (err & ABRT_ERR) printk("AbortedCommand "); 365 if (err & ATA_ABORTED) printk("AbortedCommand ");
365 if (err & MCR_ERR) printk("MediaChangeRequested "); 366 if (err & ATA_MCR) printk("MediaChangeRequested ");
366 if (err & LFS_ERR) printk("LastFailedSense=0x%02x ", 367 if (err & ATAPI_LFS) printk("LastFailedSense=0x%02x ",
367 (err & LFS_ERR) >> 4); 368 (err & ATAPI_LFS) >> 4);
368 printk("}\n"); 369 printk("}\n");
369} 370}
370 371
@@ -386,19 +387,19 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat)
386 387
387 local_irq_save(flags); 388 local_irq_save(flags);
388 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); 389 printk("%s: %s: status=0x%02x { ", drive->name, msg, stat);
389 if (stat & BUSY_STAT) 390 if (stat & ATA_BUSY)
390 printk("Busy "); 391 printk("Busy ");
391 else { 392 else {
392 if (stat & READY_STAT) printk("DriveReady "); 393 if (stat & ATA_DRDY) printk("DriveReady ");
393 if (stat & WRERR_STAT) printk("DeviceFault "); 394 if (stat & ATA_DF) printk("DeviceFault ");
394 if (stat & SEEK_STAT) printk("SeekComplete "); 395 if (stat & ATA_DSC) printk("SeekComplete ");
395 if (stat & DRQ_STAT) printk("DataRequest "); 396 if (stat & ATA_DRQ) printk("DataRequest ");
396 if (stat & ECC_STAT) printk("CorrectedError "); 397 if (stat & ATA_CORR) printk("CorrectedError ");
397 if (stat & INDEX_STAT) printk("Index "); 398 if (stat & ATA_IDX) printk("Index ");
398 if (stat & ERR_STAT) printk("Error "); 399 if (stat & ATA_ERR) printk("Error ");
399 } 400 }
400 printk("}\n"); 401 printk("}\n");
401 if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { 402 if ((stat & (ATA_BUSY | ATA_ERR)) == ATA_ERR) {
402 err = ide_read_error(drive); 403 err = ide_read_error(drive);
403 printk("%s: %s: error=0x%02x ", drive->name, msg, err); 404 printk("%s: %s: error=0x%02x ", drive->name, msg, err);
404 if (drive->media == ide_disk) 405 if (drive->media == ide_disk)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 70aa86c8807e..06575a12b635 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -50,59 +50,54 @@
50 50
51static void generic_id(ide_drive_t *drive) 51static void generic_id(ide_drive_t *drive)
52{ 52{
53 drive->id->cyls = drive->cyl; 53 u16 *id = drive->id;
54 drive->id->heads = drive->head; 54
55 drive->id->sectors = drive->sect; 55 id[ATA_ID_CUR_CYLS] = id[ATA_ID_CYLS] = drive->cyl;
56 drive->id->cur_cyls = drive->cyl; 56 id[ATA_ID_CUR_HEADS] = id[ATA_ID_HEADS] = drive->head;
57 drive->id->cur_heads = drive->head; 57 id[ATA_ID_CUR_SECTORS] = id[ATA_ID_SECTORS] = drive->sect;
58 drive->id->cur_sectors = drive->sect;
59} 58}
60 59
61static void ide_disk_init_chs(ide_drive_t *drive) 60static void ide_disk_init_chs(ide_drive_t *drive)
62{ 61{
63 struct hd_driveid *id = drive->id; 62 u16 *id = drive->id;
64 63
65 /* Extract geometry if we did not already have one for the drive */ 64 /* Extract geometry if we did not already have one for the drive */
66 if (!drive->cyl || !drive->head || !drive->sect) { 65 if (!drive->cyl || !drive->head || !drive->sect) {
67 drive->cyl = drive->bios_cyl = id->cyls; 66 drive->cyl = drive->bios_cyl = id[ATA_ID_CYLS];
68 drive->head = drive->bios_head = id->heads; 67 drive->head = drive->bios_head = id[ATA_ID_HEADS];
69 drive->sect = drive->bios_sect = id->sectors; 68 drive->sect = drive->bios_sect = id[ATA_ID_SECTORS];
70 } 69 }
71 70
72 /* Handle logical geometry translation by the drive */ 71 /* Handle logical geometry translation by the drive */
73 if ((id->field_valid & 1) && id->cur_cyls && 72 if (ata_id_current_chs_valid(id)) {
74 id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) { 73 drive->cyl = id[ATA_ID_CUR_CYLS];
75 drive->cyl = id->cur_cyls; 74 drive->head = id[ATA_ID_CUR_HEADS];
76 drive->head = id->cur_heads; 75 drive->sect = id[ATA_ID_CUR_SECTORS];
77 drive->sect = id->cur_sectors;
78 } 76 }
79 77
80 /* Use physical geometry if what we have still makes no sense */ 78 /* Use physical geometry if what we have still makes no sense */
81 if (drive->head > 16 && id->heads && id->heads <= 16) { 79 if (drive->head > 16 && id[ATA_ID_HEADS] && id[ATA_ID_HEADS] <= 16) {
82 drive->cyl = id->cyls; 80 drive->cyl = id[ATA_ID_CYLS];
83 drive->head = id->heads; 81 drive->head = id[ATA_ID_HEADS];
84 drive->sect = id->sectors; 82 drive->sect = id[ATA_ID_SECTORS];
85 } 83 }
86} 84}
87 85
88static void ide_disk_init_mult_count(ide_drive_t *drive) 86static void ide_disk_init_mult_count(ide_drive_t *drive)
89{ 87{
90 struct hd_driveid *id = drive->id; 88 u16 *id = drive->id;
91 89 u8 max_multsect = id[ATA_ID_MAX_MULTSECT] & 0xff;
92 drive->mult_count = 0; 90
93 if (id->max_multsect) { 91 if (max_multsect) {
94#ifdef CONFIG_IDEDISK_MULTI_MODE 92 if ((max_multsect / 2) > 1)
95 id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0; 93 id[ATA_ID_MULTSECT] = max_multsect | 0x100;
96 id->multsect_valid = id->multsect ? 1 : 0; 94 else
97 drive->mult_req = id->multsect_valid ? id->max_multsect : 0; 95 id[ATA_ID_MULTSECT] &= ~0x1ff;
98 drive->special.b.set_multmode = drive->mult_req ? 1 : 0; 96
99#else /* original, pre IDE-NFG, per request of AC */ 97 drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
100 drive->mult_req = 0; 98
101 if (drive->mult_req > id->max_multsect) 99 if (drive->mult_req)
102 drive->mult_req = id->max_multsect;
103 if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
104 drive->special.b.set_multmode = 1; 100 drive->special.b.set_multmode = 1;
105#endif
106 } 101 }
107} 102}
108 103
@@ -119,10 +114,10 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
119static inline void do_identify (ide_drive_t *drive, u8 cmd) 114static inline void do_identify (ide_drive_t *drive, u8 cmd)
120{ 115{
121 ide_hwif_t *hwif = HWIF(drive); 116 ide_hwif_t *hwif = HWIF(drive);
122 int bswap = 1; 117 u16 *id = drive->id;
123 struct hd_driveid *id; 118 char *m = (char *)&id[ATA_ID_PROD];
119 int bswap = 1, is_cfa;
124 120
125 id = drive->id;
126 /* read 512 bytes of id info */ 121 /* read 512 bytes of id info */
127 hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); 122 hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
128 123
@@ -135,27 +130,28 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
135 ide_fix_driveid(id); 130 ide_fix_driveid(id);
136 131
137 /* 132 /*
138 * WIN_IDENTIFY returns little-endian info, 133 * ATA_CMD_ID_ATA returns little-endian info,
139 * WIN_PIDENTIFY *usually* returns little-endian info. 134 * ATA_CMD_ID_ATAPI *usually* returns little-endian info.
140 */ 135 */
141 if (cmd == WIN_PIDENTIFY) { 136 if (cmd == ATA_CMD_ID_ATAPI) {
142 if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ 137 if ((m[0] == 'N' && m[1] == 'E') || /* NEC */
143 || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ 138 (m[0] == 'F' && m[1] == 'X') || /* Mitsumi */
144 || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ 139 (m[0] == 'P' && m[1] == 'i')) /* Pioneer */
145 /* Vertos drives may still be weird */ 140 /* Vertos drives may still be weird */
146 bswap ^= 1; 141 bswap ^= 1;
147 } 142 }
148 ide_fixstring(id->model, sizeof(id->model), bswap); 143
149 ide_fixstring(id->fw_rev, sizeof(id->fw_rev), bswap); 144 ide_fixstring(m, ATA_ID_PROD_LEN, bswap);
150 ide_fixstring(id->serial_no, sizeof(id->serial_no), bswap); 145 ide_fixstring((char *)&id[ATA_ID_FW_REV], ATA_ID_FW_REV_LEN, bswap);
146 ide_fixstring((char *)&id[ATA_ID_SERNO], ATA_ID_SERNO_LEN, bswap);
151 147
152 /* we depend on this a lot! */ 148 /* we depend on this a lot! */
153 id->model[sizeof(id->model)-1] = '\0'; 149 m[ATA_ID_PROD_LEN - 1] = '\0';
154 150
155 if (strstr(id->model, "E X A B Y T E N E S T")) 151 if (strstr(m, "E X A B Y T E N E S T"))
156 goto err_misc; 152 goto err_misc;
157 153
158 printk(KERN_INFO "%s: %s, ", drive->name, id->model); 154 printk(KERN_INFO "%s: %s, ", drive->name, m);
159 155
160 drive->present = 1; 156 drive->present = 1;
161 drive->dead = 0; 157 drive->dead = 0;
@@ -163,16 +159,16 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
163 /* 159 /*
164 * Check for an ATAPI device 160 * Check for an ATAPI device
165 */ 161 */
166 if (cmd == WIN_PIDENTIFY) { 162 if (cmd == ATA_CMD_ID_ATAPI) {
167 u8 type = (id->config >> 8) & 0x1f; 163 u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
168 164
169 printk(KERN_CONT "ATAPI "); 165 printk(KERN_CONT "ATAPI ");
170 switch (type) { 166 switch (type) {
171 case ide_floppy: 167 case ide_floppy:
172 if (!strstr(id->model, "CD-ROM")) { 168 if (!strstr(m, "CD-ROM")) {
173 if (!strstr(id->model, "oppy") && 169 if (!strstr(m, "oppy") &&
174 !strstr(id->model, "poyp") && 170 !strstr(m, "poyp") &&
175 !strstr(id->model, "ZIP")) 171 !strstr(m, "ZIP"))
176 printk(KERN_CONT "cdrom or floppy?, assuming "); 172 printk(KERN_CONT "cdrom or floppy?, assuming ");
177 if (drive->media != ide_cdrom) { 173 if (drive->media != ide_cdrom) {
178 printk(KERN_CONT "FLOPPY"); 174 printk(KERN_CONT "FLOPPY");
@@ -186,8 +182,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
186 drive->removable = 1; 182 drive->removable = 1;
187#ifdef CONFIG_PPC 183#ifdef CONFIG_PPC
188 /* kludge for Apple PowerBook internal zip */ 184 /* kludge for Apple PowerBook internal zip */
189 if (!strstr(id->model, "CD-ROM") && 185 if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
190 strstr(id->model, "ZIP")) {
191 printk(KERN_CONT "FLOPPY"); 186 printk(KERN_CONT "FLOPPY");
192 type = ide_floppy; 187 type = ide_floppy;
193 break; 188 break;
@@ -217,18 +212,15 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
217 * Not an ATAPI device: looks like a "regular" hard disk 212 * Not an ATAPI device: looks like a "regular" hard disk
218 */ 213 */
219 214
220 /* 215 is_cfa = ata_id_is_cfa(id);
221 * 0x848a = CompactFlash device
222 * These are *not* removable in Linux definition of the term
223 */
224 216
225 if ((id->config != 0x848a) && (id->config & (1<<7))) 217 /* CF devices are *not* removable in Linux definition of the term */
218 if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
226 drive->removable = 1; 219 drive->removable = 1;
227 220
228 drive->media = ide_disk; 221 drive->media = ide_disk;
229 222
230 printk(KERN_CONT "%s DISK drive\n", 223 printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA");
231 (id->config == 0x848a) ? "CFA" : "ATA");
232 224
233 return; 225 return;
234 226
@@ -268,7 +260,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
268 if (io_ports->ctl_addr) { 260 if (io_ports->ctl_addr) {
269 a = tp_ops->read_altstatus(hwif); 261 a = tp_ops->read_altstatus(hwif);
270 s = tp_ops->read_status(hwif); 262 s = tp_ops->read_status(hwif);
271 if ((a ^ s) & ~INDEX_STAT) 263 if ((a ^ s) & ~ATA_IDX)
272 /* ancient Seagate drives, broken interfaces */ 264 /* ancient Seagate drives, broken interfaces */
273 printk(KERN_INFO "%s: probing with STATUS(0x%02x) " 265 printk(KERN_INFO "%s: probing with STATUS(0x%02x) "
274 "instead of ALTSTATUS(0x%02x)\n", 266 "instead of ALTSTATUS(0x%02x)\n",
@@ -281,7 +273,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
281 /* set features register for atapi 273 /* set features register for atapi
282 * identify command to be sure of reply 274 * identify command to be sure of reply
283 */ 275 */
284 if (cmd == WIN_PIDENTIFY) { 276 if (cmd == ATA_CMD_ID_ATAPI) {
285 ide_task_t task; 277 ide_task_t task;
286 278
287 memset(&task, 0, sizeof(task)); 279 memset(&task, 0, sizeof(task));
@@ -294,24 +286,16 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
294 /* ask drive for ID */ 286 /* ask drive for ID */
295 tp_ops->exec_command(hwif, cmd); 287 tp_ops->exec_command(hwif, cmd);
296 288
297 timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; 289 timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
298 timeout += jiffies;
299 do {
300 if (time_after(jiffies, timeout)) {
301 /* drive timed-out */
302 return 1;
303 }
304 /* give drive a breather */
305 msleep(50);
306 s = use_altstatus ? tp_ops->read_altstatus(hwif)
307 : tp_ops->read_status(hwif);
308 } while (s & BUSY_STAT);
309 290
310 /* wait for IRQ and DRQ_STAT */ 291 if (ide_busy_sleep(hwif, timeout, use_altstatus))
292 return 1;
293
294 /* wait for IRQ and ATA_DRQ */
311 msleep(50); 295 msleep(50);
312 s = tp_ops->read_status(hwif); 296 s = tp_ops->read_status(hwif);
313 297
314 if (OK_STAT(s, DRQ_STAT, BAD_R_STAT)) { 298 if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) {
315 unsigned long flags; 299 unsigned long flags;
316 300
317 /* local CPU only; some systems need this */ 301 /* local CPU only; some systems need this */
@@ -387,19 +371,21 @@ static int try_to_identify (ide_drive_t *drive, u8 cmd)
387 return retval; 371 return retval;
388} 372}
389 373
390static int ide_busy_sleep(ide_hwif_t *hwif) 374int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus)
391{ 375{
392 unsigned long timeout = jiffies + WAIT_WORSTCASE;
393 u8 stat; 376 u8 stat;
394 377
378 timeout += jiffies;
379
395 do { 380 do {
396 msleep(50); 381 msleep(50); /* give drive a breather */
397 stat = hwif->tp_ops->read_status(hwif); 382 stat = altstatus ? hwif->tp_ops->read_altstatus(hwif)
398 if ((stat & BUSY_STAT) == 0) 383 : hwif->tp_ops->read_status(hwif);
384 if ((stat & ATA_BUSY) == 0)
399 return 0; 385 return 0;
400 } while (time_before(jiffies, timeout)); 386 } while (time_before(jiffies, timeout));
401 387
402 return 1; 388 return 1; /* drive timed-out */
403} 389}
404 390
405static u8 ide_read_device(ide_drive_t *drive) 391static u8 ide_read_device(ide_drive_t *drive)
@@ -444,13 +430,13 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
444 430
445 if (drive->present) { 431 if (drive->present) {
446 /* avoid waiting for inappropriate probes */ 432 /* avoid waiting for inappropriate probes */
447 if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) 433 if (drive->media != ide_disk && cmd == ATA_CMD_ID_ATA)
448 return 4; 434 return 4;
449 } 435 }
450#ifdef DEBUG 436#ifdef DEBUG
451 printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n", 437 printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n",
452 drive->name, drive->present, drive->media, 438 drive->name, drive->present, drive->media,
453 (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); 439 (cmd == ATA_CMD_ID_ATA) ? "ATA" : "ATAPI");
454#endif 440#endif
455 441
456 /* needed for some systems 442 /* needed for some systems
@@ -464,7 +450,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
464 if (drive->select.b.unit != 0) { 450 if (drive->select.b.unit != 0) {
465 /* exit with drive0 selected */ 451 /* exit with drive0 selected */
466 SELECT_DRIVE(&hwif->drives[0]); 452 SELECT_DRIVE(&hwif->drives[0]);
467 /* allow BUSY_STAT to assert & clear */ 453 /* allow ATA_BUSY to assert & clear */
468 msleep(50); 454 msleep(50);
469 } 455 }
470 /* no i/f present: mmm.. this should be a 4 -ml */ 456 /* no i/f present: mmm.. this should be a 4 -ml */
@@ -473,8 +459,8 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
473 459
474 stat = tp_ops->read_status(hwif); 460 stat = tp_ops->read_status(hwif);
475 461
476 if (OK_STAT(stat, READY_STAT, BUSY_STAT) || 462 if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) ||
477 drive->present || cmd == WIN_PIDENTIFY) { 463 drive->present || cmd == ATA_CMD_ID_ATAPI) {
478 /* send cmd and wait */ 464 /* send cmd and wait */
479 if ((rc = try_to_identify(drive, cmd))) { 465 if ((rc = try_to_identify(drive, cmd))) {
480 /* failed: try again */ 466 /* failed: try again */
@@ -483,17 +469,17 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
483 469
484 stat = tp_ops->read_status(hwif); 470 stat = tp_ops->read_status(hwif);
485 471
486 if (stat == (BUSY_STAT | READY_STAT)) 472 if (stat == (ATA_BUSY | ATA_DRDY))
487 return 4; 473 return 4;
488 474
489 if (rc == 1 && cmd == WIN_PIDENTIFY) { 475 if (rc == 1 && cmd == ATA_CMD_ID_ATAPI) {
490 printk(KERN_ERR "%s: no response (status = 0x%02x), " 476 printk(KERN_ERR "%s: no response (status = 0x%02x), "
491 "resetting drive\n", drive->name, stat); 477 "resetting drive\n", drive->name, stat);
492 msleep(50); 478 msleep(50);
493 SELECT_DRIVE(drive); 479 SELECT_DRIVE(drive);
494 msleep(50); 480 msleep(50);
495 tp_ops->exec_command(hwif, WIN_SRST); 481 tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
496 (void)ide_busy_sleep(hwif); 482 (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0);
497 rc = try_to_identify(drive, cmd); 483 rc = try_to_identify(drive, cmd);
498 } 484 }
499 485
@@ -526,13 +512,14 @@ static void enable_nest (ide_drive_t *drive)
526 const struct ide_tp_ops *tp_ops = hwif->tp_ops; 512 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
527 u8 stat; 513 u8 stat;
528 514
529 printk(KERN_INFO "%s: enabling %s -- ", hwif->name, drive->id->model); 515 printk(KERN_INFO "%s: enabling %s -- ",
516 hwif->name, (char *)&drive->id[ATA_ID_PROD]);
530 517
531 SELECT_DRIVE(drive); 518 SELECT_DRIVE(drive);
532 msleep(50); 519 msleep(50);
533 tp_ops->exec_command(hwif, EXABYTE_ENABLE_NEST); 520 tp_ops->exec_command(hwif, ATA_EXABYTE_ENABLE_NEST);
534 521
535 if (ide_busy_sleep(hwif)) { 522 if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 0)) {
536 printk(KERN_CONT "failed (timeout)\n"); 523 printk(KERN_CONT "failed (timeout)\n");
537 return; 524 return;
538 } 525 }
@@ -545,12 +532,6 @@ static void enable_nest (ide_drive_t *drive)
545 printk(KERN_CONT "failed (status = 0x%02x)\n", stat); 532 printk(KERN_CONT "failed (status = 0x%02x)\n", stat);
546 else 533 else
547 printk(KERN_CONT "success\n"); 534 printk(KERN_CONT "success\n");
548
549 /* if !(success||timed-out) */
550 if (do_probe(drive, WIN_IDENTIFY) >= 2) {
551 /* look for ATAPI device */
552 (void) do_probe(drive, WIN_PIDENTIFY);
553 }
554} 535}
555 536
556/** 537/**
@@ -567,6 +548,8 @@ static void enable_nest (ide_drive_t *drive)
567 548
568static inline u8 probe_for_drive (ide_drive_t *drive) 549static inline u8 probe_for_drive (ide_drive_t *drive)
569{ 550{
551 char *m;
552
570 /* 553 /*
571 * In order to keep things simple we have an id 554 * In order to keep things simple we have an id
572 * block for all drives at all times. If the device 555 * block for all drives at all times. If the device
@@ -576,29 +559,34 @@ static inline u8 probe_for_drive (ide_drive_t *drive)
576 * Also note that 0 everywhere means "can't do X" 559 * Also note that 0 everywhere means "can't do X"
577 */ 560 */
578 561
579 drive->id = kzalloc(SECTOR_WORDS *4, GFP_KERNEL); 562 drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL);
580 drive->id_read = 0; 563 drive->id_read = 0;
581 if(drive->id == NULL) 564 if(drive->id == NULL)
582 { 565 {
583 printk(KERN_ERR "ide: out of memory for id data.\n"); 566 printk(KERN_ERR "ide: out of memory for id data.\n");
584 return 0; 567 return 0;
585 } 568 }
586 strcpy(drive->id->model, "UNKNOWN"); 569
587 570 m = (char *)&drive->id[ATA_ID_PROD];
571 strcpy(m, "UNKNOWN");
572
588 /* skip probing? */ 573 /* skip probing? */
589 if (!drive->noprobe) 574 if (!drive->noprobe) {
590 { 575retry:
591 /* if !(success||timed-out) */ 576 /* if !(success||timed-out) */
592 if (do_probe(drive, WIN_IDENTIFY) >= 2) { 577 if (do_probe(drive, ATA_CMD_ID_ATA) >= 2)
593 /* look for ATAPI device */ 578 /* look for ATAPI device */
594 (void) do_probe(drive, WIN_PIDENTIFY); 579 (void)do_probe(drive, ATA_CMD_ID_ATAPI);
595 } 580
596 if (!drive->present) 581 if (!drive->present)
597 /* drive not found */ 582 /* drive not found */
598 return 0; 583 return 0;
599 if (strstr(drive->id->model, "E X A B Y T E N E S T")) 584
585 if (strstr(m, "E X A B Y T E N E S T")) {
600 enable_nest(drive); 586 enable_nest(drive);
601 587 goto retry;
588 }
589
602 /* identification failed? */ 590 /* identification failed? */
603 if (!drive->id_read) { 591 if (!drive->id_read) {
604 if (drive->media == ide_disk) { 592 if (drive->media == ide_disk) {
@@ -740,36 +728,38 @@ out:
740 728
741/** 729/**
742 * ide_undecoded_slave - look for bad CF adapters 730 * ide_undecoded_slave - look for bad CF adapters
743 * @drive1: drive 731 * @dev1: slave device
744 * 732 *
745 * Analyse the drives on the interface and attempt to decide if we 733 * Analyse the drives on the interface and attempt to decide if we
746 * have the same drive viewed twice. This occurs with crap CF adapters 734 * have the same drive viewed twice. This occurs with crap CF adapters
747 * and PCMCIA sometimes. 735 * and PCMCIA sometimes.
748 */ 736 */
749 737
750void ide_undecoded_slave(ide_drive_t *drive1) 738void ide_undecoded_slave(ide_drive_t *dev1)
751{ 739{
752 ide_drive_t *drive0 = &drive1->hwif->drives[0]; 740 ide_drive_t *dev0 = &dev1->hwif->drives[0];
753 741
754 if ((drive1->dn & 1) == 0 || drive0->present == 0) 742 if ((dev1->dn & 1) == 0 || dev0->present == 0)
755 return; 743 return;
756 744
757 /* If the models don't match they are not the same product */ 745 /* If the models don't match they are not the same product */
758 if (strcmp(drive0->id->model, drive1->id->model)) 746 if (strcmp((char *)&dev0->id[ATA_ID_PROD],
747 (char *)&dev1->id[ATA_ID_PROD]))
759 return; 748 return;
760 749
761 /* Serial numbers do not match */ 750 /* Serial numbers do not match */
762 if (strncmp(drive0->id->serial_no, drive1->id->serial_no, 20)) 751 if (strncmp((char *)&dev0->id[ATA_ID_SERNO],
752 (char *)&dev1->id[ATA_ID_SERNO], ATA_ID_SERNO_LEN))
763 return; 753 return;
764 754
765 /* No serial number, thankfully very rare for CF */ 755 /* No serial number, thankfully very rare for CF */
766 if (drive0->id->serial_no[0] == 0) 756 if (*(char *)&dev0->id[ATA_ID_SERNO] == 0)
767 return; 757 return;
768 758
769 /* Appears to be an IDE flash adapter with decode bugs */ 759 /* Appears to be an IDE flash adapter with decode bugs */
770 printk(KERN_WARNING "ide-probe: ignoring undecoded slave\n"); 760 printk(KERN_WARNING "ide-probe: ignoring undecoded slave\n");
771 761
772 drive1->present = 0; 762 dev1->present = 0;
773} 763}
774 764
775EXPORT_SYMBOL_GPL(ide_undecoded_slave); 765EXPORT_SYMBOL_GPL(ide_undecoded_slave);
@@ -853,7 +843,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
853 if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) 843 if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT)
854 drive->no_io_32bit = 1; 844 drive->no_io_32bit = 1;
855 else 845 else
856 drive->no_io_32bit = drive->id->dword_io ? 1 : 0; 846 drive->no_io_32bit = drive->id[ATA_ID_DWORD_IO] ? 1 : 0;
857 } 847 }
858} 848}
859 849
@@ -1037,11 +1027,6 @@ static int init_irq (ide_hwif_t *hwif)
1037 ide_hwgroup_t *hwgroup; 1027 ide_hwgroup_t *hwgroup;
1038 ide_hwif_t *match = NULL; 1028 ide_hwif_t *match = NULL;
1039 1029
1040
1041 BUG_ON(in_interrupt());
1042 BUG_ON(irqs_disabled());
1043 BUG_ON(hwif == NULL);
1044
1045 mutex_lock(&ide_cfg_mtx); 1030 mutex_lock(&ide_cfg_mtx);
1046 hwif->hwgroup = NULL; 1031 hwif->hwgroup = NULL;
1047#if MAX_HWIFS > 1 1032#if MAX_HWIFS > 1
@@ -1116,7 +1101,8 @@ static int init_irq (ide_hwif_t *hwif)
1116 sa = IRQF_SHARED; 1101 sa = IRQF_SHARED;
1117#endif /* __mc68000__ */ 1102#endif /* __mc68000__ */
1118 1103
1119 if (IDE_CHIPSET_IS_PCI(hwif->chipset)) 1104 if (hwif->chipset == ide_pci || hwif->chipset == ide_cmd646 ||
1105 hwif->chipset == ide_ali14xx)
1120 sa = IRQF_SHARED; 1106 sa = IRQF_SHARED;
1121 1107
1122 if (io_ports->ctl_addr) 1108 if (io_ports->ctl_addr)
@@ -1344,8 +1330,6 @@ static void hwif_register_devices(ide_hwif_t *hwif)
1344 if (!drive->present) 1330 if (!drive->present)
1345 continue; 1331 continue;
1346 1332
1347 ide_add_generic_settings(drive);
1348
1349 snprintf(dev->bus_id, BUS_ID_SIZE, "%u.%u", hwif->index, i); 1333 snprintf(dev->bus_id, BUS_ID_SIZE, "%u.%u", hwif->index, i);
1350 dev->parent = &hwif->gendev; 1334 dev->parent = &hwif->gendev;
1351 dev->bus = &ide_bus_type; 1335 dev->bus = &ide_bus_type;
@@ -1602,8 +1586,10 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d,
1602 if (hws[0]) 1586 if (hws[0])
1603 host->dev[0] = hws[0]->dev; 1587 host->dev[0] = hws[0]->dev;
1604 1588
1605 if (d) 1589 if (d) {
1590 host->init_chipset = d->init_chipset;
1606 host->host_flags = d->host_flags; 1591 host->host_flags = d->host_flags;
1592 }
1607 1593
1608 return host; 1594 return host;
1609} 1595}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index f66c9c3f6fc6..e7030a491463 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -12,14 +12,6 @@
12 * "settings" files. e.g. "cat /proc/ide0/hda/settings" 12 * "settings" files. e.g. "cat /proc/ide0/hda/settings"
13 * To write a new value "val" into a specific setting "name", use: 13 * To write a new value "val" into a specific setting "name", use:
14 * echo "name:val" >/proc/ide/ide0/hda/settings 14 * echo "name:val" >/proc/ide/ide0/hda/settings
15 *
16 * Also useful, "cat /proc/ide0/hda/[identify, smart_values,
17 * smart_thresholds, capabilities]" will issue an IDENTIFY /
18 * PACKET_IDENTIFY / SMART_READ_VALUES / SMART_READ_THRESHOLDS /
19 * SENSE CAPABILITIES command to /dev/hda, and then dump out the
20 * returned data as 256 16-bit words. The "hdparm" utility will
21 * be updated someday soon to use this mechanism.
22 *
23 */ 15 */
24 16
25#include <linux/module.h> 17#include <linux/module.h>
@@ -31,7 +23,6 @@
31#include <linux/mm.h> 23#include <linux/mm.h>
32#include <linux/pci.h> 24#include <linux/pci.h>
33#include <linux/ctype.h> 25#include <linux/ctype.h>
34#include <linux/hdreg.h>
35#include <linux/ide.h> 26#include <linux/ide.h>
36#include <linux/seq_file.h> 27#include <linux/seq_file.h>
37 28
@@ -109,13 +100,14 @@ static int proc_ide_read_identify
109 100
110 err = taskfile_lib_get_identify(drive, page); 101 err = taskfile_lib_get_identify(drive, page);
111 if (!err) { 102 if (!err) {
112 char *out = ((char *)page) + (SECTOR_WORDS * 4); 103 char *out = (char *)page + SECTOR_SIZE;
104
113 page = out; 105 page = out;
114 do { 106 do {
115 out += sprintf(out, "%04x%c", 107 out += sprintf(out, "%04x%c",
116 le16_to_cpup(val), (++i & 7) ? ' ' : '\n'); 108 le16_to_cpup(val), (++i & 7) ? ' ' : '\n');
117 val += 1; 109 val += 1;
118 } while (i < (SECTOR_WORDS * 2)); 110 } while (i < SECTOR_SIZE / 2);
119 len = out - page; 111 len = out - page;
120 } 112 }
121 } 113 }
@@ -123,140 +115,25 @@ static int proc_ide_read_identify
123} 115}
124 116
125/** 117/**
126 * __ide_add_setting - add an ide setting option 118 * ide_find_setting - find a specific setting
127 * @drive: drive to use 119 * @st: setting table pointer
128 * @name: setting name
129 * @rw: true if the function is read write
130 * @data_type: type of data
131 * @min: range minimum
132 * @max: range maximum
133 * @mul_factor: multiplication scale
134 * @div_factor: divison scale
135 * @data: private data field
136 * @set: setting
137 * @auto_remove: setting auto removal flag
138 *
139 * Removes the setting named from the device if it is present.
140 * The function takes the settings_lock to protect against
141 * parallel changes. This function must not be called from IRQ
142 * context. Returns 0 on success or -1 on failure.
143 *
144 * BUGS: This code is seriously over-engineered. There is also
145 * magic about how the driver specific features are setup. If
146 * a driver is attached we assume the driver settings are auto
147 * remove.
148 */
149
150static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set, int auto_remove)
151{
152 ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL;
153
154 mutex_lock(&ide_setting_mtx);
155 while ((*p) && strcmp((*p)->name, name) < 0)
156 p = &((*p)->next);
157 if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL)
158 goto abort;
159 if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL)
160 goto abort;
161 strcpy(setting->name, name);
162 setting->rw = rw;
163 setting->data_type = data_type;
164 setting->min = min;
165 setting->max = max;
166 setting->mul_factor = mul_factor;
167 setting->div_factor = div_factor;
168 setting->data = data;
169 setting->set = set;
170
171 setting->next = *p;
172 if (auto_remove)
173 setting->auto_remove = 1;
174 *p = setting;
175 mutex_unlock(&ide_setting_mtx);
176 return 0;
177abort:
178 mutex_unlock(&ide_setting_mtx);
179 kfree(setting);
180 return -1;
181}
182
183int ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set)
184{
185 return __ide_add_setting(drive, name, rw, data_type, min, max, mul_factor, div_factor, data, set, 1);
186}
187
188EXPORT_SYMBOL(ide_add_setting);
189
190/**
191 * __ide_remove_setting - remove an ide setting option
192 * @drive: drive to use
193 * @name: setting name
194 *
195 * Removes the setting named from the device if it is present.
196 * The caller must hold the setting semaphore.
197 */
198
199static void __ide_remove_setting(ide_drive_t *drive, char *name)
200{
201 ide_settings_t **p, *setting;
202
203 p = (ide_settings_t **) &drive->settings;
204
205 while ((*p) && strcmp((*p)->name, name))
206 p = &((*p)->next);
207 setting = (*p);
208 if (setting == NULL)
209 return;
210
211 (*p) = setting->next;
212
213 kfree(setting->name);
214 kfree(setting);
215}
216
217/**
218 * auto_remove_settings - remove driver specific settings
219 * @drive: drive
220 *
221 * Automatically remove all the driver specific settings for this
222 * drive. This function may not be called from IRQ context. The
223 * caller must hold ide_setting_mtx.
224 */
225
226static void auto_remove_settings(ide_drive_t *drive)
227{
228 ide_settings_t *setting;
229repeat:
230 setting = drive->settings;
231 while (setting) {
232 if (setting->auto_remove) {
233 __ide_remove_setting(drive, setting->name);
234 goto repeat;
235 }
236 setting = setting->next;
237 }
238}
239
240/**
241 * ide_find_setting_by_name - find a drive specific setting
242 * @drive: drive to scan
243 * @name: setting name 120 * @name: setting name
244 * 121 *
245 * Scan's the device setting table for a matching entry and returns 122 * Scan's the setting table for a matching entry and returns
246 * this or NULL if no entry is found. The caller must hold the 123 * this or NULL if no entry is found. The caller must hold the
247 * setting semaphore 124 * setting semaphore
248 */ 125 */
249 126
250static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name) 127static
128const struct ide_proc_devset *ide_find_setting(const struct ide_proc_devset *st,
129 char *name)
251{ 130{
252 ide_settings_t *setting = drive->settings; 131 while (st->name) {
253 132 if (strcmp(st->name, name) == 0)
254 while (setting) {
255 if (strcmp(setting->name, name) == 0)
256 break; 133 break;
257 setting = setting->next; 134 st++;
258 } 135 }
259 return setting; 136 return st->name ? st : NULL;
260} 137}
261 138
262/** 139/**
@@ -272,26 +149,20 @@ static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name)
272 * be told apart 149 * be told apart
273 */ 150 */
274 151
275static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting) 152static int ide_read_setting(ide_drive_t *drive,
153 const struct ide_proc_devset *setting)
276{ 154{
277 int val = -EINVAL; 155 const struct ide_devset *ds = setting->setting;
278 unsigned long flags; 156 int val = -EINVAL;
157
158 if (ds->get) {
159 unsigned long flags;
279 160
280 if ((setting->rw & SETTING_READ)) {
281 spin_lock_irqsave(&ide_lock, flags); 161 spin_lock_irqsave(&ide_lock, flags);
282 switch (setting->data_type) { 162 val = ds->get(drive);
283 case TYPE_BYTE:
284 val = *((u8 *) setting->data);
285 break;
286 case TYPE_SHORT:
287 val = *((u16 *) setting->data);
288 break;
289 case TYPE_INT:
290 val = *((u32 *) setting->data);
291 break;
292 }
293 spin_unlock_irqrestore(&ide_lock, flags); 163 spin_unlock_irqrestore(&ide_lock, flags);
294 } 164 }
165
295 return val; 166 return val;
296} 167}
297 168
@@ -313,33 +184,23 @@ static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting)
313 * The current scheme of polling is kludgy, though safe enough. 184 * The current scheme of polling is kludgy, though safe enough.
314 */ 185 */
315 186
316static int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val) 187static int ide_write_setting(ide_drive_t *drive,
188 const struct ide_proc_devset *setting, int val)
317{ 189{
190 const struct ide_devset *ds = setting->setting;
191
318 if (!capable(CAP_SYS_ADMIN)) 192 if (!capable(CAP_SYS_ADMIN))
319 return -EACCES; 193 return -EACCES;
320 if (setting->set) 194 if (!ds->set)
321 return setting->set(drive, val);
322 if (!(setting->rw & SETTING_WRITE))
323 return -EPERM; 195 return -EPERM;
324 if (val < setting->min || val > setting->max) 196 if ((ds->flags & DS_SYNC)
197 && (val < setting->min || val > setting->max))
325 return -EINVAL; 198 return -EINVAL;
326 if (ide_spin_wait_hwgroup(drive)) 199 return ide_devset_execute(drive, ds, val);
327 return -EBUSY;
328 switch (setting->data_type) {
329 case TYPE_BYTE:
330 *((u8 *) setting->data) = val;
331 break;
332 case TYPE_SHORT:
333 *((u16 *) setting->data) = val;
334 break;
335 case TYPE_INT:
336 *((u32 *) setting->data) = val;
337 break;
338 }
339 spin_unlock_irq(&ide_lock);
340 return 0;
341} 200}
342 201
202ide_devset_get(xfer_rate, current_speed);
203
343static int set_xfer_rate (ide_drive_t *drive, int arg) 204static int set_xfer_rate (ide_drive_t *drive, int arg)
344{ 205{
345 ide_task_t task; 206 ide_task_t task;
@@ -349,7 +210,7 @@ static int set_xfer_rate (ide_drive_t *drive, int arg)
349 return -EINVAL; 210 return -EINVAL;
350 211
351 memset(&task, 0, sizeof(task)); 212 memset(&task, 0, sizeof(task));
352 task.tf.command = WIN_SETFEATURES; 213 task.tf.command = ATA_CMD_SET_FEATURES;
353 task.tf.feature = SETFEATURES_XFER; 214 task.tf.feature = SETFEATURES_XFER;
354 task.tf.nsect = (u8)arg; 215 task.tf.nsect = (u8)arg;
355 task.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT | 216 task.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT |
@@ -364,29 +225,23 @@ static int set_xfer_rate (ide_drive_t *drive, int arg)
364 return err; 225 return err;
365} 226}
366 227
367/** 228ide_devset_rw(current_speed, xfer_rate);
368 * ide_add_generic_settings - generic ide settings 229ide_devset_rw_field(init_speed, init_speed);
369 * @drive: drive being configured 230ide_devset_rw_field(nice1, nice1);
370 * 231ide_devset_rw_field(number, dn);
371 * Add the generic parts of the system settings to the /proc files. 232
372 * The caller must not be holding the ide_setting_mtx. 233static const struct ide_proc_devset ide_generic_settings[] = {
373 */ 234 IDE_PROC_DEVSET(current_speed, 0, 70),
374 235 IDE_PROC_DEVSET(init_speed, 0, 70),
375void ide_add_generic_settings (ide_drive_t *drive) 236 IDE_PROC_DEVSET(io_32bit, 0, 1 + (SUPPORT_VLB_SYNC << 1)),
376{ 237 IDE_PROC_DEVSET(keepsettings, 0, 1),
377/* 238 IDE_PROC_DEVSET(nice1, 0, 1),
378 * drive setting name read/write access data type min max mul_factor div_factor data pointer set function 239 IDE_PROC_DEVSET(number, 0, 3),
379 */ 240 IDE_PROC_DEVSET(pio_mode, 0, 255),
380 __ide_add_setting(drive, "io_32bit", drive->no_io_32bit ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1 + (SUPPORT_VLB_SYNC << 1), 1, 1, &drive->io_32bit, set_io_32bit, 0); 241 IDE_PROC_DEVSET(unmaskirq, 0, 1),
381 __ide_add_setting(drive, "keepsettings", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->keep_settings, NULL, 0); 242 IDE_PROC_DEVSET(using_dma, 0, 1),
382 __ide_add_setting(drive, "nice1", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->nice1, NULL, 0); 243 { 0 },
383 __ide_add_setting(drive, "pio_mode", SETTING_WRITE, TYPE_BYTE, 0, 255, 1, 1, NULL, set_pio_mode, 0); 244};
384 __ide_add_setting(drive, "unmaskirq", drive->no_unmask ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->unmask, NULL, 0);
385 __ide_add_setting(drive, "using_dma", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->using_dma, set_using_dma, 0);
386 __ide_add_setting(drive, "init_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->init_speed, NULL, 0);
387 __ide_add_setting(drive, "current_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->current_speed, set_xfer_rate, 0);
388 __ide_add_setting(drive, "number", SETTING_RW, TYPE_BYTE, 0, 3, 1, 1, &drive->dn, NULL, 0);
389}
390 245
391static void proc_ide_settings_warn(void) 246static void proc_ide_settings_warn(void)
392{ 247{
@@ -403,19 +258,32 @@ static void proc_ide_settings_warn(void)
403static int proc_ide_read_settings 258static int proc_ide_read_settings
404 (char *page, char **start, off_t off, int count, int *eof, void *data) 259 (char *page, char **start, off_t off, int count, int *eof, void *data)
405{ 260{
261 const struct ide_proc_devset *setting, *g, *d;
262 const struct ide_devset *ds;
406 ide_drive_t *drive = (ide_drive_t *) data; 263 ide_drive_t *drive = (ide_drive_t *) data;
407 ide_settings_t *setting = (ide_settings_t *) drive->settings;
408 char *out = page; 264 char *out = page;
409 int len, rc, mul_factor, div_factor; 265 int len, rc, mul_factor, div_factor;
410 266
411 proc_ide_settings_warn(); 267 proc_ide_settings_warn();
412 268
413 mutex_lock(&ide_setting_mtx); 269 mutex_lock(&ide_setting_mtx);
270 g = ide_generic_settings;
271 d = drive->settings;
414 out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); 272 out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
415 out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); 273 out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
416 while (setting) { 274 while (g->name || (d && d->name)) {
417 mul_factor = setting->mul_factor; 275 /* read settings in the alphabetical order */
418 div_factor = setting->div_factor; 276 if (g->name && d && d->name) {
277 if (strcmp(d->name, g->name) < 0)
278 setting = d++;
279 else
280 setting = g++;
281 } else if (d && d->name) {
282 setting = d++;
283 } else
284 setting = g++;
285 mul_factor = setting->mulf ? setting->mulf(drive) : 1;
286 div_factor = setting->divf ? setting->divf(drive) : 1;
419 out += sprintf(out, "%-24s", setting->name); 287 out += sprintf(out, "%-24s", setting->name);
420 rc = ide_read_setting(drive, setting); 288 rc = ide_read_setting(drive, setting);
421 if (rc >= 0) 289 if (rc >= 0)
@@ -423,12 +291,12 @@ static int proc_ide_read_settings
423 else 291 else
424 out += sprintf(out, "%-16s", "write-only"); 292 out += sprintf(out, "%-16s", "write-only");
425 out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); 293 out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
426 if (setting->rw & SETTING_READ) 294 ds = setting->setting;
295 if (ds->get)
427 out += sprintf(out, "r"); 296 out += sprintf(out, "r");
428 if (setting->rw & SETTING_WRITE) 297 if (ds->set)
429 out += sprintf(out, "w"); 298 out += sprintf(out, "w");
430 out += sprintf(out, "\n"); 299 out += sprintf(out, "\n");
431 setting = setting->next;
432 } 300 }
433 len = out - page; 301 len = out - page;
434 mutex_unlock(&ide_setting_mtx); 302 mutex_unlock(&ide_setting_mtx);
@@ -442,9 +310,10 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer,
442{ 310{
443 ide_drive_t *drive = (ide_drive_t *) data; 311 ide_drive_t *drive = (ide_drive_t *) data;
444 char name[MAX_LEN + 1]; 312 char name[MAX_LEN + 1];
445 int for_real = 0; 313 int for_real = 0, mul_factor, div_factor;
446 unsigned long n; 314 unsigned long n;
447 ide_settings_t *setting; 315
316 const struct ide_proc_devset *setting;
448 char *buf, *s; 317 char *buf, *s;
449 318
450 if (!capable(CAP_SYS_ADMIN)) 319 if (!capable(CAP_SYS_ADMIN))
@@ -512,13 +381,21 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer,
512 } 381 }
513 382
514 mutex_lock(&ide_setting_mtx); 383 mutex_lock(&ide_setting_mtx);
515 setting = ide_find_setting_by_name(drive, name); 384 /* generic settings first, then driver specific ones */
385 setting = ide_find_setting(ide_generic_settings, name);
516 if (!setting) { 386 if (!setting) {
517 mutex_unlock(&ide_setting_mtx); 387 if (drive->settings)
518 goto parse_error; 388 setting = ide_find_setting(drive->settings, name);
389 if (!setting) {
390 mutex_unlock(&ide_setting_mtx);
391 goto parse_error;
392 }
393 }
394 if (for_real) {
395 mul_factor = setting->mulf ? setting->mulf(drive) : 1;
396 div_factor = setting->divf ? setting->divf(drive) : 1;
397 ide_write_setting(drive, setting, val * div_factor / mul_factor);
519 } 398 }
520 if (for_real)
521 ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor);
522 mutex_unlock(&ide_setting_mtx); 399 mutex_unlock(&ide_setting_mtx);
523 } 400 }
524 } while (!for_real++); 401 } while (!for_real++);
@@ -561,11 +438,10 @@ static int proc_ide_read_dmodel
561 (char *page, char **start, off_t off, int count, int *eof, void *data) 438 (char *page, char **start, off_t off, int count, int *eof, void *data)
562{ 439{
563 ide_drive_t *drive = (ide_drive_t *) data; 440 ide_drive_t *drive = (ide_drive_t *) data;
564 struct hd_driveid *id = drive->id; 441 char *m = (char *)&drive->id[ATA_ID_PROD];
565 int len; 442 int len;
566 443
567 len = sprintf(page, "%.40s\n", 444 len = sprintf(page, "%.40s\n", m[0] ? m : "(none)");
568 (id && id->model[0]) ? (char *)id->model : "(none)");
569 PROC_IDE_READ_RETURN(page, start, off, count, eof, len); 445 PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
570} 446}
571 447
@@ -690,6 +566,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
690 566
691void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) 567void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
692{ 568{
569 mutex_lock(&ide_setting_mtx);
570 drive->settings = driver->settings;
571 mutex_unlock(&ide_setting_mtx);
572
693 ide_add_proc_entries(drive->proc, driver->proc, drive); 573 ide_add_proc_entries(drive->proc, driver->proc, drive);
694} 574}
695 575
@@ -726,7 +606,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
726 * OTOH both ide_{read,write}_setting are only ever used under 606 * OTOH both ide_{read,write}_setting are only ever used under
727 * ide_setting_mtx. 607 * ide_setting_mtx.
728 */ 608 */
729 auto_remove_settings(drive); 609 drive->settings = NULL;
730 spin_unlock_irqrestore(&ide_lock, flags); 610 spin_unlock_irqrestore(&ide_lock, flags);
731 mutex_unlock(&ide_setting_mtx); 611 mutex_unlock(&ide_setting_mtx);
732} 612}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 3833189144ed..f8c84df4a0bc 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -15,6 +15,8 @@
15 * Documentation/ide/ChangeLog.ide-tape.1995-2002 15 * Documentation/ide/ChangeLog.ide-tape.1995-2002
16 */ 16 */
17 17
18#define DRV_NAME "ide-tape"
19
18#define IDETAPE_VERSION "1.20" 20#define IDETAPE_VERSION "1.20"
19 21
20#include <linux/module.h> 22#include <linux/module.h>
@@ -54,8 +56,6 @@ enum {
54 DBG_CHRDEV = (1 << 2), 56 DBG_CHRDEV = (1 << 2),
55 /* all remaining procedures */ 57 /* all remaining procedures */
56 DBG_PROCS = (1 << 3), 58 DBG_PROCS = (1 << 3),
57 /* buffer alloc info (pc_stack & rq_stack) */
58 DBG_PCRQ_STACK = (1 << 4),
59}; 59};
60 60
61/* define to see debug info */ 61/* define to see debug info */
@@ -81,26 +81,6 @@ enum {
81#define IDETAPE_MAX_PC_RETRIES 3 81#define IDETAPE_MAX_PC_RETRIES 3
82 82
83/* 83/*
84 * With each packet command, we allocate a buffer of IDETAPE_PC_BUFFER_SIZE
85 * bytes. This is used for several packet commands (Not for READ/WRITE commands)
86 */
87#define IDETAPE_PC_BUFFER_SIZE 256
88
89/*
90 * In various places in the driver, we need to allocate storage
91 * for packet commands and requests, which will remain valid while
92 * we leave the driver to wait for an interrupt or a timeout event.
93 */
94#define IDETAPE_PC_STACK (10 + IDETAPE_MAX_PC_RETRIES)
95
96/*
97 * Some drives (for example, Seagate STT3401A Travan) require a very long
98 * timeout, because they don't return an interrupt or clear their busy bit
99 * until after the command completes (even retension commands).
100 */
101#define IDETAPE_WAIT_CMD (900*HZ)
102
103/*
104 * The following parameter is used to select the point in the internal tape fifo 84 * The following parameter is used to select the point in the internal tape fifo
105 * in which we will start to refill the buffer. Decreasing the following 85 * in which we will start to refill the buffer. Decreasing the following
106 * parameter will improve the system's latency and interactive response, while 86 * parameter will improve the system's latency and interactive response, while
@@ -172,20 +152,6 @@ struct idetape_bh {
172#define IDETAPE_LU_RETENSION_MASK 2 152#define IDETAPE_LU_RETENSION_MASK 2
173#define IDETAPE_LU_EOT_MASK 4 153#define IDETAPE_LU_EOT_MASK 4
174 154
175/*
176 * Special requests for our block device strategy routine.
177 *
178 * In order to service a character device command, we add special requests to
179 * the tail of our block device request queue and wait for their completion.
180 */
181
182enum {
183 REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */
184 REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */
185 REQ_IDETAPE_READ = (1 << 2),
186 REQ_IDETAPE_WRITE = (1 << 3),
187};
188
189/* Error codes returned in rq->errors to the higher part of the driver. */ 155/* Error codes returned in rq->errors to the higher part of the driver. */
190#define IDETAPE_ERROR_GENERAL 101 156#define IDETAPE_ERROR_GENERAL 101
191#define IDETAPE_ERROR_FILEMARK 102 157#define IDETAPE_ERROR_FILEMARK 102
@@ -206,13 +172,6 @@ typedef struct ide_tape_obj {
206 struct kref kref; 172 struct kref kref;
207 173
208 /* 174 /*
209 * Since a typical character device operation requires more
210 * than one packet command, we provide here enough memory
211 * for the maximum of interconnected packet commands.
212 * The packet commands are stored in the circular array pc_stack.
213 * pc_stack_index points to the last used entry, and warps around
214 * to the start when we get to the last array entry.
215 *
216 * pc points to the current processed packet command. 175 * pc points to the current processed packet command.
217 * 176 *
218 * failed_pc points to the last failed packet command, or contains 177 * failed_pc points to the last failed packet command, or contains
@@ -224,13 +183,11 @@ typedef struct ide_tape_obj {
224 struct ide_atapi_pc *pc; 183 struct ide_atapi_pc *pc;
225 /* Last failed packet command */ 184 /* Last failed packet command */
226 struct ide_atapi_pc *failed_pc; 185 struct ide_atapi_pc *failed_pc;
227 /* Packet command stack */ 186 /* used by REQ_IDETAPE_{READ,WRITE} requests */
228 struct ide_atapi_pc pc_stack[IDETAPE_PC_STACK]; 187 struct ide_atapi_pc queued_pc;
229 /* Next free packet command storage space */ 188
230 int pc_stack_index; 189 struct ide_atapi_pc request_sense_pc;
231 struct request rq_stack[IDETAPE_PC_STACK]; 190 struct request request_sense_rq;
232 /* We implement a circular array */
233 int rq_stack_index;
234 191
235 /* 192 /*
236 * DSC polling variables. 193 * DSC polling variables.
@@ -451,47 +408,6 @@ static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
451} 408}
452 409
453/* 410/*
454 * idetape_next_pc_storage returns a pointer to a place in which we can
455 * safely store a packet command, even though we intend to leave the
456 * driver. A storage space for a maximum of IDETAPE_PC_STACK packet
457 * commands is allocated at initialization time.
458 */
459static struct ide_atapi_pc *idetape_next_pc_storage(ide_drive_t *drive)
460{
461 idetape_tape_t *tape = drive->driver_data;
462
463 debug_log(DBG_PCRQ_STACK, "pc_stack_index=%d\n", tape->pc_stack_index);
464
465 if (tape->pc_stack_index == IDETAPE_PC_STACK)
466 tape->pc_stack_index = 0;
467 return (&tape->pc_stack[tape->pc_stack_index++]);
468}
469
470/*
471 * idetape_next_rq_storage is used along with idetape_next_pc_storage.
472 * Since we queue packet commands in the request queue, we need to
473 * allocate a request, along with the allocation of a packet command.
474 */
475
476/**************************************************************
477 * *
478 * This should get fixed to use kmalloc(.., GFP_ATOMIC) *
479 * followed later on by kfree(). -ml *
480 * *
481 **************************************************************/
482
483static struct request *idetape_next_rq_storage(ide_drive_t *drive)
484{
485 idetape_tape_t *tape = drive->driver_data;
486
487 debug_log(DBG_PCRQ_STACK, "rq_stack_index=%d\n", tape->rq_stack_index);
488
489 if (tape->rq_stack_index == IDETAPE_PC_STACK)
490 tape->rq_stack_index = 0;
491 return (&tape->rq_stack[tape->rq_stack_index++]);
492}
493
494/*
495 * called on each failed packet command retry to analyze the request sense. We 411 * called on each failed packet command retry to analyze the request sense. We
496 * currently do not utilize this information. 412 * currently do not utilize this information.
497 */ 413 */
@@ -667,61 +583,14 @@ static void ide_tape_callback(ide_drive_t *drive)
667 idetape_end_request(drive, uptodate, 0); 583 idetape_end_request(drive, uptodate, 0);
668} 584}
669 585
670static void idetape_init_pc(struct ide_atapi_pc *pc)
671{
672 memset(pc->c, 0, 12);
673 pc->retries = 0;
674 pc->flags = 0;
675 pc->req_xfer = 0;
676 pc->buf = pc->pc_buf;
677 pc->buf_size = IDETAPE_PC_BUFFER_SIZE;
678 pc->bh = NULL;
679 pc->b_data = NULL;
680}
681
682static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) 586static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
683{ 587{
684 idetape_init_pc(pc); 588 ide_init_pc(pc);
685 pc->c[0] = REQUEST_SENSE; 589 pc->c[0] = REQUEST_SENSE;
686 pc->c[4] = 20; 590 pc->c[4] = 20;
687 pc->req_xfer = 20; 591 pc->req_xfer = 20;
688} 592}
689 593
690static void idetape_init_rq(struct request *rq, u8 cmd)
691{
692 blk_rq_init(NULL, rq);
693 rq->cmd_type = REQ_TYPE_SPECIAL;
694 rq->cmd[13] = cmd;
695}
696
697/*
698 * Generate a new packet command request in front of the request queue, before
699 * the current request, so that it will be processed immediately, on the next
700 * pass through the driver. The function below is called from the request
701 * handling part of the driver (the "bottom" part). Safe storage for the request
702 * should be allocated with ide_tape_next_{pc,rq}_storage() prior to that.
703 *
704 * Memory for those requests is pre-allocated at initialization time, and is
705 * limited to IDETAPE_PC_STACK requests. We assume that we have enough space for
706 * the maximum possible number of inter-dependent packet commands.
707 *
708 * The higher level of the driver - The ioctl handler and the character device
709 * handling functions should queue request to the lower level part and wait for
710 * their completion using idetape_queue_pc_tail or idetape_queue_rw_tail.
711 */
712static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
713 struct request *rq)
714{
715 struct ide_tape_obj *tape = drive->driver_data;
716
717 idetape_init_rq(rq, REQ_IDETAPE_PC1);
718 rq->cmd_flags |= REQ_PREEMPT;
719 rq->buffer = (char *) pc;
720 rq->rq_disk = tape->disk;
721 memcpy(rq->cmd, pc->c, 12);
722 ide_do_drive_cmd(drive, rq);
723}
724
725/* 594/*
726 * idetape_retry_pc is called when an error was detected during the 595 * idetape_retry_pc is called when an error was detected during the
727 * last packet command. We queue a request sense packet command in 596 * last packet command. We queue a request sense packet command in
@@ -729,15 +598,14 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
729 */ 598 */
730static void idetape_retry_pc(ide_drive_t *drive) 599static void idetape_retry_pc(ide_drive_t *drive)
731{ 600{
732 struct ide_atapi_pc *pc; 601 struct ide_tape_obj *tape = drive->driver_data;
733 struct request *rq; 602 struct request *rq = &tape->request_sense_rq;
603 struct ide_atapi_pc *pc = &tape->request_sense_pc;
734 604
735 (void)ide_read_error(drive); 605 (void)ide_read_error(drive);
736 pc = idetape_next_pc_storage(drive);
737 rq = idetape_next_rq_storage(drive);
738 idetape_create_request_sense_cmd(pc); 606 idetape_create_request_sense_cmd(pc);
739 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); 607 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
740 idetape_queue_pc_head(drive, pc, rq); 608 ide_queue_pc_head(drive, tape->disk, pc, rq);
741} 609}
742 610
743/* 611/*
@@ -766,13 +634,15 @@ static void ide_tape_handle_dsc(ide_drive_t *drive)
766 idetape_postpone_request(drive); 634 idetape_postpone_request(drive);
767} 635}
768 636
769static void ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, 637static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
770 unsigned int bcount, int write) 638 unsigned int bcount, int write)
771{ 639{
772 if (write) 640 if (write)
773 idetape_output_buffers(drive, pc, bcount); 641 idetape_output_buffers(drive, pc, bcount);
774 else 642 else
775 idetape_input_buffers(drive, pc, bcount); 643 idetape_input_buffers(drive, pc, bcount);
644
645 return bcount;
776} 646}
777 647
778/* 648/*
@@ -786,7 +656,7 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive)
786{ 656{
787 idetape_tape_t *tape = drive->driver_data; 657 idetape_tape_t *tape = drive->driver_data;
788 658
789 return ide_pc_intr(drive, tape->pc, idetape_pc_intr, IDETAPE_WAIT_CMD, 659 return ide_pc_intr(drive, tape->pc, idetape_pc_intr, WAIT_TAPE_CMD,
790 NULL, idetape_update_buffers, idetape_retry_pc, 660 NULL, idetape_update_buffers, idetape_retry_pc,
791 ide_tape_handle_dsc, ide_tape_io_buffers); 661 ide_tape_handle_dsc, ide_tape_io_buffers);
792} 662}
@@ -832,7 +702,7 @@ static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive)
832 idetape_tape_t *tape = drive->driver_data; 702 idetape_tape_t *tape = drive->driver_data;
833 703
834 return ide_transfer_pc(drive, tape->pc, idetape_pc_intr, 704 return ide_transfer_pc(drive, tape->pc, idetape_pc_intr,
835 IDETAPE_WAIT_CMD, NULL); 705 WAIT_TAPE_CMD, NULL);
836} 706}
837 707
838static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, 708static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
@@ -881,13 +751,13 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
881 pc->retries++; 751 pc->retries++;
882 752
883 return ide_issue_pc(drive, pc, idetape_transfer_pc, 753 return ide_issue_pc(drive, pc, idetape_transfer_pc,
884 IDETAPE_WAIT_CMD, NULL); 754 WAIT_TAPE_CMD, NULL);
885} 755}
886 756
887/* A mode sense command is used to "sense" tape parameters. */ 757/* A mode sense command is used to "sense" tape parameters. */
888static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) 758static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
889{ 759{
890 idetape_init_pc(pc); 760 ide_init_pc(pc);
891 pc->c[0] = MODE_SENSE; 761 pc->c[0] = MODE_SENSE;
892 if (page_code != IDETAPE_BLOCK_DESCRIPTOR) 762 if (page_code != IDETAPE_BLOCK_DESCRIPTOR)
893 /* DBD = 1 - Don't return block descriptors */ 763 /* DBD = 1 - Don't return block descriptors */
@@ -920,8 +790,8 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
920 790
921 stat = hwif->tp_ops->read_status(hwif); 791 stat = hwif->tp_ops->read_status(hwif);
922 792
923 if (stat & SEEK_STAT) { 793 if (stat & ATA_DSC) {
924 if (stat & ERR_STAT) { 794 if (stat & ATA_ERR) {
925 /* Error detected */ 795 /* Error detected */
926 if (pc->c[0] != TEST_UNIT_READY) 796 if (pc->c[0] != TEST_UNIT_READY)
927 printk(KERN_ERR "ide-tape: %s: I/O error, ", 797 printk(KERN_ERR "ide-tape: %s: I/O error, ",
@@ -946,7 +816,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
946 struct idetape_bh *bh = (struct idetape_bh *)rq->special; 816 struct idetape_bh *bh = (struct idetape_bh *)rq->special;
947 unsigned int length = rq->current_nr_sectors; 817 unsigned int length = rq->current_nr_sectors;
948 818
949 idetape_init_pc(pc); 819 ide_init_pc(pc);
950 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); 820 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
951 pc->c[1] = 1; 821 pc->c[1] = 1;
952 pc->bh = bh; 822 pc->bh = bh;
@@ -978,9 +848,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
978 struct request *postponed_rq = tape->postponed_rq; 848 struct request *postponed_rq = tape->postponed_rq;
979 u8 stat; 849 u8 stat;
980 850
981 debug_log(DBG_SENSE, "sector: %ld, nr_sectors: %ld," 851 debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
982 " current_nr_sectors: %d\n", 852 " current_nr_sectors: %u\n",
983 rq->sector, rq->nr_sectors, rq->current_nr_sectors); 853 (unsigned long long)rq->sector, rq->nr_sectors,
854 rq->current_nr_sectors);
984 855
985 if (!blk_special_request(rq)) { 856 if (!blk_special_request(rq)) {
986 /* We do not support buffer cache originated requests. */ 857 /* We do not support buffer cache originated requests. */
@@ -1021,7 +892,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
1021 } 892 }
1022 893
1023 if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) && 894 if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) &&
1024 (stat & SEEK_STAT) == 0) { 895 (stat & ATA_DSC) == 0) {
1025 if (postponed_rq == NULL) { 896 if (postponed_rq == NULL) {
1026 tape->dsc_polling_start = jiffies; 897 tape->dsc_polling_start = jiffies;
1027 tape->dsc_poll_freq = tape->best_dsc_rw_freq; 898 tape->dsc_poll_freq = tape->best_dsc_rw_freq;
@@ -1043,12 +914,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
1043 return ide_stopped; 914 return ide_stopped;
1044 } 915 }
1045 if (rq->cmd[13] & REQ_IDETAPE_READ) { 916 if (rq->cmd[13] & REQ_IDETAPE_READ) {
1046 pc = idetape_next_pc_storage(drive); 917 pc = &tape->queued_pc;
1047 ide_tape_create_rw_cmd(tape, pc, rq, READ_6); 918 ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
1048 goto out; 919 goto out;
1049 } 920 }
1050 if (rq->cmd[13] & REQ_IDETAPE_WRITE) { 921 if (rq->cmd[13] & REQ_IDETAPE_WRITE) {
1051 pc = idetape_next_pc_storage(drive); 922 pc = &tape->queued_pc;
1052 ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6); 923 ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6);
1053 goto out; 924 goto out;
1054 } 925 }
@@ -1235,77 +1106,30 @@ static void idetape_init_merge_buffer(idetape_tape_t *tape)
1235static void idetape_create_write_filemark_cmd(ide_drive_t *drive, 1106static void idetape_create_write_filemark_cmd(ide_drive_t *drive,
1236 struct ide_atapi_pc *pc, int write_filemark) 1107 struct ide_atapi_pc *pc, int write_filemark)
1237{ 1108{
1238 idetape_init_pc(pc); 1109 ide_init_pc(pc);
1239 pc->c[0] = WRITE_FILEMARKS; 1110 pc->c[0] = WRITE_FILEMARKS;
1240 pc->c[4] = write_filemark; 1111 pc->c[4] = write_filemark;
1241 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1112 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1242} 1113}
1243 1114
1244static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
1245{
1246 idetape_init_pc(pc);
1247 pc->c[0] = TEST_UNIT_READY;
1248}
1249
1250/*
1251 * We add a special packet command request to the tail of the request queue, and
1252 * wait for it to be serviced. This is not to be called from within the request
1253 * handling part of the driver! We allocate here data on the stack and it is
1254 * valid until the request is finished. This is not the case for the bottom part
1255 * of the driver, where we are always leaving the functions to wait for an
1256 * interrupt or a timer event.
1257 *
1258 * From the bottom part of the driver, we should allocate safe memory using
1259 * idetape_next_pc_storage() and ide_tape_next_rq_storage(), and add the request
1260 * to the request list without waiting for it to be serviced! In that case, we
1261 * usually use idetape_queue_pc_head().
1262 */
1263static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
1264{
1265 struct ide_tape_obj *tape = drive->driver_data;
1266 struct request *rq;
1267 int error;
1268
1269 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
1270 rq->cmd_type = REQ_TYPE_SPECIAL;
1271 rq->cmd[13] = REQ_IDETAPE_PC1;
1272 rq->buffer = (char *)pc;
1273 memcpy(rq->cmd, pc->c, 12);
1274 error = blk_execute_rq(drive->queue, tape->disk, rq, 0);
1275 blk_put_request(rq);
1276 return error;
1277}
1278
1279static void idetape_create_load_unload_cmd(ide_drive_t *drive,
1280 struct ide_atapi_pc *pc, int cmd)
1281{
1282 idetape_init_pc(pc);
1283 pc->c[0] = START_STOP;
1284 pc->c[4] = cmd;
1285 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1286}
1287
1288static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) 1115static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
1289{ 1116{
1290 idetape_tape_t *tape = drive->driver_data; 1117 idetape_tape_t *tape = drive->driver_data;
1291 struct ide_atapi_pc pc; 1118 struct gendisk *disk = tape->disk;
1292 int load_attempted = 0; 1119 int load_attempted = 0;
1293 1120
1294 /* Wait for the tape to become ready */ 1121 /* Wait for the tape to become ready */
1295 set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); 1122 set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
1296 timeout += jiffies; 1123 timeout += jiffies;
1297 while (time_before(jiffies, timeout)) { 1124 while (time_before(jiffies, timeout)) {
1298 idetape_create_test_unit_ready_cmd(&pc); 1125 if (ide_do_test_unit_ready(drive, disk) == 0)
1299 if (!idetape_queue_pc_tail(drive, &pc))
1300 return 0; 1126 return 0;
1301 if ((tape->sense_key == 2 && tape->asc == 4 && tape->ascq == 2) 1127 if ((tape->sense_key == 2 && tape->asc == 4 && tape->ascq == 2)
1302 || (tape->asc == 0x3A)) { 1128 || (tape->asc == 0x3A)) {
1303 /* no media */ 1129 /* no media */
1304 if (load_attempted) 1130 if (load_attempted)
1305 return -ENOMEDIUM; 1131 return -ENOMEDIUM;
1306 idetape_create_load_unload_cmd(drive, &pc, 1132 ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK);
1307 IDETAPE_LU_LOAD_MASK);
1308 idetape_queue_pc_tail(drive, &pc);
1309 load_attempted = 1; 1133 load_attempted = 1;
1310 /* not about to be ready */ 1134 /* not about to be ready */
1311 } else if (!(tape->sense_key == 2 && tape->asc == 4 && 1135 } else if (!(tape->sense_key == 2 && tape->asc == 4 &&
@@ -1318,11 +1142,12 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
1318 1142
1319static int idetape_flush_tape_buffers(ide_drive_t *drive) 1143static int idetape_flush_tape_buffers(ide_drive_t *drive)
1320{ 1144{
1145 struct ide_tape_obj *tape = drive->driver_data;
1321 struct ide_atapi_pc pc; 1146 struct ide_atapi_pc pc;
1322 int rc; 1147 int rc;
1323 1148
1324 idetape_create_write_filemark_cmd(drive, &pc, 0); 1149 idetape_create_write_filemark_cmd(drive, &pc, 0);
1325 rc = idetape_queue_pc_tail(drive, &pc); 1150 rc = ide_queue_pc_tail(drive, tape->disk, &pc);
1326 if (rc) 1151 if (rc)
1327 return rc; 1152 return rc;
1328 idetape_wait_ready(drive, 60 * 5 * HZ); 1153 idetape_wait_ready(drive, 60 * 5 * HZ);
@@ -1331,7 +1156,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive)
1331 1156
1332static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc) 1157static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc)
1333{ 1158{
1334 idetape_init_pc(pc); 1159 ide_init_pc(pc);
1335 pc->c[0] = READ_POSITION; 1160 pc->c[0] = READ_POSITION;
1336 pc->req_xfer = 20; 1161 pc->req_xfer = 20;
1337} 1162}
@@ -1345,7 +1170,7 @@ static int idetape_read_position(ide_drive_t *drive)
1345 debug_log(DBG_PROCS, "Enter %s\n", __func__); 1170 debug_log(DBG_PROCS, "Enter %s\n", __func__);
1346 1171
1347 idetape_create_read_position_cmd(&pc); 1172 idetape_create_read_position_cmd(&pc);
1348 if (idetape_queue_pc_tail(drive, &pc)) 1173 if (ide_queue_pc_tail(drive, tape->disk, &pc))
1349 return -1; 1174 return -1;
1350 position = tape->first_frame; 1175 position = tape->first_frame;
1351 return position; 1176 return position;
@@ -1355,7 +1180,7 @@ static void idetape_create_locate_cmd(ide_drive_t *drive,
1355 struct ide_atapi_pc *pc, 1180 struct ide_atapi_pc *pc,
1356 unsigned int block, u8 partition, int skip) 1181 unsigned int block, u8 partition, int skip)
1357{ 1182{
1358 idetape_init_pc(pc); 1183 ide_init_pc(pc);
1359 pc->c[0] = POSITION_TO_ELEMENT; 1184 pc->c[0] = POSITION_TO_ELEMENT;
1360 pc->c[1] = 2; 1185 pc->c[1] = 2;
1361 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]); 1186 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]);
@@ -1363,21 +1188,6 @@ static void idetape_create_locate_cmd(ide_drive_t *drive,
1363 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1188 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1364} 1189}
1365 1190
1366static int idetape_create_prevent_cmd(ide_drive_t *drive,
1367 struct ide_atapi_pc *pc, int prevent)
1368{
1369 idetape_tape_t *tape = drive->driver_data;
1370
1371 /* device supports locking according to capabilities page */
1372 if (!(tape->caps[6] & 0x01))
1373 return 0;
1374
1375 idetape_init_pc(pc);
1376 pc->c[0] = ALLOW_MEDIUM_REMOVAL;
1377 pc->c[4] = prevent;
1378 return 1;
1379}
1380
1381static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) 1191static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
1382{ 1192{
1383 idetape_tape_t *tape = drive->driver_data; 1193 idetape_tape_t *tape = drive->driver_data;
@@ -1405,6 +1215,7 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block,
1405 u8 partition, int skip) 1215 u8 partition, int skip)
1406{ 1216{
1407 idetape_tape_t *tape = drive->driver_data; 1217 idetape_tape_t *tape = drive->driver_data;
1218 struct gendisk *disk = tape->disk;
1408 int retval; 1219 int retval;
1409 struct ide_atapi_pc pc; 1220 struct ide_atapi_pc pc;
1410 1221
@@ -1412,12 +1223,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block,
1412 __ide_tape_discard_merge_buffer(drive); 1223 __ide_tape_discard_merge_buffer(drive);
1413 idetape_wait_ready(drive, 60 * 5 * HZ); 1224 idetape_wait_ready(drive, 60 * 5 * HZ);
1414 idetape_create_locate_cmd(drive, &pc, block, partition, skip); 1225 idetape_create_locate_cmd(drive, &pc, block, partition, skip);
1415 retval = idetape_queue_pc_tail(drive, &pc); 1226 retval = ide_queue_pc_tail(drive, disk, &pc);
1416 if (retval) 1227 if (retval)
1417 return (retval); 1228 return (retval);
1418 1229
1419 idetape_create_read_position_cmd(&pc); 1230 idetape_create_read_position_cmd(&pc);
1420 return (idetape_queue_pc_tail(drive, &pc)); 1231 return ide_queue_pc_tail(drive, disk, &pc);
1421} 1232}
1422 1233
1423static void ide_tape_discard_merge_buffer(ide_drive_t *drive, 1234static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
@@ -1477,7 +1288,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
1477 1288
1478static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) 1289static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
1479{ 1290{
1480 idetape_init_pc(pc); 1291 ide_init_pc(pc);
1481 pc->c[0] = INQUIRY; 1292 pc->c[0] = INQUIRY;
1482 pc->c[4] = 254; 1293 pc->c[4] = 254;
1483 pc->req_xfer = 254; 1294 pc->req_xfer = 254;
@@ -1486,14 +1297,14 @@ static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
1486static void idetape_create_rewind_cmd(ide_drive_t *drive, 1297static void idetape_create_rewind_cmd(ide_drive_t *drive,
1487 struct ide_atapi_pc *pc) 1298 struct ide_atapi_pc *pc)
1488{ 1299{
1489 idetape_init_pc(pc); 1300 ide_init_pc(pc);
1490 pc->c[0] = REZERO_UNIT; 1301 pc->c[0] = REZERO_UNIT;
1491 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1302 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1492} 1303}
1493 1304
1494static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) 1305static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
1495{ 1306{
1496 idetape_init_pc(pc); 1307 ide_init_pc(pc);
1497 pc->c[0] = ERASE; 1308 pc->c[0] = ERASE;
1498 pc->c[1] = 1; 1309 pc->c[1] = 1;
1499 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1310 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
@@ -1501,7 +1312,7 @@ static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
1501 1312
1502static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) 1313static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
1503{ 1314{
1504 idetape_init_pc(pc); 1315 ide_init_pc(pc);
1505 pc->c[0] = SPACE; 1316 pc->c[0] = SPACE;
1506 put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]); 1317 put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]);
1507 pc->c[1] = cmd; 1318 pc->c[1] = cmd;
@@ -1664,20 +1475,20 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
1664 */ 1475 */
1665static int idetape_rewind_tape(ide_drive_t *drive) 1476static int idetape_rewind_tape(ide_drive_t *drive)
1666{ 1477{
1478 struct ide_tape_obj *tape = drive->driver_data;
1479 struct gendisk *disk = tape->disk;
1667 int retval; 1480 int retval;
1668 struct ide_atapi_pc pc; 1481 struct ide_atapi_pc pc;
1669 idetape_tape_t *tape;
1670 tape = drive->driver_data;
1671 1482
1672 debug_log(DBG_SENSE, "Enter %s\n", __func__); 1483 debug_log(DBG_SENSE, "Enter %s\n", __func__);
1673 1484
1674 idetape_create_rewind_cmd(drive, &pc); 1485 idetape_create_rewind_cmd(drive, &pc);
1675 retval = idetape_queue_pc_tail(drive, &pc); 1486 retval = ide_queue_pc_tail(drive, disk, &pc);
1676 if (retval) 1487 if (retval)
1677 return retval; 1488 return retval;
1678 1489
1679 idetape_create_read_position_cmd(&pc); 1490 idetape_create_read_position_cmd(&pc);
1680 retval = idetape_queue_pc_tail(drive, &pc); 1491 retval = ide_queue_pc_tail(drive, disk, &pc);
1681 if (retval) 1492 if (retval)
1682 return retval; 1493 return retval;
1683 return 0; 1494 return 0;
@@ -1720,6 +1531,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
1720 int mt_count) 1531 int mt_count)
1721{ 1532{
1722 idetape_tape_t *tape = drive->driver_data; 1533 idetape_tape_t *tape = drive->driver_data;
1534 struct gendisk *disk = tape->disk;
1723 struct ide_atapi_pc pc; 1535 struct ide_atapi_pc pc;
1724 int retval, count = 0; 1536 int retval, count = 0;
1725 int sprev = !!(tape->caps[4] & 0x20); 1537 int sprev = !!(tape->caps[4] & 0x20);
@@ -1744,7 +1556,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
1744 case MTBSF: 1556 case MTBSF:
1745 idetape_create_space_cmd(&pc, mt_count - count, 1557 idetape_create_space_cmd(&pc, mt_count - count,
1746 IDETAPE_SPACE_OVER_FILEMARK); 1558 IDETAPE_SPACE_OVER_FILEMARK);
1747 return idetape_queue_pc_tail(drive, &pc); 1559 return ide_queue_pc_tail(drive, disk, &pc);
1748 case MTFSFM: 1560 case MTFSFM:
1749 case MTBSFM: 1561 case MTBSFM:
1750 if (!sprev) 1562 if (!sprev)
@@ -1933,11 +1745,12 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
1933 1745
1934static int idetape_write_filemark(ide_drive_t *drive) 1746static int idetape_write_filemark(ide_drive_t *drive)
1935{ 1747{
1748 struct ide_tape_obj *tape = drive->driver_data;
1936 struct ide_atapi_pc pc; 1749 struct ide_atapi_pc pc;
1937 1750
1938 /* Write a filemark */ 1751 /* Write a filemark */
1939 idetape_create_write_filemark_cmd(drive, &pc, 1); 1752 idetape_create_write_filemark_cmd(drive, &pc, 1);
1940 if (idetape_queue_pc_tail(drive, &pc)) { 1753 if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
1941 printk(KERN_ERR "ide-tape: Couldn't write a filemark\n"); 1754 printk(KERN_ERR "ide-tape: Couldn't write a filemark\n");
1942 return -EIO; 1755 return -EIO;
1943 } 1756 }
@@ -1960,6 +1773,7 @@ static int idetape_write_filemark(ide_drive_t *drive)
1960static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) 1773static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
1961{ 1774{
1962 idetape_tape_t *tape = drive->driver_data; 1775 idetape_tape_t *tape = drive->driver_data;
1776 struct gendisk *disk = tape->disk;
1963 struct ide_atapi_pc pc; 1777 struct ide_atapi_pc pc;
1964 int i, retval; 1778 int i, retval;
1965 1779
@@ -1996,9 +1810,7 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
1996 return 0; 1810 return 0;
1997 case MTLOAD: 1811 case MTLOAD:
1998 ide_tape_discard_merge_buffer(drive, 0); 1812 ide_tape_discard_merge_buffer(drive, 0);
1999 idetape_create_load_unload_cmd(drive, &pc, 1813 return ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK);
2000 IDETAPE_LU_LOAD_MASK);
2001 return idetape_queue_pc_tail(drive, &pc);
2002 case MTUNLOAD: 1814 case MTUNLOAD:
2003 case MTOFFL: 1815 case MTOFFL:
2004 /* 1816 /*
@@ -2006,14 +1818,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
2006 * attempting to eject. 1818 * attempting to eject.
2007 */ 1819 */
2008 if (tape->door_locked) { 1820 if (tape->door_locked) {
2009 if (idetape_create_prevent_cmd(drive, &pc, 0)) 1821 if (!ide_set_media_lock(drive, disk, 0))
2010 if (!idetape_queue_pc_tail(drive, &pc)) 1822 tape->door_locked = DOOR_UNLOCKED;
2011 tape->door_locked = DOOR_UNLOCKED;
2012 } 1823 }
2013 ide_tape_discard_merge_buffer(drive, 0); 1824 ide_tape_discard_merge_buffer(drive, 0);
2014 idetape_create_load_unload_cmd(drive, &pc, 1825 retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
2015 !IDETAPE_LU_LOAD_MASK);
2016 retval = idetape_queue_pc_tail(drive, &pc);
2017 if (!retval) 1826 if (!retval)
2018 clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); 1827 clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags);
2019 return retval; 1828 return retval;
@@ -2022,16 +1831,15 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
2022 return idetape_flush_tape_buffers(drive); 1831 return idetape_flush_tape_buffers(drive);
2023 case MTRETEN: 1832 case MTRETEN:
2024 ide_tape_discard_merge_buffer(drive, 0); 1833 ide_tape_discard_merge_buffer(drive, 0);
2025 idetape_create_load_unload_cmd(drive, &pc, 1834 return ide_do_start_stop(drive, disk,
2026 IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK); 1835 IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK);
2027 return idetape_queue_pc_tail(drive, &pc);
2028 case MTEOM: 1836 case MTEOM:
2029 idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD); 1837 idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD);
2030 return idetape_queue_pc_tail(drive, &pc); 1838 return ide_queue_pc_tail(drive, disk, &pc);
2031 case MTERASE: 1839 case MTERASE:
2032 (void)idetape_rewind_tape(drive); 1840 (void)idetape_rewind_tape(drive);
2033 idetape_create_erase_cmd(&pc); 1841 idetape_create_erase_cmd(&pc);
2034 return idetape_queue_pc_tail(drive, &pc); 1842 return ide_queue_pc_tail(drive, disk, &pc);
2035 case MTSETBLK: 1843 case MTSETBLK:
2036 if (mt_count) { 1844 if (mt_count) {
2037 if (mt_count < tape->blk_size || 1845 if (mt_count < tape->blk_size ||
@@ -2052,17 +1860,13 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
2052 case MTFSR: 1860 case MTFSR:
2053 case MTBSR: 1861 case MTBSR:
2054 case MTLOCK: 1862 case MTLOCK:
2055 if (!idetape_create_prevent_cmd(drive, &pc, 1)) 1863 retval = ide_set_media_lock(drive, disk, 1);
2056 return 0;
2057 retval = idetape_queue_pc_tail(drive, &pc);
2058 if (retval) 1864 if (retval)
2059 return retval; 1865 return retval;
2060 tape->door_locked = DOOR_EXPLICITLY_LOCKED; 1866 tape->door_locked = DOOR_EXPLICITLY_LOCKED;
2061 return 0; 1867 return 0;
2062 case MTUNLOCK: 1868 case MTUNLOCK:
2063 if (!idetape_create_prevent_cmd(drive, &pc, 0)) 1869 retval = ide_set_media_lock(drive, disk, 0);
2064 return 0;
2065 retval = idetape_queue_pc_tail(drive, &pc);
2066 if (retval) 1870 if (retval)
2067 return retval; 1871 return retval;
2068 tape->door_locked = DOOR_UNLOCKED; 1872 tape->door_locked = DOOR_UNLOCKED;
@@ -2144,7 +1948,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive)
2144 struct ide_atapi_pc pc; 1948 struct ide_atapi_pc pc;
2145 1949
2146 idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR); 1950 idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR);
2147 if (idetape_queue_pc_tail(drive, &pc)) { 1951 if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
2148 printk(KERN_ERR "ide-tape: Can't get block descriptor\n"); 1952 printk(KERN_ERR "ide-tape: Can't get block descriptor\n");
2149 if (tape->blk_size == 0) { 1953 if (tape->blk_size == 0) {
2150 printk(KERN_WARNING "ide-tape: Cannot deal with zero " 1954 printk(KERN_WARNING "ide-tape: Cannot deal with zero "
@@ -2164,7 +1968,6 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
2164 unsigned int minor = iminor(inode), i = minor & ~0xc0; 1968 unsigned int minor = iminor(inode), i = minor & ~0xc0;
2165 ide_drive_t *drive; 1969 ide_drive_t *drive;
2166 idetape_tape_t *tape; 1970 idetape_tape_t *tape;
2167 struct ide_atapi_pc pc;
2168 int retval; 1971 int retval;
2169 1972
2170 if (i >= MAX_HWIFS * MAX_DRIVES) 1973 if (i >= MAX_HWIFS * MAX_DRIVES)
@@ -2227,11 +2030,9 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
2227 2030
2228 /* Lock the tape drive door so user can't eject. */ 2031 /* Lock the tape drive door so user can't eject. */
2229 if (tape->chrdev_dir == IDETAPE_DIR_NONE) { 2032 if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
2230 if (idetape_create_prevent_cmd(drive, &pc, 1)) { 2033 if (!ide_set_media_lock(drive, tape->disk, 1)) {
2231 if (!idetape_queue_pc_tail(drive, &pc)) { 2034 if (tape->door_locked != DOOR_EXPLICITLY_LOCKED)
2232 if (tape->door_locked != DOOR_EXPLICITLY_LOCKED) 2035 tape->door_locked = DOOR_LOCKED;
2233 tape->door_locked = DOOR_LOCKED;
2234 }
2235 } 2036 }
2236 } 2037 }
2237 unlock_kernel(); 2038 unlock_kernel();
@@ -2264,7 +2065,6 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
2264{ 2065{
2265 struct ide_tape_obj *tape = ide_tape_f(filp); 2066 struct ide_tape_obj *tape = ide_tape_f(filp);
2266 ide_drive_t *drive = tape->drive; 2067 ide_drive_t *drive = tape->drive;
2267 struct ide_atapi_pc pc;
2268 unsigned int minor = iminor(inode); 2068 unsigned int minor = iminor(inode);
2269 2069
2270 lock_kernel(); 2070 lock_kernel();
@@ -2283,10 +2083,8 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
2283 (void) idetape_rewind_tape(drive); 2083 (void) idetape_rewind_tape(drive);
2284 if (tape->chrdev_dir == IDETAPE_DIR_NONE) { 2084 if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
2285 if (tape->door_locked == DOOR_LOCKED) { 2085 if (tape->door_locked == DOOR_LOCKED) {
2286 if (idetape_create_prevent_cmd(drive, &pc, 0)) { 2086 if (!ide_set_media_lock(drive, tape->disk, 0))
2287 if (!idetape_queue_pc_tail(drive, &pc)) 2087 tape->door_locked = DOOR_UNLOCKED;
2288 tape->door_locked = DOOR_UNLOCKED;
2289 }
2290 } 2088 }
2291 } 2089 }
2292 clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); 2090 clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags);
@@ -2295,45 +2093,6 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
2295 return 0; 2093 return 0;
2296} 2094}
2297 2095
2298/*
2299 * check the contents of the ATAPI IDENTIFY command results. We return:
2300 *
2301 * 1 - If the tape can be supported by us, based on the information we have so
2302 * far.
2303 *
2304 * 0 - If this tape driver is not currently supported by us.
2305 */
2306static int idetape_identify_device(ide_drive_t *drive)
2307{
2308 u8 gcw[2], protocol, device_type, removable, packet_size;
2309
2310 if (drive->id_read == 0)
2311 return 1;
2312
2313 *((unsigned short *) &gcw) = drive->id->config;
2314
2315 protocol = (gcw[1] & 0xC0) >> 6;
2316 device_type = gcw[1] & 0x1F;
2317 removable = !!(gcw[0] & 0x80);
2318 packet_size = gcw[0] & 0x3;
2319
2320 /* Check that we can support this device */
2321 if (protocol != 2)
2322 printk(KERN_ERR "ide-tape: Protocol (0x%02x) is not ATAPI\n",
2323 protocol);
2324 else if (device_type != 1)
2325 printk(KERN_ERR "ide-tape: Device type (0x%02x) is not set "
2326 "to tape\n", device_type);
2327 else if (!removable)
2328 printk(KERN_ERR "ide-tape: The removable flag is not set\n");
2329 else if (packet_size != 0) {
2330 printk(KERN_ERR "ide-tape: Packet size (0x%02x) is not 12"
2331 " bytes\n", packet_size);
2332 } else
2333 return 1;
2334 return 0;
2335}
2336
2337static void idetape_get_inquiry_results(ide_drive_t *drive) 2096static void idetape_get_inquiry_results(ide_drive_t *drive)
2338{ 2097{
2339 idetape_tape_t *tape = drive->driver_data; 2098 idetape_tape_t *tape = drive->driver_data;
@@ -2341,7 +2100,7 @@ static void idetape_get_inquiry_results(ide_drive_t *drive)
2341 char fw_rev[4], vendor_id[8], product_id[16]; 2100 char fw_rev[4], vendor_id[8], product_id[16];
2342 2101
2343 idetape_create_inquiry_cmd(&pc); 2102 idetape_create_inquiry_cmd(&pc);
2344 if (idetape_queue_pc_tail(drive, &pc)) { 2103 if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
2345 printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", 2104 printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n",
2346 tape->name); 2105 tape->name);
2347 return; 2106 return;
@@ -2370,7 +2129,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
2370 u8 speed, max_speed; 2129 u8 speed, max_speed;
2371 2130
2372 idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE); 2131 idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE);
2373 if (idetape_queue_pc_tail(drive, &pc)) { 2132 if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
2374 printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming" 2133 printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming"
2375 " some default values\n"); 2134 " some default values\n");
2376 tape->blk_size = 512; 2135 tape->blk_size = 512;
@@ -2402,6 +2161,11 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
2402 } 2161 }
2403 2162
2404 memcpy(&tape->caps, caps, 20); 2163 memcpy(&tape->caps, caps, 20);
2164
2165 /* device lacks locking support according to capabilities page */
2166 if ((caps[6] & 1) == 0)
2167 drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK;
2168
2405 if (caps[7] & 0x02) 2169 if (caps[7] & 0x02)
2406 tape->blk_size = 512; 2170 tape->blk_size = 512;
2407 else if (caps[7] & 0x04) 2171 else if (caps[7] & 0x04)
@@ -2409,28 +2173,56 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
2409} 2173}
2410 2174
2411#ifdef CONFIG_IDE_PROC_FS 2175#ifdef CONFIG_IDE_PROC_FS
2412static void idetape_add_settings(ide_drive_t *drive) 2176#define ide_tape_devset_get(name, field) \
2413{ 2177static int get_##name(ide_drive_t *drive) \
2414 idetape_tape_t *tape = drive->driver_data; 2178{ \
2415 2179 idetape_tape_t *tape = drive->driver_data; \
2416 ide_add_setting(drive, "buffer", SETTING_READ, TYPE_SHORT, 0, 0xffff, 2180 return tape->field; \
2417 1, 2, (u16 *)&tape->caps[16], NULL); 2181}
2418 ide_add_setting(drive, "speed", SETTING_READ, TYPE_SHORT, 0, 0xffff, 2182
2419 1, 1, (u16 *)&tape->caps[14], NULL); 2183#define ide_tape_devset_set(name, field) \
2420 ide_add_setting(drive, "buffer_size", SETTING_READ, TYPE_INT, 0, 0xffff, 2184static int set_##name(ide_drive_t *drive, int arg) \
2421 1, 1024, &tape->buffer_size, NULL); 2185{ \
2422 ide_add_setting(drive, "tdsc", SETTING_RW, TYPE_INT, IDETAPE_DSC_RW_MIN, 2186 idetape_tape_t *tape = drive->driver_data; \
2423 IDETAPE_DSC_RW_MAX, 1000, HZ, &tape->best_dsc_rw_freq, 2187 tape->field = arg; \
2424 NULL); 2188 return 0; \
2425 ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, 2189}
2426 1, &drive->dsc_overlap, NULL); 2190
2427 ide_add_setting(drive, "avg_speed", SETTING_READ, TYPE_INT, 0, 0xffff, 2191#define ide_tape_devset_rw_field(_name, _field) \
2428 1, 1, &tape->avg_speed, NULL); 2192ide_tape_devset_get(_name, _field) \
2429 ide_add_setting(drive, "debug_mask", SETTING_RW, TYPE_INT, 0, 0xffff, 1, 2193ide_tape_devset_set(_name, _field) \
2430 1, &tape->debug_mask, NULL); 2194IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name)
2431} 2195
2432#else 2196#define ide_tape_devset_r_field(_name, _field) \
2433static inline void idetape_add_settings(ide_drive_t *drive) { ; } 2197ide_tape_devset_get(_name, _field) \
2198IDE_DEVSET(_name, 0, get_##_name, NULL)
2199
2200static int mulf_tdsc(ide_drive_t *drive) { return 1000; }
2201static int divf_tdsc(ide_drive_t *drive) { return HZ; }
2202static int divf_buffer(ide_drive_t *drive) { return 2; }
2203static int divf_buffer_size(ide_drive_t *drive) { return 1024; }
2204
2205ide_devset_rw_field(dsc_overlap, dsc_overlap);
2206
2207ide_tape_devset_rw_field(debug_mask, debug_mask);
2208ide_tape_devset_rw_field(tdsc, best_dsc_rw_freq);
2209
2210ide_tape_devset_r_field(avg_speed, avg_speed);
2211ide_tape_devset_r_field(speed, caps[14]);
2212ide_tape_devset_r_field(buffer, caps[16]);
2213ide_tape_devset_r_field(buffer_size, buffer_size);
2214
2215static const struct ide_proc_devset idetape_settings[] = {
2216 __IDE_PROC_DEVSET(avg_speed, 0, 0xffff, NULL, NULL),
2217 __IDE_PROC_DEVSET(buffer, 0, 0xffff, NULL, divf_buffer),
2218 __IDE_PROC_DEVSET(buffer_size, 0, 0xffff, NULL, divf_buffer_size),
2219 __IDE_PROC_DEVSET(debug_mask, 0, 0xffff, NULL, NULL),
2220 __IDE_PROC_DEVSET(dsc_overlap, 0, 1, NULL, NULL),
2221 __IDE_PROC_DEVSET(speed, 0, 0xffff, NULL, NULL),
2222 __IDE_PROC_DEVSET(tdsc, IDETAPE_DSC_RW_MIN, IDETAPE_DSC_RW_MAX,
2223 mulf_tdsc, divf_tdsc),
2224 { 0 },
2225};
2434#endif 2226#endif
2435 2227
2436/* 2228/*
@@ -2462,15 +2254,15 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
2462 drive->dsc_overlap = 0; 2254 drive->dsc_overlap = 0;
2463 } 2255 }
2464 /* Seagate Travan drives do not support DSC overlap. */ 2256 /* Seagate Travan drives do not support DSC overlap. */
2465 if (strstr(drive->id->model, "Seagate STT3401")) 2257 if (strstr((char *)&drive->id[ATA_ID_PROD], "Seagate STT3401"))
2466 drive->dsc_overlap = 0; 2258 drive->dsc_overlap = 0;
2467 tape->minor = minor; 2259 tape->minor = minor;
2468 tape->name[0] = 'h'; 2260 tape->name[0] = 'h';
2469 tape->name[1] = 't'; 2261 tape->name[1] = 't';
2470 tape->name[2] = '0' + minor; 2262 tape->name[2] = '0' + minor;
2471 tape->chrdev_dir = IDETAPE_DIR_NONE; 2263 tape->chrdev_dir = IDETAPE_DIR_NONE;
2472 tape->pc = tape->pc_stack; 2264
2473 *((unsigned short *) &gcw) = drive->id->config; 2265 *((u16 *)&gcw) = drive->id[ATA_ID_CONFIG];
2474 2266
2475 /* Command packet DRQ type */ 2267 /* Command packet DRQ type */
2476 if (((gcw[0] & 0x60) >> 5) == 1) 2268 if (((gcw[0] & 0x60) >> 5) == 1)
@@ -2512,7 +2304,7 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
2512 tape->best_dsc_rw_freq * 1000 / HZ, 2304 tape->best_dsc_rw_freq * 1000 / HZ,
2513 drive->using_dma ? ", DMA":""); 2305 drive->using_dma ? ", DMA":"");
2514 2306
2515 idetape_add_settings(drive); 2307 ide_proc_register_driver(drive, tape->driver);
2516} 2308}
2517 2309
2518static void ide_tape_remove(ide_drive_t *drive) 2310static void ide_tape_remove(ide_drive_t *drive)
@@ -2577,12 +2369,12 @@ static ide_driver_t idetape_driver = {
2577 .remove = ide_tape_remove, 2369 .remove = ide_tape_remove,
2578 .version = IDETAPE_VERSION, 2370 .version = IDETAPE_VERSION,
2579 .media = ide_tape, 2371 .media = ide_tape,
2580 .supports_dsc_overlap = 1,
2581 .do_request = idetape_do_request, 2372 .do_request = idetape_do_request,
2582 .end_request = idetape_end_request, 2373 .end_request = idetape_end_request,
2583 .error = __ide_error, 2374 .error = __ide_error,
2584#ifdef CONFIG_IDE_PROC_FS 2375#ifdef CONFIG_IDE_PROC_FS
2585 .proc = idetape_proc, 2376 .proc = idetape_proc,
2377 .settings = idetape_settings,
2586#endif 2378#endif
2587}; 2379};
2588 2380
@@ -2645,11 +2437,11 @@ static int ide_tape_probe(ide_drive_t *drive)
2645 2437
2646 if (!strstr("ide-tape", drive->driver_req)) 2438 if (!strstr("ide-tape", drive->driver_req))
2647 goto failed; 2439 goto failed;
2648 if (!drive->present) 2440
2649 goto failed;
2650 if (drive->media != ide_tape) 2441 if (drive->media != ide_tape)
2651 goto failed; 2442 goto failed;
2652 if (!idetape_identify_device(drive)) { 2443
2444 if (drive->id_read == 1 && !ide_check_atapi_device(drive, DRV_NAME)) {
2653 printk(KERN_ERR "ide-tape: %s: not supported by this version of" 2445 printk(KERN_ERR "ide-tape: %s: not supported by this version of"
2654 " the driver\n", drive->name); 2446 " the driver\n", drive->name);
2655 goto failed; 2447 goto failed;
@@ -2667,8 +2459,6 @@ static int ide_tape_probe(ide_drive_t *drive)
2667 2459
2668 ide_init_disk(g, drive); 2460 ide_init_disk(g, drive);
2669 2461
2670 ide_proc_register_driver(drive, &idetape_driver);
2671
2672 kref_init(&tape->kref); 2462 kref_init(&tape->kref);
2673 2463
2674 tape->drive = drive; 2464 tape->drive = drive;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 7fb6f1c86272..487b18b3ebae 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -44,9 +44,9 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf)
44 memset(&args, 0, sizeof(ide_task_t)); 44 memset(&args, 0, sizeof(ide_task_t));
45 args.tf.nsect = 0x01; 45 args.tf.nsect = 0x01;
46 if (drive->media == ide_disk) 46 if (drive->media == ide_disk)
47 args.tf.command = WIN_IDENTIFY; 47 args.tf.command = ATA_CMD_ID_ATA;
48 else 48 else
49 args.tf.command = WIN_PIDENTIFY; 49 args.tf.command = ATA_CMD_ID_ATAPI;
50 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; 50 args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
51 args.data_phase = TASKFILE_IN; 51 args.data_phase = TASKFILE_IN;
52 return ide_raw_taskfile(drive, &args, buf, 1); 52 return ide_raw_taskfile(drive, &args, buf, 1);
@@ -99,12 +99,17 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
99 case TASKFILE_NO_DATA: 99 case TASKFILE_NO_DATA:
100 if (handler == NULL) 100 if (handler == NULL)
101 handler = task_no_data_intr; 101 handler = task_no_data_intr;
102 /* WIN_{SPECIFY,RESTORE,SETMULT} use custom handlers */
103 if (task->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) { 102 if (task->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) {
104 switch (tf->command) { 103 switch (tf->command) {
105 case WIN_SPECIFY: handler = set_geometry_intr; break; 104 case ATA_CMD_INIT_DEV_PARAMS:
106 case WIN_RESTORE: handler = recal_intr; break; 105 handler = set_geometry_intr;
107 case WIN_SETMULT: handler = set_multmode_intr; break; 106 break;
107 case ATA_CMD_RESTORE:
108 handler = recal_intr;
109 break;
110 case ATA_CMD_SET_MULTI:
111 handler = set_multmode_intr;
112 break;
108 } 113 }
109 } 114 }
110 ide_execute_command(drive, tf->command, handler, 115 ide_execute_command(drive, tf->command, handler,
@@ -121,7 +126,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
121EXPORT_SYMBOL_GPL(do_rw_taskfile); 126EXPORT_SYMBOL_GPL(do_rw_taskfile);
122 127
123/* 128/*
124 * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. 129 * set_multmode_intr() is invoked on completion of a ATA_CMD_SET_MULTI cmd.
125 */ 130 */
126static ide_startstop_t set_multmode_intr(ide_drive_t *drive) 131static ide_startstop_t set_multmode_intr(ide_drive_t *drive)
127{ 132{
@@ -131,7 +136,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive)
131 local_irq_enable_in_hardirq(); 136 local_irq_enable_in_hardirq();
132 stat = hwif->tp_ops->read_status(hwif); 137 stat = hwif->tp_ops->read_status(hwif);
133 138
134 if (OK_STAT(stat, READY_STAT, BAD_STAT)) 139 if (OK_STAT(stat, ATA_DRDY, BAD_STAT))
135 drive->mult_count = drive->mult_req; 140 drive->mult_count = drive->mult_req;
136 else { 141 else {
137 drive->mult_req = drive->mult_count = 0; 142 drive->mult_req = drive->mult_count = 0;
@@ -142,7 +147,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive)
142} 147}
143 148
144/* 149/*
145 * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. 150 * set_geometry_intr() is invoked on completion of a ATA_CMD_INIT_DEV_PARAMS cmd.
146 */ 151 */
147static ide_startstop_t set_geometry_intr(ide_drive_t *drive) 152static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
148{ 153{
@@ -154,15 +159,15 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
154 159
155 while (1) { 160 while (1) {
156 stat = hwif->tp_ops->read_status(hwif); 161 stat = hwif->tp_ops->read_status(hwif);
157 if ((stat & BUSY_STAT) == 0 || retries-- == 0) 162 if ((stat & ATA_BUSY) == 0 || retries-- == 0)
158 break; 163 break;
159 udelay(10); 164 udelay(10);
160 }; 165 };
161 166
162 if (OK_STAT(stat, READY_STAT, BAD_STAT)) 167 if (OK_STAT(stat, ATA_DRDY, BAD_STAT))
163 return ide_stopped; 168 return ide_stopped;
164 169
165 if (stat & (ERR_STAT|DRQ_STAT)) 170 if (stat & (ATA_ERR | ATA_DRQ))
166 return ide_error(drive, "set_geometry_intr", stat); 171 return ide_error(drive, "set_geometry_intr", stat);
167 172
168 ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL); 173 ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL);
@@ -170,7 +175,7 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive)
170} 175}
171 176
172/* 177/*
173 * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. 178 * recal_intr() is invoked on completion of a ATA_CMD_RESTORE (recalibrate) cmd.
174 */ 179 */
175static ide_startstop_t recal_intr(ide_drive_t *drive) 180static ide_startstop_t recal_intr(ide_drive_t *drive)
176{ 181{
@@ -180,7 +185,7 @@ static ide_startstop_t recal_intr(ide_drive_t *drive)
180 local_irq_enable_in_hardirq(); 185 local_irq_enable_in_hardirq();
181 stat = hwif->tp_ops->read_status(hwif); 186 stat = hwif->tp_ops->read_status(hwif);
182 187
183 if (!OK_STAT(stat, READY_STAT, BAD_STAT)) 188 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT))
184 return ide_error(drive, "recal_intr", stat); 189 return ide_error(drive, "recal_intr", stat);
185 return ide_stopped; 190 return ide_stopped;
186} 191}
@@ -197,7 +202,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
197 local_irq_enable_in_hardirq(); 202 local_irq_enable_in_hardirq();
198 stat = hwif->tp_ops->read_status(hwif); 203 stat = hwif->tp_ops->read_status(hwif);
199 204
200 if (!OK_STAT(stat, READY_STAT, BAD_STAT)) 205 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT))
201 return ide_error(drive, "task_no_data_intr", stat); 206 return ide_error(drive, "task_no_data_intr", stat);
202 /* calls ide_end_drive_cmd */ 207 /* calls ide_end_drive_cmd */
203 208
@@ -220,13 +225,13 @@ static u8 wait_drive_not_busy(ide_drive_t *drive)
220 for (retries = 0; retries < 1000; retries++) { 225 for (retries = 0; retries < 1000; retries++) {
221 stat = hwif->tp_ops->read_status(hwif); 226 stat = hwif->tp_ops->read_status(hwif);
222 227
223 if (stat & BUSY_STAT) 228 if (stat & ATA_BUSY)
224 udelay(10); 229 udelay(10);
225 else 230 else
226 break; 231 break;
227 } 232 }
228 233
229 if (stat & BUSY_STAT) 234 if (stat & ATA_BUSY)
230 printk(KERN_ERR "%s: drive still BUSY!\n", drive->name); 235 printk(KERN_ERR "%s: drive still BUSY!\n", drive->name);
231 236
232 return stat; 237 return stat;
@@ -385,7 +390,7 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
385static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq, u8 stat) 390static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq, u8 stat)
386{ 391{
387 /* Command all done? */ 392 /* Command all done? */
388 if (OK_STAT(stat, READY_STAT, BUSY_STAT)) { 393 if (OK_STAT(stat, ATA_DRDY, ATA_BUSY)) {
389 task_end_request(drive, rq, stat); 394 task_end_request(drive, rq, stat);
390 return ide_stopped; 395 return ide_stopped;
391 } 396 }
@@ -405,11 +410,11 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive)
405 u8 stat = hwif->tp_ops->read_status(hwif); 410 u8 stat = hwif->tp_ops->read_status(hwif);
406 411
407 /* Error? */ 412 /* Error? */
408 if (stat & ERR_STAT) 413 if (stat & ATA_ERR)
409 return task_error(drive, rq, __func__, stat); 414 return task_error(drive, rq, __func__, stat);
410 415
411 /* Didn't want any data? Odd. */ 416 /* Didn't want any data? Odd. */
412 if (!(stat & DRQ_STAT)) 417 if ((stat & ATA_DRQ) == 0)
413 return task_in_unexpected(drive, rq, stat); 418 return task_in_unexpected(drive, rq, stat);
414 419
415 ide_pio_datablock(drive, rq, 0); 420 ide_pio_datablock(drive, rq, 0);
@@ -442,7 +447,7 @@ static ide_startstop_t task_out_intr (ide_drive_t *drive)
442 return task_error(drive, rq, __func__, stat); 447 return task_error(drive, rq, __func__, stat);
443 448
444 /* Deal with unexpected ATA data phase. */ 449 /* Deal with unexpected ATA data phase. */
445 if (((stat & DRQ_STAT) == 0) ^ !hwif->nleft) 450 if (((stat & ATA_DRQ) == 0) ^ !hwif->nleft)
446 return task_error(drive, rq, __func__, stat); 451 return task_error(drive, rq, __func__, stat);
447 452
448 if (!hwif->nleft) { 453 if (!hwif->nleft) {
@@ -461,7 +466,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, struct request *rq)
461{ 466{
462 ide_startstop_t startstop; 467 ide_startstop_t startstop;
463 468
464 if (ide_wait_stat(&startstop, drive, DRQ_STAT, 469 if (ide_wait_stat(&startstop, drive, ATA_DRQ,
465 drive->bad_wstat, WAIT_DRQ)) { 470 drive->bad_wstat, WAIT_DRQ)) {
466 printk(KERN_ERR "%s: no DRQ after issuing %sWRITE%s\n", 471 printk(KERN_ERR "%s: no DRQ after issuing %sWRITE%s\n",
467 drive->name, 472 drive->name,
@@ -721,110 +726,3 @@ abort:
721 return err; 726 return err;
722} 727}
723#endif 728#endif
724
725int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
726{
727 u8 *buf = NULL;
728 int bufsize = 0, err = 0;
729 u8 args[4], xfer_rate = 0;
730 ide_task_t tfargs;
731 struct ide_taskfile *tf = &tfargs.tf;
732 struct hd_driveid *id = drive->id;
733
734 if (NULL == (void *) arg) {
735 struct request *rq;
736
737 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
738 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
739 err = blk_execute_rq(drive->queue, NULL, rq, 0);
740 blk_put_request(rq);
741
742 return err;
743 }
744
745 if (copy_from_user(args, (void __user *)arg, 4))
746 return -EFAULT;
747
748 memset(&tfargs, 0, sizeof(ide_task_t));
749 tf->feature = args[2];
750 if (args[0] == WIN_SMART) {
751 tf->nsect = args[3];
752 tf->lbal = args[1];
753 tf->lbam = 0x4f;
754 tf->lbah = 0xc2;
755 tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT;
756 } else {
757 tf->nsect = args[1];
758 tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE |
759 IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT;
760 }
761 tf->command = args[0];
762 tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA;
763
764 if (args[3]) {
765 tfargs.tf_flags |= IDE_TFLAG_IO_16BIT;
766 bufsize = SECTOR_WORDS * 4 * args[3];
767 buf = kzalloc(bufsize, GFP_KERNEL);
768 if (buf == NULL)
769 return -ENOMEM;
770 }
771
772 if (tf->command == WIN_SETFEATURES &&
773 tf->feature == SETFEATURES_XFER &&
774 tf->nsect >= XFER_SW_DMA_0 &&
775 (id->dma_ultra || id->dma_mword || id->dma_1word)) {
776 xfer_rate = args[1];
777 if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) {
778 printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot "
779 "be set\n", drive->name);
780 goto abort;
781 }
782 }
783
784 err = ide_raw_taskfile(drive, &tfargs, buf, args[3]);
785
786 args[0] = tf->status;
787 args[1] = tf->error;
788 args[2] = tf->nsect;
789
790 if (!err && xfer_rate) {
791 /* active-retuning-calls future */
792 ide_set_xfer_rate(drive, xfer_rate);
793 ide_driveid_update(drive);
794 }
795abort:
796 if (copy_to_user((void __user *)arg, &args, 4))
797 err = -EFAULT;
798 if (buf) {
799 if (copy_to_user((void __user *)(arg + 4), buf, bufsize))
800 err = -EFAULT;
801 kfree(buf);
802 }
803 return err;
804}
805
806int ide_task_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
807{
808 void __user *p = (void __user *)arg;
809 int err = 0;
810 u8 args[7];
811 ide_task_t task;
812
813 if (copy_from_user(args, p, 7))
814 return -EFAULT;
815
816 memset(&task, 0, sizeof(task));
817 memcpy(&task.tf_array[7], &args[1], 6);
818 task.tf.command = args[0];
819 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
820
821 err = ide_no_data_taskfile(drive, &task);
822
823 args[0] = task.tf.command;
824 memcpy(&args[1], &task.tf_array[7], 6);
825
826 if (copy_to_user(p, args, 7))
827 err = -EFAULT;
828
829 return err;
830}
diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c
index 8c2f8327f487..81f527af8fae 100644
--- a/drivers/ide/ide-timings.c
+++ b/drivers/ide/ide-timings.c
@@ -22,7 +22,6 @@
22 */ 22 */
23 23
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/hdreg.h>
26#include <linux/ide.h> 25#include <linux/ide.h>
27#include <linux/module.h> 26#include <linux/module.h>
28 27
@@ -78,15 +77,15 @@ EXPORT_SYMBOL_GPL(ide_timing_find_mode);
78 77
79u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) 78u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio)
80{ 79{
81 struct hd_driveid *id = drive->id; 80 u16 *id = drive->id;
82 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); 81 struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
83 u16 cycle = 0; 82 u16 cycle = 0;
84 83
85 if (id->field_valid & 2) { 84 if (id[ATA_ID_FIELD_VALID] & 2) {
86 if (id->capability & 8) 85 if (ata_id_has_iordy(drive->id))
87 cycle = id->eide_pio_iordy; 86 cycle = id[ATA_ID_EIDE_PIO_IORDY];
88 else 87 else
89 cycle = id->eide_pio; 88 cycle = id[ATA_ID_EIDE_PIO];
90 89
91 /* conservative "downgrade" for all pre-ATA2 drives */ 90 /* conservative "downgrade" for all pre-ATA2 drives */
92 if (pio < 3 && cycle < t->cycle) 91 if (pio < 3 && cycle < t->cycle)
@@ -138,7 +137,7 @@ EXPORT_SYMBOL_GPL(ide_timing_merge);
138int ide_timing_compute(ide_drive_t *drive, u8 speed, 137int ide_timing_compute(ide_drive_t *drive, u8 speed,
139 struct ide_timing *t, int T, int UT) 138 struct ide_timing *t, int T, int UT)
140{ 139{
141 struct hd_driveid *id = drive->id; 140 u16 *id = drive->id;
142 struct ide_timing *s, p; 141 struct ide_timing *s, p;
143 142
144 /* 143 /*
@@ -157,16 +156,15 @@ int ide_timing_compute(ide_drive_t *drive, u8 speed,
157 * If the drive is an EIDE drive, it can tell us it needs extended 156 * If the drive is an EIDE drive, it can tell us it needs extended
158 * PIO/MWDMA cycle timing. 157 * PIO/MWDMA cycle timing.
159 */ 158 */
160 if (id && id->field_valid & 2) { /* EIDE drive */ 159 if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */
161
162 memset(&p, 0, sizeof(p)); 160 memset(&p, 0, sizeof(p));
163 161
164 if (speed <= XFER_PIO_2) 162 if (speed <= XFER_PIO_2)
165 p.cycle = p.cyc8b = id->eide_pio; 163 p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
166 else if (speed <= XFER_PIO_5) 164 else if (speed <= XFER_PIO_5)
167 p.cycle = p.cyc8b = id->eide_pio_iordy; 165 p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
168 else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) 166 else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
169 p.cycle = id->eide_dma_min; 167 p.cycle = id[ATA_ID_EIDE_DMA_MIN];
170 168
171 ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); 169 ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B);
172 } 170 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 772451600e4d..9dcf5aed92cb 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -44,8 +44,6 @@
44 * inspiration from lots of linux users, esp. hamish@zot.apana.org.au 44 * inspiration from lots of linux users, esp. hamish@zot.apana.org.au
45 */ 45 */
46 46
47#define _IDE_C /* Tell ide.h it's really us */
48
49#include <linux/module.h> 47#include <linux/module.h>
50#include <linux/types.h> 48#include <linux/types.h>
51#include <linux/string.h> 49#include <linux/string.h>
@@ -58,6 +56,7 @@
58#include <linux/init.h> 56#include <linux/init.h>
59#include <linux/pci.h> 57#include <linux/pci.h>
60#include <linux/ide.h> 58#include <linux/ide.h>
59#include <linux/hdreg.h>
61#include <linux/completion.h> 60#include <linux/completion.h>
62#include <linux/device.h> 61#include <linux/device.h>
63 62
@@ -97,8 +96,6 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
97 hwif->name[2] = 'e'; 96 hwif->name[2] = 'e';
98 hwif->name[3] = '0' + index; 97 hwif->name[3] = '0' + index;
99 98
100 hwif->bus_state = BUSSTATE_ON;
101
102 init_completion(&hwif->gendev_rel_comp); 99 init_completion(&hwif->gendev_rel_comp);
103 100
104 hwif->tp_ops = &default_tp_ops; 101 hwif->tp_ops = &default_tp_ops;
@@ -119,7 +116,7 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
119 drive->media = ide_disk; 116 drive->media = ide_disk;
120 drive->select.all = (unit<<4)|0xa0; 117 drive->select.all = (unit<<4)|0xa0;
121 drive->hwif = hwif; 118 drive->hwif = hwif;
122 drive->ready_stat = READY_STAT; 119 drive->ready_stat = ATA_DRDY;
123 drive->bad_wstat = BAD_W_STAT; 120 drive->bad_wstat = BAD_W_STAT;
124 drive->special.b.recalibrate = 1; 121 drive->special.b.recalibrate = 1;
125 drive->special.b.set_geometry = 1; 122 drive->special.b.set_geometry = 1;
@@ -253,42 +250,9 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
253 250
254DEFINE_MUTEX(ide_setting_mtx); 251DEFINE_MUTEX(ide_setting_mtx);
255 252
256EXPORT_SYMBOL_GPL(ide_setting_mtx); 253ide_devset_get(io_32bit, io_32bit);
257
258/**
259 * ide_spin_wait_hwgroup - wait for group
260 * @drive: drive in the group
261 *
262 * Wait for an IDE device group to go non busy and then return
263 * holding the ide_lock which guards the hwgroup->busy status
264 * and right to use it.
265 */
266 254
267int ide_spin_wait_hwgroup (ide_drive_t *drive) 255static int set_io_32bit(ide_drive_t *drive, int arg)
268{
269 ide_hwgroup_t *hwgroup = HWGROUP(drive);
270 unsigned long timeout = jiffies + (3 * HZ);
271
272 spin_lock_irq(&ide_lock);
273
274 while (hwgroup->busy) {
275 unsigned long lflags;
276 spin_unlock_irq(&ide_lock);
277 local_irq_set(lflags);
278 if (time_after(jiffies, timeout)) {
279 local_irq_restore(lflags);
280 printk(KERN_ERR "%s: channel busy\n", drive->name);
281 return -EBUSY;
282 }
283 local_irq_restore(lflags);
284 spin_lock_irq(&ide_lock);
285 }
286 return 0;
287}
288
289EXPORT_SYMBOL(ide_spin_wait_hwgroup);
290
291int set_io_32bit(ide_drive_t *drive, int arg)
292{ 256{
293 if (drive->no_io_32bit) 257 if (drive->no_io_32bit)
294 return -EPERM; 258 return -EPERM;
@@ -296,53 +260,39 @@ int set_io_32bit(ide_drive_t *drive, int arg)
296 if (arg < 0 || arg > 1 + (SUPPORT_VLB_SYNC << 1)) 260 if (arg < 0 || arg > 1 + (SUPPORT_VLB_SYNC << 1))
297 return -EINVAL; 261 return -EINVAL;
298 262
299 if (ide_spin_wait_hwgroup(drive))
300 return -EBUSY;
301
302 drive->io_32bit = arg; 263 drive->io_32bit = arg;
303 264
304 spin_unlock_irq(&ide_lock);
305
306 return 0; 265 return 0;
307} 266}
308 267
268ide_devset_get(ksettings, keep_settings);
269
309static int set_ksettings(ide_drive_t *drive, int arg) 270static int set_ksettings(ide_drive_t *drive, int arg)
310{ 271{
311 if (arg < 0 || arg > 1) 272 if (arg < 0 || arg > 1)
312 return -EINVAL; 273 return -EINVAL;
313 274
314 if (ide_spin_wait_hwgroup(drive))
315 return -EBUSY;
316 drive->keep_settings = arg; 275 drive->keep_settings = arg;
317 spin_unlock_irq(&ide_lock);
318 276
319 return 0; 277 return 0;
320} 278}
321 279
322int set_using_dma(ide_drive_t *drive, int arg) 280ide_devset_get(using_dma, using_dma);
281
282static int set_using_dma(ide_drive_t *drive, int arg)
323{ 283{
324#ifdef CONFIG_BLK_DEV_IDEDMA 284#ifdef CONFIG_BLK_DEV_IDEDMA
325 ide_hwif_t *hwif = drive->hwif;
326 int err = -EPERM; 285 int err = -EPERM;
327 286
328 if (arg < 0 || arg > 1) 287 if (arg < 0 || arg > 1)
329 return -EINVAL; 288 return -EINVAL;
330 289
331 if (!drive->id || !(drive->id->capability & 1)) 290 if (ata_id_has_dma(drive->id) == 0)
332 goto out; 291 goto out;
333 292
334 if (hwif->dma_ops == NULL) 293 if (drive->hwif->dma_ops == NULL)
335 goto out; 294 goto out;
336 295
337 err = -EBUSY;
338 if (ide_spin_wait_hwgroup(drive))
339 goto out;
340 /*
341 * set ->busy flag, unlock and let it ride
342 */
343 hwif->hwgroup->busy = 1;
344 spin_unlock_irq(&ide_lock);
345
346 err = 0; 296 err = 0;
347 297
348 if (arg) { 298 if (arg) {
@@ -351,12 +301,6 @@ int set_using_dma(ide_drive_t *drive, int arg)
351 } else 301 } else
352 ide_dma_off(drive); 302 ide_dma_off(drive);
353 303
354 /*
355 * lock, clear ->busy flag and unlock before leaving
356 */
357 spin_lock_irq(&ide_lock);
358 hwif->hwgroup->busy = 0;
359 spin_unlock_irq(&ide_lock);
360out: 304out:
361 return err; 305 return err;
362#else 306#else
@@ -367,7 +311,7 @@ out:
367#endif 311#endif
368} 312}
369 313
370int set_pio_mode(ide_drive_t *drive, int arg) 314static int set_pio_mode(ide_drive_t *drive, int arg)
371{ 315{
372 struct request *rq; 316 struct request *rq;
373 ide_hwif_t *hwif = drive->hwif; 317 ide_hwif_t *hwif = drive->hwif;
@@ -395,6 +339,8 @@ int set_pio_mode(ide_drive_t *drive, int arg)
395 return 0; 339 return 0;
396} 340}
397 341
342ide_devset_get(unmaskirq, unmask);
343
398static int set_unmaskirq(ide_drive_t *drive, int arg) 344static int set_unmaskirq(ide_drive_t *drive, int arg)
399{ 345{
400 if (drive->no_unmask) 346 if (drive->no_unmask)
@@ -403,14 +349,20 @@ static int set_unmaskirq(ide_drive_t *drive, int arg)
403 if (arg < 0 || arg > 1) 349 if (arg < 0 || arg > 1)
404 return -EINVAL; 350 return -EINVAL;
405 351
406 if (ide_spin_wait_hwgroup(drive))
407 return -EBUSY;
408 drive->unmask = arg; 352 drive->unmask = arg;
409 spin_unlock_irq(&ide_lock);
410 353
411 return 0; 354 return 0;
412} 355}
413 356
357#define ide_gen_devset_rw(_name, _func) \
358__IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func)
359
360ide_gen_devset_rw(io_32bit, io_32bit);
361ide_gen_devset_rw(keepsettings, ksettings);
362ide_gen_devset_rw(unmaskirq, unmaskirq);
363ide_gen_devset_rw(using_dma, using_dma);
364__IDE_DEVSET(pio_mode, 0, NULL, set_pio_mode);
365
414static int generic_ide_suspend(struct device *dev, pm_message_t mesg) 366static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
415{ 367{
416 ide_drive_t *drive = dev->driver_data; 368 ide_drive_t *drive = dev->driver_data;
@@ -486,138 +438,6 @@ static int generic_ide_resume(struct device *dev)
486 return err; 438 return err;
487} 439}
488 440
489static int generic_drive_reset(ide_drive_t *drive)
490{
491 struct request *rq;
492 int ret = 0;
493
494 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
495 rq->cmd_type = REQ_TYPE_SPECIAL;
496 rq->cmd_len = 1;
497 rq->cmd[0] = REQ_DRIVE_RESET;
498 rq->cmd_flags |= REQ_SOFTBARRIER;
499 if (blk_execute_rq(drive->queue, NULL, rq, 1))
500 ret = rq->errors;
501 blk_put_request(rq);
502 return ret;
503}
504
505int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev,
506 unsigned int cmd, unsigned long arg)
507{
508 unsigned long flags;
509 ide_driver_t *drv;
510 void __user *p = (void __user *)arg;
511 int err = 0, (*setfunc)(ide_drive_t *, int);
512 u8 *val;
513
514 switch (cmd) {
515 case HDIO_GET_32BIT: val = &drive->io_32bit; goto read_val;
516 case HDIO_GET_KEEPSETTINGS: val = &drive->keep_settings; goto read_val;
517 case HDIO_GET_UNMASKINTR: val = &drive->unmask; goto read_val;
518 case HDIO_GET_DMA: val = &drive->using_dma; goto read_val;
519 case HDIO_SET_32BIT: setfunc = set_io_32bit; goto set_val;
520 case HDIO_SET_KEEPSETTINGS: setfunc = set_ksettings; goto set_val;
521 case HDIO_SET_PIO_MODE: setfunc = set_pio_mode; goto set_val;
522 case HDIO_SET_UNMASKINTR: setfunc = set_unmaskirq; goto set_val;
523 case HDIO_SET_DMA: setfunc = set_using_dma; goto set_val;
524 }
525
526 switch (cmd) {
527 case HDIO_OBSOLETE_IDENTITY:
528 case HDIO_GET_IDENTITY:
529 if (bdev != bdev->bd_contains)
530 return -EINVAL;
531 if (drive->id_read == 0)
532 return -ENOMSG;
533 if (copy_to_user(p, drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142))
534 return -EFAULT;
535 return 0;
536
537 case HDIO_GET_NICE:
538 return put_user(drive->dsc_overlap << IDE_NICE_DSC_OVERLAP |
539 drive->atapi_overlap << IDE_NICE_ATAPI_OVERLAP |
540 drive->nice1 << IDE_NICE_1,
541 (long __user *) arg);
542#ifdef CONFIG_IDE_TASK_IOCTL
543 case HDIO_DRIVE_TASKFILE:
544 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
545 return -EACCES;
546 switch(drive->media) {
547 case ide_disk:
548 return ide_taskfile_ioctl(drive, cmd, arg);
549 default:
550 return -ENOMSG;
551 }
552#endif /* CONFIG_IDE_TASK_IOCTL */
553
554 case HDIO_DRIVE_CMD:
555 if (!capable(CAP_SYS_RAWIO))
556 return -EACCES;
557 return ide_cmd_ioctl(drive, cmd, arg);
558
559 case HDIO_DRIVE_TASK:
560 if (!capable(CAP_SYS_RAWIO))
561 return -EACCES;
562 return ide_task_ioctl(drive, cmd, arg);
563 case HDIO_SET_NICE:
564 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
565 if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
566 return -EPERM;
567 drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1;
568 drv = *(ide_driver_t **)bdev->bd_disk->private_data;
569 if (drive->dsc_overlap && !drv->supports_dsc_overlap) {
570 drive->dsc_overlap = 0;
571 return -EPERM;
572 }
573 drive->nice1 = (arg >> IDE_NICE_1) & 1;
574 return 0;
575 case HDIO_DRIVE_RESET:
576 if (!capable(CAP_SYS_ADMIN))
577 return -EACCES;
578
579 return generic_drive_reset(drive);
580
581 case HDIO_GET_BUSSTATE:
582 if (!capable(CAP_SYS_ADMIN))
583 return -EACCES;
584 if (put_user(HWIF(drive)->bus_state, (long __user *)arg))
585 return -EFAULT;
586 return 0;
587
588 case HDIO_SET_BUSSTATE:
589 if (!capable(CAP_SYS_ADMIN))
590 return -EACCES;
591 return -EOPNOTSUPP;
592 default:
593 return -EINVAL;
594 }
595
596read_val:
597 mutex_lock(&ide_setting_mtx);
598 spin_lock_irqsave(&ide_lock, flags);
599 err = *val;
600 spin_unlock_irqrestore(&ide_lock, flags);
601 mutex_unlock(&ide_setting_mtx);
602 return err >= 0 ? put_user(err, (long __user *)arg) : err;
603
604set_val:
605 if (bdev != bdev->bd_contains)
606 err = -EINVAL;
607 else {
608 if (!capable(CAP_SYS_ADMIN))
609 err = -EACCES;
610 else {
611 mutex_lock(&ide_setting_mtx);
612 err = setfunc(drive, arg);
613 mutex_unlock(&ide_setting_mtx);
614 }
615 }
616 return err;
617}
618
619EXPORT_SYMBOL(generic_ide_ioctl);
620
621/** 441/**
622 * ide_device_get - get an additional reference to a ide_drive_t 442 * ide_device_get - get an additional reference to a ide_drive_t
623 * @drive: device to get a reference to 443 * @drive: device to get a reference to
@@ -710,21 +530,21 @@ static ssize_t model_show(struct device *dev, struct device_attribute *attr,
710 char *buf) 530 char *buf)
711{ 531{
712 ide_drive_t *drive = to_ide_device(dev); 532 ide_drive_t *drive = to_ide_device(dev);
713 return sprintf(buf, "%s\n", drive->id->model); 533 return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
714} 534}
715 535
716static ssize_t firmware_show(struct device *dev, struct device_attribute *attr, 536static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
717 char *buf) 537 char *buf)
718{ 538{
719 ide_drive_t *drive = to_ide_device(dev); 539 ide_drive_t *drive = to_ide_device(dev);
720 return sprintf(buf, "%s\n", drive->id->fw_rev); 540 return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
721} 541}
722 542
723static ssize_t serial_show(struct device *dev, struct device_attribute *attr, 543static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
724 char *buf) 544 char *buf)
725{ 545{
726 ide_drive_t *drive = to_ide_device(dev); 546 ide_drive_t *drive = to_ide_device(dev);
727 return sprintf(buf, "%s\n", drive->id->serial_no); 547 return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
728} 548}
729 549
730static struct device_attribute ide_dev_attrs[] = { 550static struct device_attribute ide_dev_attrs[] = {
@@ -841,7 +661,7 @@ MODULE_PARM_DESC(noprobe, "skip probing for a device");
841static unsigned int ide_nowerr; 661static unsigned int ide_nowerr;
842 662
843module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0); 663module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0);
844MODULE_PARM_DESC(nowerr, "ignore the WRERR_STAT bit for a device"); 664MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device");
845 665
846static unsigned int ide_cdroms; 666static unsigned int ide_cdroms;
847 667
@@ -906,7 +726,7 @@ static void ide_dev_apply_params(ide_drive_t *drive)
906 drive->noprobe = 1; 726 drive->noprobe = 1;
907 } 727 }
908 if (ide_nowerr & (1 << i)) { 728 if (ide_nowerr & (1 << i)) {
909 printk(KERN_INFO "ide: ignoring the WRERR_STAT bit for %s\n", 729 printk(KERN_INFO "ide: ignoring the ATA_DF bit for %s\n",
910 drive->name); 730 drive->name);
911 drive->bad_wstat = BAD_R_STAT; 731 drive->bad_wstat = BAD_R_STAT;
912 } 732 }
@@ -927,7 +747,7 @@ static void ide_dev_apply_params(ide_drive_t *drive)
927 drive->cyl, drive->head, drive->sect); 747 drive->cyl, drive->head, drive->sect);
928 drive->present = 1; 748 drive->present = 1;
929 drive->media = ide_disk; 749 drive->media = ide_disk;
930 drive->ready_stat = READY_STAT; 750 drive->ready_stat = ATA_DRDY;
931 } 751 }
932} 752}
933 753
diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c
index 4ec19737f3c5..7276c96aaa2a 100644
--- a/drivers/ide/legacy/ali14xx.c
+++ b/drivers/ide/legacy/ali14xx.c
@@ -43,7 +43,6 @@
43#include <linux/mm.h> 43#include <linux/mm.h>
44#include <linux/ioport.h> 44#include <linux/ioport.h>
45#include <linux/blkdev.h> 45#include <linux/blkdev.h>
46#include <linux/hdreg.h>
47#include <linux/ide.h> 46#include <linux/ide.h>
48#include <linux/init.h> 47#include <linux/init.h>
49 48
diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c
index 7c2afa97f417..c5a3c9ef6a5d 100644
--- a/drivers/ide/legacy/buddha.c
+++ b/drivers/ide/legacy/buddha.c
@@ -20,7 +20,6 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/hdreg.h>
24#include <linux/zorro.h> 23#include <linux/zorro.h>
25#include <linux/ide.h> 24#include <linux/ide.h>
26#include <linux/init.h> 25#include <linux/init.h>
diff --git a/drivers/ide/legacy/dtc2278.c b/drivers/ide/legacy/dtc2278.c
index af791a02a120..689b2e493413 100644
--- a/drivers/ide/legacy/dtc2278.c
+++ b/drivers/ide/legacy/dtc2278.c
@@ -10,7 +10,6 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/ioport.h> 11#include <linux/ioport.h>
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13#include <linux/hdreg.h>
14#include <linux/ide.h> 13#include <linux/ide.h>
15#include <linux/init.h> 14#include <linux/init.h>
16 15
diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c
index 724f95073d80..39d500d84b07 100644
--- a/drivers/ide/legacy/falconide.c
+++ b/drivers/ide/legacy/falconide.c
@@ -13,7 +13,6 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/interrupt.h> 14#include <linux/interrupt.h>
15#include <linux/blkdev.h> 15#include <linux/blkdev.h>
16#include <linux/hdreg.h>
17#include <linux/ide.h> 16#include <linux/ide.h>
18#include <linux/init.h> 17#include <linux/init.h>
19 18
diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index 51ba085d7aa8..691506886561 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -12,7 +12,6 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/hdreg.h>
16#include <linux/ide.h> 15#include <linux/ide.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/zorro.h> 17#include <linux/zorro.h>
diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 98f7c95e39ed..5123ea291d07 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -24,7 +24,6 @@
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/ioport.h> 25#include <linux/ioport.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/hdreg.h>
28#include <linux/ide.h> 27#include <linux/ide.h>
29#include <linux/init.h> 28#include <linux/init.h>
30 29
diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c
index 21bfac137844..ee6fc30d5e2b 100644
--- a/drivers/ide/legacy/ide-cs.c
+++ b/drivers/ide/legacy/ide-cs.c
@@ -38,7 +38,6 @@
38#include <linux/timer.h> 38#include <linux/timer.h>
39#include <linux/ioport.h> 39#include <linux/ioport.h>
40#include <linux/ide.h> 40#include <linux/ide.h>
41#include <linux/hdreg.h>
42#include <linux/major.h> 41#include <linux/major.h>
43#include <linux/delay.h> 42#include <linux/delay.h>
44#include <asm/io.h> 43#include <asm/io.h>
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c
index a0bb167980e7..43f97cc1d30e 100644
--- a/drivers/ide/legacy/macide.c
+++ b/drivers/ide/legacy/macide.c
@@ -15,7 +15,6 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/hdreg.h>
19#include <linux/delay.h> 18#include <linux/delay.h>
20#include <linux/ide.h> 19#include <linux/ide.h>
21 20
diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c
index 4abd8fc78197..4af4a8ce4cdf 100644
--- a/drivers/ide/legacy/q40ide.c
+++ b/drivers/ide/legacy/q40ide.c
@@ -14,8 +14,6 @@
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/hdreg.h>
18
19#include <linux/ide.h> 17#include <linux/ide.h>
20 18
21 /* 19 /*
diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c
index 2338f344ea24..ec408b3a7100 100644
--- a/drivers/ide/legacy/qd65xx.c
+++ b/drivers/ide/legacy/qd65xx.c
@@ -27,7 +27,6 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/blkdev.h> 29#include <linux/blkdev.h>
30#include <linux/hdreg.h>
31#include <linux/ide.h> 30#include <linux/ide.h>
32#include <linux/init.h> 31#include <linux/init.h>
33#include <asm/system.h> 32#include <asm/system.h>
@@ -151,12 +150,14 @@ static int qd_find_disk_type (ide_drive_t *drive,
151 int *active_time, int *recovery_time) 150 int *active_time, int *recovery_time)
152{ 151{
153 struct qd65xx_timing_s *p; 152 struct qd65xx_timing_s *p;
154 char model[40]; 153 char *m = (char *)&drive->id[ATA_ID_PROD];
154 char model[ATA_ID_PROD_LEN];
155 155
156 if (!*drive->id->model) return 0; 156 if (*m == 0)
157 return 0;
157 158
158 strncpy(model,drive->id->model,40); 159 strncpy(model, m, ATA_ID_PROD_LEN);
159 ide_fixstring(model,40,1); /* byte-swap */ 160 ide_fixstring(model, ATA_ID_PROD_LEN, 1); /* byte-swap */
160 161
161 for (p = qd65xx_timing ; p->offset != -1 ; p++) { 162 for (p = qd65xx_timing ; p->offset != -1 ; p++) {
162 if (!strncmp(p->model, model+p->offset, 4)) { 163 if (!strncmp(p->model, model+p->offset, 4)) {
@@ -185,20 +186,20 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing)
185 186
186static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) 187static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
187{ 188{
189 u16 *id = drive->id;
188 int active_time = 175; 190 int active_time = 175;
189 int recovery_time = 415; /* worst case values from the dos driver */ 191 int recovery_time = 415; /* worst case values from the dos driver */
190 192
191 /* 193 /*
192 * FIXME: use "pio" value 194 * FIXME: use "pio" value
193 */ 195 */
194 if (drive->id && !qd_find_disk_type(drive, &active_time, &recovery_time) 196 if (!qd_find_disk_type(drive, &active_time, &recovery_time) &&
195 && drive->id->tPIO && (drive->id->field_valid & 0x02) 197 (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) &&
196 && drive->id->eide_pio >= 240) { 198 id[ATA_ID_EIDE_PIO] >= 240) {
197
198 printk(KERN_INFO "%s: PIO mode%d\n", drive->name, 199 printk(KERN_INFO "%s: PIO mode%d\n", drive->name,
199 drive->id->tPIO); 200 id[ATA_ID_OLD_PIO_MODES] & 0xff);
200 active_time = 110; 201 active_time = 110;
201 recovery_time = drive->id->eide_pio - 120; 202 recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120;
202 } 203 }
203 204
204 qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time)); 205 qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time));
diff --git a/drivers/ide/legacy/umc8672.c b/drivers/ide/legacy/umc8672.c
index b54a14a57755..1da076e0c917 100644
--- a/drivers/ide/legacy/umc8672.c
+++ b/drivers/ide/legacy/umc8672.c
@@ -45,7 +45,6 @@
45#include <linux/mm.h> 45#include <linux/mm.h>
46#include <linux/ioport.h> 46#include <linux/ioport.h>
47#include <linux/blkdev.h> 47#include <linux/blkdev.h>
48#include <linux/hdreg.h>
49#include <linux/ide.h> 48#include <linux/ide.h>
50#include <linux/init.h> 49#include <linux/init.h>
51 50
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index 3187215e8f89..e7475ba559c7 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -7,7 +7,6 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/pci.h> 9#include <linux/pci.h>
10#include <linux/hdreg.h>
11#include <linux/ide.h> 10#include <linux/ide.h>
12#include <linux/init.h> 11#include <linux/init.h>
13 12
@@ -140,7 +139,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
140 drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0); 139 drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
141} 140}
142 141
143static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev) 142static unsigned int init_chipset_aec62xx(struct pci_dev *dev)
144{ 143{
145 /* These are necessary to get AEC6280 Macintosh cards to work */ 144 /* These are necessary to get AEC6280 Macintosh cards to work */
146 if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) || 145 if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) ||
@@ -308,6 +307,8 @@ static struct pci_driver driver = {
308 .id_table = aec62xx_pci_tbl, 307 .id_table = aec62xx_pci_tbl,
309 .probe = aec62xx_init_one, 308 .probe = aec62xx_init_one,
310 .remove = __devexit_p(aec62xx_remove), 309 .remove = __devexit_p(aec62xx_remove),
310 .suspend = ide_pci_suspend,
311 .resume = ide_pci_resume,
311}; 312};
312 313
313static int __init aec62xx_ide_init(void) 314static int __init aec62xx_ide_init(void)
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index d647526af557..053c75263918 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -31,7 +31,6 @@
31#include <linux/types.h> 31#include <linux/types.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/hdreg.h>
35#include <linux/ide.h> 34#include <linux/ide.h>
36#include <linux/init.h> 35#include <linux/init.h>
37#include <linux/dmi.h> 36#include <linux/dmi.h>
@@ -134,8 +133,8 @@ static u8 ali_udma_filter(ide_drive_t *drive)
134 if (m5229_revision > 0x20 && m5229_revision < 0xC2) { 133 if (m5229_revision > 0x20 && m5229_revision < 0xC2) {
135 if (drive->media != ide_disk) 134 if (drive->media != ide_disk)
136 return 0; 135 return 0;
137 if (chip_is_1543c_e && strstr(drive->id->model, "WDC ") && 136 if (wdc_udma == 0 && chip_is_1543c_e &&
138 wdc_udma == 0) 137 strstr((char *)&drive->id[ATA_ID_PROD], "WDC "))
139 return 0; 138 return 0;
140 } 139 }
141 140
@@ -214,7 +213,7 @@ static int ali15x3_dma_setup(ide_drive_t *drive)
214 * appropriate also sets up the 1533 southbridge. 213 * appropriate also sets up the 1533 southbridge.
215 */ 214 */
216 215
217static unsigned int __devinit init_chipset_ali15x3(struct pci_dev *dev) 216static unsigned int init_chipset_ali15x3(struct pci_dev *dev)
218{ 217{
219 unsigned long flags; 218 unsigned long flags;
220 u8 tmpbyte; 219 u8 tmpbyte;
@@ -582,6 +581,8 @@ static struct pci_driver driver = {
582 .id_table = alim15x3_pci_tbl, 581 .id_table = alim15x3_pci_tbl,
583 .probe = alim15x3_init_one, 582 .probe = alim15x3_init_one,
584 .remove = ide_pci_remove, 583 .remove = ide_pci_remove,
584 .suspend = ide_pci_suspend,
585 .resume = ide_pci_resume,
585}; 586};
586 587
587static int __init ali15x3_ide_init(void) 588static int __init ali15x3_ide_init(void)
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 1e66a960a96a..824471f91bf5 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -112,13 +112,13 @@ static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
112 amd_set_drive(drive, XFER_PIO_0 + pio); 112 amd_set_drive(drive, XFER_PIO_0 + pio);
113} 113}
114 114
115static void __devinit amd7409_cable_detect(struct pci_dev *dev) 115static void amd7409_cable_detect(struct pci_dev *dev)
116{ 116{
117 /* no host side cable detection */ 117 /* no host side cable detection */
118 amd_80w = 0x03; 118 amd_80w = 0x03;
119} 119}
120 120
121static void __devinit amd7411_cable_detect(struct pci_dev *dev) 121static void amd7411_cable_detect(struct pci_dev *dev)
122{ 122{
123 int i; 123 int i;
124 u32 u = 0; 124 u32 u = 0;
@@ -140,7 +140,7 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev)
140 * The initialization callback. Initialize drive independent registers. 140 * The initialization callback. Initialize drive independent registers.
141 */ 141 */
142 142
143static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev) 143static unsigned int init_chipset_amd74xx(struct pci_dev *dev)
144{ 144{
145 u8 t = 0, offset = amd_offset(dev); 145 u8 t = 0, offset = amd_offset(dev);
146 146
@@ -324,6 +324,8 @@ static struct pci_driver driver = {
324 .id_table = amd74xx_pci_tbl, 324 .id_table = amd74xx_pci_tbl,
325 .probe = amd74xx_probe, 325 .probe = amd74xx_probe,
326 .remove = ide_pci_remove, 326 .remove = ide_pci_remove,
327 .suspend = ide_pci_suspend,
328 .resume = ide_pci_resume,
327}; 329};
328 330
329static int __init amd74xx_ide_init(void) 331static int __init amd74xx_ide_init(void)
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 41f6cb6c163a..e4437034dd08 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -7,7 +7,6 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/pci.h> 9#include <linux/pci.h>
10#include <linux/hdreg.h>
11#include <linux/ide.h> 10#include <linux/ide.h>
12#include <linux/init.h> 11#include <linux/init.h>
13 12
@@ -188,6 +187,8 @@ static struct pci_driver driver = {
188 .id_table = atiixp_pci_tbl, 187 .id_table = atiixp_pci_tbl,
189 .probe = atiixp_init_one, 188 .probe = atiixp_init_one,
190 .remove = ide_pci_remove, 189 .remove = ide_pci_remove,
190 .suspend = ide_pci_suspend,
191 .resume = ide_pci_resume,
191}; 192};
192 193
193static int __init atiixp_ide_init(void) 194static int __init atiixp_ide_init(void)
diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c
index e6c62006ca1a..7f39cdb41410 100644
--- a/drivers/ide/pci/cmd640.c
+++ b/drivers/ide/pci/cmd640.c
@@ -103,7 +103,6 @@
103#include <linux/types.h> 103#include <linux/types.h>
104#include <linux/kernel.h> 104#include <linux/kernel.h>
105#include <linux/delay.h> 105#include <linux/delay.h>
106#include <linux/hdreg.h>
107#include <linux/ide.h> 106#include <linux/ide.h>
108#include <linux/init.h> 107#include <linux/init.h>
109 108
@@ -375,6 +374,21 @@ static void cmd640_dump_regs(void)
375} 374}
376#endif 375#endif
377 376
377static void __set_prefetch_mode(ide_drive_t *drive, int mode)
378{
379 if (mode) { /* want prefetch on? */
380#if CMD640_PREFETCH_MASKS
381 drive->no_unmask = 1;
382 drive->unmask = 0;
383#endif
384 drive->no_io_32bit = 0;
385 } else {
386 drive->no_unmask = 0;
387 drive->no_io_32bit = 1;
388 drive->io_32bit = 0;
389 }
390}
391
378#ifndef CONFIG_BLK_DEV_CMD640_ENHANCED 392#ifndef CONFIG_BLK_DEV_CMD640_ENHANCED
379/* 393/*
380 * Check whether prefetch is on for a drive, 394 * Check whether prefetch is on for a drive,
@@ -384,19 +398,10 @@ static void __init check_prefetch(ide_drive_t *drive, unsigned int index)
384{ 398{
385 u8 b = get_cmd640_reg(prefetch_regs[index]); 399 u8 b = get_cmd640_reg(prefetch_regs[index]);
386 400
387 if (b & prefetch_masks[index]) { /* is prefetch off? */ 401 __set_prefetch_mode(drive, (b & prefetch_masks[index]) ? 0 : 1);
388 drive->no_unmask = 0;
389 drive->no_io_32bit = 1;
390 drive->io_32bit = 0;
391 } else {
392#if CMD640_PREFETCH_MASKS
393 drive->no_unmask = 1;
394 drive->unmask = 0;
395#endif
396 drive->no_io_32bit = 0;
397 }
398} 402}
399#else 403#else
404
400/* 405/*
401 * Sets prefetch mode for a drive. 406 * Sets prefetch mode for a drive.
402 */ 407 */
@@ -408,19 +413,11 @@ static void set_prefetch_mode(ide_drive_t *drive, unsigned int index, int mode)
408 413
409 spin_lock_irqsave(&cmd640_lock, flags); 414 spin_lock_irqsave(&cmd640_lock, flags);
410 b = __get_cmd640_reg(reg); 415 b = __get_cmd640_reg(reg);
411 if (mode) { /* want prefetch on? */ 416 __set_prefetch_mode(drive, mode);
412#if CMD640_PREFETCH_MASKS 417 if (mode)
413 drive->no_unmask = 1;
414 drive->unmask = 0;
415#endif
416 drive->no_io_32bit = 0;
417 b &= ~prefetch_masks[index]; /* enable prefetch */ 418 b &= ~prefetch_masks[index]; /* enable prefetch */
418 } else { 419 else
419 drive->no_unmask = 0;
420 drive->no_io_32bit = 1;
421 drive->io_32bit = 0;
422 b |= prefetch_masks[index]; /* disable prefetch */ 420 b |= prefetch_masks[index]; /* disable prefetch */
423 }
424 __put_cmd640_reg(reg, b); 421 __put_cmd640_reg(reg, b);
425 spin_unlock_irqrestore(&cmd640_lock, flags); 422 spin_unlock_irqrestore(&cmd640_lock, flags);
426} 423}
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index e064398e03b4..456dee18b660 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -13,7 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/hdreg.h>
17#include <linux/ide.h> 16#include <linux/ide.h>
18#include <linux/init.h> 17#include <linux/init.h>
19 18
@@ -332,7 +331,7 @@ static int cmd646_1_dma_end(ide_drive_t *drive)
332 return (dma_stat & 7) != 4; 331 return (dma_stat & 7) != 4;
333} 332}
334 333
335static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev) 334static unsigned int init_chipset_cmd64x(struct pci_dev *dev)
336{ 335{
337 u8 mrdmode = 0; 336 u8 mrdmode = 0;
338 337
@@ -511,6 +510,8 @@ static struct pci_driver driver = {
511 .id_table = cmd64x_pci_tbl, 510 .id_table = cmd64x_pci_tbl,
512 .probe = cmd64x_init_one, 511 .probe = cmd64x_init_one,
513 .remove = ide_pci_remove, 512 .remove = ide_pci_remove,
513 .suspend = ide_pci_suspend,
514 .resume = ide_pci_resume,
514}; 515};
515 516
516static int __init cmd64x_ide_init(void) 517static int __init cmd64x_ide_init(void)
diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index 151844fcbb07..d6341f7c4144 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c
@@ -35,7 +35,6 @@
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/kernel.h> 37#include <linux/kernel.h>
38#include <linux/hdreg.h>
39#include <linux/init.h> 38#include <linux/init.h>
40#include <linux/pci.h> 39#include <linux/pci.h>
41#include <linux/ide.h> 40#include <linux/ide.h>
@@ -150,6 +149,8 @@ static struct pci_driver driver = {
150 .name = "Cyrix_IDE", 149 .name = "Cyrix_IDE",
151 .id_table = cs5520_pci_tbl, 150 .id_table = cs5520_pci_tbl,
152 .probe = cs5520_init_one, 151 .probe = cs5520_init_one,
152 .suspend = ide_pci_suspend,
153 .resume = ide_pci_resume,
153}; 154};
154 155
155static int __init cs5520_ide_init(void) 156static int __init cs5520_ide_init(void)
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index f235db8c678b..da42fa7e9f97 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -15,7 +15,6 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/hdreg.h>
19#include <linux/pci.h> 18#include <linux/pci.h>
20#include <linux/init.h> 19#include <linux/init.h>
21#include <linux/ide.h> 20#include <linux/ide.h>
@@ -81,17 +80,19 @@ static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio)
81static u8 cs5530_udma_filter(ide_drive_t *drive) 80static u8 cs5530_udma_filter(ide_drive_t *drive)
82{ 81{
83 ide_hwif_t *hwif = drive->hwif; 82 ide_hwif_t *hwif = drive->hwif;
84 ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; 83 ide_drive_t *mate = ide_get_pair_dev(drive);
85 struct hd_driveid *mateid = mate->id; 84 u16 *mateid = mate->id;
86 u8 mask = hwif->ultra_mask; 85 u8 mask = hwif->ultra_mask;
87 86
88 if (mate->present == 0) 87 if (mate == NULL)
89 goto out; 88 goto out;
90 89
91 if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { 90 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
92 if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) 91 if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
92 (mateid[ATA_ID_UDMA_MODES] & 7))
93 goto out; 93 goto out;
94 if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) 94 if ((mateid[ATA_ID_FIELD_VALID] & 2) &&
95 (mateid[ATA_ID_MWDMA_MODES] & 7))
95 mask = 0; 96 mask = 0;
96 } 97 }
97out: 98out:
@@ -133,7 +134,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
133 * Initialize the cs5530 bridge for reliable IDE DMA operation. 134 * Initialize the cs5530 bridge for reliable IDE DMA operation.
134 */ 135 */
135 136
136static unsigned int __devinit init_chipset_cs5530(struct pci_dev *dev) 137static unsigned int init_chipset_cs5530(struct pci_dev *dev)
137{ 138{
138 struct pci_dev *master_0 = NULL, *cs5530_0 = NULL; 139 struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
139 140
@@ -271,6 +272,8 @@ static struct pci_driver driver = {
271 .id_table = cs5530_pci_tbl, 272 .id_table = cs5530_pci_tbl,
272 .probe = cs5530_init_one, 273 .probe = cs5530_init_one,
273 .remove = ide_pci_remove, 274 .remove = ide_pci_remove,
275 .suspend = ide_pci_suspend,
276 .resume = ide_pci_resume,
274}; 277};
275 278
276static int __init cs5530_ide_init(void) 279static int __init cs5530_ide_init(void)
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index dd3dc23af995..1e5bc59ea2fb 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -80,12 +80,12 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed)
80 80
81 /* Set the PIO timings */ 81 /* Set the PIO timings */
82 if (speed < XFER_SW_DMA_0) { 82 if (speed < XFER_SW_DMA_0) {
83 ide_drive_t *pair = ide_get_paired_drive(drive); 83 ide_drive_t *pair = ide_get_pair_dev(drive);
84 u8 cmd, pioa; 84 u8 cmd, pioa;
85 85
86 cmd = pioa = speed - XFER_PIO_0; 86 cmd = pioa = speed - XFER_PIO_0;
87 87
88 if (pair->present) { 88 if (pair) {
89 u8 piob = ide_get_best_pio_mode(pair, 255, 4); 89 u8 piob = ide_get_best_pio_mode(pair, 255, 4);
90 90
91 if (piob < cmd) 91 if (piob < cmd)
@@ -193,10 +193,12 @@ static const struct pci_device_id cs5535_pci_tbl[] = {
193MODULE_DEVICE_TABLE(pci, cs5535_pci_tbl); 193MODULE_DEVICE_TABLE(pci, cs5535_pci_tbl);
194 194
195static struct pci_driver driver = { 195static struct pci_driver driver = {
196 .name = "CS5535_IDE", 196 .name = "CS5535_IDE",
197 .id_table = cs5535_pci_tbl, 197 .id_table = cs5535_pci_tbl,
198 .probe = cs5535_init_one, 198 .probe = cs5535_init_one,
199 .remove = ide_pci_remove, 199 .remove = ide_pci_remove,
200 .suspend = ide_pci_suspend,
201 .resume = ide_pci_resume,
200}; 202};
201 203
202static int __init cs5535_ide_init(void) 204static int __init cs5535_ide_init(void)
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index e6d8ee88d56d..69820e9224d1 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -332,7 +332,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
332/* 332/*
333 * this function is called during init and is used to setup the cy82c693 chip 333 * this function is called during init and is used to setup the cy82c693 chip
334 */ 334 */
335static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev) 335static unsigned int init_chipset_cy82c693(struct pci_dev *dev)
336{ 336{
337 if (PCI_FUNC(dev->devfn) != 1) 337 if (PCI_FUNC(dev->devfn) != 1)
338 return 0; 338 return 0;
@@ -448,6 +448,8 @@ static struct pci_driver driver = {
448 .id_table = cy82c693_pci_tbl, 448 .id_table = cy82c693_pci_tbl,
449 .probe = cy82c693_init_one, 449 .probe = cy82c693_init_one,
450 .remove = __devexit_p(cy82c693_remove), 450 .remove = __devexit_p(cy82c693_remove),
451 .suspend = ide_pci_suspend,
452 .resume = ide_pci_resume,
451}; 453};
452 454
453static int __init cy82c693_ide_init(void) 455static int __init cy82c693_ide_init(void)
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index f84bfb4f600f..83b63b365e51 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -19,7 +19,6 @@
19 19
20#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/hdreg.h>
23#include <linux/ide.h> 22#include <linux/ide.h>
24#include <linux/init.h> 23#include <linux/init.h>
25#include <linux/pci.h> 24#include <linux/pci.h>
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index b07d4f4273b3..092b238cb250 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -22,7 +22,6 @@
22#include <linux/types.h> 22#include <linux/types.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/hdreg.h>
26#include <linux/pci.h> 25#include <linux/pci.h>
27#include <linux/ide.h> 26#include <linux/ide.h>
28#include <linux/init.h> 27#include <linux/init.h>
@@ -172,6 +171,8 @@ static struct pci_driver driver = {
172 .id_table = generic_pci_tbl, 171 .id_table = generic_pci_tbl,
173 .probe = generic_init_one, 172 .probe = generic_init_one,
174 .remove = ide_pci_remove, 173 .remove = ide_pci_remove,
174 .suspend = ide_pci_suspend,
175 .resume = ide_pci_resume,
175}; 176};
176 177
177static int __init generic_ide_init(void) 178static int __init generic_ide_init(void)
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 6009b0b9655d..644de29f8fe4 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -27,7 +27,6 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/kernel.h> 28#include <linux/kernel.h>
29#include <linux/ioport.h> 29#include <linux/ioport.h>
30#include <linux/hdreg.h>
31#include <linux/interrupt.h> 30#include <linux/interrupt.h>
32#include <linux/pci.h> 31#include <linux/pci.h>
33#include <linux/init.h> 32#include <linux/init.h>
@@ -79,7 +78,7 @@ static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio)
79 */ 78 */
80#define HPT34X_PCI_INIT_REG 0x80 79#define HPT34X_PCI_INIT_REG 0x80
81 80
82static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev) 81static unsigned int init_chipset_hpt34x(struct pci_dev *dev)
83{ 82{
84 int i = 0; 83 int i = 0;
85 unsigned long hpt34xIoBase = pci_resource_start(dev, 4); 84 unsigned long hpt34xIoBase = pci_resource_start(dev, 4);
@@ -172,6 +171,8 @@ static struct pci_driver driver = {
172 .id_table = hpt34x_pci_tbl, 171 .id_table = hpt34x_pci_tbl,
173 .probe = hpt34x_init_one, 172 .probe = hpt34x_init_one,
174 .remove = ide_pci_remove, 173 .remove = ide_pci_remove,
174 .suspend = ide_pci_suspend,
175 .resume = ide_pci_resume,
175}; 176};
176 177
177static int __init hpt34x_ide_init(void) 178static int __init hpt34x_ide_init(void)
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index c37ab1743819..a194022b6a61 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -122,7 +122,6 @@
122#include <linux/kernel.h> 122#include <linux/kernel.h>
123#include <linux/delay.h> 123#include <linux/delay.h>
124#include <linux/blkdev.h> 124#include <linux/blkdev.h>
125#include <linux/hdreg.h>
126#include <linux/interrupt.h> 125#include <linux/interrupt.h>
127#include <linux/pci.h> 126#include <linux/pci.h>
128#include <linux/init.h> 127#include <linux/init.h>
@@ -605,10 +604,10 @@ static const struct hpt_info hpt371n __devinitdata = {
605 604
606static int check_in_drive_list(ide_drive_t *drive, const char **list) 605static int check_in_drive_list(ide_drive_t *drive, const char **list)
607{ 606{
608 struct hd_driveid *id = drive->id; 607 char *m = (char *)&drive->id[ATA_ID_PROD];
609 608
610 while (*list) 609 while (*list)
611 if (!strcmp(*list++,id->model)) 610 if (!strcmp(*list++, m))
612 return 1; 611 return 1;
613 return 0; 612 return 0;
614} 613}
@@ -655,7 +654,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive)
655 case HPT372A: 654 case HPT372A:
656 case HPT372N: 655 case HPT372N:
657 case HPT374 : 656 case HPT374 :
658 if (ide_dev_is_sata(drive->id)) 657 if (ata_id_is_sata(drive->id))
659 mask &= ~0x0e; 658 mask &= ~0x0e;
660 /* Fall thru */ 659 /* Fall thru */
661 default: 660 default:
@@ -675,7 +674,7 @@ static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
675 case HPT372A: 674 case HPT372A:
676 case HPT372N: 675 case HPT372N:
677 case HPT374 : 676 case HPT374 :
678 if (ide_dev_is_sata(drive->id)) 677 if (ata_id_is_sata(drive->id))
679 return 0x00; 678 return 0x00;
680 /* Fall thru */ 679 /* Fall thru */
681 default: 680 default:
@@ -731,11 +730,11 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
731 730
732static void hpt3xx_quirkproc(ide_drive_t *drive) 731static void hpt3xx_quirkproc(ide_drive_t *drive)
733{ 732{
734 struct hd_driveid *id = drive->id; 733 char *m = (char *)&drive->id[ATA_ID_PROD];
735 const char **list = quirk_drives; 734 const char **list = quirk_drives;
736 735
737 while (*list) 736 while (*list)
738 if (strstr(id->model, *list++)) { 737 if (strstr(m, *list++)) {
739 drive->quirk_list = 1; 738 drive->quirk_list = 1;
740 return; 739 return;
741 } 740 }
@@ -944,7 +943,7 @@ static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq)
944 * Perform a calibration cycle on the DPLL. 943 * Perform a calibration cycle on the DPLL.
945 * Returns 1 if this succeeds 944 * Returns 1 if this succeeds
946 */ 945 */
947static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) 946static int hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high)
948{ 947{
949 u32 dpll = (f_high << 16) | f_low | 0x100; 948 u32 dpll = (f_high << 16) | f_low | 0x100;
950 u8 scr2; 949 u8 scr2;
@@ -972,7 +971,37 @@ static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f
972 return 1; 971 return 1;
973} 972}
974 973
975static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev) 974static void hpt3xx_disable_fast_irq(struct pci_dev *dev, u8 mcr_addr)
975{
976 struct ide_host *host = pci_get_drvdata(dev);
977 struct hpt_info *info = host->host_priv + (&dev->dev == host->dev[1]);
978 u8 chip_type = info->chip_type;
979 u8 new_mcr, old_mcr = 0;
980
981 /*
982 * Disable the "fast interrupt" prediction. Don't hold off
983 * on interrupts. (== 0x01 despite what the docs say)
984 */
985 pci_read_config_byte(dev, mcr_addr + 1, &old_mcr);
986
987 if (chip_type >= HPT374)
988 new_mcr = old_mcr & ~0x07;
989 else if (chip_type >= HPT370) {
990 new_mcr = old_mcr;
991 new_mcr &= ~0x02;
992#ifdef HPT_DELAY_INTERRUPT
993 new_mcr &= ~0x01;
994#else
995 new_mcr |= 0x01;
996#endif
997 } else /* HPT366 and HPT368 */
998 new_mcr = old_mcr & ~0x80;
999
1000 if (new_mcr != old_mcr)
1001 pci_write_config_byte(dev, mcr_addr + 1, new_mcr);
1002}
1003
1004static unsigned int init_chipset_hpt366(struct pci_dev *dev)
976{ 1005{
977 unsigned long io_base = pci_resource_start(dev, 4); 1006 unsigned long io_base = pci_resource_start(dev, 4);
978 struct hpt_info *info = hpt3xx_get_info(&dev->dev); 1007 struct hpt_info *info = hpt3xx_get_info(&dev->dev);
@@ -1209,9 +1238,11 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev)
1209 * NOTE: This register is only writeable via I/O space. 1238 * NOTE: This register is only writeable via I/O space.
1210 */ 1239 */
1211 if (chip_type == HPT371N && clock == ATA_CLOCK_66MHZ) 1240 if (chip_type == HPT371N && clock == ATA_CLOCK_66MHZ)
1212
1213 outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c); 1241 outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c);
1214 1242
1243 hpt3xx_disable_fast_irq(dev, 0x50);
1244 hpt3xx_disable_fast_irq(dev, 0x54);
1245
1215 return dev->irq; 1246 return dev->irq;
1216} 1247}
1217 1248
@@ -1265,7 +1296,6 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
1265 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 1296 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
1266 int serialize = HPT_SERIALIZE_IO; 1297 int serialize = HPT_SERIALIZE_IO;
1267 u8 chip_type = info->chip_type; 1298 u8 chip_type = info->chip_type;
1268 u8 new_mcr, old_mcr = 0;
1269 1299
1270 /* Cache the channel's MISC. control registers' offset */ 1300 /* Cache the channel's MISC. control registers' offset */
1271 hwif->select_data = hwif->channel ? 0x54 : 0x50; 1301 hwif->select_data = hwif->channel ? 0x54 : 0x50;
@@ -1288,29 +1318,6 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
1288 /* Serialize access to this device if needed */ 1318 /* Serialize access to this device if needed */
1289 if (serialize && hwif->mate) 1319 if (serialize && hwif->mate)
1290 hwif->serialized = hwif->mate->serialized = 1; 1320 hwif->serialized = hwif->mate->serialized = 1;
1291
1292 /*
1293 * Disable the "fast interrupt" prediction. Don't hold off
1294 * on interrupts. (== 0x01 despite what the docs say)
1295 */
1296 pci_read_config_byte(dev, hwif->select_data + 1, &old_mcr);
1297
1298 if (info->chip_type >= HPT374)
1299 new_mcr = old_mcr & ~0x07;
1300 else if (info->chip_type >= HPT370) {
1301 new_mcr = old_mcr;
1302 new_mcr &= ~0x02;
1303
1304#ifdef HPT_DELAY_INTERRUPT
1305 new_mcr &= ~0x01;
1306#else
1307 new_mcr |= 0x01;
1308#endif
1309 } else /* HPT366 and HPT368 */
1310 new_mcr = old_mcr & ~0x80;
1311
1312 if (new_mcr != old_mcr)
1313 pci_write_config_byte(dev, hwif->select_data + 1, new_mcr);
1314} 1321}
1315 1322
1316static int __devinit init_dma_hpt366(ide_hwif_t *hwif, 1323static int __devinit init_dma_hpt366(ide_hwif_t *hwif,
@@ -1620,6 +1627,8 @@ static struct pci_driver driver = {
1620 .id_table = hpt366_pci_tbl, 1627 .id_table = hpt366_pci_tbl,
1621 .probe = hpt366_init_one, 1628 .probe = hpt366_init_one,
1622 .remove = __devexit_p(hpt366_remove), 1629 .remove = __devexit_p(hpt366_remove),
1630 .suspend = ide_pci_suspend,
1631 .resume = ide_pci_resume,
1623}; 1632};
1624 1633
1625static int __init hpt366_ide_init(void) 1634static int __init hpt366_ide_init(void)
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index 652e47dd7e89..0954ccd08d6f 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -10,7 +10,6 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/hdreg.h>
14#include <linux/ide.h> 13#include <linux/ide.h>
15#include <linux/init.h> 14#include <linux/init.h>
16 15
@@ -195,6 +194,8 @@ static struct pci_driver driver = {
195 .id_table = it8213_pci_tbl, 194 .id_table = it8213_pci_tbl,
196 .probe = it8213_init_one, 195 .probe = it8213_init_one,
197 .remove = ide_pci_remove, 196 .remove = ide_pci_remove,
197 .suspend = ide_pci_suspend,
198 .resume = ide_pci_resume,
198}; 199};
199 200
200static int __init it8213_ide_init(void) 201static int __init it8213_ide_init(void)
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 4a1508a707cc..46edd083b348 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -63,7 +63,6 @@
63#include <linux/types.h> 63#include <linux/types.h>
64#include <linux/module.h> 64#include <linux/module.h>
65#include <linux/pci.h> 65#include <linux/pci.h>
66#include <linux/hdreg.h>
67#include <linux/ide.h> 66#include <linux/ide.h>
68#include <linux/init.h> 67#include <linux/init.h>
69 68
@@ -446,8 +445,7 @@ static u8 it821x_cable_detect(ide_hwif_t *hwif)
446static void it821x_quirkproc(ide_drive_t *drive) 445static void it821x_quirkproc(ide_drive_t *drive)
447{ 446{
448 struct it821x_dev *itdev = ide_get_hwifdata(drive->hwif); 447 struct it821x_dev *itdev = ide_get_hwifdata(drive->hwif);
449 struct hd_driveid *id = drive->id; 448 u16 *id = drive->id;
450 u16 *idbits = (u16 *)drive->id;
451 449
452 if (!itdev->smart) { 450 if (!itdev->smart) {
453 /* 451 /*
@@ -466,36 +464,36 @@ static void it821x_quirkproc(ide_drive_t *drive)
466 */ 464 */
467 465
468 /* Check for RAID v native */ 466 /* Check for RAID v native */
469 if(strstr(id->model, "Integrated Technology Express")) { 467 if (strstr((char *)&id[ATA_ID_PROD],
468 "Integrated Technology Express")) {
470 /* In raid mode the ident block is slightly buggy 469 /* In raid mode the ident block is slightly buggy
471 We need to set the bits so that the IDE layer knows 470 We need to set the bits so that the IDE layer knows
472 LBA28. LBA48 and DMA ar valid */ 471 LBA28. LBA48 and DMA ar valid */
473 id->capability |= 3; /* LBA28, DMA */ 472 id[ATA_ID_CAPABILITY] |= (3 << 8); /* LBA28, DMA */
474 id->command_set_2 |= 0x0400; /* LBA48 valid */ 473 id[ATA_ID_COMMAND_SET_2] |= 0x0400; /* LBA48 valid */
475 id->cfs_enable_2 |= 0x0400; /* LBA48 on */ 474 id[ATA_ID_CFS_ENABLE_2] |= 0x0400; /* LBA48 on */
476 /* Reporting logic */ 475 /* Reporting logic */
477 printk(KERN_INFO "%s: IT8212 %sRAID %d volume", 476 printk(KERN_INFO "%s: IT8212 %sRAID %d volume",
478 drive->name, 477 drive->name, id[147] ? "Bootable " : "",
479 idbits[147] ? "Bootable ":"", 478 id[ATA_ID_CSFO]);
480 idbits[129]); 479 if (id[ATA_ID_CSFO] != 1)
481 if(idbits[129] != 1) 480 printk(KERN_CONT "(%dK stripe)", id[146]);
482 printk("(%dK stripe)", idbits[146]); 481 printk(KERN_CONT ".\n");
483 printk(".\n");
484 } else { 482 } else {
485 /* Non RAID volume. Fixups to stop the core code 483 /* Non RAID volume. Fixups to stop the core code
486 doing unsupported things */ 484 doing unsupported things */
487 id->field_valid &= 3; 485 id[ATA_ID_FIELD_VALID] &= 3;
488 id->queue_depth = 0; 486 id[ATA_ID_QUEUE_DEPTH] = 0;
489 id->command_set_1 = 0; 487 id[ATA_ID_COMMAND_SET_1] = 0;
490 id->command_set_2 &= 0xC400; 488 id[ATA_ID_COMMAND_SET_2] &= 0xC400;
491 id->cfsse &= 0xC000; 489 id[ATA_ID_CFSSE] &= 0xC000;
492 id->cfs_enable_1 = 0; 490 id[ATA_ID_CFS_ENABLE_1] = 0;
493 id->cfs_enable_2 &= 0xC400; 491 id[ATA_ID_CFS_ENABLE_2] &= 0xC400;
494 id->csf_default &= 0xC000; 492 id[ATA_ID_CSF_DEFAULT] &= 0xC000;
495 id->word127 = 0; 493 id[127] = 0;
496 id->dlf = 0; 494 id[ATA_ID_DLF] = 0;
497 id->csfo = 0; 495 id[ATA_ID_CSFO] = 0;
498 id->cfa_power = 0; 496 id[ATA_ID_CFA_POWER] = 0;
499 printk(KERN_INFO "%s: Performing identify fixups.\n", 497 printk(KERN_INFO "%s: Performing identify fixups.\n",
500 drive->name); 498 drive->name);
501 } 499 }
@@ -505,8 +503,8 @@ static void it821x_quirkproc(ide_drive_t *drive)
505 * IDE core that DMA is supported (it821x hardware 503 * IDE core that DMA is supported (it821x hardware
506 * takes care of DMA mode programming). 504 * takes care of DMA mode programming).
507 */ 505 */
508 if (id->capability & 1) { 506 if (ata_id_has_dma(id)) {
509 id->dma_mword |= 0x0101; 507 id[ATA_ID_MWDMA_MODES] |= 0x0101;
510 drive->current_speed = XFER_MW_DMA_0; 508 drive->current_speed = XFER_MW_DMA_0;
511 } 509 }
512 } 510 }
@@ -588,7 +586,7 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
588 hwif->mwdma_mask = ATA_MWDMA2; 586 hwif->mwdma_mask = ATA_MWDMA2;
589} 587}
590 588
591static void __devinit it8212_disable_raid(struct pci_dev *dev) 589static void it8212_disable_raid(struct pci_dev *dev)
592{ 590{
593 /* Reset local CPU, and set BIOS not ready */ 591 /* Reset local CPU, and set BIOS not ready */
594 pci_write_config_byte(dev, 0x5E, 0x01); 592 pci_write_config_byte(dev, 0x5E, 0x01);
@@ -605,7 +603,7 @@ static void __devinit it8212_disable_raid(struct pci_dev *dev)
605 pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20); 603 pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
606} 604}
607 605
608static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev) 606static unsigned int init_chipset_it821x(struct pci_dev *dev)
609{ 607{
610 u8 conf; 608 u8 conf;
611 static char *mode[2] = { "pass through", "smart" }; 609 static char *mode[2] = { "pass through", "smart" };
@@ -687,6 +685,8 @@ static struct pci_driver driver = {
687 .id_table = it821x_pci_tbl, 685 .id_table = it821x_pci_tbl,
688 .probe = it821x_init_one, 686 .probe = it821x_init_one,
689 .remove = __devexit_p(it821x_remove), 687 .remove = __devexit_p(it821x_remove),
688 .suspend = ide_pci_suspend,
689 .resume = ide_pci_resume,
690}; 690};
691 691
692static int __init it821x_ide_init(void) 692static int __init it821x_ide_init(void)
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index bb9d09d8f196..acd647110648 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -8,7 +8,6 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/pci.h> 10#include <linux/pci.h>
11#include <linux/hdreg.h>
12#include <linux/ide.h> 11#include <linux/ide.h>
13#include <linux/init.h> 12#include <linux/init.h>
14 13
@@ -155,6 +154,8 @@ static struct pci_driver driver = {
155 .id_table = jmicron_pci_tbl, 154 .id_table = jmicron_pci_tbl,
156 .probe = jmicron_init_one, 155 .probe = jmicron_init_one,
157 .remove = ide_pci_remove, 156 .remove = ide_pci_remove,
157 .suspend = ide_pci_suspend,
158 .resume = ide_pci_resume,
158}; 159};
159 160
160static int __init jmicron_ide_init(void) 161static int __init jmicron_ide_init(void)
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index ffefcd15196c..53bd645736d9 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -11,7 +11,6 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/hdreg.h>
15#include <linux/pci.h> 14#include <linux/pci.h>
16#include <linux/delay.h> 15#include <linux/delay.h>
17#include <linux/ide.h> 16#include <linux/ide.h>
@@ -274,9 +273,9 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
274 do { 273 do {
275 udelay(50); 274 udelay(50);
276 stat = hwif->tp_ops->read_status(hwif); 275 stat = hwif->tp_ops->read_status(hwif);
277 if (stat == 0xff) 276 if (stat == 0xff)
278 break; 277 break;
279 } while ((stat & BUSY_STAT) && --timeout); 278 } while ((stat & ATA_BUSY) && --timeout);
280#endif 279#endif
281 } 280 }
282 281
@@ -340,6 +339,8 @@ static struct pci_driver driver = {
340 .id_table = ns87415_pci_tbl, 339 .id_table = ns87415_pci_tbl,
341 .probe = ns87415_init_one, 340 .probe = ns87415_init_one,
342 .remove = ide_pci_remove, 341 .remove = ide_pci_remove,
342 .suspend = ide_pci_suspend,
343 .resume = ide_pci_resume,
343}; 344};
344 345
345static int __init ns87415_ide_init(void) 346static int __init ns87415_ide_init(void)
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index e28e672ddafc..3de11ddcf863 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -85,7 +85,6 @@
85#include <linux/module.h> 85#include <linux/module.h>
86#include <linux/kernel.h> 86#include <linux/kernel.h>
87#include <linux/pci.h> 87#include <linux/pci.h>
88#include <linux/hdreg.h>
89#include <linux/ide.h> 88#include <linux/ide.h>
90 89
91#include <asm/io.h> 90#include <asm/io.h>
@@ -137,7 +136,7 @@ static u8 read_reg(int reg)
137static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) 136static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
138{ 137{
139 ide_hwif_t *hwif = drive->hwif; 138 ide_hwif_t *hwif = drive->hwif;
140 ide_drive_t *pair = ide_get_paired_drive(drive); 139 ide_drive_t *pair = ide_get_pair_dev(drive);
141 unsigned long flags; 140 unsigned long flags;
142 u8 tim, misc, addr_pio = pio, clk; 141 u8 tim, misc, addr_pio = pio, clk;
143 142
@@ -153,7 +152,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
153 152
154 drive->drive_data = XFER_PIO_0 + pio; 153 drive->drive_data = XFER_PIO_0 + pio;
155 154
156 if (pair->present) { 155 if (pair) {
157 if (pair->drive_data && pair->drive_data < drive->drive_data) 156 if (pair->drive_data && pair->drive_data < drive->drive_data)
158 addr_pio = pair->drive_data - XFER_PIO_0; 157 addr_pio = pair->drive_data - XFER_PIO_0;
159 } 158 }
@@ -226,6 +225,8 @@ static struct pci_driver driver = {
226 .id_table = opti621_pci_tbl, 225 .id_table = opti621_pci_tbl,
227 .probe = opti621_init_one, 226 .probe = opti621_init_one,
228 .remove = ide_pci_remove, 227 .remove = ide_pci_remove,
228 .suspend = ide_pci_suspend,
229 .resume = ide_pci_resume,
229}; 230};
230 231
231static int __init opti621_ide_init(void) 232static int __init opti621_ide_init(void)
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index d477da6b5858..9fc59962553b 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -19,7 +19,6 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/hdreg.h>
23#include <linux/pci.h> 22#include <linux/pci.h>
24#include <linux/init.h> 23#include <linux/init.h>
25#include <linux/ide.h> 24#include <linux/ide.h>
@@ -203,10 +202,10 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
203 202
204static void pdcnew_quirkproc(ide_drive_t *drive) 203static void pdcnew_quirkproc(ide_drive_t *drive)
205{ 204{
206 const char **list, *model = drive->id->model; 205 const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
207 206
208 for (list = pdc_quirk_drives; *list != NULL; list++) 207 for (list = pdc_quirk_drives; *list != NULL; list++)
209 if (strstr(model, *list) != NULL) { 208 if (strstr(m, *list) != NULL) {
210 drive->quirk_list = 2; 209 drive->quirk_list = 2;
211 return; 210 return;
212 } 211 }
@@ -227,7 +226,7 @@ static void pdcnew_reset(ide_drive_t *drive)
227 * read_counter - Read the byte count registers 226 * read_counter - Read the byte count registers
228 * @dma_base: for the port address 227 * @dma_base: for the port address
229 */ 228 */
230static long __devinit read_counter(u32 dma_base) 229static long read_counter(u32 dma_base)
231{ 230{
232 u32 pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08; 231 u32 pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08;
233 u8 cnt0, cnt1, cnt2, cnt3; 232 u8 cnt0, cnt1, cnt2, cnt3;
@@ -267,7 +266,7 @@ static long __devinit read_counter(u32 dma_base)
267 * @dma_base: for the port address 266 * @dma_base: for the port address
268 * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock. 267 * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock.
269 */ 268 */
270static long __devinit detect_pll_input_clock(unsigned long dma_base) 269static long detect_pll_input_clock(unsigned long dma_base)
271{ 270{
272 struct timeval start_time, end_time; 271 struct timeval start_time, end_time;
273 long start_count, end_count; 272 long start_count, end_count;
@@ -310,7 +309,7 @@ static long __devinit detect_pll_input_clock(unsigned long dma_base)
310} 309}
311 310
312#ifdef CONFIG_PPC_PMAC 311#ifdef CONFIG_PPC_PMAC
313static void __devinit apple_kiwi_init(struct pci_dev *pdev) 312static void apple_kiwi_init(struct pci_dev *pdev)
314{ 313{
315 struct device_node *np = pci_device_to_OF_node(pdev); 314 struct device_node *np = pci_device_to_OF_node(pdev);
316 u8 conf; 315 u8 conf;
@@ -326,7 +325,7 @@ static void __devinit apple_kiwi_init(struct pci_dev *pdev)
326} 325}
327#endif /* CONFIG_PPC_PMAC */ 326#endif /* CONFIG_PPC_PMAC */
328 327
329static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev) 328static unsigned int init_chipset_pdcnew(struct pci_dev *dev)
330{ 329{
331 const char *name = DRV_NAME; 330 const char *name = DRV_NAME;
332 unsigned long dma_base = pci_resource_start(dev, 4); 331 unsigned long dma_base = pci_resource_start(dev, 4);
@@ -567,6 +566,8 @@ static struct pci_driver driver = {
567 .id_table = pdc202new_pci_tbl, 566 .id_table = pdc202new_pci_tbl,
568 .probe = pdc202new_init_one, 567 .probe = pdc202new_init_one,
569 .remove = __devexit_p(pdc202new_remove), 568 .remove = __devexit_p(pdc202new_remove),
569 .suspend = ide_pci_suspend,
570 .resume = ide_pci_resume,
570}; 571};
571 572
572static int __init pdc202new_ide_init(void) 573static int __init pdc202new_ide_init(void)
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index de9a27400462..cb6d2a00c514 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -13,7 +13,6 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/blkdev.h> 15#include <linux/blkdev.h>
16#include <linux/hdreg.h>
17#include <linux/pci.h> 16#include <linux/pci.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/ide.h> 18#include <linux/ide.h>
@@ -86,7 +85,7 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
86 * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A 85 * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A
87 */ 86 */
88 AP &= ~0x3f; 87 AP &= ~0x3f;
89 if (drive->id->capability & 4) 88 if (ata_id_iordy_disable(drive->id))
90 AP |= 0x20; /* set IORDY_EN bit */ 89 AP |= 0x20; /* set IORDY_EN bit */
91 if (drive->media == ide_disk) 90 if (drive->media == ide_disk)
92 AP |= 0x10; /* set Prefetch_EN bit */ 91 AP |= 0x10; /* set Prefetch_EN bit */
@@ -154,10 +153,10 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
154 153
155static void pdc202xx_quirkproc(ide_drive_t *drive) 154static void pdc202xx_quirkproc(ide_drive_t *drive)
156{ 155{
157 const char **list, *model = drive->id->model; 156 const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
158 157
159 for (list = pdc_quirk_drives; *list != NULL; list++) 158 for (list = pdc_quirk_drives; *list != NULL; list++)
160 if (strstr(model, *list) != NULL) { 159 if (strstr(m, *list) != NULL) {
161 drive->quirk_list = 2; 160 drive->quirk_list = 2;
162 return; 161 return;
163 } 162 }
@@ -265,7 +264,7 @@ static void pdc202xx_dma_timeout(ide_drive_t *drive)
265 ide_dma_timeout(drive); 264 ide_dma_timeout(drive);
266} 265}
267 266
268static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev) 267static unsigned int init_chipset_pdc202xx(struct pci_dev *dev)
269{ 268{
270 unsigned long dmabase = pci_resource_start(dev, 4); 269 unsigned long dmabase = pci_resource_start(dev, 4);
271 u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0; 270 u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0;
@@ -432,6 +431,8 @@ static struct pci_driver driver = {
432 .id_table = pdc202xx_pci_tbl, 431 .id_table = pdc202xx_pci_tbl,
433 .probe = pdc202xx_init_one, 432 .probe = pdc202xx_init_one,
434 .remove = ide_pci_remove, 433 .remove = ide_pci_remove,
434 .suspend = ide_pci_suspend,
435 .resume = ide_pci_resume,
435}; 436};
436 437
437static int __init pdc202xx_ide_init(void) 438static int __init pdc202xx_ide_init(void)
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 30cfc815fe31..a06c03f8e295 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -48,7 +48,6 @@
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kernel.h> 49#include <linux/kernel.h>
50#include <linux/pci.h> 50#include <linux/pci.h>
51#include <linux/hdreg.h>
52#include <linux/ide.h> 51#include <linux/ide.h>
53#include <linux/init.h> 52#include <linux/init.h>
54 53
@@ -205,7 +204,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
205 * out to be nice and simple. 204 * out to be nice and simple.
206 */ 205 */
207 206
208static unsigned int __devinit init_chipset_ich(struct pci_dev *dev) 207static unsigned int init_chipset_ich(struct pci_dev *dev)
209{ 208{
210 u32 extra = 0; 209 u32 extra = 0;
211 210
@@ -450,6 +449,8 @@ static struct pci_driver driver = {
450 .id_table = piix_pci_tbl, 449 .id_table = piix_pci_tbl,
451 .probe = piix_init_one, 450 .probe = piix_init_one,
452 .remove = ide_pci_remove, 451 .remove = ide_pci_remove,
452 .suspend = ide_pci_suspend,
453 .resume = ide_pci_resume,
453}; 454};
454 455
455static int __init piix_ide_init(void) 456static int __init piix_ide_init(void)
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index 8d11ee838a2a..c117a068761b 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -16,7 +16,6 @@
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/hdreg.h>
20#include <linux/pci.h> 19#include <linux/pci.h>
21#include <linux/ide.h> 20#include <linux/ide.h>
22#include <linux/init.h> 21#include <linux/init.h>
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index 8efaed16fea3..bdc1fed41260 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/hdreg.h>
18#include <linux/pci.h> 17#include <linux/pci.h>
19#include <linux/init.h> 18#include <linux/init.h>
20#include <linux/ide.h> 19#include <linux/ide.h>
@@ -104,17 +103,19 @@ static void sc1200_tunepio(ide_drive_t *drive, u8 pio)
104static u8 sc1200_udma_filter(ide_drive_t *drive) 103static u8 sc1200_udma_filter(ide_drive_t *drive)
105{ 104{
106 ide_hwif_t *hwif = drive->hwif; 105 ide_hwif_t *hwif = drive->hwif;
107 ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; 106 ide_drive_t *mate = ide_get_pair_dev(drive);
108 struct hd_driveid *mateid = mate->id; 107 u16 *mateid = mate->id;
109 u8 mask = hwif->ultra_mask; 108 u8 mask = hwif->ultra_mask;
110 109
111 if (mate->present == 0) 110 if (mate == NULL)
112 goto out; 111 goto out;
113 112
114 if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { 113 if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
115 if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) 114 if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
115 (mateid[ATA_ID_UDMA_MODES] & 7))
116 goto out; 116 goto out;
117 if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) 117 if ((mateid[ATA_ID_FIELD_VALID] & 2) &&
118 (mateid[ATA_ID_MWDMA_MODES] & 7))
118 mask = 0; 119 mask = 0;
119 } 120 }
120out: 121out:
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 44cccd1e086a..e92a874b31df 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -26,7 +26,6 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/pci.h> 27#include <linux/pci.h>
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/hdreg.h>
30#include <linux/ide.h> 29#include <linux/ide.h>
31#include <linux/init.h> 30#include <linux/init.h>
32 31
@@ -400,7 +399,7 @@ static int scc_dma_end(ide_drive_t *drive)
400 /* errata A308 workaround: Step5 (check data loss) */ 399 /* errata A308 workaround: Step5 (check data loss) */
401 /* We don't check non ide_disk because it is limited to UDMA4 */ 400 /* We don't check non ide_disk because it is limited to UDMA4 */
402 if (!(in_be32((void __iomem *)hwif->io_ports.ctl_addr) 401 if (!(in_be32((void __iomem *)hwif->io_ports.ctl_addr)
403 & ERR_STAT) && 402 & ATA_ERR) &&
404 drive->media == ide_disk && drive->current_speed > XFER_UDMA_4) { 403 drive->media == ide_disk && drive->current_speed > XFER_UDMA_4) {
405 reg = in_be32((void __iomem *)intsts_port); 404 reg = in_be32((void __iomem *)intsts_port);
406 if (!(reg & INTSTS_ACTEINT)) { 405 if (!(reg & INTSTS_ACTEINT)) {
@@ -504,7 +503,7 @@ static int scc_dma_test_irq(ide_drive_t *drive)
504 503
505 /* SCC errata A252,A308 workaround: Step4 */ 504 /* SCC errata A252,A308 workaround: Step4 */
506 if ((in_be32((void __iomem *)hwif->io_ports.ctl_addr) 505 if ((in_be32((void __iomem *)hwif->io_ports.ctl_addr)
507 & ERR_STAT) && 506 & ATA_ERR) &&
508 (int_stat & INTSTS_INTRQ)) 507 (int_stat & INTSTS_INTRQ))
509 return 1; 508 return 1;
510 509
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index c3bdc6e51a48..3dff2aea317e 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/hdreg.h>
36#include <linux/ide.h> 35#include <linux/ide.h>
37#include <linux/init.h> 36#include <linux/init.h>
38 37
@@ -57,8 +56,10 @@ static struct pci_dev *isa_dev;
57 56
58static int check_in_drive_lists (ide_drive_t *drive, const char **list) 57static int check_in_drive_lists (ide_drive_t *drive, const char **list)
59{ 58{
59 char *m = (char *)&drive->id[ATA_ID_PROD];
60
60 while (*list) 61 while (*list)
61 if (!strcmp(*list++, drive->id->model)) 62 if (!strcmp(*list++, m))
62 return 1; 63 return 1;
63 return 0; 64 return 0;
64} 65}
@@ -174,7 +175,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
174 pci_write_config_byte(dev, 0x54, ultra_enable); 175 pci_write_config_byte(dev, 0x54, ultra_enable);
175} 176}
176 177
177static unsigned int __devinit init_chipset_svwks(struct pci_dev *dev) 178static unsigned int init_chipset_svwks(struct pci_dev *dev)
178{ 179{
179 unsigned int reg; 180 unsigned int reg;
180 u8 btr; 181 u8 btr;
@@ -447,6 +448,8 @@ static struct pci_driver driver = {
447 .id_table = svwks_pci_tbl, 448 .id_table = svwks_pci_tbl,
448 .probe = svwks_init_one, 449 .probe = svwks_init_one,
449 .remove = ide_pci_remove, 450 .remove = ide_pci_remove,
451 .suspend = ide_pci_suspend,
452 .resume = ide_pci_resume,
450}; 453};
451 454
452static int __init svwks_ide_init(void) 455static int __init svwks_ide_init(void)
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 681306c9d79b..1017fb4f6317 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -22,7 +22,6 @@
22#include <linux/types.h> 22#include <linux/types.h>
23#include <linux/pci.h> 23#include <linux/pci.h>
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/hdreg.h>
26#include <linux/init.h> 25#include <linux/init.h>
27#include <linux/kernel.h> 26#include <linux/kernel.h>
28#include <linux/ioport.h> 27#include <linux/ioport.h>
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index db2b88a369ab..174a873b4c64 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -39,7 +39,6 @@
39#include <linux/types.h> 39#include <linux/types.h>
40#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/pci.h> 41#include <linux/pci.h>
42#include <linux/hdreg.h>
43#include <linux/ide.h> 42#include <linux/ide.h>
44#include <linux/init.h> 43#include <linux/init.h>
45#include <linux/io.h> 44#include <linux/io.h>
@@ -223,7 +222,9 @@ static u8 sil_pata_udma_filter(ide_drive_t *drive)
223 222
224static u8 sil_sata_udma_filter(ide_drive_t *drive) 223static u8 sil_sata_udma_filter(ide_drive_t *drive)
225{ 224{
226 return strstr(drive->id->model, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6; 225 char *m = (char *)&drive->id[ATA_ID_PROD];
226
227 return strstr(m, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6;
227} 228}
228 229
229/** 230/**
@@ -243,7 +244,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
243 244
244 ide_hwif_t *hwif = HWIF(drive); 245 ide_hwif_t *hwif = HWIF(drive);
245 struct pci_dev *dev = to_pci_dev(hwif->dev); 246 struct pci_dev *dev = to_pci_dev(hwif->dev);
246 ide_drive_t *pair = ide_get_paired_drive(drive); 247 ide_drive_t *pair = ide_get_pair_dev(drive);
247 u32 speedt = 0; 248 u32 speedt = 0;
248 u16 speedp = 0; 249 u16 speedp = 0;
249 unsigned long addr = siimage_seldev(drive, 0x04); 250 unsigned long addr = siimage_seldev(drive, 0x04);
@@ -257,7 +258,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
257 u8 unit = drive->select.b.unit; 258 u8 unit = drive->select.b.unit;
258 259
259 /* trim *taskfile* PIO to the slowest of the master/slave */ 260 /* trim *taskfile* PIO to the slowest of the master/slave */
260 if (pair->present) { 261 if (pair) {
261 u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4); 262 u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4);
262 263
263 if (pair_pio < tf_pio) 264 if (pair_pio < tf_pio)
@@ -462,7 +463,7 @@ static void sil_sata_pre_reset(ide_drive_t *drive)
462 * to 133 MHz clocking if the system isn't already set up to do it. 463 * to 133 MHz clocking if the system isn't already set up to do it.
463 */ 464 */
464 465
465static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev) 466static unsigned int init_chipset_siimage(struct pci_dev *dev)
466{ 467{
467 struct ide_host *host = pci_get_drvdata(dev); 468 struct ide_host *host = pci_get_drvdata(dev);
468 void __iomem *ioaddr = host->host_priv; 469 void __iomem *ioaddr = host->host_priv;
@@ -616,8 +617,8 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif)
616 617
617static int is_dev_seagate_sata(ide_drive_t *drive) 618static int is_dev_seagate_sata(ide_drive_t *drive)
618{ 619{
619 const char *s = &drive->id->model[0]; 620 const char *s = (const char *)&drive->id[ATA_ID_PROD];
620 unsigned len = strnlen(s, sizeof(drive->id->model)); 621 unsigned len = strnlen(s, ATA_ID_PROD_LEN);
621 622
622 if ((len > 4) && (!memcmp(s, "ST", 2))) 623 if ((len > 4) && (!memcmp(s, "ST", 2)))
623 if ((!memcmp(s + len - 2, "AS", 2)) || 624 if ((!memcmp(s + len - 2, "AS", 2)) ||
@@ -833,6 +834,8 @@ static struct pci_driver driver = {
833 .id_table = siimage_pci_tbl, 834 .id_table = siimage_pci_tbl,
834 .probe = siimage_init_one, 835 .probe = siimage_init_one,
835 .remove = __devexit_p(siimage_remove), 836 .remove = __devexit_p(siimage_remove),
837 .suspend = ide_pci_suspend,
838 .resume = ide_pci_resume,
836}; 839};
837 840
838static int __init siimage_ide_init(void) 841static int __init siimage_ide_init(void)
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index 5efe21d6ef97..734dd41f1f67 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -47,7 +47,6 @@
47#include <linux/types.h> 47#include <linux/types.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kernel.h> 49#include <linux/kernel.h>
50#include <linux/hdreg.h>
51#include <linux/pci.h> 50#include <linux/pci.h>
52#include <linux/init.h> 51#include <linux/init.h>
53#include <linux/ide.h> 52#include <linux/ide.h>
@@ -448,7 +447,7 @@ static int __devinit sis_find_family(struct pci_dev *dev)
448 return chipset_family; 447 return chipset_family;
449} 448}
450 449
451static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev) 450static unsigned int init_chipset_sis5513(struct pci_dev *dev)
452{ 451{
453 /* Make general config ops here 452 /* Make general config ops here
454 1/ tell IDE channels to operate in Compatibility mode only 453 1/ tell IDE channels to operate in Compatibility mode only
@@ -611,6 +610,8 @@ static struct pci_driver driver = {
611 .id_table = sis5513_pci_tbl, 610 .id_table = sis5513_pci_tbl,
612 .probe = sis5513_init_one, 611 .probe = sis5513_init_one,
613 .remove = __devexit_p(sis5513_remove), 612 .remove = __devexit_p(sis5513_remove),
613 .suspend = ide_pci_suspend,
614 .resume = ide_pci_resume,
614}; 615};
615 616
616static int __init sis5513_ide_init(void) 617static int __init sis5513_ide_init(void)
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index 73905bcc08fb..37a6b7bdc040 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -17,7 +17,6 @@
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/hdreg.h>
21#include <linux/pci.h> 20#include <linux/pci.h>
22#include <linux/ide.h> 21#include <linux/ide.h>
23 22
@@ -62,7 +61,7 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
62 if (cmd_off == 0) 61 if (cmd_off == 0)
63 cmd_off = 1; 62 cmd_off = 1;
64 63
65 if (pio > 2 || ide_dev_has_iordy(drive->id)) 64 if (pio > 2 || ata_id_has_iordy(drive->id))
66 iordy = 0x40; 65 iordy = 0x40;
67 66
68 return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy; 67 return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy;
@@ -272,7 +271,7 @@ static u8 sl82c105_bridge_revision(struct pci_dev *dev)
272 * channel 0 here at least, but channel 1 has to be enabled by 271 * channel 0 here at least, but channel 1 has to be enabled by
273 * firmware or arch code. We still set both to 16 bits mode. 272 * firmware or arch code. We still set both to 16 bits mode.
274 */ 273 */
275static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev) 274static unsigned int init_chipset_sl82c105(struct pci_dev *dev)
276{ 275{
277 u32 val; 276 u32 val;
278 277
@@ -351,6 +350,8 @@ static struct pci_driver driver = {
351 .id_table = sl82c105_pci_tbl, 350 .id_table = sl82c105_pci_tbl,
352 .probe = sl82c105_init_one, 351 .probe = sl82c105_init_one,
353 .remove = ide_pci_remove, 352 .remove = ide_pci_remove,
353 .suspend = ide_pci_suspend,
354 .resume = ide_pci_resume,
354}; 355};
355 356
356static int __init sl82c105_ide_init(void) 357static int __init sl82c105_ide_init(void)
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index 866d6c65e3a0..a9551a13ac57 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -11,7 +11,6 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/pci.h> 13#include <linux/pci.h>
14#include <linux/hdreg.h>
15#include <linux/ide.h> 14#include <linux/ide.h>
16#include <linux/init.h> 15#include <linux/init.h>
17 16
@@ -160,6 +159,8 @@ static struct pci_driver driver = {
160 .id_table = slc90e66_pci_tbl, 159 .id_table = slc90e66_pci_tbl,
161 .probe = slc90e66_init_one, 160 .probe = slc90e66_init_one,
162 .remove = ide_pci_remove, 161 .remove = ide_pci_remove,
162 .suspend = ide_pci_suspend,
163 .resume = ide_pci_resume,
163}; 164};
164 165
165static int __init slc90e66_ide_init(void) 166static int __init slc90e66_ide_init(void)
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index b77ec35151b3..be8715dcee05 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -28,7 +28,6 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/hdreg.h>
32#include <linux/pci.h> 31#include <linux/pci.h>
33#include <linux/ide.h> 32#include <linux/ide.h>
34#include <linux/init.h> 33#include <linux/init.h>
@@ -120,6 +119,8 @@ static struct pci_driver driver = {
120 .id_table = triflex_pci_tbl, 119 .id_table = triflex_pci_tbl,
121 .probe = triflex_init_one, 120 .probe = triflex_init_one,
122 .remove = ide_pci_remove, 121 .remove = ide_pci_remove,
122 .suspend = ide_pci_suspend,
123 .resume = ide_pci_resume,
123}; 124};
124 125
125static int __init triflex_ide_init(void) 126static int __init triflex_ide_init(void)
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index fd28b49977fd..4dfbc6a68b5b 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -135,7 +135,6 @@
135#include <linux/interrupt.h> 135#include <linux/interrupt.h>
136#include <linux/blkdev.h> 136#include <linux/blkdev.h>
137#include <linux/init.h> 137#include <linux/init.h>
138#include <linux/hdreg.h>
139#include <linux/pci.h> 138#include <linux/pci.h>
140#include <linux/ide.h> 139#include <linux/ide.h>
141 140
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 94fb9ab3223f..acacdaab69c2 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -154,7 +154,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
154static void via_set_drive(ide_drive_t *drive, const u8 speed) 154static void via_set_drive(ide_drive_t *drive, const u8 speed)
155{ 155{
156 ide_hwif_t *hwif = drive->hwif; 156 ide_hwif_t *hwif = drive->hwif;
157 ide_drive_t *peer = hwif->drives + (~drive->dn & 1); 157 ide_drive_t *peer = ide_get_pair_dev(drive);
158 struct pci_dev *dev = to_pci_dev(hwif->dev); 158 struct pci_dev *dev = to_pci_dev(hwif->dev);
159 struct ide_host *host = pci_get_drvdata(dev); 159 struct ide_host *host = pci_get_drvdata(dev);
160 struct via82cxxx_dev *vdev = host->host_priv; 160 struct via82cxxx_dev *vdev = host->host_priv;
@@ -173,7 +173,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
173 173
174 ide_timing_compute(drive, speed, &t, T, UT); 174 ide_timing_compute(drive, speed, &t, T, UT);
175 175
176 if (peer->present) { 176 if (peer) {
177 ide_timing_compute(peer, peer->current_speed, &p, T, UT); 177 ide_timing_compute(peer, peer->current_speed, &p, T, UT);
178 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); 178 ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
179 } 179 }
@@ -215,7 +215,7 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
215/* 215/*
216 * Check and handle 80-wire cable presence 216 * Check and handle 80-wire cable presence
217 */ 217 */
218static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) 218static void via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
219{ 219{
220 int i; 220 int i;
221 221
@@ -267,7 +267,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
267 * and initialize its drive independent registers. 267 * and initialize its drive independent registers.
268 */ 268 */
269 269
270static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev) 270static unsigned int init_chipset_via82cxxx(struct pci_dev *dev)
271{ 271{
272 struct ide_host *host = pci_get_drvdata(dev); 272 struct ide_host *host = pci_get_drvdata(dev);
273 struct via82cxxx_dev *vdev = host->host_priv; 273 struct via82cxxx_dev *vdev = host->host_priv;
@@ -492,6 +492,8 @@ static struct pci_driver driver = {
492 .id_table = via_pci_tbl, 492 .id_table = via_pci_tbl,
493 .probe = via_init_one, 493 .probe = via_init_one,
494 .remove = __devexit_p(via_remove), 494 .remove = __devexit_p(via_remove),
495 .suspend = ide_pci_suspend,
496 .resume = ide_pci_resume,
495}; 497};
496 498
497static int __init via_ide_init(void) 499static int __init via_ide_init(void)
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index fa2be26272d5..c3432da78d52 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -669,9 +669,9 @@ static void
669set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, 669set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2,
670 u8 speed) 670 u8 speed)
671{ 671{
672 u16 *id = drive->id;
672 int cycleTime, accessTime = 0, recTime = 0; 673 int cycleTime, accessTime = 0, recTime = 0;
673 unsigned accessTicks, recTicks; 674 unsigned accessTicks, recTicks;
674 struct hd_driveid *id = drive->id;
675 struct mdma_timings_t* tm = NULL; 675 struct mdma_timings_t* tm = NULL;
676 int i; 676 int i;
677 677
@@ -686,8 +686,8 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2,
686 } 686 }
687 687
688 /* Check if drive provides explicit DMA cycle time */ 688 /* Check if drive provides explicit DMA cycle time */
689 if ((id->field_valid & 2) && id->eide_dma_time) 689 if ((id[ATA_ID_FIELD_VALID] & 2) && id[ATA_ID_EIDE_DMA_TIME])
690 cycleTime = max_t(int, id->eide_dma_time, cycleTime); 690 cycleTime = max_t(int, id[ATA_ID_EIDE_DMA_TIME], cycleTime);
691 691
692 /* OHare limits according to some old Apple sources */ 692 /* OHare limits according to some old Apple sources */
693 if ((intf_type == controller_ohare) && (cycleTime < 150)) 693 if ((intf_type == controller_ohare) && (cycleTime < 150))
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index a8e9e8a69a52..9f1f9163a136 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -659,3 +659,36 @@ void ide_pci_remove(struct pci_dev *dev)
659 pci_disable_device(dev); 659 pci_disable_device(dev);
660} 660}
661EXPORT_SYMBOL_GPL(ide_pci_remove); 661EXPORT_SYMBOL_GPL(ide_pci_remove);
662
663#ifdef CONFIG_PM
664int ide_pci_suspend(struct pci_dev *dev, pm_message_t state)
665{
666 pci_save_state(dev);
667 pci_disable_device(dev);
668 pci_set_power_state(dev, pci_choose_state(dev, state));
669
670 return 0;
671}
672EXPORT_SYMBOL_GPL(ide_pci_suspend);
673
674int ide_pci_resume(struct pci_dev *dev)
675{
676 struct ide_host *host = pci_get_drvdata(dev);
677 int rc;
678
679 pci_set_power_state(dev, PCI_D0);
680
681 rc = pci_enable_device(dev);
682 if (rc)
683 return rc;
684
685 pci_restore_state(dev);
686 pci_set_master(dev);
687
688 if (host->init_chipset)
689 host->init_chipset(dev);
690
691 return 0;
692}
693EXPORT_SYMBOL_GPL(ide_pci_resume);
694#endif
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24b..4b47f4ece5b7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
26# Build Intel IOMMU support 26# Build Intel IOMMU support
27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o 27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
28 28
29obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
30
29# 31#
30# Some architectures use the generic PCI setup functions 32# Some architectures use the generic PCI setup functions
31# 33#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae2333f..bd2c01674f5e 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
19 * Author: Shaohua Li <shaohua.li@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * 21 *
22 * This file implements early detection/parsing of DMA Remapping Devices 22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables. 24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
25 */ 27 */
26 28
27#include <linux/pci.h> 29#include <linux/pci.h>
28#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/timer.h>
29#include "iova.h" 32#include "iova.h"
30#include "intel-iommu.h" 33#include "intel-iommu.h"
31 34
@@ -37,7 +40,6 @@
37 * these units are not supported by the architecture. 40 * these units are not supported by the architecture.
38 */ 41 */
39LIST_HEAD(dmar_drhd_units); 42LIST_HEAD(dmar_drhd_units);
40LIST_HEAD(dmar_rmrr_units);
41 43
42static struct acpi_table_header * __initdata dmar_tbl; 44static struct acpi_table_header * __initdata dmar_tbl;
43 45
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 list_add(&drhd->list, &dmar_drhd_units); 55 list_add(&drhd->list, &dmar_drhd_units);
54} 56}
55 57
56static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
57{
58 list_add(&rmrr->list, &dmar_rmrr_units);
59}
60
61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, 58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment) 59 struct pci_dev **dev, u16 segment)
63{ 60{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd; 169 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru; 170 struct dmar_drhd_unit *dmaru;
174 int ret = 0; 171 int ret = 0;
175 static int include_all;
176 172
177 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); 173 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178 if (!dmaru) 174 if (!dmaru)
179 return -ENOMEM; 175 return -ENOMEM;
180 176
177 dmaru->hdr = header;
181 drhd = (struct acpi_dmar_hardware_unit *)header; 178 drhd = (struct acpi_dmar_hardware_unit *)header;
182 dmaru->reg_base_addr = drhd->address; 179 dmaru->reg_base_addr = drhd->address;
183 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184 181
182 ret = alloc_iommu(dmaru);
183 if (ret) {
184 kfree(dmaru);
185 return ret;
186 }
187 dmar_register_drhd_unit(dmaru);
188 return 0;
189}
190
191static int __init
192dmar_parse_dev(struct dmar_drhd_unit *dmaru)
193{
194 struct acpi_dmar_hardware_unit *drhd;
195 static int include_all;
196 int ret;
197
198 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
185 if (!dmaru->include_all) 200 if (!dmaru->include_all)
186 ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
187 ((void *)drhd) + header->length, 202 ((void *)drhd) + drhd->header.length,
188 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
189 drhd->segment); 204 drhd->segment);
190 else { 205 else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
197 include_all = 1; 212 include_all = 1;
198 } 213 }
199 214
200 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) 215 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
216 list_del(&dmaru->list);
201 kfree(dmaru); 217 kfree(dmaru);
202 else 218 }
203 dmar_register_drhd_unit(dmaru);
204 return ret; 219 return ret;
205} 220}
206 221
222#ifdef CONFIG_DMAR
223LIST_HEAD(dmar_rmrr_units);
224
225static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
226{
227 list_add(&rmrr->list, &dmar_rmrr_units);
228}
229
230
207static int __init 231static int __init
208dmar_parse_one_rmrr(struct acpi_dmar_header *header) 232dmar_parse_one_rmrr(struct acpi_dmar_header *header)
209{ 233{
210 struct acpi_dmar_reserved_memory *rmrr; 234 struct acpi_dmar_reserved_memory *rmrr;
211 struct dmar_rmrr_unit *rmrru; 235 struct dmar_rmrr_unit *rmrru;
212 int ret = 0;
213 236
214 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 237 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
215 if (!rmrru) 238 if (!rmrru)
216 return -ENOMEM; 239 return -ENOMEM;
217 240
241 rmrru->hdr = header;
218 rmrr = (struct acpi_dmar_reserved_memory *)header; 242 rmrr = (struct acpi_dmar_reserved_memory *)header;
219 rmrru->base_address = rmrr->base_address; 243 rmrru->base_address = rmrr->base_address;
220 rmrru->end_address = rmrr->end_address; 244 rmrru->end_address = rmrr->end_address;
245
246 dmar_register_rmrr_unit(rmrru);
247 return 0;
248}
249
250static int __init
251rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
252{
253 struct acpi_dmar_reserved_memory *rmrr;
254 int ret;
255
256 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
221 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 257 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
222 ((void *)rmrr) + header->length, 258 ((void *)rmrr) + rmrr->header.length,
223 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 259 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
224 260
225 if (ret || (rmrru->devices_cnt == 0)) 261 if (ret || (rmrru->devices_cnt == 0)) {
262 list_del(&rmrru->list);
226 kfree(rmrru); 263 kfree(rmrru);
227 else 264 }
228 dmar_register_rmrr_unit(rmrru);
229 return ret; 265 return ret;
230} 266}
267#endif
231 268
232static void __init 269static void __init
233dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 270dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
252 } 289 }
253} 290}
254 291
292
255/** 293/**
256 * parse_dmar_table - parses the DMA reporting table 294 * parse_dmar_table - parses the DMA reporting table
257 */ 295 */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
284 ret = dmar_parse_one_drhd(entry_header); 322 ret = dmar_parse_one_drhd(entry_header);
285 break; 323 break;
286 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 324 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
325#ifdef CONFIG_DMAR
287 ret = dmar_parse_one_rmrr(entry_header); 326 ret = dmar_parse_one_rmrr(entry_header);
327#endif
288 break; 328 break;
289 default: 329 default:
290 printk(KERN_WARNING PREFIX 330 printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
300 return ret; 340 return ret;
301} 341}
302 342
343int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
344 struct pci_dev *dev)
345{
346 int index;
347
348 while (dev) {
349 for (index = 0; index < cnt; index++)
350 if (dev == devices[index])
351 return 1;
303 352
304int __init dmar_table_init(void) 353 /* Check our parent */
354 dev = dev->bus->self;
355 }
356
357 return 0;
358}
359
360struct dmar_drhd_unit *
361dmar_find_matched_drhd_unit(struct pci_dev *dev)
305{ 362{
363 struct dmar_drhd_unit *drhd = NULL;
364
365 list_for_each_entry(drhd, &dmar_drhd_units, list) {
366 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
367 drhd->devices_cnt, dev))
368 return drhd;
369 }
370
371 return NULL;
372}
373
374int __init dmar_dev_scope_init(void)
375{
376 struct dmar_drhd_unit *drhd;
377 int ret = -ENODEV;
378
379 for_each_drhd_unit(drhd) {
380 ret = dmar_parse_dev(drhd);
381 if (ret)
382 return ret;
383 }
384
385#ifdef CONFIG_DMAR
386 {
387 struct dmar_rmrr_unit *rmrr;
388 for_each_rmrr_units(rmrr) {
389 ret = rmrr_parse_dev(rmrr);
390 if (ret)
391 return ret;
392 }
393 }
394#endif
395
396 return ret;
397}
306 398
399
400int __init dmar_table_init(void)
401{
402 static int dmar_table_initialized;
307 int ret; 403 int ret;
308 404
405 if (dmar_table_initialized)
406 return 0;
407
408 dmar_table_initialized = 1;
409
309 ret = parse_dmar_table(); 410 ret = parse_dmar_table();
310 if (ret) { 411 if (ret) {
311 printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); 412 if (ret != -ENODEV)
413 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
312 return ret; 414 return ret;
313 } 415 }
314 416
@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
317 return -ENODEV; 419 return -ENODEV;
318 } 420 }
319 421
422#ifdef CONFIG_DMAR
320 if (list_empty(&dmar_rmrr_units)) 423 if (list_empty(&dmar_rmrr_units))
321 printk(KERN_INFO PREFIX "No RMRR found\n"); 424 printk(KERN_INFO PREFIX "No RMRR found\n");
425#endif
322 426
427#ifdef CONFIG_INTR_REMAP
428 parse_ioapics_under_ir();
429#endif
323 return 0; 430 return 0;
324} 431}
325 432
@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
341 448
342 return (ACPI_SUCCESS(status) ? 1 : 0); 449 return (ACPI_SUCCESS(status) ? 1 : 0);
343} 450}
451
452void __init detect_intel_iommu(void)
453{
454 int ret;
455
456 ret = early_dmar_detect();
457
458#ifdef CONFIG_DMAR
459 {
460 struct acpi_table_dmar *dmar;
461 /*
462 * for now we will disable dma-remapping when interrupt
463 * remapping is enabled.
464 * When support for queued invalidation for IOTLB invalidation
465 * is added, we will not need this any more.
466 */
467 dmar = (struct acpi_table_dmar *) dmar_tbl;
468 if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
469 printk(KERN_INFO
470 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n");
472 printk(KERN_INFO
473 "Disabling IOMMU detection, because of missing "
474 "queued invalidation support for IOTLB "
475 "invalidation\n");
476 printk(KERN_INFO
477 "Use \"nox2apic\", if you want to use Intel "
478 " IOMMU for DMA-remapping and don't care about "
479 " x2apic support\n");
480
481 dmar_disabled = 1;
482 return;
483 }
484
485 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
486 !dmar_disabled)
487 iommu_detected = 1;
488 }
489#endif
490}
491
492
493int alloc_iommu(struct dmar_drhd_unit *drhd)
494{
495 struct intel_iommu *iommu;
496 int map_size;
497 u32 ver;
498 static int iommu_allocated = 0;
499
500 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
501 if (!iommu)
502 return -ENOMEM;
503
504 iommu->seq_id = iommu_allocated++;
505
506 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
507 if (!iommu->reg) {
508 printk(KERN_ERR "IOMMU: can't map the region\n");
509 goto error;
510 }
511 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
512 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
513
514 /* the registers might be more than one page */
515 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
516 cap_max_fault_reg_offset(iommu->cap));
517 map_size = PAGE_ALIGN_4K(map_size);
518 if (map_size > PAGE_SIZE_4K) {
519 iounmap(iommu->reg);
520 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
521 if (!iommu->reg) {
522 printk(KERN_ERR "IOMMU: can't map the region\n");
523 goto error;
524 }
525 }
526
527 ver = readl(iommu->reg + DMAR_VER_REG);
528 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
529 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
530 iommu->cap, iommu->ecap);
531
532 spin_lock_init(&iommu->register_lock);
533
534 drhd->iommu = iommu;
535 return 0;
536error:
537 kfree(iommu);
538 return -1;
539}
540
541void free_iommu(struct intel_iommu *iommu)
542{
543 if (!iommu)
544 return;
545
546#ifdef CONFIG_DMAR
547 free_dmar_iommu(iommu);
548#endif
549
550 if (iommu->reg)
551 iounmap(iommu->reg);
552 kfree(iommu);
553}
554
555/*
556 * Reclaim all the submitted descriptors which have completed its work.
557 */
558static inline void reclaim_free_desc(struct q_inval *qi)
559{
560 while (qi->desc_status[qi->free_tail] == QI_DONE) {
561 qi->desc_status[qi->free_tail] = QI_FREE;
562 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
563 qi->free_cnt++;
564 }
565}
566
567/*
568 * Submit the queued invalidation descriptor to the remapping
569 * hardware unit and wait for its completion.
570 */
571void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
572{
573 struct q_inval *qi = iommu->qi;
574 struct qi_desc *hw, wait_desc;
575 int wait_index, index;
576 unsigned long flags;
577
578 if (!qi)
579 return;
580
581 hw = qi->desc;
582
583 spin_lock(&qi->q_lock);
584 while (qi->free_cnt < 3) {
585 spin_unlock(&qi->q_lock);
586 cpu_relax();
587 spin_lock(&qi->q_lock);
588 }
589
590 index = qi->free_head;
591 wait_index = (index + 1) % QI_LENGTH;
592
593 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
594
595 hw[index] = *desc;
596
597 wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
598 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
599
600 hw[wait_index] = wait_desc;
601
602 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
603 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
604
605 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
606 qi->free_cnt -= 2;
607
608 spin_lock_irqsave(&iommu->register_lock, flags);
609 /*
610 * update the HW tail register indicating the presence of
611 * new descriptors.
612 */
613 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
614 spin_unlock_irqrestore(&iommu->register_lock, flags);
615
616 while (qi->desc_status[wait_index] != QI_DONE) {
617 spin_unlock(&qi->q_lock);
618 cpu_relax();
619 spin_lock(&qi->q_lock);
620 }
621
622 qi->desc_status[index] = QI_DONE;
623
624 reclaim_free_desc(qi);
625 spin_unlock(&qi->q_lock);
626}
627
628/*
629 * Flush the global interrupt entry cache.
630 */
631void qi_global_iec(struct intel_iommu *iommu)
632{
633 struct qi_desc desc;
634
635 desc.low = QI_IEC_TYPE;
636 desc.high = 0;
637
638 qi_submit_sync(&desc, iommu);
639}
640
641/*
642 * Enable Queued Invalidation interface. This is a must to support
643 * interrupt-remapping. Also used by DMA-remapping, which replaces
644 * register based IOTLB invalidation.
645 */
646int dmar_enable_qi(struct intel_iommu *iommu)
647{
648 u32 cmd, sts;
649 unsigned long flags;
650 struct q_inval *qi;
651
652 if (!ecap_qis(iommu->ecap))
653 return -ENOENT;
654
655 /*
656 * queued invalidation is already setup and enabled.
657 */
658 if (iommu->qi)
659 return 0;
660
661 iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
662 if (!iommu->qi)
663 return -ENOMEM;
664
665 qi = iommu->qi;
666
667 qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
668 if (!qi->desc) {
669 kfree(qi);
670 iommu->qi = 0;
671 return -ENOMEM;
672 }
673
674 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
675 if (!qi->desc_status) {
676 free_page((unsigned long) qi->desc);
677 kfree(qi);
678 iommu->qi = 0;
679 return -ENOMEM;
680 }
681
682 qi->free_head = qi->free_tail = 0;
683 qi->free_cnt = QI_LENGTH;
684
685 spin_lock_init(&qi->q_lock);
686
687 spin_lock_irqsave(&iommu->register_lock, flags);
688 /* write zero to the tail reg */
689 writel(0, iommu->reg + DMAR_IQT_REG);
690
691 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
692
693 cmd = iommu->gcmd | DMA_GCMD_QIE;
694 iommu->gcmd |= DMA_GCMD_QIE;
695 writel(cmd, iommu->reg + DMAR_GCMD_REG);
696
697 /* Make sure hardware complete it */
698 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
699 spin_unlock_irqrestore(&iommu->register_lock, flags);
700
701 return 0;
702}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6c4c1c3c50ee..389fdd6f4a9f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
49 49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51 51
52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 53
56 54
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
58 56
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 58
61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250 59#define HIGH_WATER_MARK 250
64struct deferred_flush_tables { 60struct deferred_flush_tables {
65 int next; 61 int next;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
185 kmem_cache_free(iommu_iova_cache, iova); 181 kmem_cache_free(iommu_iova_cache, iova);
186} 182}
187 183
188static inline void __iommu_flush_cache(
189 struct intel_iommu *iommu, void *addr, int size)
190{
191 if (!ecap_coherent(iommu->ecap))
192 clflush_cache_range(addr, size);
193}
194
195/* Gets context entry for a given bus and devfn */ 184/* Gets context entry for a given bus and devfn */
196static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
197 u8 bus, u8 devfn) 186 u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
488 return 0; 477 return 0;
489} 478}
490 479
491#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
492{\
493 cycles_t start_time = get_cycles();\
494 while (1) {\
495 sts = op (iommu->reg + offset);\
496 if (cond)\
497 break;\
498 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
499 panic("DMAR hardware is malfunctioning\n");\
500 cpu_relax();\
501 }\
502}
503
504static void iommu_set_root_entry(struct intel_iommu *iommu) 480static void iommu_set_root_entry(struct intel_iommu *iommu)
505{ 481{
506 void *addr; 482 void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
990 return -ENOMEM; 966 return -ENOMEM;
991 } 967 }
992 968
969 spin_lock_init(&iommu->lock);
970
993 /* 971 /*
994 * if Caching mode is set, then invalid translations are tagged 972 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it. 973 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
998 set_bit(0, iommu->domain_ids); 976 set_bit(0, iommu->domain_ids);
999 return 0; 977 return 0;
1000} 978}
1001static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
1002 struct dmar_drhd_unit *drhd)
1003{
1004 int ret;
1005 int map_size;
1006 u32 ver;
1007
1008 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
1009 if (!iommu->reg) {
1010 printk(KERN_ERR "IOMMU: can't map the region\n");
1011 goto error;
1012 }
1013 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
1014 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
1015
1016 /* the registers might be more than one page */
1017 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1018 cap_max_fault_reg_offset(iommu->cap));
1019 map_size = PAGE_ALIGN_4K(map_size);
1020 if (map_size > PAGE_SIZE_4K) {
1021 iounmap(iommu->reg);
1022 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1023 if (!iommu->reg) {
1024 printk(KERN_ERR "IOMMU: can't map the region\n");
1025 goto error;
1026 }
1027 }
1028
1029 ver = readl(iommu->reg + DMAR_VER_REG);
1030 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1031 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1032 iommu->cap, iommu->ecap);
1033 ret = iommu_init_domains(iommu);
1034 if (ret)
1035 goto error_unmap;
1036 spin_lock_init(&iommu->lock);
1037 spin_lock_init(&iommu->register_lock);
1038 979
1039 drhd->iommu = iommu;
1040 return iommu;
1041error_unmap:
1042 iounmap(iommu->reg);
1043error:
1044 kfree(iommu);
1045 return NULL;
1046}
1047 980
1048static void domain_exit(struct dmar_domain *domain); 981static void domain_exit(struct dmar_domain *domain);
1049static void free_iommu(struct intel_iommu *iommu) 982
983void free_dmar_iommu(struct intel_iommu *iommu)
1050{ 984{
1051 struct dmar_domain *domain; 985 struct dmar_domain *domain;
1052 int i; 986 int i;
1053 987
1054 if (!iommu)
1055 return;
1056
1057 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1058 for (; i < cap_ndoms(iommu->cap); ) { 989 for (; i < cap_ndoms(iommu->cap); ) {
1059 domain = iommu->domains[i]; 990 domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
1078 1009
1079 /* free context mapping */ 1010 /* free context mapping */
1080 free_context_table(iommu); 1011 free_context_table(iommu);
1081
1082 if (iommu->reg)
1083 iounmap(iommu->reg);
1084 kfree(iommu);
1085} 1012}
1086 1013
1087static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1014static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
1426 return NULL; 1353 return NULL;
1427} 1354}
1428 1355
1429static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1430 struct pci_dev *dev)
1431{
1432 int index;
1433
1434 while (dev) {
1435 for (index = 0; index < cnt; index++)
1436 if (dev == devices[index])
1437 return 1;
1438
1439 /* Check our parent */
1440 dev = dev->bus->self;
1441 }
1442
1443 return 0;
1444}
1445
1446static struct dmar_drhd_unit *
1447dmar_find_matched_drhd_unit(struct pci_dev *dev)
1448{
1449 struct dmar_drhd_unit *drhd = NULL;
1450
1451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1452 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1453 drhd->devices_cnt, dev))
1454 return drhd;
1455 }
1456
1457 return NULL;
1458}
1459
1460/* domain is initialized */ 1356/* domain is initialized */
1461static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1357static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1462{ 1358{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
1729 * endfor 1625 * endfor
1730 */ 1626 */
1731 for_each_drhd_unit(drhd) { 1627 for_each_drhd_unit(drhd) {
1732 if (drhd->ignored)
1733 continue;
1734 g_num_of_iommus++; 1628 g_num_of_iommus++;
1735 /* 1629 /*
1736 * lock not needed as this is only incremented in the single 1630 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
1739 */ 1633 */
1740 } 1634 }
1741 1635
1742 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1743 if (!g_iommus) {
1744 ret = -ENOMEM;
1745 goto error;
1746 }
1747
1748 deferred_flush = kzalloc(g_num_of_iommus * 1636 deferred_flush = kzalloc(g_num_of_iommus *
1749 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1750 if (!deferred_flush) { 1638 if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
1752 goto error; 1640 goto error;
1753 } 1641 }
1754 1642
1755 i = 0;
1756 for_each_drhd_unit(drhd) { 1643 for_each_drhd_unit(drhd) {
1757 if (drhd->ignored) 1644 if (drhd->ignored)
1758 continue; 1645 continue;
1759 iommu = alloc_iommu(&g_iommus[i], drhd); 1646
1760 i++; 1647 iommu = drhd->iommu;
1761 if (!iommu) { 1648
1762 ret = -ENOMEM; 1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1763 goto error; 1651 goto error;
1764 }
1765 1652
1766 /* 1653 /*
1767 * TBD: 1654 * TBD:
@@ -1845,7 +1732,6 @@ error:
1845 iommu = drhd->iommu; 1732 iommu = drhd->iommu;
1846 free_iommu(iommu); 1733 free_iommu(iommu);
1847 } 1734 }
1848 kfree(g_iommus);
1849 return ret; 1735 return ret;
1850} 1736}
1851 1737
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
2002 /* just flush them all */ 1888 /* just flush them all */
2003 for (i = 0; i < g_num_of_iommus; i++) { 1889 for (i = 0; i < g_num_of_iommus; i++) {
2004 if (deferred_flush[i].next) { 1890 if (deferred_flush[i].next) {
2005 iommu_flush_iotlb_global(&g_iommus[i], 0); 1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
2006 for (j = 0; j < deferred_flush[i].next; j++) { 1895 for (j = 0; j < deferred_flush[i].next; j++) {
2007 __free_iova(&deferred_flush[i].domain[j]->iovad, 1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
2008 deferred_flush[i].iova[j]); 1897 deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2032 if (list_size == HIGH_WATER_MARK) 1921 if (list_size == HIGH_WATER_MARK)
2033 flush_unmaps(); 1922 flush_unmaps();
2034 1923
2035 iommu_id = dom->iommu - g_iommus; 1924 iommu_id = dom->iommu->seq_id;
1925
2036 next = deferred_flush[iommu_id].next; 1926 next = deferred_flush[iommu_id].next;
2037 deferred_flush[iommu_id].domain[next] = dom; 1927 deferred_flush[iommu_id].domain[next] = dom;
2038 deferred_flush[iommu_id].iova[next] = iova; 1928 deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,38 +2238,6 @@ static void __init iommu_exit_mempool(void)
2348 2238
2349} 2239}
2350 2240
2351static int blacklist_iommu(const struct dmi_system_id *id)
2352{
2353 printk(KERN_INFO "%s detected; disabling IOMMU\n",
2354 id->ident);
2355 dmar_disabled = 1;
2356 return 0;
2357}
2358
2359static struct dmi_system_id __initdata intel_iommu_dmi_table[] = {
2360 { /* Some DG33BU BIOS revisions advertised non-existent VT-d */
2361 .callback = blacklist_iommu,
2362 .ident = "Intel DG33BU",
2363 { DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
2364 DMI_MATCH(DMI_BOARD_NAME, "DG33BU"),
2365 }
2366 },
2367 { }
2368};
2369
2370
2371void __init detect_intel_iommu(void)
2372{
2373 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2374 return;
2375 if (early_dmar_detect()) {
2376 dmi_check_system(intel_iommu_dmi_table);
2377 if (dmar_disabled)
2378 return;
2379 iommu_detected = 1;
2380 }
2381}
2382
2383static void __init init_no_remapping_devices(void) 2241static void __init init_no_remapping_devices(void)
2384{ 2242{
2385 struct dmar_drhd_unit *drhd; 2243 struct dmar_drhd_unit *drhd;
@@ -2426,12 +2284,19 @@ int __init intel_iommu_init(void)
2426{ 2284{
2427 int ret = 0; 2285 int ret = 0;
2428 2286
2429 if (no_iommu || swiotlb || dmar_disabled)
2430 return -ENODEV;
2431
2432 if (dmar_table_init()) 2287 if (dmar_table_init())
2433 return -ENODEV; 2288 return -ENODEV;
2434 2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2435 iommu_init_mempool(); 2300 iommu_init_mempool();
2436 dmar_init_reserved_ranges(); 2301 dmar_init_reserved_ranges();
2437 2302
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122e..2142c01e0143 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include "iova.h" 28#include "iova.h"
29#include <linux/io.h> 29#include <linux/io.h>
30 30#include <asm/cacheflush.h>
31/* 31#include "dma_remapping.h"
32 * We need a fixed PAGE_SIZE of 4K irrespective of
33 * arch PAGE_SIZE for IOMMU page tables.
34 */
35#define PAGE_SHIFT_4K (12)
36#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
37#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
38#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
39
40#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
41#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
42#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
43 32
44/* 33/*
45 * Intel IOMMU register specification per version 1.0 public spec. 34 * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
63#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ 52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
64#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ 53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
65#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ 54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
66 60
67#define OFFSET_STRIDE (9) 61#define OFFSET_STRIDE (9)
68/* 62/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
126#define ecap_max_iotlb_offset(e) \ 120#define ecap_max_iotlb_offset(e) \
127 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
128#define ecap_coherent(e) ((e) & 0x1) 122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
129 127
130 128
131/* IOTLB_REG */ 129/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
141#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) 139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
142#define DMA_TLB_MAX_SIZE (0x3f) 140#define DMA_TLB_MAX_SIZE (0x3f)
143 141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
144/* PMEN_REG */ 153/* PMEN_REG */
145#define DMA_PMEN_EPM (((u32)1)<<31) 154#define DMA_PMEN_EPM (((u32)1)<<31)
146#define DMA_PMEN_PRS (((u32)1)<<0) 155#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
151#define DMA_GCMD_SFL (((u32)1) << 29) 160#define DMA_GCMD_SFL (((u32)1) << 29)
152#define DMA_GCMD_EAFL (((u32)1) << 28) 161#define DMA_GCMD_EAFL (((u32)1) << 28)
153#define DMA_GCMD_WBF (((u32)1) << 27) 162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
154 166
155/* GSTS_REG */ 167/* GSTS_REG */
156#define DMA_GSTS_TES (((u32)1) << 31) 168#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
158#define DMA_GSTS_FLS (((u32)1) << 29) 170#define DMA_GSTS_FLS (((u32)1) << 29)
159#define DMA_GSTS_AFLS (((u32)1) << 28) 171#define DMA_GSTS_AFLS (((u32)1) << 28)
160#define DMA_GSTS_WBFS (((u32)1) << 27) 172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
161 176
162/* CCMD_REG */ 177/* CCMD_REG */
163#define DMA_CCMD_ICC (((u64)1) << 63) 178#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
187#define dma_frcd_source_id(c) (c & 0xffff) 202#define dma_frcd_source_id(c) (c & 0xffff)
188#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ 203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
189 204
190/* 205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
191 * 0: Present 206
192 * 1-11: Reserved 207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
193 * 12-63: Context Ptr (12 - (haw-1)) 208{\
194 * 64-127: Reserved 209 cycles_t start_time = get_cycles();\
195 */ 210 while (1) {\
196struct root_entry { 211 sts = op (iommu->reg + offset);\
197 u64 val; 212 if (cond)\
198 u64 rsvd1; 213 break;\
199}; 214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
200#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) 215 panic("DMAR hardware is malfunctioning\n");\
201static inline bool root_present(struct root_entry *root) 216 cpu_relax();\
202{ 217 }\
203 return (root->val & 1);
204}
205static inline void set_root_present(struct root_entry *root)
206{
207 root->val |= 1;
208}
209static inline void set_root_value(struct root_entry *root, unsigned long value)
210{
211 root->val |= value & PAGE_MASK_4K;
212} 218}
213 219
214struct context_entry; 220#define QI_LENGTH 256 /* queue length */
215static inline struct context_entry *
216get_context_addr_from_root(struct root_entry *root)
217{
218 return (struct context_entry *)
219 (root_present(root)?phys_to_virt(
220 root->val & PAGE_MASK_4K):
221 NULL);
222}
223
224/*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235struct context_entry {
236 u64 lo;
237 u64 hi;
238};
239#define context_present(c) ((c).lo & 1)
240#define context_fault_disable(c) (((c).lo >> 1) & 1)
241#define context_translation_type(c) (((c).lo >> 2) & 3)
242#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
243#define context_address_width(c) ((c).hi & 7)
244#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
245
246#define context_set_present(c) do {(c).lo |= 1;} while (0)
247#define context_set_fault_enable(c) \
248 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
249#define context_set_translation_type(c, val) \
250 do { \
251 (c).lo &= (((u64)-1) << 4) | 3; \
252 (c).lo |= ((val) & 3) << 2; \
253 } while (0)
254#define CONTEXT_TT_MULTI_LEVEL 0
255#define context_set_address_root(c, val) \
256 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
257#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
258#define context_set_domain_id(c, val) \
259 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
260#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
261 221
262/* 222enum {
263 * 0: readable 223 QI_FREE,
264 * 1: writable 224 QI_IN_USE,
265 * 2-6: reserved 225 QI_DONE
266 * 7: super page
267 * 8-11: available
268 * 12-63: Host physcial address
269 */
270struct dma_pte {
271 u64 val;
272}; 226};
273#define dma_clear_pte(p) do {(p).val = 0;} while (0)
274
275#define DMA_PTE_READ (1)
276#define DMA_PTE_WRITE (2)
277 227
278#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 228#define QI_CC_TYPE 0x1
279#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 229#define QI_IOTLB_TYPE 0x2
280#define dma_set_pte_prot(p, prot) \ 230#define QI_DIOTLB_TYPE 0x3
281 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 231#define QI_IEC_TYPE 0x4
282#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) 232#define QI_IWD_TYPE 0x5
283#define dma_set_pte_addr(p, addr) do {\
284 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
285#define dma_pte_present(p) (((p).val & 3) != 0)
286 233
287struct intel_iommu; 234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
288 237
289struct dmar_domain { 238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
290 int id; /* domain id */ 239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
291 struct intel_iommu *iommu; /* back pointer to owning iommu */
292 240
293 struct list_head devices; /* all devices' list */ 241struct qi_desc {
294 struct iova_domain iovad; /* iova's that belong to this domain */ 242 u64 low, high;
243};
295 244
296 struct dma_pte *pgd; /* virtual address */ 245struct q_inval {
297 spinlock_t mapping_lock; /* page table lock */ 246 spinlock_t q_lock;
298 int gaw; /* max guest address width */ 247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
299 253
300 /* adjusted guest address width, 0 is level 2 30-bit */ 254#ifdef CONFIG_INTR_REMAP
301 int agaw; 255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
302 258
303#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 259#define INTR_REMAP_TABLE_ENTRIES 65536
304 int flags;
305};
306 260
307/* PCI domain-device relationship */ 261struct ir_table {
308struct device_domain_info { 262 struct irte *base;
309 struct list_head link; /* link to domain siblings */
310 struct list_head global; /* link to global list */
311 u8 bus; /* PCI bus numer */
312 u8 devfn; /* PCI devfn number */
313 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
314 struct dmar_domain *domain; /* pointer to domain */
315}; 263};
316 264#endif
317extern int init_dmars(void);
318 265
319struct intel_iommu { 266struct intel_iommu {
320 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
321 u64 cap; 268 u64 cap;
322 u64 ecap; 269 u64 ecap;
323 unsigned long *domain_ids; /* bitmap of domains */
324 struct dmar_domain **domains; /* ptr to domains */
325 int seg; 270 int seg;
326 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
327 spinlock_t lock; /* protect context, domain ids */
328 spinlock_t register_lock; /* protect register handling */ 272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
329 struct root_entry *root_entry; /* virtual address */ 279 struct root_entry *root_entry; /* virtual address */
330 280
331 unsigned int irq; 281 unsigned int irq;
332 unsigned char name[7]; /* Device Name */ 282 unsigned char name[7]; /* Device Name */
333 struct msi_msg saved_msg; 283 struct msi_msg saved_msg;
334 struct sys_device sysdev; 284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
335}; 290};
336 291
337#ifndef CONFIG_DMAR_GFX_WA 292static inline void __iommu_flush_cache(
338static inline void iommu_prepare_gfx_mapping(void) 293 struct intel_iommu *iommu, void *addr, int size)
339{ 294{
340 return; 295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
341} 297}
342#endif /* !CONFIG_DMAR_GFX_WA */
343 298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
344#endif 307#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 000000000000..bb642cc5e18c
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
1#include <linux/dmar.h>
2#include <linux/spinlock.h>
3#include <linux/jiffies.h>
4#include <linux/pci.h>
5#include <linux/irq.h>
6#include <asm/io_apic.h>
7#include "intel-iommu.h"
8#include "intr_remapping.h"
9
10static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num;
12int intr_remapping_enabled;
13
14static struct {
15 struct intel_iommu *iommu;
16 u16 irte_index;
17 u16 sub_handle;
18 u8 irte_mask;
19} irq_2_iommu[NR_IRQS];
20
21static DEFINE_SPINLOCK(irq_2_ir_lock);
22
23int irq_remapped(int irq)
24{
25 if (irq > NR_IRQS)
26 return 0;
27
28 if (!irq_2_iommu[irq].iommu)
29 return 0;
30
31 return 1;
32}
33
34int get_irte(int irq, struct irte *entry)
35{
36 int index;
37
38 if (!entry || irq > NR_IRQS)
39 return -1;
40
41 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) {
43 spin_unlock(&irq_2_ir_lock);
44 return -1;
45 }
46
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
49
50 spin_unlock(&irq_2_ir_lock);
51 return 0;
52}
53
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{
56 struct ir_table *table = iommu->ir_table;
57 u16 index, start_index;
58 unsigned int mask = 0;
59 int i;
60
61 if (!count)
62 return -1;
63
64 /*
65 * start the IRTE search from index 0.
66 */
67 index = start_index = 0;
68
69 if (count > 1) {
70 count = __roundup_pow_of_two(count);
71 mask = ilog2(count);
72 }
73
74 if (mask > ecap_max_handle_mask(iommu->ecap)) {
75 printk(KERN_ERR
76 "Requested mask %x exceeds the max invalidation handle"
77 " mask value %Lx\n", mask,
78 ecap_max_handle_mask(iommu->ecap));
79 return -1;
80 }
81
82 spin_lock(&irq_2_ir_lock);
83 do {
84 for (i = index; i < index + count; i++)
85 if (table->base[i].present)
86 break;
87 /* empty index found */
88 if (i == index + count)
89 break;
90
91 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
92
93 if (index == start_index) {
94 spin_unlock(&irq_2_ir_lock);
95 printk(KERN_ERR "can't allocate an IRTE\n");
96 return -1;
97 }
98 } while (1);
99
100 for (i = index; i < index + count; i++)
101 table->base[i].present = 1;
102
103 irq_2_iommu[irq].iommu = iommu;
104 irq_2_iommu[irq].irte_index = index;
105 irq_2_iommu[irq].sub_handle = 0;
106 irq_2_iommu[irq].irte_mask = mask;
107
108 spin_unlock(&irq_2_ir_lock);
109
110 return index;
111}
112
113static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
114{
115 struct qi_desc desc;
116
117 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
118 | QI_IEC_SELECTIVE;
119 desc.high = 0;
120
121 qi_submit_sync(&desc, iommu);
122}
123
124int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{
126 int index;
127
128 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
130 spin_unlock(&irq_2_ir_lock);
131 return -1;
132 }
133
134 *sub_handle = irq_2_iommu[irq].sub_handle;
135 index = irq_2_iommu[irq].irte_index;
136 spin_unlock(&irq_2_ir_lock);
137 return index;
138}
139
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{
142 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147
148 irq_2_iommu[irq].iommu = iommu;
149 irq_2_iommu[irq].irte_index = index;
150 irq_2_iommu[irq].sub_handle = subhandle;
151 irq_2_iommu[irq].irte_mask = 0;
152
153 spin_unlock(&irq_2_ir_lock);
154
155 return 0;
156}
157
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{
160 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
162 spin_unlock(&irq_2_ir_lock);
163 return -1;
164 }
165
166 irq_2_iommu[irq].iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0;
170
171 spin_unlock(&irq_2_ir_lock);
172
173 return 0;
174}
175
176int modify_irte(int irq, struct irte *irte_modified)
177{
178 int index;
179 struct irte *irte;
180 struct intel_iommu *iommu;
181
182 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
184 spin_unlock(&irq_2_ir_lock);
185 return -1;
186 }
187
188 iommu = irq_2_iommu[irq].iommu;
189
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
191 irte = &iommu->ir_table->base[index];
192
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
194 __iommu_flush_cache(iommu, irte, sizeof(*irte));
195
196 qi_flush_iec(iommu, index, 0);
197
198 spin_unlock(&irq_2_ir_lock);
199 return 0;
200}
201
202int flush_irte(int irq)
203{
204 int index;
205 struct intel_iommu *iommu;
206
207 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
209 spin_unlock(&irq_2_ir_lock);
210 return -1;
211 }
212
213 iommu = irq_2_iommu[irq].iommu;
214
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
216
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
218 spin_unlock(&irq_2_ir_lock);
219
220 return 0;
221}
222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
233struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
234{
235 struct dmar_drhd_unit *drhd;
236
237 drhd = dmar_find_matched_drhd_unit(dev);
238 if (!drhd)
239 return NULL;
240
241 return drhd->iommu;
242}
243
244int free_irte(int irq)
245{
246 int index, i;
247 struct irte *irte;
248 struct intel_iommu *iommu;
249
250 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
252 spin_unlock(&irq_2_ir_lock);
253 return -1;
254 }
255
256 iommu = irq_2_iommu[irq].iommu;
257
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
259 irte = &iommu->ir_table->base[index];
260
261 if (!irq_2_iommu[irq].sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
265 }
266
267 irq_2_iommu[irq].iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0;
271
272 spin_unlock(&irq_2_ir_lock);
273
274 return 0;
275}
276
277static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
278{
279 u64 addr;
280 u32 cmd, sts;
281 unsigned long flags;
282
283 addr = virt_to_phys((void *)iommu->ir_table->base);
284
285 spin_lock_irqsave(&iommu->register_lock, flags);
286
287 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
288 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
289
290 /* Set interrupt-remapping table pointer */
291 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
292 writel(cmd, iommu->reg + DMAR_GCMD_REG);
293
294 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
295 readl, (sts & DMA_GSTS_IRTPS), sts);
296 spin_unlock_irqrestore(&iommu->register_lock, flags);
297
298 /*
299 * global invalidation of interrupt entry cache before enabling
300 * interrupt-remapping.
301 */
302 qi_global_iec(iommu);
303
304 spin_lock_irqsave(&iommu->register_lock, flags);
305
306 /* Enable interrupt-remapping */
307 cmd = iommu->gcmd | DMA_GCMD_IRE;
308 iommu->gcmd |= DMA_GCMD_IRE;
309 writel(cmd, iommu->reg + DMAR_GCMD_REG);
310
311 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
312 readl, (sts & DMA_GSTS_IRES), sts);
313
314 spin_unlock_irqrestore(&iommu->register_lock, flags);
315}
316
317
318static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
319{
320 struct ir_table *ir_table;
321 struct page *pages;
322
323 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
324 GFP_KERNEL);
325
326 if (!iommu->ir_table)
327 return -ENOMEM;
328
329 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
330
331 if (!pages) {
332 printk(KERN_ERR "failed to allocate pages of order %d\n",
333 INTR_REMAP_PAGE_ORDER);
334 kfree(iommu->ir_table);
335 return -ENOMEM;
336 }
337
338 ir_table->base = page_address(pages);
339
340 iommu_set_intr_remapping(iommu, mode);
341 return 0;
342}
343
344int __init enable_intr_remapping(int eim)
345{
346 struct dmar_drhd_unit *drhd;
347 int setup = 0;
348
349 /*
350 * check for the Interrupt-remapping support
351 */
352 for_each_drhd_unit(drhd) {
353 struct intel_iommu *iommu = drhd->iommu;
354
355 if (!ecap_ir_support(iommu->ecap))
356 continue;
357
358 if (eim && !ecap_eim_support(iommu->ecap)) {
359 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
360 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
361 return -1;
362 }
363 }
364
365 /*
366 * Enable queued invalidation for all the DRHD's.
367 */
368 for_each_drhd_unit(drhd) {
369 int ret;
370 struct intel_iommu *iommu = drhd->iommu;
371 ret = dmar_enable_qi(iommu);
372
373 if (ret) {
374 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
375 " invalidation, ecap %Lx, ret %d\n",
376 drhd->reg_base_addr, iommu->ecap, ret);
377 return -1;
378 }
379 }
380
381 /*
382 * Setup Interrupt-remapping for all the DRHD's now.
383 */
384 for_each_drhd_unit(drhd) {
385 struct intel_iommu *iommu = drhd->iommu;
386
387 if (!ecap_ir_support(iommu->ecap))
388 continue;
389
390 if (setup_intr_remapping(iommu, eim))
391 goto error;
392
393 setup = 1;
394 }
395
396 if (!setup)
397 goto error;
398
399 intr_remapping_enabled = 1;
400
401 return 0;
402
403error:
404 /*
405 * handle error condition gracefully here!
406 */
407 return -1;
408}
409
410static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
411 struct intel_iommu *iommu)
412{
413 struct acpi_dmar_hardware_unit *drhd;
414 struct acpi_dmar_device_scope *scope;
415 void *start, *end;
416
417 drhd = (struct acpi_dmar_hardware_unit *)header;
418
419 start = (void *)(drhd + 1);
420 end = ((void *)drhd) + header->length;
421
422 while (start < end) {
423 scope = start;
424 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
425 if (ir_ioapic_num == MAX_IO_APICS) {
426 printk(KERN_WARNING "Exceeded Max IO APICS\n");
427 return -1;
428 }
429
430 printk(KERN_INFO "IOAPIC id %d under DRHD base"
431 " 0x%Lx\n", scope->enumeration_id,
432 drhd->address);
433
434 ir_ioapic[ir_ioapic_num].iommu = iommu;
435 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
436 ir_ioapic_num++;
437 }
438 start += scope->length;
439 }
440
441 return 0;
442}
443
444/*
445 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
446 * hardware unit.
447 */
448int __init parse_ioapics_under_ir(void)
449{
450 struct dmar_drhd_unit *drhd;
451 int ir_supported = 0;
452
453 for_each_drhd_unit(drhd) {
454 struct intel_iommu *iommu = drhd->iommu;
455
456 if (ecap_ir_support(iommu->ecap)) {
457 if (ir_parse_ioapic_scope(drhd->hdr, iommu))
458 return -1;
459
460 ir_supported = 1;
461 }
462 }
463
464 if (ir_supported && ir_ioapic_num != nr_ioapics) {
465 printk(KERN_WARNING
466 "Not all IO-APIC's listed under remapping hardware\n");
467 return -1;
468 }
469
470 return ir_supported;
471}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 000000000000..05f2635bbe4e
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
1#include "intel-iommu.h"
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6};
7
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 81c16cba5417..90212ac33be3 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -40,7 +40,6 @@
40#include <linux/ioport.h> 40#include <linux/ioport.h>
41#include <linux/blkdev.h> 41#include <linux/blkdev.h>
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/hdreg.h>
44#include <linux/slab.h> 43#include <linux/slab.h>
45#include <linux/ide.h> 44#include <linux/ide.h>
46#include <linux/scatterlist.h> 45#include <linux/scatterlist.h>
@@ -131,50 +130,6 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive)
131 return scsihost_to_idescsi(ide_drive->driver_data); 130 return scsihost_to_idescsi(ide_drive->driver_data);
132} 131}
133 132
134/*
135 * PIO data transfer routine using the scatter gather table.
136 */
137static void ide_scsi_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
138 unsigned int bcount, int write)
139{
140 ide_hwif_t *hwif = drive->hwif;
141 const struct ide_tp_ops *tp_ops = hwif->tp_ops;
142 xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data;
143 char *buf;
144 int count;
145
146 while (bcount) {
147 count = min(pc->sg->length - pc->b_count, bcount);
148 if (PageHighMem(sg_page(pc->sg))) {
149 unsigned long flags;
150
151 local_irq_save(flags);
152 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
153 pc->sg->offset;
154 xf(drive, NULL, buf + pc->b_count, count);
155 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
156 local_irq_restore(flags);
157 } else {
158 buf = sg_virt(pc->sg);
159 xf(drive, NULL, buf + pc->b_count, count);
160 }
161 bcount -= count; pc->b_count += count;
162 if (pc->b_count == pc->sg->length) {
163 if (!--pc->sg_cnt)
164 break;
165 pc->sg = sg_next(pc->sg);
166 pc->b_count = 0;
167 }
168 }
169
170 if (bcount) {
171 printk(KERN_ERR "%s: scatter gather table too small, %s\n",
172 drive->name, write ? "padding with zeros"
173 : "discarding data");
174 ide_pad_transfer(drive, write, bcount);
175 }
176}
177
178static void ide_scsi_hex_dump(u8 *data, int len) 133static void ide_scsi_hex_dump(u8 *data, int len)
179{ 134{
180 print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0); 135 print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
@@ -244,9 +199,9 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
244{ 199{
245 ide_hwif_t *hwif = drive->hwif; 200 ide_hwif_t *hwif = drive->hwif;
246 201
247 if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) 202 if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
248 /* force an abort */ 203 /* force an abort */
249 hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); 204 hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
250 205
251 rq->errors++; 206 rq->errors++;
252 207
@@ -344,7 +299,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
344 299
345 return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc), 300 return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc),
346 idescsi_expiry, NULL, NULL, NULL, 301 idescsi_expiry, NULL, NULL, NULL,
347 ide_scsi_io_buffers); 302 ide_io_buffers);
348} 303}
349 304
350static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive) 305static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive)
@@ -430,21 +385,41 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
430} 385}
431 386
432#ifdef CONFIG_IDE_PROC_FS 387#ifdef CONFIG_IDE_PROC_FS
433static void idescsi_add_settings(ide_drive_t *drive) 388#define ide_scsi_devset_get(name, field) \
434{ 389static int get_##name(ide_drive_t *drive) \
435 idescsi_scsi_t *scsi = drive_to_idescsi(drive); 390{ \
436 391 idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
437/* 392 return scsi->field; \
438 * drive setting name read/write data type min max mul_factor div_factor data pointer set function 393}
439 */ 394
440 ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, &drive->bios_cyl, NULL); 395#define ide_scsi_devset_set(name, field) \
441 ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); 396static int set_##name(ide_drive_t *drive, int arg) \
442 ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); 397{ \
443 ide_add_setting(drive, "transform", SETTING_RW, TYPE_INT, 0, 3, 1, 1, &scsi->transform, NULL); 398 idescsi_scsi_t *scsi = drive_to_idescsi(drive); \
444 ide_add_setting(drive, "log", SETTING_RW, TYPE_INT, 0, 1, 1, 1, &scsi->log, NULL); 399 scsi->field = arg; \
445} 400 return 0; \
446#else 401}
447static inline void idescsi_add_settings(ide_drive_t *drive) { ; } 402
403#define ide_scsi_devset_rw_field(_name, _field) \
404ide_scsi_devset_get(_name, _field); \
405ide_scsi_devset_set(_name, _field); \
406IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name);
407
408ide_devset_rw_field(bios_cyl, bios_cyl);
409ide_devset_rw_field(bios_head, bios_head);
410ide_devset_rw_field(bios_sect, bios_sect);
411
412ide_scsi_devset_rw_field(transform, transform);
413ide_scsi_devset_rw_field(log, log);
414
415static const struct ide_proc_devset idescsi_settings[] = {
416 IDE_PROC_DEVSET(bios_cyl, 0, 1023),
417 IDE_PROC_DEVSET(bios_head, 0, 255),
418 IDE_PROC_DEVSET(bios_sect, 0, 63),
419 IDE_PROC_DEVSET(log, 0, 1),
420 IDE_PROC_DEVSET(transform, 0, 3),
421 { 0 },
422};
448#endif 423#endif
449 424
450/* 425/*
@@ -452,7 +427,7 @@ static inline void idescsi_add_settings(ide_drive_t *drive) { ; }
452 */ 427 */
453static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) 428static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi)
454{ 429{
455 if (drive->id && (drive->id->config & 0x0060) == 0x20) 430 if ((drive->id[ATA_ID_CONFIG] & 0x0060) == 0x20)
456 set_bit(IDE_AFLAG_DRQ_INTERRUPT, &drive->atapi_flags); 431 set_bit(IDE_AFLAG_DRQ_INTERRUPT, &drive->atapi_flags);
457 clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); 432 clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform);
458#if IDESCSI_DEBUG_LOG 433#if IDESCSI_DEBUG_LOG
@@ -461,7 +436,7 @@ static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi)
461 436
462 drive->pc_callback = ide_scsi_callback; 437 drive->pc_callback = ide_scsi_callback;
463 438
464 idescsi_add_settings(drive); 439 ide_proc_register_driver(drive, scsi->driver);
465} 440}
466 441
467static void ide_scsi_remove(ide_drive_t *drive) 442static void ide_scsi_remove(ide_drive_t *drive)
@@ -503,12 +478,12 @@ static ide_driver_t idescsi_driver = {
503 .remove = ide_scsi_remove, 478 .remove = ide_scsi_remove,
504 .version = IDESCSI_VERSION, 479 .version = IDESCSI_VERSION,
505 .media = ide_scsi, 480 .media = ide_scsi,
506 .supports_dsc_overlap = 0,
507 .do_request = idescsi_do_request, 481 .do_request = idescsi_do_request,
508 .end_request = idescsi_end_request, 482 .end_request = idescsi_end_request,
509 .error = idescsi_atapi_error, 483 .error = idescsi_atapi_error,
510#ifdef CONFIG_IDE_PROC_FS 484#ifdef CONFIG_IDE_PROC_FS
511 .proc = idescsi_proc, 485 .proc = idescsi_proc,
486 .settings = idescsi_settings,
512#endif 487#endif
513}; 488};
514 489
@@ -811,6 +786,7 @@ static int ide_scsi_probe(ide_drive_t *drive)
811 struct gendisk *g; 786 struct gendisk *g;
812 static int warned; 787 static int warned;
813 int err = -ENOMEM; 788 int err = -ENOMEM;
789 u16 last_lun;
814 790
815 if (!warned && drive->media == ide_cdrom) { 791 if (!warned && drive->media == ide_cdrom) {
816 printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n"); 792 printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n");
@@ -821,7 +797,6 @@ static int ide_scsi_probe(ide_drive_t *drive)
821 return -ENODEV; 797 return -ENODEV;
822 798
823 if (!strstr("ide-scsi", drive->driver_req) || 799 if (!strstr("ide-scsi", drive->driver_req) ||
824 !drive->present ||
825 drive->media == ide_disk || 800 drive->media == ide_disk ||
826 !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) 801 !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t))))
827 return -ENODEV; 802 return -ENODEV;
@@ -836,12 +811,12 @@ static int ide_scsi_probe(ide_drive_t *drive)
836 811
837 host->max_id = 1; 812 host->max_id = 1;
838 813
839 if (drive->id->last_lun) 814 last_lun = drive->id[ATA_ID_LAST_LUN];
840 debug_log("%s: id->last_lun=%u\n", drive->name, 815 if (last_lun)
841 drive->id->last_lun); 816 debug_log("%s: last_lun=%u\n", drive->name, last_lun);
842 817
843 if ((drive->id->last_lun & 0x7) != 7) 818 if ((last_lun & 7) != 7)
844 host->max_lun = (drive->id->last_lun & 0x7) + 1; 819 host->max_lun = (last_lun & 7) + 1;
845 else 820 else
846 host->max_lun = 1; 821 host->max_lun = 1;
847 822
@@ -852,7 +827,6 @@ static int ide_scsi_probe(ide_drive_t *drive)
852 idescsi->host = host; 827 idescsi->host = host;
853 idescsi->disk = g; 828 idescsi->disk = g;
854 g->private_data = &idescsi->driver; 829 g->private_data = &idescsi->driver;
855 ide_proc_register_driver(drive, &idescsi_driver);
856 err = 0; 830 err = 0;
857 idescsi_setup(drive, idescsi); 831 idescsi_setup(drive, idescsi);
858 g->fops = &idescsi_ops; 832 g->fops = &idescsi_ops;
diff --git a/fs/Kconfig b/fs/Kconfig
index abccb5dab9a8..40183d94b683 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -136,37 +136,51 @@ config EXT3_FS_SECURITY
136 If you are not using a security module that requires using 136 If you are not using a security module that requires using
137 extended attributes for file security labels, say N. 137 extended attributes for file security labels, say N.
138 138
139config EXT4DEV_FS 139config EXT4_FS
140 tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" 140 tristate "The Extended 4 (ext4) filesystem"
141 depends on EXPERIMENTAL
142 select JBD2 141 select JBD2
143 select CRC16 142 select CRC16
144 help 143 help
145 Ext4dev is a predecessor filesystem of the next generation 144 This is the next generation of the ext3 filesystem.
146 extended fs ext4, based on ext3 filesystem code. It will be
147 renamed ext4 fs later, once ext4dev is mature and stabilized.
148 145
149 Unlike the change from ext2 filesystem to ext3 filesystem, 146 Unlike the change from ext2 filesystem to ext3 filesystem,
150 the on-disk format of ext4dev is not the same as ext3 any more: 147 the on-disk format of ext4 is not forwards compatible with
151 it is based on extent maps and it supports 48-bit physical block 148 ext3; it is based on extent maps and it supports 48-bit
152 numbers. These combined on-disk format changes will allow 149 physical block numbers. The ext4 filesystem also supports delayed
153 ext4dev/ext4 to handle more than 16 TB filesystem volumes -- 150 allocation, persistent preallocation, high resolution time stamps,
154 a hard limit that ext3 cannot overcome without changing the 151 and a number of other features to improve performance and speed
155 on-disk format. 152 up fsck time. For more information, please see the web pages at
156 153 http://ext4.wiki.kernel.org.
157 Other than extent maps and 48-bit block numbers, ext4dev also is 154
158 likely to have other new features such as persistent preallocation, 155 The ext4 filesystem will support mounting an ext3
159 high resolution time stamps, and larger file support etc. These 156 filesystem; while there will be some performance gains from
160 features will be added to ext4dev gradually. 157 the delayed allocation and inode table readahead, the best
158 performance gains will require enabling ext4 features in the
159 filesystem, or formating a new filesystem as an ext4
160 filesystem initially.
161 161
162 To compile this file system support as a module, choose M here. The 162 To compile this file system support as a module, choose M here. The
163 module will be called ext4dev. 163 module will be called ext4dev.
164 164
165 If unsure, say N. 165 If unsure, say N.
166 166
167config EXT4DEV_FS_XATTR 167config EXT4DEV_COMPAT
168 bool "Ext4dev extended attributes" 168 bool "Enable ext4dev compatibility"
169 depends on EXT4DEV_FS 169 depends on EXT4_FS
170 help
171 Starting with 2.6.28, the name of the ext4 filesystem was
172 renamed from ext4dev to ext4. Unfortunately there are some
173 lagecy userspace programs (such as klibc's fstype) have
174 "ext4dev" hardcoded.
175
176 To enable backwards compatibility so that systems that are
177 still expecting to mount ext4 filesystems using ext4dev,
178 chose Y here. This feature will go away by 2.6.31, so
179 please arrange to get your userspace programs fixed!
180
181config EXT4_FS_XATTR
182 bool "Ext4 extended attributes"
183 depends on EXT4_FS
170 default y 184 default y
171 help 185 help
172 Extended attributes are name:value pairs associated with inodes by 186 Extended attributes are name:value pairs associated with inodes by
@@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR
175 189
176 If unsure, say N. 190 If unsure, say N.
177 191
178 You need this for POSIX ACL support on ext4dev/ext4. 192 You need this for POSIX ACL support on ext4.
179 193
180config EXT4DEV_FS_POSIX_ACL 194config EXT4_FS_POSIX_ACL
181 bool "Ext4dev POSIX Access Control Lists" 195 bool "Ext4 POSIX Access Control Lists"
182 depends on EXT4DEV_FS_XATTR 196 depends on EXT4_FS_XATTR
183 select FS_POSIX_ACL 197 select FS_POSIX_ACL
184 help 198 help
185 POSIX Access Control Lists (ACLs) support permissions for users and 199 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL
190 204
191 If you don't know what Access Control Lists are, say N 205 If you don't know what Access Control Lists are, say N
192 206
193config EXT4DEV_FS_SECURITY 207config EXT4_FS_SECURITY
194 bool "Ext4dev Security Labels" 208 bool "Ext4 Security Labels"
195 depends on EXT4DEV_FS_XATTR 209 depends on EXT4_FS_XATTR
196 help 210 help
197 Security labels support alternative access control models 211 Security labels support alternative access control models
198 implemented by security modules like SELinux. This option 212 implemented by security modules like SELinux. This option
199 enables an extended attribute handler for file security 213 enables an extended attribute handler for file security
200 labels in the ext4dev/ext4 filesystem. 214 labels in the ext4 filesystem.
201 215
202 If you are not using a security module that requires using 216 If you are not using a security module that requires using
203 extended attributes for file security labels, say N. 217 extended attributes for file security labels, say N.
@@ -240,22 +254,22 @@ config JBD2
240 help 254 help
241 This is a generic journaling layer for block devices that support 255 This is a generic journaling layer for block devices that support
242 both 32-bit and 64-bit block numbers. It is currently used by 256 both 32-bit and 64-bit block numbers. It is currently used by
243 the ext4dev/ext4 filesystem, but it could also be used to add 257 the ext4 filesystem, but it could also be used to add
244 journal support to other file systems or block devices such 258 journal support to other file systems or block devices such
245 as RAID or LVM. 259 as RAID or LVM.
246 260
247 If you are using ext4dev/ext4, you need to say Y here. If you are not 261 If you are using ext4, you need to say Y here. If you are not
248 using ext4dev/ext4 then you will probably want to say N. 262 using ext4 then you will probably want to say N.
249 263
250 To compile this device as a module, choose M here. The module will be 264 To compile this device as a module, choose M here. The module will be
251 called jbd2. If you are compiling ext4dev/ext4 into the kernel, 265 called jbd2. If you are compiling ext4 into the kernel,
252 you cannot compile this code as a module. 266 you cannot compile this code as a module.
253 267
254config JBD2_DEBUG 268config JBD2_DEBUG
255 bool "JBD2 (ext4dev/ext4) debugging support" 269 bool "JBD2 (ext4) debugging support"
256 depends on JBD2 && DEBUG_FS 270 depends on JBD2 && DEBUG_FS
257 help 271 help
258 If you are using the ext4dev/ext4 journaled file system (or 272 If you are using the ext4 journaled file system (or
259 potentially any other filesystem/device using JBD2), this option 273 potentially any other filesystem/device using JBD2), this option
260 allows you to enable debugging output while the system is running, 274 allows you to enable debugging output while the system is running,
261 in order to help track down any problems you are having. 275 in order to help track down any problems you are having.
@@ -270,9 +284,9 @@ config JBD2_DEBUG
270config FS_MBCACHE 284config FS_MBCACHE
271# Meta block cache for Extended Attributes (ext2/ext3/ext4) 285# Meta block cache for Extended Attributes (ext2/ext3/ext4)
272 tristate 286 tristate
273 depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR 287 depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR
274 default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y 288 default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y
275 default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m 289 default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m
276 290
277config REISERFS_FS 291config REISERFS_FS
278 tristate "Reiserfs support" 292 tristate "Reiserfs support"
diff --git a/fs/Makefile b/fs/Makefile
index a1482a5eff15..de404b00eb0c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -69,7 +69,7 @@ obj-$(CONFIG_DLM) += dlm/
69# Do not add any filesystems before this line 69# Do not add any filesystems before this line
70obj-$(CONFIG_REISERFS_FS) += reiserfs/ 70obj-$(CONFIG_REISERFS_FS) += reiserfs/
71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
72obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev 72obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev
73obj-$(CONFIG_JBD) += jbd/ 73obj-$(CONFIG_JBD) += jbd/
74obj-$(CONFIG_JBD2) += jbd2/ 74obj-$(CONFIG_JBD2) += jbd2/
75obj-$(CONFIG_EXT2_FS) += ext2/ 75obj-$(CONFIG_EXT2_FS) += ext2/
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 47d88da2d33b..bae998c1e44e 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *);
133extern int ext2_setattr (struct dentry *, struct iattr *); 133extern int ext2_setattr (struct dentry *, struct iattr *);
134extern void ext2_set_inode_flags(struct inode *inode); 134extern void ext2_set_inode_flags(struct inode *inode);
135extern void ext2_get_inode_flags(struct ext2_inode_info *); 135extern void ext2_get_inode_flags(struct ext2_inode_info *);
136extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
137 u64 start, u64 len);
136int __ext2_write_begin(struct file *file, struct address_space *mapping, 138int __ext2_write_begin(struct file *file, struct address_space *mapping,
137 loff_t pos, unsigned len, unsigned flags, 139 loff_t pos, unsigned len, unsigned flags,
138 struct page **pagep, void **fsdata); 140 struct page **pagep, void **fsdata);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5f2fa9c36293..45ed07122182 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = {
86#endif 86#endif
87 .setattr = ext2_setattr, 87 .setattr = ext2_setattr,
88 .permission = ext2_permission, 88 .permission = ext2_permission,
89 .fiemap = ext2_fiemap,
89}; 90};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 991d6dfeb51f..7658b33e2653 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,6 +31,7 @@
31#include <linux/writeback.h> 31#include <linux/writeback.h>
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include <linux/mpage.h> 33#include <linux/mpage.h>
34#include <linux/fiemap.h>
34#include "ext2.h" 35#include "ext2.h"
35#include "acl.h" 36#include "acl.h"
36#include "xip.h" 37#include "xip.h"
@@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_
704 705
705} 706}
706 707
708int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
709 u64 start, u64 len)
710{
711 return generic_block_fiemap(inode, fieinfo, start, len,
712 ext2_get_block);
713}
714
707static int ext2_writepage(struct page *page, struct writeback_control *wbc) 715static int ext2_writepage(struct page *page, struct writeback_control *wbc)
708{ 716{
709 return block_write_full_page(page, ext2_get_block, wbc); 717 return block_write_full_page(page, ext2_get_block, wbc);
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index acc4913d3019..3be1e0689c9a 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = {
134 .removexattr = generic_removexattr, 134 .removexattr = generic_removexattr,
135#endif 135#endif
136 .permission = ext3_permission, 136 .permission = ext3_permission,
137 .fiemap = ext3_fiemap,
137}; 138};
138 139
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 507d8689b111..ebfec4d0148e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -36,6 +36,7 @@
36#include <linux/mpage.h> 36#include <linux/mpage.h>
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h>
39#include "xattr.h" 40#include "xattr.h"
40#include "acl.h" 41#include "acl.h"
41 42
@@ -981,6 +982,13 @@ out:
981 return ret; 982 return ret;
982} 983}
983 984
985int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
986 u64 start, u64 len)
987{
988 return generic_block_fiemap(inode, fieinfo, start, len,
989 ext3_get_block);
990}
991
984/* 992/*
985 * `handle' can be NULL if create is zero 993 * `handle' can be NULL if create is zero
986 */ 994 */
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index ac6fa8ca0a2f..a8ff003a00f7 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -2,12 +2,12 @@
2# Makefile for the linux ext4-filesystem routines. 2# Makefile for the linux ext4-filesystem routines.
3# 3#
4 4
5obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o 5obj-$(CONFIG_EXT4_FS) += ext4.o
6 6
7ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o 9 ext4_jbd2.o migrate.o mballoc.o
10 10
11ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 11ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
12ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o 12ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
13ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o 13ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cd2b855a07d6..cb45257a246e 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size)
51 } 51 }
52} 52}
53 53
54#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl 56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
57 if the ACL has not been cached */ 57 if the ACL has not been cached */
58#define EXT4_ACL_NOT_CACHED ((void *)-1) 58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext4_permission (struct inode *, int); 61extern int ext4_permission(struct inode *, int);
62extern int ext4_acl_chmod (struct inode *); 62extern int ext4_acl_chmod(struct inode *);
63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
64 64
65#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ 65#else /* CONFIG_EXT4_FS_POSIX_ACL */
66#include <linux/sched.h> 66#include <linux/sched.h>
67#define ext4_permission NULL 67#define ext4_permission NULL
68 68
@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
77{ 77{
78 return 0; 78 return 0;
79} 79}
80#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ 80#endif /* CONFIG_EXT4_FS_POSIX_ACL */
81 81
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9fa960ba6da..bd2ece228827 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
83 } 83 }
84 return used_blocks; 84 return used_blocks;
85} 85}
86
86/* Initializes an uninitialized block bitmap if given, and returns the 87/* Initializes an uninitialized block bitmap if given, and returns the
87 * number of blocks free in the group. */ 88 * number of blocks free in the group. */
88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 89unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
132 */ 133 */
133 group_blocks = ext4_blocks_count(sbi->s_es) - 134 group_blocks = ext4_blocks_count(sbi->s_es) -
134 le32_to_cpu(sbi->s_es->s_first_data_block) - 135 le32_to_cpu(sbi->s_es->s_first_data_block) -
135 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); 136 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
136 } else { 137 } else {
137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 138 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
138 } 139 }
@@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
200 * @bh: pointer to the buffer head to store the block 201 * @bh: pointer to the buffer head to store the block
201 * group descriptor 202 * group descriptor
202 */ 203 */
203struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 204struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
204 ext4_group_t block_group, 205 ext4_group_t block_group,
205 struct buffer_head ** bh) 206 struct buffer_head **bh)
206{ 207{
207 unsigned long group_desc; 208 unsigned long group_desc;
208 unsigned long offset; 209 unsigned long offset;
209 struct ext4_group_desc * desc; 210 struct ext4_group_desc *desc;
210 struct ext4_sb_info *sbi = EXT4_SB(sb); 211 struct ext4_sb_info *sbi = EXT4_SB(sb);
211 212
212 if (block_group >= sbi->s_groups_count) { 213 if (block_group >= sbi->s_groups_count) {
213 ext4_error (sb, "ext4_get_group_desc", 214 ext4_error(sb, "ext4_get_group_desc",
214 "block_group >= groups_count - " 215 "block_group >= groups_count - "
215 "block_group = %lu, groups_count = %lu", 216 "block_group = %lu, groups_count = %lu",
216 block_group, sbi->s_groups_count); 217 block_group, sbi->s_groups_count);
217 218
218 return NULL; 219 return NULL;
219 } 220 }
@@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
222 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 223 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
223 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 224 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
224 if (!sbi->s_group_desc[group_desc]) { 225 if (!sbi->s_group_desc[group_desc]) {
225 ext4_error (sb, "ext4_get_group_desc", 226 ext4_error(sb, "ext4_get_group_desc",
226 "Group descriptor not loaded - " 227 "Group descriptor not loaded - "
227 "block_group = %lu, group_desc = %lu, desc = %lu", 228 "block_group = %lu, group_desc = %lu, desc = %lu",
228 block_group, group_desc, offset); 229 block_group, group_desc, offset);
229 return NULL; 230 return NULL;
230 } 231 }
231 232
@@ -302,8 +303,8 @@ err_out:
302struct buffer_head * 303struct buffer_head *
303ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 304ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
304{ 305{
305 struct ext4_group_desc * desc; 306 struct ext4_group_desc *desc;
306 struct buffer_head * bh = NULL; 307 struct buffer_head *bh = NULL;
307 ext4_fsblk_t bitmap_blk; 308 ext4_fsblk_t bitmap_blk;
308 309
309 desc = ext4_get_group_desc(sb, block_group, NULL); 310 desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
318 block_group, bitmap_blk); 319 block_group, bitmap_blk);
319 return NULL; 320 return NULL;
320 } 321 }
321 if (bh_uptodate_or_lock(bh)) 322 if (buffer_uptodate(bh) &&
323 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
322 return bh; 324 return bh;
323 325
326 lock_buffer(bh);
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 327 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 328 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
326 ext4_init_block_bitmap(sb, bh, block_group, desc); 329 ext4_init_block_bitmap(sb, bh, block_group, desc);
@@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
345 */ 348 */
346 return bh; 349 return bh;
347} 350}
348/*
349 * The reservation window structure operations
350 * --------------------------------------------
351 * Operations include:
352 * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
353 *
354 * We use a red-black tree to represent per-filesystem reservation
355 * windows.
356 *
357 */
358
359/**
360 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
361 * @rb_root: root of per-filesystem reservation rb tree
362 * @verbose: verbose mode
363 * @fn: function which wishes to dump the reservation map
364 *
365 * If verbose is turned on, it will print the whole block reservation
366 * windows(start, end). Otherwise, it will only print out the "bad" windows,
367 * those windows that overlap with their immediate neighbors.
368 */
369#if 1
370static void __rsv_window_dump(struct rb_root *root, int verbose,
371 const char *fn)
372{
373 struct rb_node *n;
374 struct ext4_reserve_window_node *rsv, *prev;
375 int bad;
376
377restart:
378 n = rb_first(root);
379 bad = 0;
380 prev = NULL;
381
382 printk("Block Allocation Reservation Windows Map (%s):\n", fn);
383 while (n) {
384 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
385 if (verbose)
386 printk("reservation window 0x%p "
387 "start: %llu, end: %llu\n",
388 rsv, rsv->rsv_start, rsv->rsv_end);
389 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
390 printk("Bad reservation %p (start >= end)\n",
391 rsv);
392 bad = 1;
393 }
394 if (prev && prev->rsv_end >= rsv->rsv_start) {
395 printk("Bad reservation %p (prev->end >= start)\n",
396 rsv);
397 bad = 1;
398 }
399 if (bad) {
400 if (!verbose) {
401 printk("Restarting reservation walk in verbose mode\n");
402 verbose = 1;
403 goto restart;
404 }
405 }
406 n = rb_next(n);
407 prev = rsv;
408 }
409 printk("Window map complete.\n");
410 BUG_ON(bad);
411}
412#define rsv_window_dump(root, verbose) \
413 __rsv_window_dump((root), (verbose), __func__)
414#else
415#define rsv_window_dump(root, verbose) do {} while (0)
416#endif
417
418/**
419 * goal_in_my_reservation()
420 * @rsv: inode's reservation window
421 * @grp_goal: given goal block relative to the allocation block group
422 * @group: the current allocation block group
423 * @sb: filesystem super block
424 *
425 * Test if the given goal block (group relative) is within the file's
426 * own block reservation window range.
427 *
428 * If the reservation window is outside the goal allocation group, return 0;
429 * grp_goal (given goal block) could be -1, which means no specific
430 * goal block. In this case, always return 1.
431 * If the goal block is within the reservation window, return 1;
432 * otherwise, return 0;
433 */
434static int
435goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
436 ext4_group_t group, struct super_block *sb)
437{
438 ext4_fsblk_t group_first_block, group_last_block;
439
440 group_first_block = ext4_group_first_block_no(sb, group);
441 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
442
443 if ((rsv->_rsv_start > group_last_block) ||
444 (rsv->_rsv_end < group_first_block))
445 return 0;
446 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
447 || (grp_goal + group_first_block > rsv->_rsv_end)))
448 return 0;
449 return 1;
450}
451
452/**
453 * search_reserve_window()
454 * @rb_root: root of reservation tree
455 * @goal: target allocation block
456 *
457 * Find the reserved window which includes the goal, or the previous one
458 * if the goal is not in any window.
459 * Returns NULL if there are no windows or if all windows start after the goal.
460 */
461static struct ext4_reserve_window_node *
462search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
463{
464 struct rb_node *n = root->rb_node;
465 struct ext4_reserve_window_node *rsv;
466
467 if (!n)
468 return NULL;
469
470 do {
471 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
472
473 if (goal < rsv->rsv_start)
474 n = n->rb_left;
475 else if (goal > rsv->rsv_end)
476 n = n->rb_right;
477 else
478 return rsv;
479 } while (n);
480 /*
481 * We've fallen off the end of the tree: the goal wasn't inside
482 * any particular node. OK, the previous node must be to one
483 * side of the interval containing the goal. If it's the RHS,
484 * we need to back up one.
485 */
486 if (rsv->rsv_start > goal) {
487 n = rb_prev(&rsv->rsv_node);
488 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
489 }
490 return rsv;
491}
492
493/**
494 * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
495 * @sb: super block
496 * @rsv: reservation window to add
497 *
498 * Must be called with rsv_lock hold.
499 */
500void ext4_rsv_window_add(struct super_block *sb,
501 struct ext4_reserve_window_node *rsv)
502{
503 struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
504 struct rb_node *node = &rsv->rsv_node;
505 ext4_fsblk_t start = rsv->rsv_start;
506
507 struct rb_node ** p = &root->rb_node;
508 struct rb_node * parent = NULL;
509 struct ext4_reserve_window_node *this;
510
511 while (*p)
512 {
513 parent = *p;
514 this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
515
516 if (start < this->rsv_start)
517 p = &(*p)->rb_left;
518 else if (start > this->rsv_end)
519 p = &(*p)->rb_right;
520 else {
521 rsv_window_dump(root, 1);
522 BUG();
523 }
524 }
525
526 rb_link_node(node, parent, p);
527 rb_insert_color(node, root);
528}
529
530/**
531 * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
532 * @sb: super block
533 * @rsv: reservation window to remove
534 *
535 * Mark the block reservation window as not allocated, and unlink it
536 * from the filesystem reservation window rb tree. Must be called with
537 * rsv_lock hold.
538 */
539static void rsv_window_remove(struct super_block *sb,
540 struct ext4_reserve_window_node *rsv)
541{
542 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
543 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
544 rsv->rsv_alloc_hit = 0;
545 rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
546}
547
548/*
549 * rsv_is_empty() -- Check if the reservation window is allocated.
550 * @rsv: given reservation window to check
551 *
552 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
553 */
554static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
555{
556 /* a valid reservation end block could not be 0 */
557 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
558}
559
560/**
561 * ext4_init_block_alloc_info()
562 * @inode: file inode structure
563 *
564 * Allocate and initialize the reservation window structure, and
565 * link the window to the ext4 inode structure at last
566 *
567 * The reservation window structure is only dynamically allocated
568 * and linked to ext4 inode the first time the open file
569 * needs a new block. So, before every ext4_new_block(s) call, for
570 * regular files, we should check whether the reservation window
571 * structure exists or not. In the latter case, this function is called.
572 * Fail to do so will result in block reservation being turned off for that
573 * open file.
574 *
575 * This function is called from ext4_get_blocks_handle(), also called
576 * when setting the reservation window size through ioctl before the file
577 * is open for write (needs block allocation).
578 *
579 * Needs down_write(i_data_sem) protection prior to call this function.
580 */
581void ext4_init_block_alloc_info(struct inode *inode)
582{
583 struct ext4_inode_info *ei = EXT4_I(inode);
584 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
585 struct super_block *sb = inode->i_sb;
586
587 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
588 if (block_i) {
589 struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
590
591 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
592 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
593
594 /*
595 * if filesystem is mounted with NORESERVATION, the goal
596 * reservation window size is set to zero to indicate
597 * block reservation is off
598 */
599 if (!test_opt(sb, RESERVATION))
600 rsv->rsv_goal_size = 0;
601 else
602 rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
603 rsv->rsv_alloc_hit = 0;
604 block_i->last_alloc_logical_block = 0;
605 block_i->last_alloc_physical_block = 0;
606 }
607 ei->i_block_alloc_info = block_i;
608}
609
610/**
611 * ext4_discard_reservation()
612 * @inode: inode
613 *
614 * Discard(free) block reservation window on last file close, or truncate
615 * or at last iput().
616 *
617 * It is being called in three cases:
618 * ext4_release_file(): last writer close the file
619 * ext4_clear_inode(): last iput(), when nobody link to this file.
620 * ext4_truncate(): when the block indirect map is about to change.
621 *
622 */
623void ext4_discard_reservation(struct inode *inode)
624{
625 struct ext4_inode_info *ei = EXT4_I(inode);
626 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
627 struct ext4_reserve_window_node *rsv;
628 spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
629
630 ext4_mb_discard_inode_preallocations(inode);
631
632 if (!block_i)
633 return;
634
635 rsv = &block_i->rsv_window_node;
636 if (!rsv_is_empty(&rsv->rsv_window)) {
637 spin_lock(rsv_lock);
638 if (!rsv_is_empty(&rsv->rsv_window))
639 rsv_window_remove(inode->i_sb, rsv);
640 spin_unlock(rsv_lock);
641 }
642}
643 351
644/** 352/**
645 * ext4_free_blocks_sb() -- Free given blocks and update quota 353 * ext4_free_blocks_sb() -- Free given blocks and update quota
@@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode)
648 * @block: start physcial block to free 356 * @block: start physcial block to free
649 * @count: number of blocks to free 357 * @count: number of blocks to free
650 * @pdquot_freed_blocks: pointer to quota 358 * @pdquot_freed_blocks: pointer to quota
359 *
360 * XXX This function is only used by the on-line resizing code, which
361 * should probably be fixed up to call the mballoc variant. There
362 * this needs to be cleaned up later; in fact, I'm not convinced this
363 * is 100% correct in the face of the mballoc code. The online resizing
364 * code needs to be fixed up to more tightly (and correctly) interlock
365 * with the mballoc code.
651 */ 366 */
652void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 367void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
653 ext4_fsblk_t block, unsigned long count, 368 ext4_fsblk_t block, unsigned long count,
@@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
659 ext4_grpblk_t bit; 374 ext4_grpblk_t bit;
660 unsigned long i; 375 unsigned long i;
661 unsigned long overflow; 376 unsigned long overflow;
662 struct ext4_group_desc * desc; 377 struct ext4_group_desc *desc;
663 struct ext4_super_block * es; 378 struct ext4_super_block *es;
664 struct ext4_sb_info *sbi; 379 struct ext4_sb_info *sbi;
665 int err = 0, ret; 380 int err = 0, ret;
666 ext4_grpblk_t group_freed; 381 ext4_grpblk_t group_freed;
@@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
671 if (block < le32_to_cpu(es->s_first_data_block) || 386 if (block < le32_to_cpu(es->s_first_data_block) ||
672 block + count < block || 387 block + count < block ||
673 block + count > ext4_blocks_count(es)) { 388 block + count > ext4_blocks_count(es)) {
674 ext4_error (sb, "ext4_free_blocks", 389 ext4_error(sb, "ext4_free_blocks",
675 "Freeing blocks not in datazone - " 390 "Freeing blocks not in datazone - "
676 "block = %llu, count = %lu", block, count); 391 "block = %llu, count = %lu", block, count);
677 goto error_return; 392 goto error_return;
678 } 393 }
679 394
680 ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); 395 ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
681 396
682do_more: 397do_more:
683 overflow = 0; 398 overflow = 0;
@@ -694,7 +409,7 @@ do_more:
694 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 409 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
695 if (!bitmap_bh) 410 if (!bitmap_bh)
696 goto error_return; 411 goto error_return;
697 desc = ext4_get_group_desc (sb, block_group, &gd_bh); 412 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
698 if (!desc) 413 if (!desc)
699 goto error_return; 414 goto error_return;
700 415
@@ -703,10 +418,10 @@ do_more:
703 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 418 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
704 in_range(block + count - 1, ext4_inode_table(sb, desc), 419 in_range(block + count - 1, ext4_inode_table(sb, desc),
705 sbi->s_itb_per_group)) { 420 sbi->s_itb_per_group)) {
706 ext4_error (sb, "ext4_free_blocks", 421 ext4_error(sb, "ext4_free_blocks",
707 "Freeing blocks in system zones - " 422 "Freeing blocks in system zones - "
708 "Block = %llu, count = %lu", 423 "Block = %llu, count = %lu",
709 block, count); 424 block, count);
710 goto error_return; 425 goto error_return;
711 } 426 }
712 427
@@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
848 ext4_fsblk_t block, unsigned long count, 563 ext4_fsblk_t block, unsigned long count,
849 int metadata) 564 int metadata)
850{ 565{
851 struct super_block * sb; 566 struct super_block *sb;
852 unsigned long dquot_freed_blocks; 567 unsigned long dquot_freed_blocks;
853 568
854 /* this isn't the right place to decide whether block is metadata 569 /* this isn't the right place to decide whether block is metadata
@@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
859 574
860 sb = inode->i_sb; 575 sb = inode->i_sb;
861 576
862 if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) 577 ext4_mb_free_blocks(handle, inode, block, count,
863 ext4_free_blocks_sb(handle, sb, block, count, 578 metadata, &dquot_freed_blocks);
864 &dquot_freed_blocks);
865 else
866 ext4_mb_free_blocks(handle, inode, block, count,
867 metadata, &dquot_freed_blocks);
868 if (dquot_freed_blocks) 579 if (dquot_freed_blocks)
869 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); 580 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
870 return; 581 return;
871} 582}
872 583
873/** 584int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
874 * ext4_test_allocatable() 585 s64 nblocks)
875 * @nr: given allocation block group
876 * @bh: bufferhead contains the bitmap of the given block group
877 *
878 * For ext4 allocations, we must not reuse any blocks which are
879 * allocated in the bitmap buffer's "last committed data" copy. This
880 * prevents deletes from freeing up the page for reuse until we have
881 * committed the delete transaction.
882 *
883 * If we didn't do this, then deleting something and reallocating it as
884 * data would allow the old block to be overwritten before the
885 * transaction committed (because we force data to disk before commit).
886 * This would lead to corruption if we crashed between overwriting the
887 * data and committing the delete.
888 *
889 * @@@ We may want to make this allocation behaviour conditional on
890 * data-writes at some point, and disable it for metadata allocations or
891 * sync-data inodes.
892 */
893static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
894{
895 int ret;
896 struct journal_head *jh = bh2jh(bh);
897
898 if (ext4_test_bit(nr, bh->b_data))
899 return 0;
900
901 jbd_lock_bh_state(bh);
902 if (!jh->b_committed_data)
903 ret = 1;
904 else
905 ret = !ext4_test_bit(nr, jh->b_committed_data);
906 jbd_unlock_bh_state(bh);
907 return ret;
908}
909
910/**
911 * bitmap_search_next_usable_block()
912 * @start: the starting block (group relative) of the search
913 * @bh: bufferhead contains the block group bitmap
914 * @maxblocks: the ending block (group relative) of the reservation
915 *
916 * The bitmap search --- search forward alternately through the actual
917 * bitmap on disk and the last-committed copy in journal, until we find a
918 * bit free in both bitmaps.
919 */
920static ext4_grpblk_t
921bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
922 ext4_grpblk_t maxblocks)
923{ 586{
924 ext4_grpblk_t next; 587 s64 free_blocks, dirty_blocks;
925 struct journal_head *jh = bh2jh(bh); 588 s64 root_blocks = 0;
926 589 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
927 while (start < maxblocks) { 590 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
928 next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
929 if (next >= maxblocks)
930 return -1;
931 if (ext4_test_allocatable(next, bh))
932 return next;
933 jbd_lock_bh_state(bh);
934 if (jh->b_committed_data)
935 start = ext4_find_next_zero_bit(jh->b_committed_data,
936 maxblocks, next);
937 jbd_unlock_bh_state(bh);
938 }
939 return -1;
940}
941 591
942/** 592 free_blocks = percpu_counter_read_positive(fbc);
943 * find_next_usable_block() 593 dirty_blocks = percpu_counter_read_positive(dbc);
944 * @start: the starting block (group relative) to find next
945 * allocatable block in bitmap.
946 * @bh: bufferhead contains the block group bitmap
947 * @maxblocks: the ending block (group relative) for the search
948 *
949 * Find an allocatable block in a bitmap. We honor both the bitmap and
950 * its last-committed copy (if that exists), and perform the "most
951 * appropriate allocation" algorithm of looking for a free block near
952 * the initial goal; then for a free byte somewhere in the bitmap; then
953 * for any free bit in the bitmap.
954 */
955static ext4_grpblk_t
956find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
957 ext4_grpblk_t maxblocks)
958{
959 ext4_grpblk_t here, next;
960 char *p, *r;
961
962 if (start > 0) {
963 /*
964 * The goal was occupied; search forward for a free
965 * block within the next XX blocks.
966 *
967 * end_goal is more or less random, but it has to be
968 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
969 * next 64-bit boundary is simple..
970 */
971 ext4_grpblk_t end_goal = (start + 63) & ~63;
972 if (end_goal > maxblocks)
973 end_goal = maxblocks;
974 here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
975 if (here < end_goal && ext4_test_allocatable(here, bh))
976 return here;
977 ext4_debug("Bit not found near goal\n");
978 }
979
980 here = start;
981 if (here < 0)
982 here = 0;
983
984 p = ((char *)bh->b_data) + (here >> 3);
985 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
986 next = (r - ((char *)bh->b_data)) << 3;
987
988 if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
989 return next;
990
991 /*
992 * The bitmap search --- search forward alternately through the actual
993 * bitmap and the last-committed copy until we find a bit free in
994 * both
995 */
996 here = bitmap_search_next_usable_block(here, bh, maxblocks);
997 return here;
998}
999
1000/**
1001 * claim_block()
1002 * @block: the free block (group relative) to allocate
1003 * @bh: the bufferhead containts the block group bitmap
1004 *
1005 * We think we can allocate this block in this bitmap. Try to set the bit.
1006 * If that succeeds then check that nobody has allocated and then freed the
1007 * block since we saw that is was not marked in b_committed_data. If it _was_
1008 * allocated and freed then clear the bit in the bitmap again and return
1009 * zero (failure).
1010 */
1011static inline int
1012claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
1013{
1014 struct journal_head *jh = bh2jh(bh);
1015 int ret;
1016
1017 if (ext4_set_bit_atomic(lock, block, bh->b_data))
1018 return 0;
1019 jbd_lock_bh_state(bh);
1020 if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
1021 ext4_clear_bit_atomic(lock, block, bh->b_data);
1022 ret = 0;
1023 } else {
1024 ret = 1;
1025 }
1026 jbd_unlock_bh_state(bh);
1027 return ret;
1028}
1029 594
1030/** 595 if (!capable(CAP_SYS_RESOURCE) &&
1031 * ext4_try_to_allocate() 596 sbi->s_resuid != current->fsuid &&
1032 * @sb: superblock 597 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1033 * @handle: handle to this transaction 598 root_blocks = ext4_r_blocks_count(sbi->s_es);
1034 * @group: given allocation block group
1035 * @bitmap_bh: bufferhead holds the block bitmap
1036 * @grp_goal: given target block within the group
1037 * @count: target number of blocks to allocate
1038 * @my_rsv: reservation window
1039 *
1040 * Attempt to allocate blocks within a give range. Set the range of allocation
1041 * first, then find the first free bit(s) from the bitmap (within the range),
1042 * and at last, allocate the blocks by claiming the found free bit as allocated.
1043 *
1044 * To set the range of this allocation:
1045 * if there is a reservation window, only try to allocate block(s) from the
1046 * file's own reservation window;
1047 * Otherwise, the allocation range starts from the give goal block, ends at
1048 * the block group's last block.
1049 *
1050 * If we failed to allocate the desired block then we may end up crossing to a
1051 * new bitmap. In that case we must release write access to the old one via
1052 * ext4_journal_release_buffer(), else we'll run out of credits.
1053 */
1054static ext4_grpblk_t
1055ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
1056 ext4_group_t group, struct buffer_head *bitmap_bh,
1057 ext4_grpblk_t grp_goal, unsigned long *count,
1058 struct ext4_reserve_window *my_rsv)
1059{
1060 ext4_fsblk_t group_first_block;
1061 ext4_grpblk_t start, end;
1062 unsigned long num = 0;
1063
1064 /* we do allocation within the reservation window if we have a window */
1065 if (my_rsv) {
1066 group_first_block = ext4_group_first_block_no(sb, group);
1067 if (my_rsv->_rsv_start >= group_first_block)
1068 start = my_rsv->_rsv_start - group_first_block;
1069 else
1070 /* reservation window cross group boundary */
1071 start = 0;
1072 end = my_rsv->_rsv_end - group_first_block + 1;
1073 if (end > EXT4_BLOCKS_PER_GROUP(sb))
1074 /* reservation window crosses group boundary */
1075 end = EXT4_BLOCKS_PER_GROUP(sb);
1076 if ((start <= grp_goal) && (grp_goal < end))
1077 start = grp_goal;
1078 else
1079 grp_goal = -1;
1080 } else {
1081 if (grp_goal > 0)
1082 start = grp_goal;
1083 else
1084 start = 0;
1085 end = EXT4_BLOCKS_PER_GROUP(sb);
1086 }
1087
1088 BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
1089
1090repeat:
1091 if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
1092 grp_goal = find_next_usable_block(start, bitmap_bh, end);
1093 if (grp_goal < 0)
1094 goto fail_access;
1095 if (!my_rsv) {
1096 int i;
1097
1098 for (i = 0; i < 7 && grp_goal > start &&
1099 ext4_test_allocatable(grp_goal - 1,
1100 bitmap_bh);
1101 i++, grp_goal--)
1102 ;
1103 }
1104 }
1105 start = grp_goal;
1106
1107 if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1108 grp_goal, bitmap_bh)) {
1109 /*
1110 * The block was allocated by another thread, or it was
1111 * allocated and then freed by another thread
1112 */
1113 start++;
1114 grp_goal++;
1115 if (start >= end)
1116 goto fail_access;
1117 goto repeat;
1118 }
1119 num++;
1120 grp_goal++;
1121 while (num < *count && grp_goal < end
1122 && ext4_test_allocatable(grp_goal, bitmap_bh)
1123 && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1124 grp_goal, bitmap_bh)) {
1125 num++;
1126 grp_goal++;
1127 }
1128 *count = num;
1129 return grp_goal - num;
1130fail_access:
1131 *count = num;
1132 return -1;
1133}
1134
1135/**
1136 * find_next_reservable_window():
1137 * find a reservable space within the given range.
1138 * It does not allocate the reservation window for now:
1139 * alloc_new_reservation() will do the work later.
1140 *
1141 * @search_head: the head of the searching list;
1142 * This is not necessarily the list head of the whole filesystem
1143 *
1144 * We have both head and start_block to assist the search
1145 * for the reservable space. The list starts from head,
1146 * but we will shift to the place where start_block is,
1147 * then start from there, when looking for a reservable space.
1148 *
1149 * @size: the target new reservation window size
1150 *
1151 * @group_first_block: the first block we consider to start
1152 * the real search from
1153 *
1154 * @last_block:
1155 * the maximum block number that our goal reservable space
1156 * could start from. This is normally the last block in this
1157 * group. The search will end when we found the start of next
1158 * possible reservable space is out of this boundary.
1159 * This could handle the cross boundary reservation window
1160 * request.
1161 *
1162 * basically we search from the given range, rather than the whole
1163 * reservation double linked list, (start_block, last_block)
1164 * to find a free region that is of my size and has not
1165 * been reserved.
1166 *
1167 */
1168static int find_next_reservable_window(
1169 struct ext4_reserve_window_node *search_head,
1170 struct ext4_reserve_window_node *my_rsv,
1171 struct super_block * sb,
1172 ext4_fsblk_t start_block,
1173 ext4_fsblk_t last_block)
1174{
1175 struct rb_node *next;
1176 struct ext4_reserve_window_node *rsv, *prev;
1177 ext4_fsblk_t cur;
1178 int size = my_rsv->rsv_goal_size;
1179
1180 /* TODO: make the start of the reservation window byte-aligned */
1181 /* cur = *start_block & ~7;*/
1182 cur = start_block;
1183 rsv = search_head;
1184 if (!rsv)
1185 return -1;
1186
1187 while (1) {
1188 if (cur <= rsv->rsv_end)
1189 cur = rsv->rsv_end + 1;
1190
1191 /* TODO?
1192 * in the case we could not find a reservable space
1193 * that is what is expected, during the re-search, we could
1194 * remember what's the largest reservable space we could have
1195 * and return that one.
1196 *
1197 * For now it will fail if we could not find the reservable
1198 * space with expected-size (or more)...
1199 */
1200 if (cur > last_block)
1201 return -1; /* fail */
1202
1203 prev = rsv;
1204 next = rb_next(&rsv->rsv_node);
1205 rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
1206 599
1207 /* 600 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1208 * Reached the last reservation, we can just append to the 601 EXT4_FREEBLOCKS_WATERMARK) {
1209 * previous one. 602 free_blocks = percpu_counter_sum(fbc);
1210 */ 603 dirty_blocks = percpu_counter_sum(dbc);
1211 if (!next) 604 if (dirty_blocks < 0) {
1212 break; 605 printk(KERN_CRIT "Dirty block accounting "
1213 606 "went wrong %lld\n",
1214 if (cur + size <= rsv->rsv_start) { 607 dirty_blocks);
1215 /*
1216 * Found a reserveable space big enough. We could
1217 * have a reservation across the group boundary here
1218 */
1219 break;
1220 } 608 }
1221 } 609 }
1222 /* 610 /* Check whether we have space after
1223 * we come here either : 611 * accounting for current dirty blocks
1224 * when we reach the end of the whole list,
1225 * and there is empty reservable space after last entry in the list.
1226 * append it to the end of the list.
1227 *
1228 * or we found one reservable space in the middle of the list,
1229 * return the reservation window that we could append to.
1230 * succeed.
1231 */ 612 */
613 if (free_blocks < ((root_blocks + nblocks) + dirty_blocks))
614 /* we don't have free space */
615 return -ENOSPC;
1232 616
1233 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 617 /* Add the blocks to nblocks */
1234 rsv_window_remove(sb, my_rsv); 618 percpu_counter_add(dbc, nblocks);
1235
1236 /*
1237 * Let's book the whole avaliable window for now. We will check the
1238 * disk bitmap later and then, if there are free blocks then we adjust
1239 * the window size if it's larger than requested.
1240 * Otherwise, we will remove this node from the tree next time
1241 * call find_next_reservable_window.
1242 */
1243 my_rsv->rsv_start = cur;
1244 my_rsv->rsv_end = cur + size - 1;
1245 my_rsv->rsv_alloc_hit = 0;
1246
1247 if (prev != my_rsv)
1248 ext4_rsv_window_add(sb, my_rsv);
1249
1250 return 0; 619 return 0;
1251} 620}
1252 621
1253/** 622/**
1254 * alloc_new_reservation()--allocate a new reservation window
1255 *
1256 * To make a new reservation, we search part of the filesystem
1257 * reservation list (the list that inside the group). We try to
1258 * allocate a new reservation window near the allocation goal,
1259 * or the beginning of the group, if there is no goal.
1260 *
1261 * We first find a reservable space after the goal, then from
1262 * there, we check the bitmap for the first free block after
1263 * it. If there is no free block until the end of group, then the
1264 * whole group is full, we failed. Otherwise, check if the free
1265 * block is inside the expected reservable space, if so, we
1266 * succeed.
1267 * If the first free block is outside the reservable space, then
1268 * start from the first free block, we search for next available
1269 * space, and go on.
1270 *
1271 * on succeed, a new reservation will be found and inserted into the list
1272 * It contains at least one free block, and it does not overlap with other
1273 * reservation windows.
1274 *
1275 * failed: we failed to find a reservation window in this group
1276 *
1277 * @rsv: the reservation
1278 *
1279 * @grp_goal: The goal (group-relative). It is where the search for a
1280 * free reservable space should start from.
1281 * if we have a grp_goal(grp_goal >0 ), then start from there,
1282 * no grp_goal(grp_goal = -1), we start from the first block
1283 * of the group.
1284 *
1285 * @sb: the super block
1286 * @group: the group we are trying to allocate in
1287 * @bitmap_bh: the block group block bitmap
1288 *
1289 */
1290static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
1291 ext4_grpblk_t grp_goal, struct super_block *sb,
1292 ext4_group_t group, struct buffer_head *bitmap_bh)
1293{
1294 struct ext4_reserve_window_node *search_head;
1295 ext4_fsblk_t group_first_block, group_end_block, start_block;
1296 ext4_grpblk_t first_free_block;
1297 struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
1298 unsigned long size;
1299 int ret;
1300 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1301
1302 group_first_block = ext4_group_first_block_no(sb, group);
1303 group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1304
1305 if (grp_goal < 0)
1306 start_block = group_first_block;
1307 else
1308 start_block = grp_goal + group_first_block;
1309
1310 size = my_rsv->rsv_goal_size;
1311
1312 if (!rsv_is_empty(&my_rsv->rsv_window)) {
1313 /*
1314 * if the old reservation is cross group boundary
1315 * and if the goal is inside the old reservation window,
1316 * we will come here when we just failed to allocate from
1317 * the first part of the window. We still have another part
1318 * that belongs to the next group. In this case, there is no
1319 * point to discard our window and try to allocate a new one
1320 * in this group(which will fail). we should
1321 * keep the reservation window, just simply move on.
1322 *
1323 * Maybe we could shift the start block of the reservation
1324 * window to the first block of next group.
1325 */
1326
1327 if ((my_rsv->rsv_start <= group_end_block) &&
1328 (my_rsv->rsv_end > group_end_block) &&
1329 (start_block >= my_rsv->rsv_start))
1330 return -1;
1331
1332 if ((my_rsv->rsv_alloc_hit >
1333 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1334 /*
1335 * if the previously allocation hit ratio is
1336 * greater than 1/2, then we double the size of
1337 * the reservation window the next time,
1338 * otherwise we keep the same size window
1339 */
1340 size = size * 2;
1341 if (size > EXT4_MAX_RESERVE_BLOCKS)
1342 size = EXT4_MAX_RESERVE_BLOCKS;
1343 my_rsv->rsv_goal_size= size;
1344 }
1345 }
1346
1347 spin_lock(rsv_lock);
1348 /*
1349 * shift the search start to the window near the goal block
1350 */
1351 search_head = search_reserve_window(fs_rsv_root, start_block);
1352
1353 /*
1354 * find_next_reservable_window() simply finds a reservable window
1355 * inside the given range(start_block, group_end_block).
1356 *
1357 * To make sure the reservation window has a free bit inside it, we
1358 * need to check the bitmap after we found a reservable window.
1359 */
1360retry:
1361 ret = find_next_reservable_window(search_head, my_rsv, sb,
1362 start_block, group_end_block);
1363
1364 if (ret == -1) {
1365 if (!rsv_is_empty(&my_rsv->rsv_window))
1366 rsv_window_remove(sb, my_rsv);
1367 spin_unlock(rsv_lock);
1368 return -1;
1369 }
1370
1371 /*
1372 * On success, find_next_reservable_window() returns the
1373 * reservation window where there is a reservable space after it.
1374 * Before we reserve this reservable space, we need
1375 * to make sure there is at least a free block inside this region.
1376 *
1377 * searching the first free bit on the block bitmap and copy of
1378 * last committed bitmap alternatively, until we found a allocatable
1379 * block. Search start from the start block of the reservable space
1380 * we just found.
1381 */
1382 spin_unlock(rsv_lock);
1383 first_free_block = bitmap_search_next_usable_block(
1384 my_rsv->rsv_start - group_first_block,
1385 bitmap_bh, group_end_block - group_first_block + 1);
1386
1387 if (first_free_block < 0) {
1388 /*
1389 * no free block left on the bitmap, no point
1390 * to reserve the space. return failed.
1391 */
1392 spin_lock(rsv_lock);
1393 if (!rsv_is_empty(&my_rsv->rsv_window))
1394 rsv_window_remove(sb, my_rsv);
1395 spin_unlock(rsv_lock);
1396 return -1; /* failed */
1397 }
1398
1399 start_block = first_free_block + group_first_block;
1400 /*
1401 * check if the first free block is within the
1402 * free space we just reserved
1403 */
1404 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
1405 return 0; /* success */
1406 /*
1407 * if the first free bit we found is out of the reservable space
1408 * continue search for next reservable space,
1409 * start from where the free block is,
1410 * we also shift the list head to where we stopped last time
1411 */
1412 search_head = my_rsv;
1413 spin_lock(rsv_lock);
1414 goto retry;
1415}
1416
1417/**
1418 * try_to_extend_reservation()
1419 * @my_rsv: given reservation window
1420 * @sb: super block
1421 * @size: the delta to extend
1422 *
1423 * Attempt to expand the reservation window large enough to have
1424 * required number of free blocks
1425 *
1426 * Since ext4_try_to_allocate() will always allocate blocks within
1427 * the reservation window range, if the window size is too small,
1428 * multiple blocks allocation has to stop at the end of the reservation
1429 * window. To make this more efficient, given the total number of
1430 * blocks needed and the current size of the window, we try to
1431 * expand the reservation window size if necessary on a best-effort
1432 * basis before ext4_new_blocks() tries to allocate blocks,
1433 */
1434static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
1435 struct super_block *sb, int size)
1436{
1437 struct ext4_reserve_window_node *next_rsv;
1438 struct rb_node *next;
1439 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1440
1441 if (!spin_trylock(rsv_lock))
1442 return;
1443
1444 next = rb_next(&my_rsv->rsv_node);
1445
1446 if (!next)
1447 my_rsv->rsv_end += size;
1448 else {
1449 next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
1450
1451 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1452 my_rsv->rsv_end += size;
1453 else
1454 my_rsv->rsv_end = next_rsv->rsv_start - 1;
1455 }
1456 spin_unlock(rsv_lock);
1457}
1458
1459/**
1460 * ext4_try_to_allocate_with_rsv()
1461 * @sb: superblock
1462 * @handle: handle to this transaction
1463 * @group: given allocation block group
1464 * @bitmap_bh: bufferhead holds the block bitmap
1465 * @grp_goal: given target block within the group
1466 * @count: target number of blocks to allocate
1467 * @my_rsv: reservation window
1468 * @errp: pointer to store the error code
1469 *
1470 * This is the main function used to allocate a new block and its reservation
1471 * window.
1472 *
1473 * Each time when a new block allocation is need, first try to allocate from
1474 * its own reservation. If it does not have a reservation window, instead of
1475 * looking for a free bit on bitmap first, then look up the reservation list to
1476 * see if it is inside somebody else's reservation window, we try to allocate a
1477 * reservation window for it starting from the goal first. Then do the block
1478 * allocation within the reservation window.
1479 *
1480 * This will avoid keeping on searching the reservation list again and
1481 * again when somebody is looking for a free block (without
1482 * reservation), and there are lots of free blocks, but they are all
1483 * being reserved.
1484 *
1485 * We use a red-black tree for the per-filesystem reservation list.
1486 *
1487 */
1488static ext4_grpblk_t
1489ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1490 ext4_group_t group, struct buffer_head *bitmap_bh,
1491 ext4_grpblk_t grp_goal,
1492 struct ext4_reserve_window_node * my_rsv,
1493 unsigned long *count, int *errp)
1494{
1495 ext4_fsblk_t group_first_block, group_last_block;
1496 ext4_grpblk_t ret = 0;
1497 int fatal;
1498 unsigned long num = *count;
1499
1500 *errp = 0;
1501
1502 /*
1503 * Make sure we use undo access for the bitmap, because it is critical
1504 * that we do the frozen_data COW on bitmap buffers in all cases even
1505 * if the buffer is in BJ_Forget state in the committing transaction.
1506 */
1507 BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1508 fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
1509 if (fatal) {
1510 *errp = fatal;
1511 return -1;
1512 }
1513
1514 /*
1515 * we don't deal with reservation when
1516 * filesystem is mounted without reservation
1517 * or the file is not a regular file
1518 * or last attempt to allocate a block with reservation turned on failed
1519 */
1520 if (my_rsv == NULL ) {
1521 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1522 grp_goal, count, NULL);
1523 goto out;
1524 }
1525 /*
1526 * grp_goal is a group relative block number (if there is a goal)
1527 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
1528 * first block is a filesystem wide block number
1529 * first block is the block number of the first block in this group
1530 */
1531 group_first_block = ext4_group_first_block_no(sb, group);
1532 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1533
1534 /*
1535 * Basically we will allocate a new block from inode's reservation
1536 * window.
1537 *
1538 * We need to allocate a new reservation window, if:
1539 * a) inode does not have a reservation window; or
1540 * b) last attempt to allocate a block from existing reservation
1541 * failed; or
1542 * c) we come here with a goal and with a reservation window
1543 *
1544 * We do not need to allocate a new reservation window if we come here
1545 * at the beginning with a goal and the goal is inside the window, or
1546 * we don't have a goal but already have a reservation window.
1547 * then we could go to allocate from the reservation window directly.
1548 */
1549 while (1) {
1550 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1551 !goal_in_my_reservation(&my_rsv->rsv_window,
1552 grp_goal, group, sb)) {
1553 if (my_rsv->rsv_goal_size < *count)
1554 my_rsv->rsv_goal_size = *count;
1555 ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1556 group, bitmap_bh);
1557 if (ret < 0)
1558 break; /* failed */
1559
1560 if (!goal_in_my_reservation(&my_rsv->rsv_window,
1561 grp_goal, group, sb))
1562 grp_goal = -1;
1563 } else if (grp_goal >= 0) {
1564 int curr = my_rsv->rsv_end -
1565 (grp_goal + group_first_block) + 1;
1566
1567 if (curr < *count)
1568 try_to_extend_reservation(my_rsv, sb,
1569 *count - curr);
1570 }
1571
1572 if ((my_rsv->rsv_start > group_last_block) ||
1573 (my_rsv->rsv_end < group_first_block)) {
1574 rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
1575 BUG();
1576 }
1577 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1578 grp_goal, &num, &my_rsv->rsv_window);
1579 if (ret >= 0) {
1580 my_rsv->rsv_alloc_hit += num;
1581 *count = num;
1582 break; /* succeed */
1583 }
1584 num = *count;
1585 }
1586out:
1587 if (ret >= 0) {
1588 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1589 "bitmap block");
1590 fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
1591 if (fatal) {
1592 *errp = fatal;
1593 return -1;
1594 }
1595 return ret;
1596 }
1597
1598 BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1599 ext4_journal_release_buffer(handle, bitmap_bh);
1600 return ret;
1601}
1602
1603/**
1604 * ext4_has_free_blocks() 623 * ext4_has_free_blocks()
1605 * @sbi: in-core super block structure. 624 * @sbi: in-core super block structure.
1606 * @nblocks: number of neeed blocks 625 * @nblocks: number of neeed blocks
@@ -1610,29 +629,34 @@ out:
1610 * On success, return nblocks 629 * On success, return nblocks
1611 */ 630 */
1612ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 631ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1613 ext4_fsblk_t nblocks) 632 s64 nblocks)
1614{ 633{
1615 ext4_fsblk_t free_blocks; 634 s64 free_blocks, dirty_blocks;
1616 ext4_fsblk_t root_blocks = 0; 635 s64 root_blocks = 0;
636 struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
637 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
1617 638
1618 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 639 free_blocks = percpu_counter_read_positive(fbc);
640 dirty_blocks = percpu_counter_read_positive(dbc);
1619 641
1620 if (!capable(CAP_SYS_RESOURCE) && 642 if (!capable(CAP_SYS_RESOURCE) &&
1621 sbi->s_resuid != current->fsuid && 643 sbi->s_resuid != current->fsuid &&
1622 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) 644 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1623 root_blocks = ext4_r_blocks_count(sbi->s_es); 645 root_blocks = ext4_r_blocks_count(sbi->s_es);
1624#ifdef CONFIG_SMP 646
1625 if (free_blocks - root_blocks < FBC_BATCH) 647 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1626 free_blocks = 648 EXT4_FREEBLOCKS_WATERMARK) {
1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 649 free_blocks = percpu_counter_sum(fbc);
1628#endif 650 dirty_blocks = percpu_counter_sum(dbc);
1629 if (free_blocks <= root_blocks) 651 }
652 if (free_blocks <= (root_blocks + dirty_blocks))
1630 /* we don't have free space */ 653 /* we don't have free space */
1631 return 0; 654 return 0;
1632 if (free_blocks - root_blocks < nblocks) 655
1633 return free_blocks - root_blocks; 656 if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
657 return free_blocks - (root_blocks + dirty_blocks);
1634 return nblocks; 658 return nblocks;
1635 } 659}
1636 660
1637 661
1638/** 662/**
@@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
1657 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 681 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
1658} 682}
1659 683
1660/**
1661 * ext4_old_new_blocks() -- core block bitmap based block allocation function
1662 *
1663 * @handle: handle to this transaction
1664 * @inode: file inode
1665 * @goal: given target block(filesystem wide)
1666 * @count: target number of blocks to allocate
1667 * @errp: error code
1668 *
1669 * ext4_old_new_blocks uses a goal block to assist allocation and look up
1670 * the block bitmap directly to do block allocation. It tries to
1671 * allocate block(s) from the block group contains the goal block first. If
1672 * that fails, it will try to allocate block(s) from other block groups
1673 * without any specific goal block.
1674 *
1675 * This function is called when -o nomballoc mount option is enabled
1676 *
1677 */
1678ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1679 ext4_fsblk_t goal, unsigned long *count, int *errp)
1680{
1681 struct buffer_head *bitmap_bh = NULL;
1682 struct buffer_head *gdp_bh;
1683 ext4_group_t group_no;
1684 ext4_group_t goal_group;
1685 ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1686 ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1687 ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
1688 ext4_group_t bgi; /* blockgroup iteration index */
1689 int fatal = 0, err;
1690 int performed_allocation = 0;
1691 ext4_grpblk_t free_blocks; /* number of free blocks in a group */
1692 struct super_block *sb;
1693 struct ext4_group_desc *gdp;
1694 struct ext4_super_block *es;
1695 struct ext4_sb_info *sbi;
1696 struct ext4_reserve_window_node *my_rsv = NULL;
1697 struct ext4_block_alloc_info *block_i;
1698 unsigned short windowsz = 0;
1699 ext4_group_t ngroups;
1700 unsigned long num = *count;
1701
1702 sb = inode->i_sb;
1703 if (!sb) {
1704 *errp = -ENODEV;
1705 printk("ext4_new_block: nonexistent device");
1706 return 0;
1707 }
1708
1709 sbi = EXT4_SB(sb);
1710 if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1711 /*
1712 * With delalloc we already reserved the blocks
1713 */
1714 *count = ext4_has_free_blocks(sbi, *count);
1715 }
1716 if (*count == 0) {
1717 *errp = -ENOSPC;
1718 return 0; /*return with ENOSPC error */
1719 }
1720 num = *count;
1721
1722 /*
1723 * Check quota for allocation of this block.
1724 */
1725 if (DQUOT_ALLOC_BLOCK(inode, num)) {
1726 *errp = -EDQUOT;
1727 return 0;
1728 }
1729
1730 sbi = EXT4_SB(sb);
1731 es = EXT4_SB(sb)->s_es;
1732 ext4_debug("goal=%llu.\n", goal);
1733 /*
1734 * Allocate a block from reservation only when
1735 * filesystem is mounted with reservation(default,-o reservation), and
1736 * it's a regular file, and
1737 * the desired window size is greater than 0 (One could use ioctl
1738 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
1739 * reservation on that particular file)
1740 */
1741 block_i = EXT4_I(inode)->i_block_alloc_info;
1742 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1743 my_rsv = &block_i->rsv_window_node;
1744
1745 /*
1746 * First, test whether the goal block is free.
1747 */
1748 if (goal < le32_to_cpu(es->s_first_data_block) ||
1749 goal >= ext4_blocks_count(es))
1750 goal = le32_to_cpu(es->s_first_data_block);
1751 ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
1752 goal_group = group_no;
1753retry_alloc:
1754 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1755 if (!gdp)
1756 goto io_error;
1757
1758 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1759 /*
1760 * if there is not enough free blocks to make a new resevation
1761 * turn off reservation for this allocation
1762 */
1763 if (my_rsv && (free_blocks < windowsz)
1764 && (rsv_is_empty(&my_rsv->rsv_window)))
1765 my_rsv = NULL;
1766
1767 if (free_blocks > 0) {
1768 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1769 if (!bitmap_bh)
1770 goto io_error;
1771 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1772 group_no, bitmap_bh, grp_target_blk,
1773 my_rsv, &num, &fatal);
1774 if (fatal)
1775 goto out;
1776 if (grp_alloc_blk >= 0)
1777 goto allocated;
1778 }
1779
1780 ngroups = EXT4_SB(sb)->s_groups_count;
1781 smp_rmb();
1782
1783 /*
1784 * Now search the rest of the groups. We assume that
1785 * group_no and gdp correctly point to the last group visited.
1786 */
1787 for (bgi = 0; bgi < ngroups; bgi++) {
1788 group_no++;
1789 if (group_no >= ngroups)
1790 group_no = 0;
1791 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1792 if (!gdp)
1793 goto io_error;
1794 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1795 /*
1796 * skip this group if the number of
1797 * free blocks is less than half of the reservation
1798 * window size.
1799 */
1800 if (free_blocks <= (windowsz/2))
1801 continue;
1802
1803 brelse(bitmap_bh);
1804 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1805 if (!bitmap_bh)
1806 goto io_error;
1807 /*
1808 * try to allocate block(s) from this group, without a goal(-1).
1809 */
1810 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1811 group_no, bitmap_bh, -1, my_rsv,
1812 &num, &fatal);
1813 if (fatal)
1814 goto out;
1815 if (grp_alloc_blk >= 0)
1816 goto allocated;
1817 }
1818 /*
1819 * We may end up a bogus ealier ENOSPC error due to
1820 * filesystem is "full" of reservations, but
1821 * there maybe indeed free blocks avaliable on disk
1822 * In this case, we just forget about the reservations
1823 * just do block allocation as without reservations.
1824 */
1825 if (my_rsv) {
1826 my_rsv = NULL;
1827 windowsz = 0;
1828 group_no = goal_group;
1829 goto retry_alloc;
1830 }
1831 /* No space left on the device */
1832 *errp = -ENOSPC;
1833 goto out;
1834
1835allocated:
1836
1837 ext4_debug("using block group %lu(%d)\n",
1838 group_no, gdp->bg_free_blocks_count);
1839
1840 BUFFER_TRACE(gdp_bh, "get_write_access");
1841 fatal = ext4_journal_get_write_access(handle, gdp_bh);
1842 if (fatal)
1843 goto out;
1844
1845 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
1846
1847 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
1848 in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
1849 in_range(ret_block, ext4_inode_table(sb, gdp),
1850 EXT4_SB(sb)->s_itb_per_group) ||
1851 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
1852 EXT4_SB(sb)->s_itb_per_group)) {
1853 ext4_error(sb, "ext4_new_block",
1854 "Allocating block in system zone - "
1855 "blocks from %llu, length %lu",
1856 ret_block, num);
1857 /*
1858 * claim_block marked the blocks we allocated
1859 * as in use. So we may want to selectively
1860 * mark some of the blocks as free
1861 */
1862 goto retry_alloc;
1863 }
1864
1865 performed_allocation = 1;
1866
1867#ifdef CONFIG_JBD2_DEBUG
1868 {
1869 struct buffer_head *debug_bh;
1870
1871 /* Record bitmap buffer state in the newly allocated block */
1872 debug_bh = sb_find_get_block(sb, ret_block);
1873 if (debug_bh) {
1874 BUFFER_TRACE(debug_bh, "state when allocated");
1875 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1876 brelse(debug_bh);
1877 }
1878 }
1879 jbd_lock_bh_state(bitmap_bh);
1880 spin_lock(sb_bgl_lock(sbi, group_no));
1881 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1882 int i;
1883
1884 for (i = 0; i < num; i++) {
1885 if (ext4_test_bit(grp_alloc_blk+i,
1886 bh2jh(bitmap_bh)->b_committed_data)) {
1887 printk("%s: block was unexpectedly set in "
1888 "b_committed_data\n", __func__);
1889 }
1890 }
1891 }
1892 ext4_debug("found bit %d\n", grp_alloc_blk);
1893 spin_unlock(sb_bgl_lock(sbi, group_no));
1894 jbd_unlock_bh_state(bitmap_bh);
1895#endif
1896
1897 if (ret_block + num - 1 >= ext4_blocks_count(es)) {
1898 ext4_error(sb, "ext4_new_block",
1899 "block(%llu) >= blocks count(%llu) - "
1900 "block_group = %lu, es == %p ", ret_block,
1901 ext4_blocks_count(es), group_no, es);
1902 goto out;
1903 }
1904
1905 /*
1906 * It is up to the caller to add the new buffer to a journal
1907 * list of some description. We don't know in advance whether
1908 * the caller wants to use it as metadata or data.
1909 */
1910 spin_lock(sb_bgl_lock(sbi, group_no));
1911 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1912 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1913 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1914 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1915 spin_unlock(sb_bgl_lock(sbi, group_no));
1916 if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1917 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1918
1919 if (sbi->s_log_groups_per_flex) {
1920 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1921 spin_lock(sb_bgl_lock(sbi, flex_group));
1922 sbi->s_flex_groups[flex_group].free_blocks -= num;
1923 spin_unlock(sb_bgl_lock(sbi, flex_group));
1924 }
1925
1926 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1927 err = ext4_journal_dirty_metadata(handle, gdp_bh);
1928 if (!fatal)
1929 fatal = err;
1930
1931 sb->s_dirt = 1;
1932 if (fatal)
1933 goto out;
1934
1935 *errp = 0;
1936 brelse(bitmap_bh);
1937 DQUOT_FREE_BLOCK(inode, *count-num);
1938 *count = num;
1939 return ret_block;
1940
1941io_error:
1942 *errp = -EIO;
1943out:
1944 if (fatal) {
1945 *errp = fatal;
1946 ext4_std_error(sb, fatal);
1947 }
1948 /*
1949 * Undo the block allocation
1950 */
1951 if (!performed_allocation)
1952 DQUOT_FREE_BLOCK(inode, *count);
1953 brelse(bitmap_bh);
1954 return 0;
1955}
1956
1957#define EXT4_META_BLOCK 0x1 684#define EXT4_META_BLOCK 0x1
1958 685
1959static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, 686static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
@@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1963 struct ext4_allocation_request ar; 690 struct ext4_allocation_request ar;
1964 ext4_fsblk_t ret; 691 ext4_fsblk_t ret;
1965 692
1966 if (!test_opt(inode->i_sb, MBALLOC)) {
1967 return ext4_old_new_blocks(handle, inode, goal, count, errp);
1968 }
1969
1970 memset(&ar, 0, sizeof(ar)); 693 memset(&ar, 0, sizeof(ar));
1971 /* Fill with neighbour allocated blocks */ 694 /* Fill with neighbour allocated blocks */
1972 695
@@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
2008 /* 731 /*
2009 * Account for the allocated meta blocks 732 * Account for the allocated meta blocks
2010 */ 733 */
2011 if (!(*errp)) { 734 if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
2012 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 735 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2013 EXT4_I(inode)->i_allocated_meta_blocks += *count; 736 EXT4_I(inode)->i_allocated_meta_blocks += *count;
2014 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 737 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
2093 bitmap_count += x; 816 bitmap_count += x;
2094 } 817 }
2095 brelse(bitmap_bh); 818 brelse(bitmap_bh);
2096 printk("ext4_count_free_blocks: stored = %llu" 819 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
2097 ", computed = %llu, %llu\n", 820 ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
2098 ext4_free_blocks_count(es), 821 desc_count, bitmap_count);
2099 desc_count, bitmap_count);
2100 return bitmap_count; 822 return bitmap_count;
2101#else 823#else
2102 desc_count = 0; 824 desc_count = 0;
@@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
2183 905
2184 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || 906 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
2185 metagroup < first_meta_bg) 907 metagroup < first_meta_bg)
2186 return ext4_bg_num_gdb_nometa(sb,group); 908 return ext4_bg_num_gdb_nometa(sb, group);
2187 909
2188 return ext4_bg_num_gdb_meta(sb,group); 910 return ext4_bg_num_gdb_meta(sb,group);
2189 911
2190} 912}
913
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index d37ea6750454..0a7a6663c190 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,17 +15,17 @@
15 15
16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17 17
18unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) 18unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars)
19{ 19{
20 unsigned int i; 20 unsigned int i;
21 unsigned long sum = 0; 21 unsigned long sum = 0;
22 22
23 if (!map) 23 if (!map)
24 return (0); 24 return 0;
25 for (i = 0; i < numchars; i++) 25 for (i = 0; i < numchars; i++)
26 sum += nibblemap[map->b_data[i] & 0xf] + 26 sum += nibblemap[map->b_data[i] & 0xf] +
27 nibblemap[(map->b_data[i] >> 4) & 0xf]; 27 nibblemap[(map->b_data[i] >> 4) & 0xf];
28 return (sum); 28 return sum;
29} 29}
30 30
31#endif /* EXT4FS_DEBUG */ 31#endif /* EXT4FS_DEBUG */
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ec8e33b45219..3ca6a2b7632d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = {
33}; 33};
34 34
35static int ext4_readdir(struct file *, void *, filldir_t); 35static int ext4_readdir(struct file *, void *, filldir_t);
36static int ext4_dx_readdir(struct file * filp, 36static int ext4_dx_readdir(struct file *filp,
37 void * dirent, filldir_t filldir); 37 void *dirent, filldir_t filldir);
38static int ext4_release_dir (struct inode * inode, 38static int ext4_release_dir(struct inode *inode,
39 struct file * filp); 39 struct file *filp);
40 40
41const struct file_operations ext4_dir_operations = { 41const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
61} 61}
62 62
63 63
64int ext4_check_dir_entry (const char * function, struct inode * dir, 64int ext4_check_dir_entry(const char *function, struct inode *dir,
65 struct ext4_dir_entry_2 * de, 65 struct ext4_dir_entry_2 *de,
66 struct buffer_head * bh, 66 struct buffer_head *bh,
67 unsigned long offset) 67 unsigned long offset)
68{ 68{
69 const char * error_msg = NULL; 69 const char *error_msg = NULL;
70 const int rlen = ext4_rec_len_from_disk(de->rec_len); 70 const int rlen = ext4_rec_len_from_disk(de->rec_len);
71 71
72 if (rlen < EXT4_DIR_REC_LEN(1)) 72 if (rlen < EXT4_DIR_REC_LEN(1))
@@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
82 error_msg = "inode out of bounds"; 82 error_msg = "inode out of bounds";
83 83
84 if (error_msg != NULL) 84 if (error_msg != NULL)
85 ext4_error (dir->i_sb, function, 85 ext4_error(dir->i_sb, function,
86 "bad entry in directory #%lu: %s - " 86 "bad entry in directory #%lu: %s - "
87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", 87 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
88 dir->i_ino, error_msg, offset, 88 dir->i_ino, error_msg, offset,
@@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
91 return error_msg == NULL ? 1 : 0; 91 return error_msg == NULL ? 1 : 0;
92} 92}
93 93
94static int ext4_readdir(struct file * filp, 94static int ext4_readdir(struct file *filp,
95 void * dirent, filldir_t filldir) 95 void *dirent, filldir_t filldir)
96{ 96{
97 int error = 0; 97 int error = 0;
98 unsigned long offset; 98 unsigned long offset;
@@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp,
102 int err; 102 int err;
103 struct inode *inode = filp->f_path.dentry->d_inode; 103 struct inode *inode = filp->f_path.dentry->d_inode;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0;
105 106
106 sb = inode->i_sb; 107 sb = inode->i_sb;
107 108
@@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp,
148 * of recovering data when there's a bad sector 149 * of recovering data when there's a bad sector
149 */ 150 */
150 if (!bh) { 151 if (!bh) {
151 ext4_error (sb, "ext4_readdir", 152 if (!dir_has_error) {
152 "directory #%lu contains a hole at offset %lu", 153 ext4_error(sb, __func__, "directory #%lu "
153 inode->i_ino, (unsigned long)filp->f_pos); 154 "contains a hole at offset %Lu",
155 inode->i_ino,
156 (unsigned long long) filp->f_pos);
157 dir_has_error = 1;
158 }
154 /* corrupt size? Maybe no more blocks to read */ 159 /* corrupt size? Maybe no more blocks to read */
155 if (filp->f_pos > inode->i_blocks << 9) 160 if (filp->f_pos > inode->i_blocks << 9)
156 break; 161 break;
@@ -187,14 +192,14 @@ revalidate:
187 while (!error && filp->f_pos < inode->i_size 192 while (!error && filp->f_pos < inode->i_size
188 && offset < sb->s_blocksize) { 193 && offset < sb->s_blocksize) {
189 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); 194 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
190 if (!ext4_check_dir_entry ("ext4_readdir", inode, de, 195 if (!ext4_check_dir_entry("ext4_readdir", inode, de,
191 bh, offset)) { 196 bh, offset)) {
192 /* 197 /*
193 * On error, skip the f_pos to the next block 198 * On error, skip the f_pos to the next block
194 */ 199 */
195 filp->f_pos = (filp->f_pos | 200 filp->f_pos = (filp->f_pos |
196 (sb->s_blocksize - 1)) + 1; 201 (sb->s_blocksize - 1)) + 1;
197 brelse (bh); 202 brelse(bh);
198 ret = stored; 203 ret = stored;
199 goto out; 204 goto out;
200 } 205 }
@@ -218,12 +223,12 @@ revalidate:
218 break; 223 break;
219 if (version != filp->f_version) 224 if (version != filp->f_version)
220 goto revalidate; 225 goto revalidate;
221 stored ++; 226 stored++;
222 } 227 }
223 filp->f_pos += ext4_rec_len_from_disk(de->rec_len); 228 filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
224 } 229 }
225 offset = 0; 230 offset = 0;
226 brelse (bh); 231 brelse(bh);
227 } 232 }
228out: 233out:
229 return ret; 234 return ret;
@@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root)
290 parent = rb_parent(n); 295 parent = rb_parent(n);
291 fname = rb_entry(n, struct fname, rb_hash); 296 fname = rb_entry(n, struct fname, rb_hash);
292 while (fname) { 297 while (fname) {
293 struct fname * old = fname; 298 struct fname *old = fname;
294 fname = fname->next; 299 fname = fname->next;
295 kfree (old); 300 kfree(old);
296 } 301 }
297 if (!parent) 302 if (!parent)
298 root->rb_node = NULL; 303 root->rb_node = NULL;
@@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
331 struct ext4_dir_entry_2 *dirent) 336 struct ext4_dir_entry_2 *dirent)
332{ 337{
333 struct rb_node **p, *parent = NULL; 338 struct rb_node **p, *parent = NULL;
334 struct fname * fname, *new_fn; 339 struct fname *fname, *new_fn;
335 struct dir_private_info *info; 340 struct dir_private_info *info;
336 int len; 341 int len;
337 342
@@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
388 * for all entres on the fname linked list. (Normally there is only 393 * for all entres on the fname linked list. (Normally there is only
389 * one entry on the linked list, unless there are 62 bit hash collisions.) 394 * one entry on the linked list, unless there are 62 bit hash collisions.)
390 */ 395 */
391static int call_filldir(struct file * filp, void * dirent, 396static int call_filldir(struct file *filp, void *dirent,
392 filldir_t filldir, struct fname *fname) 397 filldir_t filldir, struct fname *fname)
393{ 398{
394 struct dir_private_info *info = filp->private_data; 399 struct dir_private_info *info = filp->private_data;
395 loff_t curr_pos; 400 loff_t curr_pos;
396 struct inode *inode = filp->f_path.dentry->d_inode; 401 struct inode *inode = filp->f_path.dentry->d_inode;
397 struct super_block * sb; 402 struct super_block *sb;
398 int error; 403 int error;
399 404
400 sb = inode->i_sb; 405 sb = inode->i_sb;
401 406
402 if (!fname) { 407 if (!fname) {
403 printk("call_filldir: called with null fname?!?\n"); 408 printk(KERN_ERR "ext4: call_filldir: called with "
409 "null fname?!?\n");
404 return 0; 410 return 0;
405 } 411 }
406 curr_pos = hash2pos(fname->hash, fname->minor_hash); 412 curr_pos = hash2pos(fname->hash, fname->minor_hash);
@@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent,
419 return 0; 425 return 0;
420} 426}
421 427
422static int ext4_dx_readdir(struct file * filp, 428static int ext4_dx_readdir(struct file *filp,
423 void * dirent, filldir_t filldir) 429 void *dirent, filldir_t filldir)
424{ 430{
425 struct dir_private_info *info = filp->private_data; 431 struct dir_private_info *info = filp->private_data;
426 struct inode *inode = filp->f_path.dentry->d_inode; 432 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -511,7 +517,7 @@ finished:
511 return 0; 517 return 0;
512} 518}
513 519
514static int ext4_release_dir (struct inode * inode, struct file * filp) 520static int ext4_release_dir(struct inode *inode, struct file *filp)
515{ 521{
516 if (filp->private_data) 522 if (filp->private_data)
517 ext4_htree_free_dir_info(filp->private_data); 523 ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 295003241d3d..f46a513a5157 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -44,9 +44,9 @@
44#ifdef EXT4FS_DEBUG 44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \ 45#define ext4_debug(f, a...) \
46 do { \ 46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ 47 printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __func__); \ 48 __FILE__, __LINE__, __func__); \
49 printk (KERN_DEBUG f, ## a); \ 49 printk(KERN_DEBUG f, ## a); \
50 } while (0) 50 } while (0)
51#else 51#else
52#define ext4_debug(f, a...) do {} while (0) 52#define ext4_debug(f, a...) do {} while (0)
@@ -128,7 +128,7 @@ struct ext4_allocation_request {
128#else 128#else
129# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 129# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
130#endif 130#endif
131#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) 131#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
132#ifdef __KERNEL__ 132#ifdef __KERNEL__
133# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 133# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
134#else 134#else
@@ -245,7 +245,7 @@ struct flex_groups {
245#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 245#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
246 246
247#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 247#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
248#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ 248#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
249 249
250/* 250/*
251 * Inode dynamic state flags 251 * Inode dynamic state flags
@@ -291,8 +291,6 @@ struct ext4_new_group_data {
291#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS 291#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
292#define EXT4_IOC_GETVERSION _IOR('f', 3, long) 292#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
293#define EXT4_IOC_SETVERSION _IOW('f', 4, long) 293#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
294#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
295#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
296#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 294#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
297#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 295#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
298#ifdef CONFIG_JBD2_DEBUG 296#ifdef CONFIG_JBD2_DEBUG
@@ -300,7 +298,10 @@ struct ext4_new_group_data {
300#endif 298#endif
301#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 299#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
302#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 300#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
303#define EXT4_IOC_MIGRATE _IO('f', 7) 301#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
302#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
303#define EXT4_IOC_MIGRATE _IO('f', 9)
304 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
304 305
305/* 306/*
306 * ioctl commands in 32 bit emulation 307 * ioctl commands in 32 bit emulation
@@ -538,7 +539,6 @@ do { \
538#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 539#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
539#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 540#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
540#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 541#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
541#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
543/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 543/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
544#ifndef _LINUX_EXT2_FS_H 544#ifndef _LINUX_EXT2_FS_H
@@ -667,7 +667,7 @@ struct ext4_super_block {
667}; 667};
668 668
669#ifdef __KERNEL__ 669#ifdef __KERNEL__
670static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) 670static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
671{ 671{
672 return sb->s_fs_info; 672 return sb->s_fs_info;
673} 673}
@@ -725,11 +725,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
725 */ 725 */
726 726
727#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ 727#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
728 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) 728 (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
729#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ 729#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
730 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) 730 (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
731#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ 731#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
732 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) 732 (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
733#define EXT4_SET_COMPAT_FEATURE(sb,mask) \ 733#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
734 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) 734 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
735#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ 735#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
@@ -789,6 +789,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
789#define EXT4_DEF_RESUID 0 789#define EXT4_DEF_RESUID 0
790#define EXT4_DEF_RESGID 0 790#define EXT4_DEF_RESGID 0
791 791
792#define EXT4_DEF_INODE_READAHEAD_BLKS 32
793
792/* 794/*
793 * Default mount options 795 * Default mount options
794 */ 796 */
@@ -954,6 +956,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
954void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 956void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
955 unsigned long *blockgrpp, ext4_grpblk_t *offsetp); 957 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
956 958
959extern struct proc_dir_entry *ext4_proc_root;
960
961#ifdef CONFIG_PROC_FS
962extern const struct file_operations ext4_ui_proc_fops;
963
964#define EXT4_PROC_HANDLER(name, var) \
965do { \
966 proc = proc_create_data(name, mode, sbi->s_proc, \
967 &ext4_ui_proc_fops, &sbi->s_##var); \
968 if (proc == NULL) { \
969 printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
970 goto err_out; \
971 } \
972} while (0)
973#else
974#define EXT4_PROC_HANDLER(name, var)
975#endif
976
957/* 977/*
958 * Function prototypes 978 * Function prototypes
959 */ 979 */
@@ -981,23 +1001,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
981extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, 1001extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
982 ext4_lblk_t iblock, ext4_fsblk_t goal, 1002 ext4_lblk_t iblock, ext4_fsblk_t goal,
983 unsigned long *count, int *errp); 1003 unsigned long *count, int *errp);
984extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, 1004extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
985 ext4_fsblk_t goal, unsigned long *count, int *errp);
986extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1005extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
987 ext4_fsblk_t nblocks); 1006 s64 nblocks);
988extern void ext4_free_blocks (handle_t *handle, struct inode *inode, 1007extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
989 ext4_fsblk_t block, unsigned long count, int metadata); 1008 ext4_fsblk_t block, unsigned long count, int metadata);
990extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, 1009extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
991 ext4_fsblk_t block, unsigned long count, 1010 ext4_fsblk_t block, unsigned long count,
992 unsigned long *pdquot_freed_blocks); 1011 unsigned long *pdquot_freed_blocks);
993extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); 1012extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
994extern void ext4_check_blocks_bitmap (struct super_block *); 1013extern void ext4_check_blocks_bitmap(struct super_block *);
995extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1014extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
996 ext4_group_t block_group, 1015 ext4_group_t block_group,
997 struct buffer_head ** bh); 1016 struct buffer_head ** bh);
998extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1017extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
999extern void ext4_init_block_alloc_info(struct inode *);
1000extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
1001 1018
1002/* dir.c */ 1019/* dir.c */
1003extern int ext4_check_dir_entry(const char *, struct inode *, 1020extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1009,20 +1026,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1009extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1026extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1010 1027
1011/* fsync.c */ 1028/* fsync.c */
1012extern int ext4_sync_file (struct file *, struct dentry *, int); 1029extern int ext4_sync_file(struct file *, struct dentry *, int);
1013 1030
1014/* hash.c */ 1031/* hash.c */
1015extern int ext4fs_dirhash(const char *name, int len, struct 1032extern int ext4fs_dirhash(const char *name, int len, struct
1016 dx_hash_info *hinfo); 1033 dx_hash_info *hinfo);
1017 1034
1018/* ialloc.c */ 1035/* ialloc.c */
1019extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); 1036extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
1020extern void ext4_free_inode (handle_t *, struct inode *); 1037extern void ext4_free_inode(handle_t *, struct inode *);
1021extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); 1038extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1022extern unsigned long ext4_count_free_inodes (struct super_block *); 1039extern unsigned long ext4_count_free_inodes(struct super_block *);
1023extern unsigned long ext4_count_dirs (struct super_block *); 1040extern unsigned long ext4_count_dirs(struct super_block *);
1024extern void ext4_check_inodes_bitmap (struct super_block *); 1041extern void ext4_check_inodes_bitmap(struct super_block *);
1025extern unsigned long ext4_count_free (struct buffer_head *, unsigned); 1042extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
1026 1043
1027/* mballoc.c */ 1044/* mballoc.c */
1028extern long ext4_mb_stats; 1045extern long ext4_mb_stats;
@@ -1032,7 +1049,7 @@ extern int ext4_mb_release(struct super_block *);
1032extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 1049extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1033 struct ext4_allocation_request *, int *); 1050 struct ext4_allocation_request *, int *);
1034extern int ext4_mb_reserve_blocks(struct super_block *, int); 1051extern int ext4_mb_reserve_blocks(struct super_block *, int);
1035extern void ext4_mb_discard_inode_preallocations(struct inode *); 1052extern void ext4_discard_preallocations(struct inode *);
1036extern int __init init_ext4_mballoc(void); 1053extern int __init init_ext4_mballoc(void);
1037extern void exit_ext4_mballoc(void); 1054extern void exit_ext4_mballoc(void);
1038extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1055extern void ext4_mb_free_blocks(handle_t *, struct inode *,
@@ -1050,24 +1067,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1050 ext4_lblk_t, int, int *); 1067 ext4_lblk_t, int, int *);
1051struct buffer_head *ext4_bread(handle_t *, struct inode *, 1068struct buffer_head *ext4_bread(handle_t *, struct inode *,
1052 ext4_lblk_t, int, int *); 1069 ext4_lblk_t, int, int *);
1070int ext4_get_block(struct inode *inode, sector_t iblock,
1071 struct buffer_head *bh_result, int create);
1053int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 1072int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1054 ext4_lblk_t iblock, unsigned long maxblocks, 1073 ext4_lblk_t iblock, unsigned long maxblocks,
1055 struct buffer_head *bh_result, 1074 struct buffer_head *bh_result,
1056 int create, int extend_disksize); 1075 int create, int extend_disksize);
1057 1076
1058extern struct inode *ext4_iget(struct super_block *, unsigned long); 1077extern struct inode *ext4_iget(struct super_block *, unsigned long);
1059extern int ext4_write_inode (struct inode *, int); 1078extern int ext4_write_inode(struct inode *, int);
1060extern int ext4_setattr (struct dentry *, struct iattr *); 1079extern int ext4_setattr(struct dentry *, struct iattr *);
1061extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1080extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1062 struct kstat *stat); 1081 struct kstat *stat);
1063extern void ext4_delete_inode (struct inode *); 1082extern void ext4_delete_inode(struct inode *);
1064extern int ext4_sync_inode (handle_t *, struct inode *); 1083extern int ext4_sync_inode(handle_t *, struct inode *);
1065extern void ext4_discard_reservation (struct inode *);
1066extern void ext4_dirty_inode(struct inode *); 1084extern void ext4_dirty_inode(struct inode *);
1067extern int ext4_change_inode_journal_flag(struct inode *, int); 1085extern int ext4_change_inode_journal_flag(struct inode *, int);
1068extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 1086extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1069extern int ext4_can_truncate(struct inode *inode); 1087extern int ext4_can_truncate(struct inode *inode);
1070extern void ext4_truncate (struct inode *); 1088extern void ext4_truncate(struct inode *);
1071extern void ext4_set_inode_flags(struct inode *); 1089extern void ext4_set_inode_flags(struct inode *);
1072extern void ext4_get_inode_flags(struct ext4_inode_info *); 1090extern void ext4_get_inode_flags(struct ext4_inode_info *);
1073extern void ext4_set_aops(struct inode *inode); 1091extern void ext4_set_aops(struct inode *inode);
@@ -1080,11 +1098,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
1080 1098
1081/* ioctl.c */ 1099/* ioctl.c */
1082extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1100extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1083extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); 1101extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
1084 1102
1085/* migrate.c */ 1103/* migrate.c */
1086extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, 1104extern int ext4_ext_migrate(struct inode *);
1087 unsigned long);
1088/* namei.c */ 1105/* namei.c */
1089extern int ext4_orphan_add(handle_t *, struct inode *); 1106extern int ext4_orphan_add(handle_t *, struct inode *);
1090extern int ext4_orphan_del(handle_t *, struct inode *); 1107extern int ext4_orphan_del(handle_t *, struct inode *);
@@ -1099,14 +1116,14 @@ extern int ext4_group_extend(struct super_block *sb,
1099 ext4_fsblk_t n_blocks_count); 1116 ext4_fsblk_t n_blocks_count);
1100 1117
1101/* super.c */ 1118/* super.c */
1102extern void ext4_error (struct super_block *, const char *, const char *, ...) 1119extern void ext4_error(struct super_block *, const char *, const char *, ...)
1103 __attribute__ ((format (printf, 3, 4))); 1120 __attribute__ ((format (printf, 3, 4)));
1104extern void __ext4_std_error (struct super_block *, const char *, int); 1121extern void __ext4_std_error(struct super_block *, const char *, int);
1105extern void ext4_abort (struct super_block *, const char *, const char *, ...) 1122extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1106 __attribute__ ((format (printf, 3, 4))); 1123 __attribute__ ((format (printf, 3, 4)));
1107extern void ext4_warning (struct super_block *, const char *, const char *, ...) 1124extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1108 __attribute__ ((format (printf, 3, 4))); 1125 __attribute__ ((format (printf, 3, 4)));
1109extern void ext4_update_dynamic_rev (struct super_block *sb); 1126extern void ext4_update_dynamic_rev(struct super_block *sb);
1110extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 1127extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1111 __u32 compat); 1128 __u32 compat);
1112extern int ext4_update_rocompat_feature(handle_t *handle, 1129extern int ext4_update_rocompat_feature(handle_t *handle,
@@ -1179,7 +1196,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1179 1196
1180static inline 1197static inline
1181struct ext4_group_info *ext4_get_group_info(struct super_block *sb, 1198struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1182 ext4_group_t group) 1199 ext4_group_t group)
1183{ 1200{
1184 struct ext4_group_info ***grp_info; 1201 struct ext4_group_info ***grp_info;
1185 long indexv, indexh; 1202 long indexv, indexh;
@@ -1207,6 +1224,28 @@ do { \
1207 __ext4_std_error((sb), __func__, (errno)); \ 1224 __ext4_std_error((sb), __func__, (errno)); \
1208} while (0) 1225} while (0)
1209 1226
1227#ifdef CONFIG_SMP
1228/* Each CPU can accumulate FBC_BATCH blocks in their local
1229 * counters. So we need to make sure we have free blocks more
1230 * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
1231 */
1232#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
1233#else
1234#define EXT4_FREEBLOCKS_WATERMARK 0
1235#endif
1236
1237static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
1238{
1239 /*
1240 * XXX: replace with spinlock if seen contended -bzzz
1241 */
1242 down_write(&EXT4_I(inode)->i_data_sem);
1243 if (newsize > EXT4_I(inode)->i_disksize)
1244 EXT4_I(inode)->i_disksize = newsize;
1245 up_write(&EXT4_I(inode)->i_data_sem);
1246 return ;
1247}
1248
1210/* 1249/*
1211 * Inodes and files operations 1250 * Inodes and files operations
1212 */ 1251 */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index d33dc56d6986..bec7ce59fc0d 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -124,6 +124,19 @@ struct ext4_ext_path {
124#define EXT4_EXT_CACHE_GAP 1 124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2 125#define EXT4_EXT_CACHE_EXTENT 2
126 126
127/*
128 * to be called by ext4_ext_walk_space()
129 * negative retcode - error
130 * positive retcode - signal for ext4_ext_walk_space(), see below
131 * callback must return valid extent (passed or newly created)
132 */
133typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
134 struct ext4_ext_cache *,
135 struct ext4_extent *, void *);
136
137#define EXT_CONTINUE 0
138#define EXT_BREAK 1
139#define EXT_REPEAT 2
127 140
128#define EXT_MAX_BLOCK 0xffffffff 141#define EXT_MAX_BLOCK 0xffffffff
129 142
@@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode,
224 struct ext4_extent *); 237 struct ext4_extent *);
225extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); 238extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
226extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); 239extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
240extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
241 ext_prepare_callback, void *);
227extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 242extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
228 struct ext4_ext_path *); 243 struct ext4_ext_path *);
229extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, 244extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index ef7409f0e7e4..5c124c0ac6d3 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t;
33/* data type for block group number */ 33/* data type for block group number */
34typedef unsigned long ext4_group_t; 34typedef unsigned long ext4_group_t;
35 35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start 36#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end 37#define rsv_end rsv_window._rsv_end
70 38
@@ -97,11 +65,8 @@ struct ext4_inode_info {
97 ext4_group_t i_block_group; 65 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */ 66 __u32 i_state; /* Dynamic state flags for ext4 */
99 67
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup; 68 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR 69#ifdef CONFIG_EXT4_FS_XATTR
105 /* 70 /*
106 * Extended attributes can be read independently of the main file 71 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention 72 * data. Taking i_mutex even when reading would cause contention
@@ -111,7 +76,7 @@ struct ext4_inode_info {
111 */ 76 */
112 struct rw_semaphore xattr_sem; 77 struct rw_semaphore xattr_sem;
113#endif 78#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 79#ifdef CONFIG_EXT4_FS_POSIX_ACL
115 struct posix_acl *i_acl; 80 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl; 81 struct posix_acl *i_default_acl;
117#endif 82#endif
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6300226d5531..6a0b40d43264 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -40,8 +40,8 @@ struct ext4_sb_info {
40 unsigned long s_blocks_last; /* Last seen block count */ 40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */ 42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ 43 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc; 44 struct buffer_head **s_group_desc;
45 unsigned long s_mount_opt; 45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block; 46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid; 47 uid_t s_resuid;
@@ -52,6 +52,7 @@ struct ext4_sb_info {
52 int s_desc_per_block_bits; 52 int s_desc_per_block_bits;
53 int s_inode_size; 53 int s_inode_size;
54 int s_first_ino; 54 int s_first_ino;
55 unsigned int s_inode_readahead_blks;
55 spinlock_t s_next_gen_lock; 56 spinlock_t s_next_gen_lock;
56 u32 s_next_generation; 57 u32 s_next_generation;
57 u32 s_hash_seed[4]; 58 u32 s_hash_seed[4];
@@ -59,16 +60,17 @@ struct ext4_sb_info {
59 struct percpu_counter s_freeblocks_counter; 60 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter; 61 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter; 62 struct percpu_counter s_dirs_counter;
63 struct percpu_counter s_dirtyblocks_counter;
62 struct blockgroup_lock s_blockgroup_lock; 64 struct blockgroup_lock s_blockgroup_lock;
65 struct proc_dir_entry *s_proc;
63 66
64 /* root of the per fs reservation window tree */ 67 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock; 68 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root; 69 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68 70
69 /* Journaling */ 71 /* Journaling */
70 struct inode * s_journal_inode; 72 struct inode *s_journal_inode;
71 struct journal_s * s_journal; 73 struct journal_s *s_journal;
72 struct list_head s_orphan; 74 struct list_head s_orphan;
73 unsigned long s_commit_interval; 75 unsigned long s_commit_interval;
74 struct block_device *journal_bdev; 76 struct block_device *journal_bdev;
@@ -106,12 +108,12 @@ struct ext4_sb_info {
106 108
107 /* tunables */ 109 /* tunables */
108 unsigned long s_stripe; 110 unsigned long s_stripe;
109 unsigned long s_mb_stream_request; 111 unsigned int s_mb_stream_request;
110 unsigned long s_mb_max_to_scan; 112 unsigned int s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan; 113 unsigned int s_mb_min_to_scan;
112 unsigned long s_mb_stats; 114 unsigned int s_mb_stats;
113 unsigned long s_mb_order2_reqs; 115 unsigned int s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc; 116 unsigned int s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */ 117 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group; 118 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start; 119 unsigned long s_mb_last_start;
@@ -121,7 +123,6 @@ struct ext4_sb_info {
121 int s_mb_history_cur; 123 int s_mb_history_cur;
122 int s_mb_history_max; 124 int s_mb_history_max;
123 int s_mb_history_num; 125 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock; 126 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter; 127 int s_mb_history_filter;
127 128
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b24d3c53f20c..ea2ce3c0ae66 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -40,6 +40,7 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/falloc.h> 41#include <linux/falloc.h>
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <linux/fiemap.h>
43#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
44#include "ext4_extents.h" 45#include "ext4_extents.h"
45 46
@@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
383 ext_debug("\n"); 384 ext_debug("\n");
384} 385}
385#else 386#else
386#define ext4_ext_show_path(inode,path) 387#define ext4_ext_show_path(inode, path)
387#define ext4_ext_show_leaf(inode,path) 388#define ext4_ext_show_leaf(inode, path)
388#endif 389#endif
389 390
390void ext4_ext_drop_refs(struct ext4_ext_path *path) 391void ext4_ext_drop_refs(struct ext4_ext_path *path)
@@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode,
440 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 441 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
441 if (k != 0 && 442 if (k != 0 &&
442 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 443 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
443 printk("k=%d, ix=0x%p, first=0x%p\n", k, 444 printk(KERN_DEBUG "k=%d, ix=0x%p, "
444 ix, EXT_FIRST_INDEX(eh)); 445 "first=0x%p\n", k,
445 printk("%u <= %u\n", 446 ix, EXT_FIRST_INDEX(eh));
447 printk(KERN_DEBUG "%u <= %u\n",
446 le32_to_cpu(ix->ei_block), 448 le32_to_cpu(ix->ei_block),
447 le32_to_cpu(ix[-1].ei_block)); 449 le32_to_cpu(ix[-1].ei_block));
448 } 450 }
@@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1475 struct ext4_ext_path *path, 1477 struct ext4_ext_path *path,
1476 struct ext4_extent *newext) 1478 struct ext4_extent *newext)
1477{ 1479{
1478 struct ext4_extent_header * eh; 1480 struct ext4_extent_header *eh;
1479 struct ext4_extent *ex, *fex; 1481 struct ext4_extent *ex, *fex;
1480 struct ext4_extent *nearex; /* nearest extent */ 1482 struct ext4_extent *nearex; /* nearest extent */
1481 struct ext4_ext_path *npath = NULL; 1483 struct ext4_ext_path *npath = NULL;
@@ -1625,6 +1627,113 @@ cleanup:
1625 return err; 1627 return err;
1626} 1628}
1627 1629
1630int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1631 ext4_lblk_t num, ext_prepare_callback func,
1632 void *cbdata)
1633{
1634 struct ext4_ext_path *path = NULL;
1635 struct ext4_ext_cache cbex;
1636 struct ext4_extent *ex;
1637 ext4_lblk_t next, start = 0, end = 0;
1638 ext4_lblk_t last = block + num;
1639 int depth, exists, err = 0;
1640
1641 BUG_ON(func == NULL);
1642 BUG_ON(inode == NULL);
1643
1644 while (block < last && block != EXT_MAX_BLOCK) {
1645 num = last - block;
1646 /* find extent for this block */
1647 path = ext4_ext_find_extent(inode, block, path);
1648 if (IS_ERR(path)) {
1649 err = PTR_ERR(path);
1650 path = NULL;
1651 break;
1652 }
1653
1654 depth = ext_depth(inode);
1655 BUG_ON(path[depth].p_hdr == NULL);
1656 ex = path[depth].p_ext;
1657 next = ext4_ext_next_allocated_block(path);
1658
1659 exists = 0;
1660 if (!ex) {
1661 /* there is no extent yet, so try to allocate
1662 * all requested space */
1663 start = block;
1664 end = block + num;
1665 } else if (le32_to_cpu(ex->ee_block) > block) {
1666 /* need to allocate space before found extent */
1667 start = block;
1668 end = le32_to_cpu(ex->ee_block);
1669 if (block + num < end)
1670 end = block + num;
1671 } else if (block >= le32_to_cpu(ex->ee_block)
1672 + ext4_ext_get_actual_len(ex)) {
1673 /* need to allocate space after found extent */
1674 start = block;
1675 end = block + num;
1676 if (end >= next)
1677 end = next;
1678 } else if (block >= le32_to_cpu(ex->ee_block)) {
1679 /*
1680 * some part of requested space is covered
1681 * by found extent
1682 */
1683 start = block;
1684 end = le32_to_cpu(ex->ee_block)
1685 + ext4_ext_get_actual_len(ex);
1686 if (block + num < end)
1687 end = block + num;
1688 exists = 1;
1689 } else {
1690 BUG();
1691 }
1692 BUG_ON(end <= start);
1693
1694 if (!exists) {
1695 cbex.ec_block = start;
1696 cbex.ec_len = end - start;
1697 cbex.ec_start = 0;
1698 cbex.ec_type = EXT4_EXT_CACHE_GAP;
1699 } else {
1700 cbex.ec_block = le32_to_cpu(ex->ee_block);
1701 cbex.ec_len = ext4_ext_get_actual_len(ex);
1702 cbex.ec_start = ext_pblock(ex);
1703 cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1704 }
1705
1706 BUG_ON(cbex.ec_len == 0);
1707 err = func(inode, path, &cbex, ex, cbdata);
1708 ext4_ext_drop_refs(path);
1709
1710 if (err < 0)
1711 break;
1712
1713 if (err == EXT_REPEAT)
1714 continue;
1715 else if (err == EXT_BREAK) {
1716 err = 0;
1717 break;
1718 }
1719
1720 if (ext_depth(inode) != depth) {
1721 /* depth was changed. we have to realloc path */
1722 kfree(path);
1723 path = NULL;
1724 }
1725
1726 block = cbex.ec_block + cbex.ec_len;
1727 }
1728
1729 if (path) {
1730 ext4_ext_drop_refs(path);
1731 kfree(path);
1732 }
1733
1734 return err;
1735}
1736
1628static void 1737static void
1629ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1738ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1630 __u32 len, ext4_fsblk_t start, int type) 1739 __u32 len, ext4_fsblk_t start, int type)
@@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb)
2142 */ 2251 */
2143 2252
2144 if (test_opt(sb, EXTENTS)) { 2253 if (test_opt(sb, EXTENTS)) {
2145 printk("EXT4-fs: file extents enabled"); 2254 printk(KERN_INFO "EXT4-fs: file extents enabled");
2146#ifdef AGGRESSIVE_TEST 2255#ifdef AGGRESSIVE_TEST
2147 printk(", aggressive tests"); 2256 printk(", aggressive tests");
2148#endif 2257#endif
@@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2696 goto out2; 2805 goto out2;
2697 } 2806 }
2698 /* 2807 /*
2699 * Okay, we need to do block allocation. Lazily initialize the block 2808 * Okay, we need to do block allocation.
2700 * allocation info here if necessary.
2701 */ 2809 */
2702 if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
2703 ext4_init_block_alloc_info(inode);
2704 2810
2705 /* find neighbour allocated blocks */ 2811 /* find neighbour allocated blocks */
2706 ar.lleft = iblock; 2812 ar.lleft = iblock;
@@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2760 /* free data blocks we just allocated */ 2866 /* free data blocks we just allocated */
2761 /* not a good idea to call discard here directly, 2867 /* not a good idea to call discard here directly,
2762 * but otherwise we'd need to call it every free() */ 2868 * but otherwise we'd need to call it every free() */
2763 ext4_mb_discard_inode_preallocations(inode); 2869 ext4_discard_preallocations(inode);
2764 ext4_free_blocks(handle, inode, ext_pblock(&newex), 2870 ext4_free_blocks(handle, inode, ext_pblock(&newex),
2765 ext4_ext_get_actual_len(&newex), 0); 2871 ext4_ext_get_actual_len(&newex), 0);
2766 goto out2; 2872 goto out2;
@@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode)
2824 down_write(&EXT4_I(inode)->i_data_sem); 2930 down_write(&EXT4_I(inode)->i_data_sem);
2825 ext4_ext_invalidate_cache(inode); 2931 ext4_ext_invalidate_cache(inode);
2826 2932
2827 ext4_discard_reservation(inode); 2933 ext4_discard_preallocations(inode);
2828 2934
2829 /* 2935 /*
2830 * TODO: optimization is possible here. 2936 * TODO: optimization is possible here.
@@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode,
2877 * Update only when preallocation was requested beyond 2983 * Update only when preallocation was requested beyond
2878 * the file size. 2984 * the file size.
2879 */ 2985 */
2880 if (!(mode & FALLOC_FL_KEEP_SIZE) && 2986 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
2881 new_size > i_size_read(inode)) { 2987 if (new_size > i_size_read(inode))
2882 i_size_write(inode, new_size); 2988 i_size_write(inode, new_size);
2883 EXT4_I(inode)->i_disksize = new_size; 2989 if (new_size > EXT4_I(inode)->i_disksize)
2990 ext4_update_i_disksize(inode, new_size);
2884 } 2991 }
2885 2992
2886} 2993}
@@ -2972,3 +3079,143 @@ retry:
2972 mutex_unlock(&inode->i_mutex); 3079 mutex_unlock(&inode->i_mutex);
2973 return ret > 0 ? ret2 : ret; 3080 return ret > 0 ? ret2 : ret;
2974} 3081}
3082
3083/*
3084 * Callback function called for each extent to gather FIEMAP information.
3085 */
3086int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3087 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3088 void *data)
3089{
3090 struct fiemap_extent_info *fieinfo = data;
3091 unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
3092 __u64 logical;
3093 __u64 physical;
3094 __u64 length;
3095 __u32 flags = 0;
3096 int error;
3097
3098 logical = (__u64)newex->ec_block << blksize_bits;
3099
3100 if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
3101 pgoff_t offset;
3102 struct page *page;
3103 struct buffer_head *bh = NULL;
3104
3105 offset = logical >> PAGE_SHIFT;
3106 page = find_get_page(inode->i_mapping, offset);
3107 if (!page || !page_has_buffers(page))
3108 return EXT_CONTINUE;
3109
3110 bh = page_buffers(page);
3111
3112 if (!bh)
3113 return EXT_CONTINUE;
3114
3115 if (buffer_delay(bh)) {
3116 flags |= FIEMAP_EXTENT_DELALLOC;
3117 page_cache_release(page);
3118 } else {
3119 page_cache_release(page);
3120 return EXT_CONTINUE;
3121 }
3122 }
3123
3124 physical = (__u64)newex->ec_start << blksize_bits;
3125 length = (__u64)newex->ec_len << blksize_bits;
3126
3127 if (ex && ext4_ext_is_uninitialized(ex))
3128 flags |= FIEMAP_EXTENT_UNWRITTEN;
3129
3130 /*
3131 * If this extent reaches EXT_MAX_BLOCK, it must be last.
3132 *
3133 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3134 * this also indicates no more allocated blocks.
3135 *
3136 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3137 */
3138 if (logical + length - 1 == EXT_MAX_BLOCK ||
3139 ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
3140 flags |= FIEMAP_EXTENT_LAST;
3141
3142 error = fiemap_fill_next_extent(fieinfo, logical, physical,
3143 length, flags);
3144 if (error < 0)
3145 return error;
3146 if (error == 1)
3147 return EXT_BREAK;
3148
3149 return EXT_CONTINUE;
3150}
3151
3152/* fiemap flags we can handle specified here */
3153#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
3154
3155int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
3156{
3157 __u64 physical = 0;
3158 __u64 length;
3159 __u32 flags = FIEMAP_EXTENT_LAST;
3160 int blockbits = inode->i_sb->s_blocksize_bits;
3161 int error = 0;
3162
3163 /* in-inode? */
3164 if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
3165 struct ext4_iloc iloc;
3166 int offset; /* offset of xattr in inode */
3167
3168 error = ext4_get_inode_loc(inode, &iloc);
3169 if (error)
3170 return error;
3171 physical = iloc.bh->b_blocknr << blockbits;
3172 offset = EXT4_GOOD_OLD_INODE_SIZE +
3173 EXT4_I(inode)->i_extra_isize;
3174 physical += offset;
3175 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3176 flags |= FIEMAP_EXTENT_DATA_INLINE;
3177 } else { /* external block */
3178 physical = EXT4_I(inode)->i_file_acl << blockbits;
3179 length = inode->i_sb->s_blocksize;
3180 }
3181
3182 if (physical)
3183 error = fiemap_fill_next_extent(fieinfo, 0, physical,
3184 length, flags);
3185 return (error < 0 ? error : 0);
3186}
3187
3188int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3189 __u64 start, __u64 len)
3190{
3191 ext4_lblk_t start_blk;
3192 ext4_lblk_t len_blks;
3193 int error = 0;
3194
3195 /* fallback to generic here if not in extents fmt */
3196 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3197 return generic_block_fiemap(inode, fieinfo, start, len,
3198 ext4_get_block);
3199
3200 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
3201 return -EBADR;
3202
3203 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3204 error = ext4_xattr_fiemap(inode, fieinfo);
3205 } else {
3206 start_blk = start >> inode->i_sb->s_blocksize_bits;
3207 len_blks = len >> inode->i_sb->s_blocksize_bits;
3208
3209 /*
3210 * Walk the extent tree gathering extent information.
3211 * ext4_ext_fiemap_cb will push extents back to user.
3212 */
3213 down_write(&EXT4_I(inode)->i_data_sem);
3214 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3215 ext4_ext_fiemap_cb, fieinfo);
3216 up_write(&EXT4_I(inode)->i_data_sem);
3217 }
3218
3219 return error;
3220}
3221
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 430eb7978db4..6bd11fba71f7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -31,14 +31,14 @@
31 * from ext4_file_open: open gets called at every open, but release 31 * from ext4_file_open: open gets called at every open, but release
32 * gets called only when /all/ the files are closed. 32 * gets called only when /all/ the files are closed.
33 */ 33 */
34static int ext4_release_file (struct inode * inode, struct file * filp) 34static int ext4_release_file(struct inode *inode, struct file *filp)
35{ 35{
36 /* if we are the last writer on the inode, drop the block reservation */ 36 /* if we are the last writer on the inode, drop the block reservation */
37 if ((filp->f_mode & FMODE_WRITE) && 37 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 38 (atomic_read(&inode->i_writecount) == 1))
39 { 39 {
40 down_write(&EXT4_I(inode)->i_data_sem); 40 down_write(&EXT4_I(inode)->i_data_sem);
41 ext4_discard_reservation(inode); 41 ext4_discard_preallocations(inode);
42 up_write(&EXT4_I(inode)->i_data_sem); 42 up_write(&EXT4_I(inode)->i_data_sem);
43 } 43 }
44 if (is_dx(inode) && filp->private_data) 44 if (is_dx(inode) && filp->private_data)
@@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
140 return 0; 140 return 0;
141} 141}
142 142
143extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
144 __u64 start, __u64 len);
145
143const struct file_operations ext4_file_operations = { 146const struct file_operations ext4_file_operations = {
144 .llseek = generic_file_llseek, 147 .llseek = generic_file_llseek,
145 .read = do_sync_read, 148 .read = do_sync_read,
@@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = {
162 .truncate = ext4_truncate, 165 .truncate = ext4_truncate,
163 .setattr = ext4_setattr, 166 .setattr = ext4_setattr,
164 .getattr = ext4_getattr, 167 .getattr = ext4_getattr,
165#ifdef CONFIG_EXT4DEV_FS_XATTR 168#ifdef CONFIG_EXT4_FS_XATTR
166 .setxattr = generic_setxattr, 169 .setxattr = generic_setxattr,
167 .getxattr = generic_getxattr, 170 .getxattr = generic_getxattr,
168 .listxattr = ext4_listxattr, 171 .listxattr = ext4_listxattr,
@@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = {
170#endif 173#endif
171 .permission = ext4_permission, 174 .permission = ext4_permission,
172 .fallocate = ext4_fallocate, 175 .fallocate = ext4_fallocate,
176 .fiemap = ext4_fiemap,
173}; 177};
174 178
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a45c3737ad31..5afe4370840b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,6 +28,7 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/marker.h>
31#include "ext4.h" 32#include "ext4.h"
32#include "ext4_jbd2.h" 33#include "ext4_jbd2.h"
33 34
@@ -43,7 +44,7 @@
43 * inode to disk. 44 * inode to disk.
44 */ 45 */
45 46
46int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) 47int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
47{ 48{
48 struct inode *inode = dentry->d_inode; 49 struct inode *inode = dentry->d_inode;
49 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 50 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
51 52
52 J_ASSERT(ext4_journal_current_handle() == NULL); 53 J_ASSERT(ext4_journal_current_handle() == NULL);
53 54
55 trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
56 inode->i_sb->s_id, datasync, inode->i_ino,
57 dentry->d_parent->d_inode->i_ino);
58
54 /* 59 /*
55 * data=writeback: 60 * data=writeback:
56 * The caller's filemap_fdatawrite()/wait will sync the data. 61 * The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1d6329dbe390..556ca8eba3db 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
27 sum += DELTA; 27 sum += DELTA;
28 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 28 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
29 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 29 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
30 } while(--n); 30 } while (--n);
31 31
32 buf[0] += b0; 32 buf[0] += b0;
33 buf[1] += b1; 33 buf[1] += b1;
@@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[])
35 35
36 36
37/* The old legacy hash */ 37/* The old legacy hash */
38static __u32 dx_hack_hash (const char *name, int len) 38static __u32 dx_hack_hash(const char *name, int len)
39{ 39{
40 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; 40 __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
41 while (len--) { 41 while (len--) {
@@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
59 val = pad; 59 val = pad;
60 if (len > num*4) 60 if (len > num*4)
61 len = num * 4; 61 len = num * 4;
62 for (i=0; i < len; i++) { 62 for (i = 0; i < len; i++) {
63 if ((i % 4) == 0) 63 if ((i % 4) == 0)
64 val = pad; 64 val = pad;
65 val = msg[i] + (val << 8); 65 val = msg[i] + (val << 8);
@@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
104 104
105 /* Check to see if the seed is all zero's */ 105 /* Check to see if the seed is all zero's */
106 if (hinfo->seed) { 106 if (hinfo->seed) {
107 for (i=0; i < 4; i++) { 107 for (i = 0; i < 4; i++) {
108 if (hinfo->seed[i]) 108 if (hinfo->seed[i])
109 break; 109 break;
110 } 110 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f344834bbf58..fe34d74cfb19 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
115 block_group, bitmap_blk); 115 block_group, bitmap_blk);
116 return NULL; 116 return NULL;
117 } 117 }
118 if (bh_uptodate_or_lock(bh)) 118 if (buffer_uptodate(bh) &&
119 !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
119 return bh; 120 return bh;
120 121
122 lock_buffer(bh);
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 123 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 124 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc); 125 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
154 * though), and then we'd have two inodes sharing the 156 * though), and then we'd have two inodes sharing the
155 * same inode number and space on the harddisk. 157 * same inode number and space on the harddisk.
156 */ 158 */
157void ext4_free_inode (handle_t *handle, struct inode * inode) 159void ext4_free_inode(handle_t *handle, struct inode *inode)
158{ 160{
159 struct super_block * sb = inode->i_sb; 161 struct super_block *sb = inode->i_sb;
160 int is_directory; 162 int is_directory;
161 unsigned long ino; 163 unsigned long ino;
162 struct buffer_head *bitmap_bh = NULL; 164 struct buffer_head *bitmap_bh = NULL;
163 struct buffer_head *bh2; 165 struct buffer_head *bh2;
164 ext4_group_t block_group; 166 ext4_group_t block_group;
165 unsigned long bit; 167 unsigned long bit;
166 struct ext4_group_desc * gdp; 168 struct ext4_group_desc *gdp;
167 struct ext4_super_block * es; 169 struct ext4_super_block *es;
168 struct ext4_sb_info *sbi; 170 struct ext4_sb_info *sbi;
169 int fatal = 0, err; 171 int fatal = 0, err;
170 ext4_group_t flex_group; 172 ext4_group_t flex_group;
171 173
172 if (atomic_read(&inode->i_count) > 1) { 174 if (atomic_read(&inode->i_count) > 1) {
173 printk ("ext4_free_inode: inode has count=%d\n", 175 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
174 atomic_read(&inode->i_count)); 176 atomic_read(&inode->i_count));
175 return; 177 return;
176 } 178 }
177 if (inode->i_nlink) { 179 if (inode->i_nlink) {
178 printk ("ext4_free_inode: inode has nlink=%d\n", 180 printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
179 inode->i_nlink); 181 inode->i_nlink);
180 return; 182 return;
181 } 183 }
182 if (!sb) { 184 if (!sb) {
183 printk("ext4_free_inode: inode on nonexistent device\n"); 185 printk(KERN_ERR "ext4_free_inode: inode on "
186 "nonexistent device\n");
184 return; 187 return;
185 } 188 }
186 sbi = EXT4_SB(sb); 189 sbi = EXT4_SB(sb);
187 190
188 ino = inode->i_ino; 191 ino = inode->i_ino;
189 ext4_debug ("freeing inode %lu\n", ino); 192 ext4_debug("freeing inode %lu\n", ino);
190 193
191 /* 194 /*
192 * Note: we must free any quota before locking the superblock, 195 * Note: we must free any quota before locking the superblock,
@@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 is_directory = S_ISDIR(inode->i_mode); 203 is_directory = S_ISDIR(inode->i_mode);
201 204
202 /* Do this BEFORE marking the inode not in use or returning an error */ 205 /* Do this BEFORE marking the inode not in use or returning an error */
203 clear_inode (inode); 206 clear_inode(inode);
204 207
205 es = EXT4_SB(sb)->s_es; 208 es = EXT4_SB(sb)->s_es;
206 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 209 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
207 ext4_error (sb, "ext4_free_inode", 210 ext4_error(sb, "ext4_free_inode",
208 "reserved or nonexistent inode %lu", ino); 211 "reserved or nonexistent inode %lu", ino);
209 goto error_return; 212 goto error_return;
210 } 213 }
211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 214 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
222 /* Ok, now we can actually update the inode bitmaps.. */ 225 /* Ok, now we can actually update the inode bitmaps.. */
223 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 226 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
224 bit, bitmap_bh->b_data)) 227 bit, bitmap_bh->b_data))
225 ext4_error (sb, "ext4_free_inode", 228 ext4_error(sb, "ext4_free_inode",
226 "bit already cleared for inode %lu", ino); 229 "bit already cleared for inode %lu", ino);
227 else { 230 else {
228 gdp = ext4_get_group_desc (sb, block_group, &bh2); 231 gdp = ext4_get_group_desc(sb, block_group, &bh2);
229 232
230 BUFFER_TRACE(bh2, "get_write_access"); 233 BUFFER_TRACE(bh2, "get_write_access");
231 fatal = ext4_journal_get_write_access(handle, bh2); 234 fatal = ext4_journal_get_write_access(handle, bh2);
@@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
287 avefreei = freei / ngroups; 290 avefreei = freei / ngroups;
288 291
289 for (group = 0; group < ngroups; group++) { 292 for (group = 0; group < ngroups; group++) {
290 desc = ext4_get_group_desc (sb, group, NULL); 293 desc = ext4_get_group_desc(sb, group, NULL);
291 if (!desc || !desc->bg_free_inodes_count) 294 if (!desc || !desc->bg_free_inodes_count)
292 continue; 295 continue;
293 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) 296 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
576 * For other inodes, search forward from the parent directory's block 579 * For other inodes, search forward from the parent directory's block
577 * group to find a free inode. 580 * group to find a free inode.
578 */ 581 */
579struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) 582struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
580{ 583{
581 struct super_block *sb; 584 struct super_block *sb;
582 struct buffer_head *bitmap_bh = NULL; 585 struct buffer_head *bitmap_bh = NULL;
583 struct buffer_head *bh2; 586 struct buffer_head *bh2;
584 ext4_group_t group = 0; 587 ext4_group_t group = 0;
585 unsigned long ino = 0; 588 unsigned long ino = 0;
586 struct inode * inode; 589 struct inode *inode;
587 struct ext4_group_desc * gdp = NULL; 590 struct ext4_group_desc *gdp = NULL;
588 struct ext4_super_block * es; 591 struct ext4_super_block *es;
589 struct ext4_inode_info *ei; 592 struct ext4_inode_info *ei;
590 struct ext4_sb_info *sbi; 593 struct ext4_sb_info *sbi;
591 int ret2, err = 0; 594 int ret2, err = 0;
@@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
613 } 616 }
614 617
615 if (S_ISDIR(mode)) { 618 if (S_ISDIR(mode)) {
616 if (test_opt (sb, OLDALLOC)) 619 if (test_opt(sb, OLDALLOC))
617 ret2 = find_group_dir(sb, dir, &group); 620 ret2 = find_group_dir(sb, dir, &group);
618 else 621 else
619 ret2 = find_group_orlov(sb, dir, &group); 622 ret2 = find_group_orlov(sb, dir, &group);
@@ -783,7 +786,7 @@ got:
783 } 786 }
784 787
785 inode->i_uid = current->fsuid; 788 inode->i_uid = current->fsuid;
786 if (test_opt (sb, GRPID)) 789 if (test_opt(sb, GRPID))
787 inode->i_gid = dir->i_gid; 790 inode->i_gid = dir->i_gid;
788 else if (dir->i_mode & S_ISGID) { 791 else if (dir->i_mode & S_ISGID) {
789 inode->i_gid = dir->i_gid; 792 inode->i_gid = dir->i_gid;
@@ -816,7 +819,6 @@ got:
816 ei->i_flags &= ~EXT4_DIRSYNC_FL; 819 ei->i_flags &= ~EXT4_DIRSYNC_FL;
817 ei->i_file_acl = 0; 820 ei->i_file_acl = 0;
818 ei->i_dtime = 0; 821 ei->i_dtime = 0;
819 ei->i_block_alloc_info = NULL;
820 ei->i_block_group = group; 822 ei->i_block_group = group;
821 823
822 ext4_set_inode_flags(inode); 824 ext4_set_inode_flags(inode);
@@ -832,7 +834,7 @@ got:
832 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 834 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
833 835
834 ret = inode; 836 ret = inode;
835 if(DQUOT_ALLOC_INODE(inode)) { 837 if (DQUOT_ALLOC_INODE(inode)) {
836 err = -EDQUOT; 838 err = -EDQUOT;
837 goto fail_drop; 839 goto fail_drop;
838 } 840 }
@@ -841,7 +843,7 @@ got:
841 if (err) 843 if (err)
842 goto fail_free_drop; 844 goto fail_free_drop;
843 845
844 err = ext4_init_security(handle,inode, dir); 846 err = ext4_init_security(handle, inode, dir);
845 if (err) 847 if (err)
846 goto fail_free_drop; 848 goto fail_free_drop;
847 849
@@ -959,7 +961,7 @@ error:
959 return ERR_PTR(err); 961 return ERR_PTR(err);
960} 962}
961 963
962unsigned long ext4_count_free_inodes (struct super_block * sb) 964unsigned long ext4_count_free_inodes(struct super_block *sb)
963{ 965{
964 unsigned long desc_count; 966 unsigned long desc_count;
965 struct ext4_group_desc *gdp; 967 struct ext4_group_desc *gdp;
@@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
974 bitmap_count = 0; 976 bitmap_count = 0;
975 gdp = NULL; 977 gdp = NULL;
976 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 978 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
977 gdp = ext4_get_group_desc (sb, i, NULL); 979 gdp = ext4_get_group_desc(sb, i, NULL);
978 if (!gdp) 980 if (!gdp)
979 continue; 981 continue;
980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 982 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
989 bitmap_count += x; 991 bitmap_count += x;
990 } 992 }
991 brelse(bitmap_bh); 993 brelse(bitmap_bh);
992 printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", 994 printk(KERN_DEBUG "ext4_count_free_inodes: "
993 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); 995 "stored = %u, computed = %lu, %lu\n",
996 le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
994 return desc_count; 997 return desc_count;
995#else 998#else
996 desc_count = 0; 999 desc_count = 0;
997 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1000 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
998 gdp = ext4_get_group_desc (sb, i, NULL); 1001 gdp = ext4_get_group_desc(sb, i, NULL);
999 if (!gdp) 1002 if (!gdp)
1000 continue; 1003 continue;
1001 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 1004 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
1006} 1009}
1007 1010
1008/* Called at mount-time, super-block is locked */ 1011/* Called at mount-time, super-block is locked */
1009unsigned long ext4_count_dirs (struct super_block * sb) 1012unsigned long ext4_count_dirs(struct super_block * sb)
1010{ 1013{
1011 unsigned long count = 0; 1014 unsigned long count = 0;
1012 ext4_group_t i; 1015 ext4_group_t i;
1013 1016
1014 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1017 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
1015 struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); 1018 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1016 if (!gdp) 1019 if (!gdp)
1017 continue; 1020 continue;
1018 count += le16_to_cpu(gdp->bg_used_dirs_count); 1021 count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7e91913e325b..9b4ec9decfd1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
190/* 190/*
191 * Called at the last iput() if i_nlink is zero. 191 * Called at the last iput() if i_nlink is zero.
192 */ 192 */
193void ext4_delete_inode (struct inode * inode) 193void ext4_delete_inode(struct inode *inode)
194{ 194{
195 handle_t *handle; 195 handle_t *handle;
196 int err; 196 int err;
@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode,
330 int final = 0; 330 int final = 0;
331 331
332 if (i_block < 0) { 332 if (i_block < 0) {
333 ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); 333 ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
334 } else if (i_block < direct_blocks) { 334 } else if (i_block < direct_blocks) {
335 offsets[n++] = i_block; 335 offsets[n++] = i_block;
336 final = direct_blocks; 336 final = direct_blocks;
337 } else if ( (i_block -= direct_blocks) < indirect_blocks) { 337 } else if ((i_block -= direct_blocks) < indirect_blocks) {
338 offsets[n++] = EXT4_IND_BLOCK; 338 offsets[n++] = EXT4_IND_BLOCK;
339 offsets[n++] = i_block; 339 offsets[n++] = i_block;
340 final = ptrs; 340 final = ptrs;
@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
400 400
401 *err = 0; 401 *err = 0;
402 /* i_data is not going away, no lock needed */ 402 /* i_data is not going away, no lock needed */
403 add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); 403 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
404 if (!p->key) 404 if (!p->key)
405 goto no_block; 405 goto no_block;
406 while (--depth) { 406 while (--depth) {
407 bh = sb_bread(sb, le32_to_cpu(p->key)); 407 bh = sb_bread(sb, le32_to_cpu(p->key));
408 if (!bh) 408 if (!bh)
409 goto failure; 409 goto failure;
410 add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); 410 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
411 /* Reader: end */ 411 /* Reader: end */
412 if (!p->key) 412 if (!p->key)
413 goto no_block; 413 goto no_block;
@@ -443,7 +443,7 @@ no_block:
443static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) 443static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
444{ 444{
445 struct ext4_inode_info *ei = EXT4_I(inode); 445 struct ext4_inode_info *ei = EXT4_I(inode);
446 __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; 446 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
447 __le32 *p; 447 __le32 *p;
448 ext4_fsblk_t bg_start; 448 ext4_fsblk_t bg_start;
449 ext4_fsblk_t last_block; 449 ext4_fsblk_t last_block;
@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
486static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 486static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
487 Indirect *partial) 487 Indirect *partial)
488{ 488{
489 struct ext4_block_alloc_info *block_i;
490
491 block_i = EXT4_I(inode)->i_block_alloc_info;
492
493 /* 489 /*
494 * try the heuristic for sequential allocation, 490 * XXX need to get goal block from mballoc's data structures
495 * failing that at least try to get decent locality.
496 */ 491 */
497 if (block_i && (block == block_i->last_alloc_logical_block + 1)
498 && (block_i->last_alloc_physical_block != 0)) {
499 return block_i->last_alloc_physical_block + 1;
500 }
501 492
502 return ext4_find_near(inode, partial); 493 return ext4_find_near(inode, partial);
503} 494}
@@ -630,7 +621,7 @@ allocated:
630 *err = 0; 621 *err = 0;
631 return ret; 622 return ret;
632failed_out: 623failed_out:
633 for (i = 0; i <index; i++) 624 for (i = 0; i < index; i++)
634 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 625 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
635 return ret; 626 return ret;
636} 627}
@@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
703 branch[n].p = (__le32 *) bh->b_data + offsets[n]; 694 branch[n].p = (__le32 *) bh->b_data + offsets[n];
704 branch[n].key = cpu_to_le32(new_blocks[n]); 695 branch[n].key = cpu_to_le32(new_blocks[n]);
705 *branch[n].p = branch[n].key; 696 *branch[n].p = branch[n].key;
706 if ( n == indirect_blks) { 697 if (n == indirect_blks) {
707 current_block = new_blocks[n]; 698 current_block = new_blocks[n];
708 /* 699 /*
709 * End of chain, update the last new metablock of 700 * End of chain, update the last new metablock of
@@ -730,7 +721,7 @@ failed:
730 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 721 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
731 ext4_journal_forget(handle, branch[i].bh); 722 ext4_journal_forget(handle, branch[i].bh);
732 } 723 }
733 for (i = 0; i <indirect_blks; i++) 724 for (i = 0; i < indirect_blks; i++)
734 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 725 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
735 726
736 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 727 ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
@@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
757{ 748{
758 int i; 749 int i;
759 int err = 0; 750 int err = 0;
760 struct ext4_block_alloc_info *block_i;
761 ext4_fsblk_t current_block; 751 ext4_fsblk_t current_block;
762 752
763 block_i = EXT4_I(inode)->i_block_alloc_info;
764 /* 753 /*
765 * If we're splicing into a [td]indirect block (as opposed to the 754 * If we're splicing into a [td]indirect block (as opposed to the
766 * inode) then we need to get write access to the [td]indirect block 755 * inode) then we need to get write access to the [td]indirect block
@@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
783 if (num == 0 && blks > 1) { 772 if (num == 0 && blks > 1) {
784 current_block = le32_to_cpu(where->key) + 1; 773 current_block = le32_to_cpu(where->key) + 1;
785 for (i = 1; i < blks; i++) 774 for (i = 1; i < blks; i++)
786 *(where->p + i ) = cpu_to_le32(current_block++); 775 *(where->p + i) = cpu_to_le32(current_block++);
787 }
788
789 /*
790 * update the most recently allocated logical & physical block
791 * in i_block_alloc_info, to assist find the proper goal block for next
792 * allocation
793 */
794 if (block_i) {
795 block_i->last_alloc_logical_block = block + blks - 1;
796 block_i->last_alloc_physical_block =
797 le32_to_cpu(where[num].key) + blks - 1;
798 } 776 }
799 777
800 /* We are done with atomic stuff, now do the rest of housekeeping */ 778 /* We are done with atomic stuff, now do the rest of housekeeping */
@@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
914 goto cleanup; 892 goto cleanup;
915 893
916 /* 894 /*
917 * Okay, we need to do block allocation. Lazily initialize the block 895 * Okay, we need to do block allocation.
918 * allocation info here if necessary
919 */ 896 */
920 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
921 ext4_init_block_alloc_info(inode);
922
923 goal = ext4_find_goal(inode, iblock, partial); 897 goal = ext4_find_goal(inode, iblock, partial);
924 898
925 /* the number of blocks need to allocate for [d,t]indirect blocks */ 899 /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1030 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1004 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1031 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1005 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1032 1006
1033 /* Account for allocated meta_blocks */ 1007 if (mdb_free) {
1034 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; 1008 /* Account for allocated meta_blocks */
1009 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1035 1010
1036 /* update fs free blocks counter for truncate case */ 1011 /* update fs dirty blocks counter */
1037 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); 1012 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1013 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1014 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1015 }
1038 1016
1039 /* update per-inode reservations */ 1017 /* update per-inode reservations */
1040 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); 1018 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1041 EXT4_I(inode)->i_reserved_data_blocks -= used; 1019 EXT4_I(inode)->i_reserved_data_blocks -= used;
1042 1020
1043 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1044 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1045 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1046 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1021 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1047} 1022}
1048 1023
@@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1160/* Maximum number of blocks we map for direct IO at once. */ 1135/* Maximum number of blocks we map for direct IO at once. */
1161#define DIO_MAX_BLOCKS 4096 1136#define DIO_MAX_BLOCKS 4096
1162 1137
1163static int ext4_get_block(struct inode *inode, sector_t iblock, 1138int ext4_get_block(struct inode *inode, sector_t iblock,
1164 struct buffer_head *bh_result, int create) 1139 struct buffer_head *bh_result, int create)
1165{ 1140{
1166 handle_t *handle = ext4_journal_current_handle(); 1141 handle_t *handle = ext4_journal_current_handle();
1167 int ret = 0, started = 0; 1142 int ret = 0, started = 0;
@@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1241 BUFFER_TRACE(bh, "call get_create_access"); 1216 BUFFER_TRACE(bh, "call get_create_access");
1242 fatal = ext4_journal_get_create_access(handle, bh); 1217 fatal = ext4_journal_get_create_access(handle, bh);
1243 if (!fatal && !buffer_uptodate(bh)) { 1218 if (!fatal && !buffer_uptodate(bh)) {
1244 memset(bh->b_data,0,inode->i_sb->s_blocksize); 1219 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
1245 set_buffer_uptodate(bh); 1220 set_buffer_uptodate(bh);
1246 } 1221 }
1247 unlock_buffer(bh); 1222 unlock_buffer(bh);
@@ -1266,7 +1241,7 @@ err:
1266struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 1241struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
1267 ext4_lblk_t block, int create, int *err) 1242 ext4_lblk_t block, int create, int *err)
1268{ 1243{
1269 struct buffer_head * bh; 1244 struct buffer_head *bh;
1270 1245
1271 bh = ext4_getblk(handle, inode, block, create, err); 1246 bh = ext4_getblk(handle, inode, block, create, err);
1272 if (!bh) 1247 if (!bh)
@@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
1282 return NULL; 1257 return NULL;
1283} 1258}
1284 1259
1285static int walk_page_buffers( handle_t *handle, 1260static int walk_page_buffers(handle_t *handle,
1286 struct buffer_head *head, 1261 struct buffer_head *head,
1287 unsigned from, 1262 unsigned from,
1288 unsigned to, 1263 unsigned to,
1289 int *partial, 1264 int *partial,
1290 int (*fn)( handle_t *handle, 1265 int (*fn)(handle_t *handle,
1291 struct buffer_head *bh)) 1266 struct buffer_head *bh))
1292{ 1267{
1293 struct buffer_head *bh; 1268 struct buffer_head *bh;
1294 unsigned block_start, block_end; 1269 unsigned block_start, block_end;
@@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle,
1296 int err, ret = 0; 1271 int err, ret = 0;
1297 struct buffer_head *next; 1272 struct buffer_head *next;
1298 1273
1299 for ( bh = head, block_start = 0; 1274 for (bh = head, block_start = 0;
1300 ret == 0 && (bh != head || !block_start); 1275 ret == 0 && (bh != head || !block_start);
1301 block_start = block_end, bh = next) 1276 block_start = block_end, bh = next)
1302 { 1277 {
1303 next = bh->b_this_page; 1278 next = bh->b_this_page;
1304 block_end = block_start + blocksize; 1279 block_end = block_start + blocksize;
@@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1351 loff_t pos, unsigned len, unsigned flags, 1326 loff_t pos, unsigned len, unsigned flags,
1352 struct page **pagep, void **fsdata) 1327 struct page **pagep, void **fsdata)
1353{ 1328{
1354 struct inode *inode = mapping->host; 1329 struct inode *inode = mapping->host;
1355 int ret, needed_blocks = ext4_writepage_trans_blocks(inode); 1330 int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
1356 handle_t *handle; 1331 handle_t *handle;
1357 int retries = 0; 1332 int retries = 0;
1358 struct page *page; 1333 struct page *page;
1359 pgoff_t index; 1334 pgoff_t index;
1360 unsigned from, to; 1335 unsigned from, to;
1361 1336
1362 index = pos >> PAGE_CACHE_SHIFT; 1337 index = pos >> PAGE_CACHE_SHIFT;
1363 from = pos & (PAGE_CACHE_SIZE - 1); 1338 from = pos & (PAGE_CACHE_SIZE - 1);
1364 to = from + len; 1339 to = from + len;
1365 1340
1366retry: 1341retry:
1367 handle = ext4_journal_start(inode, needed_blocks); 1342 handle = ext4_journal_start(inode, needed_blocks);
1368 if (IS_ERR(handle)) { 1343 if (IS_ERR(handle)) {
1369 ret = PTR_ERR(handle); 1344 ret = PTR_ERR(handle);
1370 goto out; 1345 goto out;
1371 } 1346 }
1372 1347
1373 page = __grab_cache_page(mapping, index); 1348 page = __grab_cache_page(mapping, index);
@@ -1387,9 +1362,16 @@ retry:
1387 } 1362 }
1388 1363
1389 if (ret) { 1364 if (ret) {
1390 unlock_page(page); 1365 unlock_page(page);
1391 ext4_journal_stop(handle); 1366 ext4_journal_stop(handle);
1392 page_cache_release(page); 1367 page_cache_release(page);
1368 /*
1369 * block_write_begin may have instantiated a few blocks
1370 * outside i_size. Trim these off again. Don't need
1371 * i_size_read because we hold i_mutex.
1372 */
1373 if (pos + len > inode->i_size)
1374 vmtruncate(inode, inode->i_size);
1393 } 1375 }
1394 1376
1395 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 1377 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file,
1426 ret = ext4_jbd2_file_inode(handle, inode); 1408 ret = ext4_jbd2_file_inode(handle, inode);
1427 1409
1428 if (ret == 0) { 1410 if (ret == 0) {
1429 /*
1430 * generic_write_end() will run mark_inode_dirty() if i_size
1431 * changes. So let's piggyback the i_disksize mark_inode_dirty
1432 * into that.
1433 */
1434 loff_t new_i_size; 1411 loff_t new_i_size;
1435 1412
1436 new_i_size = pos + copied; 1413 new_i_size = pos + copied;
1437 if (new_i_size > EXT4_I(inode)->i_disksize) 1414 if (new_i_size > EXT4_I(inode)->i_disksize) {
1438 EXT4_I(inode)->i_disksize = new_i_size; 1415 ext4_update_i_disksize(inode, new_i_size);
1416 /* We need to mark inode dirty even if
1417 * new_i_size is less that inode->i_size
1418 * bu greater than i_disksize.(hint delalloc)
1419 */
1420 ext4_mark_inode_dirty(handle, inode);
1421 }
1422
1439 ret2 = generic_write_end(file, mapping, pos, len, copied, 1423 ret2 = generic_write_end(file, mapping, pos, len, copied,
1440 page, fsdata); 1424 page, fsdata);
1441 copied = ret2; 1425 copied = ret2;
@@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file,
1460 loff_t new_i_size; 1444 loff_t new_i_size;
1461 1445
1462 new_i_size = pos + copied; 1446 new_i_size = pos + copied;
1463 if (new_i_size > EXT4_I(inode)->i_disksize) 1447 if (new_i_size > EXT4_I(inode)->i_disksize) {
1464 EXT4_I(inode)->i_disksize = new_i_size; 1448 ext4_update_i_disksize(inode, new_i_size);
1449 /* We need to mark inode dirty even if
1450 * new_i_size is less that inode->i_size
1451 * bu greater than i_disksize.(hint delalloc)
1452 */
1453 ext4_mark_inode_dirty(handle, inode);
1454 }
1465 1455
1466 ret2 = generic_write_end(file, mapping, pos, len, copied, 1456 ret2 = generic_write_end(file, mapping, pos, len, copied,
1467 page, fsdata); 1457 page, fsdata);
@@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file,
1486 int ret = 0, ret2; 1476 int ret = 0, ret2;
1487 int partial = 0; 1477 int partial = 0;
1488 unsigned from, to; 1478 unsigned from, to;
1479 loff_t new_i_size;
1489 1480
1490 from = pos & (PAGE_CACHE_SIZE - 1); 1481 from = pos & (PAGE_CACHE_SIZE - 1);
1491 to = from + len; 1482 to = from + len;
@@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file,
1500 to, &partial, write_end_fn); 1491 to, &partial, write_end_fn);
1501 if (!partial) 1492 if (!partial)
1502 SetPageUptodate(page); 1493 SetPageUptodate(page);
1503 if (pos+copied > inode->i_size) 1494 new_i_size = pos + copied;
1495 if (new_i_size > inode->i_size)
1504 i_size_write(inode, pos+copied); 1496 i_size_write(inode, pos+copied);
1505 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1497 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
1506 if (inode->i_size > EXT4_I(inode)->i_disksize) { 1498 if (new_i_size > EXT4_I(inode)->i_disksize) {
1507 EXT4_I(inode)->i_disksize = inode->i_size; 1499 ext4_update_i_disksize(inode, new_i_size);
1508 ret2 = ext4_mark_inode_dirty(handle, inode); 1500 ret2 = ext4_mark_inode_dirty(handle, inode);
1509 if (!ret) 1501 if (!ret)
1510 ret = ret2; 1502 ret = ret2;
@@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file,
1521 1513
1522static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1514static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1523{ 1515{
1516 int retries = 0;
1524 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1517 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1525 unsigned long md_needed, mdblocks, total = 0; 1518 unsigned long md_needed, mdblocks, total = 0;
1526 1519
@@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1529 * in order to allocate nrblocks 1522 * in order to allocate nrblocks
1530 * worse case is one extent per block 1523 * worse case is one extent per block
1531 */ 1524 */
1525repeat:
1532 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1526 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1533 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1527 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
1534 mdblocks = ext4_calc_metadata_amount(inode, total); 1528 mdblocks = ext4_calc_metadata_amount(inode, total);
@@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1537 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; 1531 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1538 total = md_needed + nrblocks; 1532 total = md_needed + nrblocks;
1539 1533
1540 if (ext4_has_free_blocks(sbi, total) < total) { 1534 if (ext4_claim_free_blocks(sbi, total)) {
1541 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1535 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1536 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1537 yield();
1538 goto repeat;
1539 }
1542 return -ENOSPC; 1540 return -ENOSPC;
1543 } 1541 }
1544 /* reduce fs free blocks counter */
1545 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1546
1547 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1542 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1548 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; 1543 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1549 1544
@@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1585 1580
1586 release = to_free + mdb_free; 1581 release = to_free + mdb_free;
1587 1582
1588 /* update fs free blocks counter for truncate case */ 1583 /* update fs dirty blocks counter for truncate case */
1589 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1584 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1590 1585
1591 /* update per-inode reservations */ 1586 /* update per-inode reservations */
1592 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1587 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -1630,6 +1625,7 @@ struct mpage_da_data {
1630 struct writeback_control *wbc; 1625 struct writeback_control *wbc;
1631 int io_done; 1626 int io_done;
1632 long pages_written; 1627 long pages_written;
1628 int retval;
1633}; 1629};
1634 1630
1635/* 1631/*
@@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1783 unmap_underlying_metadata(bdev, bh->b_blocknr + i); 1779 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
1784} 1780}
1785 1781
1782static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
1783 sector_t logical, long blk_cnt)
1784{
1785 int nr_pages, i;
1786 pgoff_t index, end;
1787 struct pagevec pvec;
1788 struct inode *inode = mpd->inode;
1789 struct address_space *mapping = inode->i_mapping;
1790
1791 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
1792 end = (logical + blk_cnt - 1) >>
1793 (PAGE_CACHE_SHIFT - inode->i_blkbits);
1794 while (index <= end) {
1795 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1796 if (nr_pages == 0)
1797 break;
1798 for (i = 0; i < nr_pages; i++) {
1799 struct page *page = pvec.pages[i];
1800 index = page->index;
1801 if (index > end)
1802 break;
1803 index++;
1804
1805 BUG_ON(!PageLocked(page));
1806 BUG_ON(PageWriteback(page));
1807 block_invalidatepage(page, 0);
1808 ClearPageUptodate(page);
1809 unlock_page(page);
1810 }
1811 }
1812 return;
1813}
1814
1815static void ext4_print_free_blocks(struct inode *inode)
1816{
1817 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1818 printk(KERN_EMERG "Total free blocks count %lld\n",
1819 ext4_count_free_blocks(inode->i_sb));
1820 printk(KERN_EMERG "Free/Dirty block details\n");
1821 printk(KERN_EMERG "free_blocks=%lld\n",
1822 percpu_counter_sum(&sbi->s_freeblocks_counter));
1823 printk(KERN_EMERG "dirty_blocks=%lld\n",
1824 percpu_counter_sum(&sbi->s_dirtyblocks_counter));
1825 printk(KERN_EMERG "Block reservation details\n");
1826 printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
1827 EXT4_I(inode)->i_reserved_data_blocks);
1828 printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
1829 EXT4_I(inode)->i_reserved_meta_blocks);
1830 return;
1831}
1832
1786/* 1833/*
1787 * mpage_da_map_blocks - go through given space 1834 * mpage_da_map_blocks - go through given space
1788 * 1835 *
@@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1792 * The function skips space we know is already mapped to disk blocks. 1839 * The function skips space we know is already mapped to disk blocks.
1793 * 1840 *
1794 */ 1841 */
1795static void mpage_da_map_blocks(struct mpage_da_data *mpd) 1842static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1796{ 1843{
1797 int err = 0; 1844 int err = 0;
1798 struct buffer_head *lbh = &mpd->lbh;
1799 sector_t next = lbh->b_blocknr;
1800 struct buffer_head new; 1845 struct buffer_head new;
1846 struct buffer_head *lbh = &mpd->lbh;
1847 sector_t next;
1801 1848
1802 /* 1849 /*
1803 * We consider only non-mapped and non-allocated blocks 1850 * We consider only non-mapped and non-allocated blocks
1804 */ 1851 */
1805 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 1852 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1806 return; 1853 return 0;
1807
1808 new.b_state = lbh->b_state; 1854 new.b_state = lbh->b_state;
1809 new.b_blocknr = 0; 1855 new.b_blocknr = 0;
1810 new.b_size = lbh->b_size; 1856 new.b_size = lbh->b_size;
1811 1857 next = lbh->b_blocknr;
1812 /* 1858 /*
1813 * If we didn't accumulate anything 1859 * If we didn't accumulate anything
1814 * to write simply return 1860 * to write simply return
1815 */ 1861 */
1816 if (!new.b_size) 1862 if (!new.b_size)
1817 return; 1863 return 0;
1818 err = mpd->get_block(mpd->inode, next, &new, 1); 1864 err = mpd->get_block(mpd->inode, next, &new, 1);
1819 if (err) 1865 if (err) {
1820 return; 1866
1867 /* If get block returns with error
1868 * we simply return. Later writepage
1869 * will redirty the page and writepages
1870 * will find the dirty page again
1871 */
1872 if (err == -EAGAIN)
1873 return 0;
1874
1875 if (err == -ENOSPC &&
1876 ext4_count_free_blocks(mpd->inode->i_sb)) {
1877 mpd->retval = err;
1878 return 0;
1879 }
1880
1881 /*
1882 * get block failure will cause us
1883 * to loop in writepages. Because
1884 * a_ops->writepage won't be able to
1885 * make progress. The page will be redirtied
1886 * by writepage and writepages will again
1887 * try to write the same.
1888 */
1889 printk(KERN_EMERG "%s block allocation failed for inode %lu "
1890 "at logical offset %llu with max blocks "
1891 "%zd with error %d\n",
1892 __func__, mpd->inode->i_ino,
1893 (unsigned long long)next,
1894 lbh->b_size >> mpd->inode->i_blkbits, err);
1895 printk(KERN_EMERG "This should not happen.!! "
1896 "Data will be lost\n");
1897 if (err == -ENOSPC) {
1898 ext4_print_free_blocks(mpd->inode);
1899 }
1900 /* invlaidate all the pages */
1901 ext4_da_block_invalidatepages(mpd, next,
1902 lbh->b_size >> mpd->inode->i_blkbits);
1903 return err;
1904 }
1821 BUG_ON(new.b_size == 0); 1905 BUG_ON(new.b_size == 0);
1822 1906
1823 if (buffer_new(&new)) 1907 if (buffer_new(&new))
@@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1830 if (buffer_delay(lbh) || buffer_unwritten(lbh)) 1914 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1831 mpage_put_bnr_to_bhs(mpd, next, &new); 1915 mpage_put_bnr_to_bhs(mpd, next, &new);
1832 1916
1833 return; 1917 return 0;
1834} 1918}
1835 1919
1836#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 1920#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1899,8 +1983,8 @@ flush_it:
1899 * We couldn't merge the block to our extent, so we 1983 * We couldn't merge the block to our extent, so we
1900 * need to flush current extent and start new one 1984 * need to flush current extent and start new one
1901 */ 1985 */
1902 mpage_da_map_blocks(mpd); 1986 if (mpage_da_map_blocks(mpd) == 0)
1903 mpage_da_submit_io(mpd); 1987 mpage_da_submit_io(mpd);
1904 mpd->io_done = 1; 1988 mpd->io_done = 1;
1905 return; 1989 return;
1906} 1990}
@@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page,
1942 * and start IO on them using writepage() 2026 * and start IO on them using writepage()
1943 */ 2027 */
1944 if (mpd->next_page != mpd->first_page) { 2028 if (mpd->next_page != mpd->first_page) {
1945 mpage_da_map_blocks(mpd); 2029 if (mpage_da_map_blocks(mpd) == 0)
1946 mpage_da_submit_io(mpd); 2030 mpage_da_submit_io(mpd);
1947 /* 2031 /*
1948 * skip rest of the page in the page_vec 2032 * skip rest of the page in the page_vec
1949 */ 2033 */
@@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page,
2018 */ 2102 */
2019static int mpage_da_writepages(struct address_space *mapping, 2103static int mpage_da_writepages(struct address_space *mapping,
2020 struct writeback_control *wbc, 2104 struct writeback_control *wbc,
2021 get_block_t get_block) 2105 struct mpage_da_data *mpd)
2022{ 2106{
2023 struct mpage_da_data mpd;
2024 long to_write; 2107 long to_write;
2025 int ret; 2108 int ret;
2026 2109
2027 if (!get_block) 2110 if (!mpd->get_block)
2028 return generic_writepages(mapping, wbc); 2111 return generic_writepages(mapping, wbc);
2029 2112
2030 mpd.wbc = wbc; 2113 mpd->lbh.b_size = 0;
2031 mpd.inode = mapping->host; 2114 mpd->lbh.b_state = 0;
2032 mpd.lbh.b_size = 0; 2115 mpd->lbh.b_blocknr = 0;
2033 mpd.lbh.b_state = 0; 2116 mpd->first_page = 0;
2034 mpd.lbh.b_blocknr = 0; 2117 mpd->next_page = 0;
2035 mpd.first_page = 0; 2118 mpd->io_done = 0;
2036 mpd.next_page = 0; 2119 mpd->pages_written = 0;
2037 mpd.get_block = get_block; 2120 mpd->retval = 0;
2038 mpd.io_done = 0;
2039 mpd.pages_written = 0;
2040 2121
2041 to_write = wbc->nr_to_write; 2122 to_write = wbc->nr_to_write;
2042 2123
2043 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); 2124 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2044 2125
2045 /* 2126 /*
2046 * Handle last extent of pages 2127 * Handle last extent of pages
2047 */ 2128 */
2048 if (!mpd.io_done && mpd.next_page != mpd.first_page) { 2129 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2049 mpage_da_map_blocks(&mpd); 2130 if (mpage_da_map_blocks(mpd) == 0)
2050 mpage_da_submit_io(&mpd); 2131 mpage_da_submit_io(mpd);
2051 } 2132 }
2052 2133
2053 wbc->nr_to_write = to_write - mpd.pages_written; 2134 wbc->nr_to_write = to_write - mpd->pages_written;
2054 return ret; 2135 return ret;
2055} 2136}
2056 2137
@@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2103 handle_t *handle = NULL; 2184 handle_t *handle = NULL;
2104 2185
2105 handle = ext4_journal_current_handle(); 2186 handle = ext4_journal_current_handle();
2106 if (!handle) { 2187 BUG_ON(!handle);
2107 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, 2188 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2108 bh_result, 0, 0, 0); 2189 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2109 BUG_ON(!ret);
2110 } else {
2111 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2112 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2113 }
2114
2115 if (ret > 0) { 2190 if (ret > 0) {
2191
2116 bh_result->b_size = (ret << inode->i_blkbits); 2192 bh_result->b_size = (ret << inode->i_blkbits);
2117 2193
2194 if (ext4_should_order_data(inode)) {
2195 int retval;
2196 retval = ext4_jbd2_file_inode(handle, inode);
2197 if (retval)
2198 /*
2199 * Failed to add inode for ordered
2200 * mode. Don't update file size
2201 */
2202 return retval;
2203 }
2204
2118 /* 2205 /*
2119 * Update on-disk size along with block allocation 2206 * Update on-disk size along with block allocation
2120 * we don't use 'extend_disksize' as size may change 2207 * we don't use 'extend_disksize' as size may change
@@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2124 if (disksize > i_size_read(inode)) 2211 if (disksize > i_size_read(inode))
2125 disksize = i_size_read(inode); 2212 disksize = i_size_read(inode);
2126 if (disksize > EXT4_I(inode)->i_disksize) { 2213 if (disksize > EXT4_I(inode)->i_disksize) {
2127 /* 2214 ext4_update_i_disksize(inode, disksize);
2128 * XXX: replace with spinlock if seen contended -bzzz 2215 ret = ext4_mark_inode_dirty(handle, inode);
2129 */ 2216 return ret;
2130 down_write(&EXT4_I(inode)->i_data_sem);
2131 if (disksize > EXT4_I(inode)->i_disksize)
2132 EXT4_I(inode)->i_disksize = disksize;
2133 up_write(&EXT4_I(inode)->i_data_sem);
2134
2135 if (EXT4_I(inode)->i_disksize == disksize) {
2136 ret = ext4_mark_inode_dirty(handle, inode);
2137 return ret;
2138 }
2139 } 2217 }
2140 ret = 0; 2218 ret = 0;
2141 } 2219 }
@@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2284{ 2362{
2285 handle_t *handle = NULL; 2363 handle_t *handle = NULL;
2286 loff_t range_start = 0; 2364 loff_t range_start = 0;
2365 struct mpage_da_data mpd;
2287 struct inode *inode = mapping->host; 2366 struct inode *inode = mapping->host;
2288 int needed_blocks, ret = 0, nr_to_writebump = 0; 2367 int needed_blocks, ret = 0, nr_to_writebump = 0;
2289 long to_write, pages_skipped = 0; 2368 long to_write, pages_skipped = 0;
@@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping,
2317 range_start = wbc->range_start; 2396 range_start = wbc->range_start;
2318 pages_skipped = wbc->pages_skipped; 2397 pages_skipped = wbc->pages_skipped;
2319 2398
2399 mpd.wbc = wbc;
2400 mpd.inode = mapping->host;
2401
2320restart_loop: 2402restart_loop:
2321 to_write = wbc->nr_to_write; 2403 to_write = wbc->nr_to_write;
2322 while (!ret && to_write > 0) { 2404 while (!ret && to_write > 0) {
@@ -2340,23 +2422,17 @@ restart_loop:
2340 dump_stack(); 2422 dump_stack();
2341 goto out_writepages; 2423 goto out_writepages;
2342 } 2424 }
2343 if (ext4_should_order_data(inode)) {
2344 /*
2345 * With ordered mode we need to add
2346 * the inode to the journal handl
2347 * when we do block allocation.
2348 */
2349 ret = ext4_jbd2_file_inode(handle, inode);
2350 if (ret) {
2351 ext4_journal_stop(handle);
2352 goto out_writepages;
2353 }
2354 }
2355
2356 to_write -= wbc->nr_to_write; 2425 to_write -= wbc->nr_to_write;
2357 ret = mpage_da_writepages(mapping, wbc, 2426
2358 ext4_da_get_block_write); 2427 mpd.get_block = ext4_da_get_block_write;
2428 ret = mpage_da_writepages(mapping, wbc, &mpd);
2429
2359 ext4_journal_stop(handle); 2430 ext4_journal_stop(handle);
2431
2432 if (mpd.retval == -ENOSPC)
2433 jbd2_journal_force_commit_nested(sbi->s_journal);
2434
2435 /* reset the retry count */
2360 if (ret == MPAGE_DA_EXTENT_TAIL) { 2436 if (ret == MPAGE_DA_EXTENT_TAIL) {
2361 /* 2437 /*
2362 * got one extent now try with 2438 * got one extent now try with
@@ -2391,6 +2467,33 @@ out_writepages:
2391 return ret; 2467 return ret;
2392} 2468}
2393 2469
2470#define FALL_BACK_TO_NONDELALLOC 1
2471static int ext4_nonda_switch(struct super_block *sb)
2472{
2473 s64 free_blocks, dirty_blocks;
2474 struct ext4_sb_info *sbi = EXT4_SB(sb);
2475
2476 /*
2477 * switch to non delalloc mode if we are running low
2478 * on free block. The free block accounting via percpu
2479 * counters can get slightly wrong with FBC_BATCH getting
2480 * accumulated on each CPU without updating global counters
2481 * Delalloc need an accurate free block accounting. So switch
2482 * to non delalloc when we are near to error range.
2483 */
2484 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
2485 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
2486 if (2 * free_blocks < 3 * dirty_blocks ||
2487 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2488 /*
2489 * free block count is less that 150% of dirty blocks
2490 * or free blocks is less that watermark
2491 */
2492 return 1;
2493 }
2494 return 0;
2495}
2496
2394static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2497static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2395 loff_t pos, unsigned len, unsigned flags, 2498 loff_t pos, unsigned len, unsigned flags,
2396 struct page **pagep, void **fsdata) 2499 struct page **pagep, void **fsdata)
@@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2406 from = pos & (PAGE_CACHE_SIZE - 1); 2509 from = pos & (PAGE_CACHE_SIZE - 1);
2407 to = from + len; 2510 to = from + len;
2408 2511
2512 if (ext4_nonda_switch(inode->i_sb)) {
2513 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2514 return ext4_write_begin(file, mapping, pos,
2515 len, flags, pagep, fsdata);
2516 }
2517 *fsdata = (void *)0;
2409retry: 2518retry:
2410 /* 2519 /*
2411 * With delayed allocation, we don't log the i_disksize update 2520 * With delayed allocation, we don't log the i_disksize update
@@ -2433,6 +2542,13 @@ retry:
2433 unlock_page(page); 2542 unlock_page(page);
2434 ext4_journal_stop(handle); 2543 ext4_journal_stop(handle);
2435 page_cache_release(page); 2544 page_cache_release(page);
2545 /*
2546 * block_write_begin may have instantiated a few blocks
2547 * outside i_size. Trim these off again. Don't need
2548 * i_size_read because we hold i_mutex.
2549 */
2550 if (pos + len > inode->i_size)
2551 vmtruncate(inode, inode->i_size);
2436 } 2552 }
2437 2553
2438 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2554 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2456 bh = page_buffers(page); 2572 bh = page_buffers(page);
2457 idx = offset >> inode->i_blkbits; 2573 idx = offset >> inode->i_blkbits;
2458 2574
2459 for (i=0; i < idx; i++) 2575 for (i = 0; i < idx; i++)
2460 bh = bh->b_this_page; 2576 bh = bh->b_this_page;
2461 2577
2462 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2578 if (!buffer_mapped(bh) || (buffer_delay(bh)))
@@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file,
2474 handle_t *handle = ext4_journal_current_handle(); 2590 handle_t *handle = ext4_journal_current_handle();
2475 loff_t new_i_size; 2591 loff_t new_i_size;
2476 unsigned long start, end; 2592 unsigned long start, end;
2593 int write_mode = (int)(unsigned long)fsdata;
2594
2595 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2596 if (ext4_should_order_data(inode)) {
2597 return ext4_ordered_write_end(file, mapping, pos,
2598 len, copied, page, fsdata);
2599 } else if (ext4_should_writeback_data(inode)) {
2600 return ext4_writeback_write_end(file, mapping, pos,
2601 len, copied, page, fsdata);
2602 } else {
2603 BUG();
2604 }
2605 }
2477 2606
2478 start = pos & (PAGE_CACHE_SIZE - 1); 2607 start = pos & (PAGE_CACHE_SIZE - 1);
2479 end = start + copied -1; 2608 end = start + copied - 1;
2480 2609
2481 /* 2610 /*
2482 * generic_write_end() will run mark_inode_dirty() if i_size 2611 * generic_write_end() will run mark_inode_dirty() if i_size
@@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file,
2500 EXT4_I(inode)->i_disksize = new_i_size; 2629 EXT4_I(inode)->i_disksize = new_i_size;
2501 } 2630 }
2502 up_write(&EXT4_I(inode)->i_data_sem); 2631 up_write(&EXT4_I(inode)->i_data_sem);
2632 /* We need to mark inode dirty even if
2633 * new_i_size is less that inode->i_size
2634 * bu greater than i_disksize.(hint delalloc)
2635 */
2636 ext4_mark_inode_dirty(handle, inode);
2503 } 2637 }
2504 } 2638 }
2505 ret2 = generic_write_end(file, mapping, pos, len, copied, 2639 ret2 = generic_write_end(file, mapping, pos, len, copied,
@@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2591 return 0; 2725 return 0;
2592 } 2726 }
2593 2727
2594 return generic_block_bmap(mapping,block,ext4_get_block); 2728 return generic_block_bmap(mapping, block, ext4_get_block);
2595} 2729}
2596 2730
2597static int bget_one(handle_t *handle, struct buffer_head *bh) 2731static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
3197 if (!partial->key && *partial->p) 3331 if (!partial->key && *partial->p)
3198 /* Writer: end */ 3332 /* Writer: end */
3199 goto no_top; 3333 goto no_top;
3200 for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) 3334 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
3201 ; 3335 ;
3202 /* 3336 /*
3203 * OK, we've found the last block that must survive. The rest of our 3337 * OK, we've found the last block that must survive. The rest of our
@@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
3216 } 3350 }
3217 /* Writer: end */ 3351 /* Writer: end */
3218 3352
3219 while(partial > p) { 3353 while (partial > p) {
3220 brelse(partial->bh); 3354 brelse(partial->bh);
3221 partial--; 3355 partial--;
3222 } 3356 }
@@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3408 /* This zaps the entire block. Bottom up. */ 3542 /* This zaps the entire block. Bottom up. */
3409 BUFFER_TRACE(bh, "free child branches"); 3543 BUFFER_TRACE(bh, "free child branches");
3410 ext4_free_branches(handle, inode, bh, 3544 ext4_free_branches(handle, inode, bh,
3411 (__le32*)bh->b_data, 3545 (__le32 *) bh->b_data,
3412 (__le32*)bh->b_data + addr_per_block, 3546 (__le32 *) bh->b_data + addr_per_block,
3413 depth); 3547 depth);
3414 3548
3415 /* 3549 /*
3416 * We've probably journalled the indirect block several 3550 * We've probably journalled the indirect block several
@@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode)
3578 */ 3712 */
3579 down_write(&ei->i_data_sem); 3713 down_write(&ei->i_data_sem);
3580 3714
3581 ext4_discard_reservation(inode); 3715 ext4_discard_preallocations(inode);
3582 3716
3583 /* 3717 /*
3584 * The orphan list entry will now protect us from any crash which 3718 * The orphan list entry will now protect us from any crash which
@@ -3673,41 +3807,6 @@ out_stop:
3673 ext4_journal_stop(handle); 3807 ext4_journal_stop(handle);
3674} 3808}
3675 3809
3676static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3677 unsigned long ino, struct ext4_iloc *iloc)
3678{
3679 ext4_group_t block_group;
3680 unsigned long offset;
3681 ext4_fsblk_t block;
3682 struct ext4_group_desc *gdp;
3683
3684 if (!ext4_valid_inum(sb, ino)) {
3685 /*
3686 * This error is already checked for in namei.c unless we are
3687 * looking at an NFS filehandle, in which case no error
3688 * report is needed
3689 */
3690 return 0;
3691 }
3692
3693 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
3694 gdp = ext4_get_group_desc(sb, block_group, NULL);
3695 if (!gdp)
3696 return 0;
3697
3698 /*
3699 * Figure out the offset within the block group inode table
3700 */
3701 offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
3702 EXT4_INODE_SIZE(sb);
3703 block = ext4_inode_table(sb, gdp) +
3704 (offset >> EXT4_BLOCK_SIZE_BITS(sb));
3705
3706 iloc->block_group = block_group;
3707 iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
3708 return block;
3709}
3710
3711/* 3810/*
3712 * ext4_get_inode_loc returns with an extra refcount against the inode's 3811 * ext4_get_inode_loc returns with an extra refcount against the inode's
3713 * underlying buffer_head on success. If 'in_mem' is true, we have all 3812 * underlying buffer_head on success. If 'in_mem' is true, we have all
@@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
3717static int __ext4_get_inode_loc(struct inode *inode, 3816static int __ext4_get_inode_loc(struct inode *inode,
3718 struct ext4_iloc *iloc, int in_mem) 3817 struct ext4_iloc *iloc, int in_mem)
3719{ 3818{
3720 ext4_fsblk_t block; 3819 struct ext4_group_desc *gdp;
3721 struct buffer_head *bh; 3820 struct buffer_head *bh;
3821 struct super_block *sb = inode->i_sb;
3822 ext4_fsblk_t block;
3823 int inodes_per_block, inode_offset;
3824
3825 iloc->bh = 0;
3826 if (!ext4_valid_inum(sb, inode->i_ino))
3827 return -EIO;
3722 3828
3723 block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); 3829 iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3724 if (!block) 3830 gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3831 if (!gdp)
3725 return -EIO; 3832 return -EIO;
3726 3833
3727 bh = sb_getblk(inode->i_sb, block); 3834 /*
3835 * Figure out the offset within the block group inode table
3836 */
3837 inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
3838 inode_offset = ((inode->i_ino - 1) %
3839 EXT4_INODES_PER_GROUP(sb));
3840 block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3841 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3842
3843 bh = sb_getblk(sb, block);
3728 if (!bh) { 3844 if (!bh) {
3729 ext4_error (inode->i_sb, "ext4_get_inode_loc", 3845 ext4_error(sb, "ext4_get_inode_loc", "unable to read "
3730 "unable to read inode block - " 3846 "inode block - inode=%lu, block=%llu",
3731 "inode=%lu, block=%llu", 3847 inode->i_ino, block);
3732 inode->i_ino, block);
3733 return -EIO; 3848 return -EIO;
3734 } 3849 }
3735 if (!buffer_uptodate(bh)) { 3850 if (!buffer_uptodate(bh)) {
@@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode,
3757 */ 3872 */
3758 if (in_mem) { 3873 if (in_mem) {
3759 struct buffer_head *bitmap_bh; 3874 struct buffer_head *bitmap_bh;
3760 struct ext4_group_desc *desc; 3875 int i, start;
3761 int inodes_per_buffer;
3762 int inode_offset, i;
3763 ext4_group_t block_group;
3764 int start;
3765
3766 block_group = (inode->i_ino - 1) /
3767 EXT4_INODES_PER_GROUP(inode->i_sb);
3768 inodes_per_buffer = bh->b_size /
3769 EXT4_INODE_SIZE(inode->i_sb);
3770 inode_offset = ((inode->i_ino - 1) %
3771 EXT4_INODES_PER_GROUP(inode->i_sb));
3772 start = inode_offset & ~(inodes_per_buffer - 1);
3773 3876
3774 /* Is the inode bitmap in cache? */ 3877 start = inode_offset & ~(inodes_per_block - 1);
3775 desc = ext4_get_group_desc(inode->i_sb,
3776 block_group, NULL);
3777 if (!desc)
3778 goto make_io;
3779 3878
3780 bitmap_bh = sb_getblk(inode->i_sb, 3879 /* Is the inode bitmap in cache? */
3781 ext4_inode_bitmap(inode->i_sb, desc)); 3880 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3782 if (!bitmap_bh) 3881 if (!bitmap_bh)
3783 goto make_io; 3882 goto make_io;
3784 3883
@@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode,
3791 brelse(bitmap_bh); 3890 brelse(bitmap_bh);
3792 goto make_io; 3891 goto make_io;
3793 } 3892 }
3794 for (i = start; i < start + inodes_per_buffer; i++) { 3893 for (i = start; i < start + inodes_per_block; i++) {
3795 if (i == inode_offset) 3894 if (i == inode_offset)
3796 continue; 3895 continue;
3797 if (ext4_test_bit(i, bitmap_bh->b_data)) 3896 if (ext4_test_bit(i, bitmap_bh->b_data))
3798 break; 3897 break;
3799 } 3898 }
3800 brelse(bitmap_bh); 3899 brelse(bitmap_bh);
3801 if (i == start + inodes_per_buffer) { 3900 if (i == start + inodes_per_block) {
3802 /* all other inodes are free, so skip I/O */ 3901 /* all other inodes are free, so skip I/O */
3803 memset(bh->b_data, 0, bh->b_size); 3902 memset(bh->b_data, 0, bh->b_size);
3804 set_buffer_uptodate(bh); 3903 set_buffer_uptodate(bh);
@@ -3809,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode,
3809 3908
3810make_io: 3909make_io:
3811 /* 3910 /*
3911 * If we need to do any I/O, try to pre-readahead extra
3912 * blocks from the inode table.
3913 */
3914 if (EXT4_SB(sb)->s_inode_readahead_blks) {
3915 ext4_fsblk_t b, end, table;
3916 unsigned num;
3917
3918 table = ext4_inode_table(sb, gdp);
3919 /* Make sure s_inode_readahead_blks is a power of 2 */
3920 while (EXT4_SB(sb)->s_inode_readahead_blks &
3921 (EXT4_SB(sb)->s_inode_readahead_blks-1))
3922 EXT4_SB(sb)->s_inode_readahead_blks =
3923 (EXT4_SB(sb)->s_inode_readahead_blks &
3924 (EXT4_SB(sb)->s_inode_readahead_blks-1));
3925 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
3926 if (table > b)
3927 b = table;
3928 end = b + EXT4_SB(sb)->s_inode_readahead_blks;
3929 num = EXT4_INODES_PER_GROUP(sb);
3930 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3931 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3932 num -= le16_to_cpu(gdp->bg_itable_unused);
3933 table += num / inodes_per_block;
3934 if (end > table)
3935 end = table;
3936 while (b <= end)
3937 sb_breadahead(sb, b++);
3938 }
3939
3940 /*
3812 * There are other valid inodes in the buffer, this inode 3941 * There are other valid inodes in the buffer, this inode
3813 * has in-inode xattrs, or we don't have this inode in memory. 3942 * has in-inode xattrs, or we don't have this inode in memory.
3814 * Read the block from disk. 3943 * Read the block from disk.
@@ -3818,10 +3947,9 @@ make_io:
3818 submit_bh(READ_META, bh); 3947 submit_bh(READ_META, bh);
3819 wait_on_buffer(bh); 3948 wait_on_buffer(bh);
3820 if (!buffer_uptodate(bh)) { 3949 if (!buffer_uptodate(bh)) {
3821 ext4_error(inode->i_sb, "ext4_get_inode_loc", 3950 ext4_error(sb, __func__,
3822 "unable to read inode block - " 3951 "unable to read inode block - inode=%lu, "
3823 "inode=%lu, block=%llu", 3952 "block=%llu", inode->i_ino, block);
3824 inode->i_ino, block);
3825 brelse(bh); 3953 brelse(bh);
3826 return -EIO; 3954 return -EIO;
3827 } 3955 }
@@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3913 return inode; 4041 return inode;
3914 4042
3915 ei = EXT4_I(inode); 4043 ei = EXT4_I(inode);
3916#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 4044#ifdef CONFIG_EXT4_FS_POSIX_ACL
3917 ei->i_acl = EXT4_ACL_NOT_CACHED; 4045 ei->i_acl = EXT4_ACL_NOT_CACHED;
3918 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 4046 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
3919#endif 4047#endif
3920 ei->i_block_alloc_info = NULL;
3921 4048
3922 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4049 ret = __ext4_get_inode_loc(inode, &iloc, 0);
3923 if (ret < 0) 4050 if (ret < 0)
@@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3927 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4054 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3928 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4055 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3929 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 4056 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3930 if(!(test_opt (inode->i_sb, NO_UID32))) { 4057 if (!(test_opt(inode->i_sb, NO_UID32))) {
3931 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 4058 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3932 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 4059 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3933 } 4060 }
@@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3945 if (inode->i_mode == 0 || 4072 if (inode->i_mode == 0 ||
3946 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4073 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
3947 /* this inode is deleted */ 4074 /* this inode is deleted */
3948 brelse (bh); 4075 brelse(bh);
3949 ret = -ESTALE; 4076 ret = -ESTALE;
3950 goto bad_inode; 4077 goto bad_inode;
3951 } 4078 }
@@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3978 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4105 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
3979 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4106 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
3980 EXT4_INODE_SIZE(inode->i_sb)) { 4107 EXT4_INODE_SIZE(inode->i_sb)) {
3981 brelse (bh); 4108 brelse(bh);
3982 ret = -EIO; 4109 ret = -EIO;
3983 goto bad_inode; 4110 goto bad_inode;
3984 } 4111 }
@@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4031 init_special_inode(inode, inode->i_mode, 4158 init_special_inode(inode, inode->i_mode,
4032 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4159 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4033 } 4160 }
4034 brelse (iloc.bh); 4161 brelse(iloc.bh);
4035 ext4_set_inode_flags(inode); 4162 ext4_set_inode_flags(inode);
4036 unlock_new_inode(inode); 4163 unlock_new_inode(inode);
4037 return inode; 4164 return inode;
@@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle,
4113 4240
4114 ext4_get_inode_flags(ei); 4241 ext4_get_inode_flags(ei);
4115 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 4242 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4116 if(!(test_opt(inode->i_sb, NO_UID32))) { 4243 if (!(test_opt(inode->i_sb, NO_UID32))) {
4117 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); 4244 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
4118 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); 4245 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
4119/* 4246/*
4120 * Fix up interoperability with old kernels. Otherwise, old inodes get 4247 * Fix up interoperability with old kernels. Otherwise, old inodes get
4121 * re-used with the upper 16 bits of the uid/gid intact 4248 * re-used with the upper 16 bits of the uid/gid intact
4122 */ 4249 */
4123 if(!ei->i_dtime) { 4250 if (!ei->i_dtime) {
4124 raw_inode->i_uid_high = 4251 raw_inode->i_uid_high =
4125 cpu_to_le16(high_16_bits(inode->i_uid)); 4252 cpu_to_le16(high_16_bits(inode->i_uid));
4126 raw_inode->i_gid_high = 4253 raw_inode->i_gid_high =
@@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle,
4208 ei->i_state &= ~EXT4_STATE_NEW; 4335 ei->i_state &= ~EXT4_STATE_NEW;
4209 4336
4210out_brelse: 4337out_brelse:
4211 brelse (bh); 4338 brelse(bh);
4212 ext4_std_error(inode->i_sb, err); 4339 ext4_std_error(inode->i_sb, err);
4213 return err; 4340 return err;
4214} 4341}
@@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4811 loff_t size; 4938 loff_t size;
4812 unsigned long len; 4939 unsigned long len;
4813 int ret = -EINVAL; 4940 int ret = -EINVAL;
4941 void *fsdata;
4814 struct file *file = vma->vm_file; 4942 struct file *file = vma->vm_file;
4815 struct inode *inode = file->f_path.dentry->d_inode; 4943 struct inode *inode = file->f_path.dentry->d_inode;
4816 struct address_space *mapping = inode->i_mapping; 4944 struct address_space *mapping = inode->i_mapping;
@@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4849 * on the same page though 4977 * on the same page though
4850 */ 4978 */
4851 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 4979 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
4852 len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); 4980 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
4853 if (ret < 0) 4981 if (ret < 0)
4854 goto out_unlock; 4982 goto out_unlock;
4855 ret = mapping->a_ops->write_end(file, mapping, page_offset(page), 4983 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
4856 len, len, page, NULL); 4984 len, len, page, fsdata);
4857 if (ret < 0) 4985 if (ret < 0)
4858 goto out_unlock; 4986 goto out_unlock;
4859 ret = 0; 4987 ret = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7a6c2f1faba6..ea27eaa0cfe5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23 struct inode *inode = filp->f_dentry->d_inode; 23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size;
27 26
28 ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); 27 ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
29 28
30 switch (cmd) { 29 switch (cmd) {
31 case EXT4_IOC_GETFLAGS: 30 case EXT4_IOC_GETFLAGS:
@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
34 return put_user(flags, (int __user *) arg); 33 return put_user(flags, (int __user *) arg);
35 case EXT4_IOC_SETFLAGS: { 34 case EXT4_IOC_SETFLAGS: {
36 handle_t *handle = NULL; 35 handle_t *handle = NULL;
37 int err; 36 int err, migrate = 0;
38 struct ext4_iloc iloc; 37 struct ext4_iloc iloc;
39 unsigned int oldflags; 38 unsigned int oldflags;
40 unsigned int jflag; 39 unsigned int jflag;
@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
82 if (!capable(CAP_SYS_RESOURCE)) 81 if (!capable(CAP_SYS_RESOURCE))
83 goto flags_out; 82 goto flags_out;
84 } 83 }
84 if (oldflags & EXT4_EXTENTS_FL) {
85 /* We don't support clearning extent flags */
86 if (!(flags & EXT4_EXTENTS_FL)) {
87 err = -EOPNOTSUPP;
88 goto flags_out;
89 }
90 } else if (flags & EXT4_EXTENTS_FL) {
91 /* migrate the file */
92 migrate = 1;
93 flags &= ~EXT4_EXTENTS_FL;
94 }
85 95
86 handle = ext4_journal_start(inode, 1); 96 handle = ext4_journal_start(inode, 1);
87 if (IS_ERR(handle)) { 97 if (IS_ERR(handle)) {
@@ -109,6 +119,10 @@ flags_err:
109 119
110 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) 120 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
111 err = ext4_change_inode_journal_flag(inode, jflag); 121 err = ext4_change_inode_journal_flag(inode, jflag);
122 if (err)
123 goto flags_out;
124 if (migrate)
125 err = ext4_ext_migrate(inode);
112flags_out: 126flags_out:
113 mutex_unlock(&inode->i_mutex); 127 mutex_unlock(&inode->i_mutex);
114 mnt_drop_write(filp->f_path.mnt); 128 mnt_drop_write(filp->f_path.mnt);
@@ -175,49 +189,6 @@ setversion_out:
175 return ret; 189 return ret;
176 } 190 }
177#endif 191#endif
178 case EXT4_IOC_GETRSVSZ:
179 if (test_opt(inode->i_sb, RESERVATION)
180 && S_ISREG(inode->i_mode)
181 && ei->i_block_alloc_info) {
182 rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
183 return put_user(rsv_window_size, (int __user *)arg);
184 }
185 return -ENOTTY;
186 case EXT4_IOC_SETRSVSZ: {
187 int err;
188
189 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
190 return -ENOTTY;
191
192 if (!is_owner_or_cap(inode))
193 return -EACCES;
194
195 if (get_user(rsv_window_size, (int __user *)arg))
196 return -EFAULT;
197
198 err = mnt_want_write(filp->f_path.mnt);
199 if (err)
200 return err;
201
202 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
203 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
204
205 /*
206 * need to allocate reservation structure for this inode
207 * before set the window size
208 */
209 down_write(&ei->i_data_sem);
210 if (!ei->i_block_alloc_info)
211 ext4_init_block_alloc_info(inode);
212
213 if (ei->i_block_alloc_info){
214 struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
215 rsv->rsv_goal_size = rsv_window_size;
216 }
217 up_write(&ei->i_data_sem);
218 mnt_drop_write(filp->f_path.mnt);
219 return 0;
220 }
221 case EXT4_IOC_GROUP_EXTEND: { 192 case EXT4_IOC_GROUP_EXTEND: {
222 ext4_fsblk_t n_blocks_count; 193 ext4_fsblk_t n_blocks_count;
223 struct super_block *sb = inode->i_sb; 194 struct super_block *sb = inode->i_sb;
@@ -267,7 +238,26 @@ setversion_out:
267 } 238 }
268 239
269 case EXT4_IOC_MIGRATE: 240 case EXT4_IOC_MIGRATE:
270 return ext4_ext_migrate(inode, filp, cmd, arg); 241 {
242 int err;
243 if (!is_owner_or_cap(inode))
244 return -EACCES;
245
246 err = mnt_want_write(filp->f_path.mnt);
247 if (err)
248 return err;
249 /*
250 * inode_mutex prevent write and truncate on the file.
251 * Read still goes through. We take i_data_sem in
252 * ext4_ext_swap_inode_data before we switch the
253 * inode format to prevent read.
254 */
255 mutex_lock(&(inode->i_mutex));
256 err = ext4_ext_migrate(inode);
257 mutex_unlock(&(inode->i_mutex));
258 mnt_drop_write(filp->f_path.mnt);
259 return err;
260 }
271 261
272 default: 262 default:
273 return -ENOTTY; 263 return -ENOTTY;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e0e3a5eb1ddb..b580714f0d85 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
477 b2 = (unsigned char *) bitmap; 477 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { 478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) { 479 if (b1[i] != b2[i]) {
480 printk("corruption in group %lu at byte %u(%u):" 480 printk(KERN_ERR "corruption in group %lu "
481 " %x in copy != %x on disk/prealloc\n", 481 "at byte %u(%u): %x in copy != %x "
482 e4b->bd_group, i, i * 8, b1[i], b2[i]); 482 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]);
483 BUG(); 484 BUG();
484 } 485 }
485 } 486 }
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
533 void *buddy; 534 void *buddy;
534 void *buddy2; 535 void *buddy2;
535 536
536 if (!test_opt(sb, MBALLOC))
537 return 0;
538
539 { 537 {
540 static int mb_check_counter; 538 static int mb_check_counter;
541 if (mb_check_counter++ % 100 != 0) 539 if (mb_check_counter++ % 100 != 0)
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
784 if (bh[i] == NULL) 782 if (bh[i] == NULL)
785 goto out; 783 goto out;
786 784
787 if (bh_uptodate_or_lock(bh[i])) 785 if (buffer_uptodate(bh[i]) &&
786 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
788 continue; 787 continue;
789 788
789 lock_buffer(bh[i]);
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
792 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb)
2169{ 2169{
2170 struct ext4_sb_info *sbi = EXT4_SB(sb); 2170 struct ext4_sb_info *sbi = EXT4_SB(sb);
2171 2171
2172 remove_proc_entry("mb_groups", sbi->s_mb_proc); 2172 if (sbi->s_proc != NULL) {
2173 remove_proc_entry("mb_history", sbi->s_mb_proc); 2173 remove_proc_entry("mb_groups", sbi->s_proc);
2174 2174 remove_proc_entry("mb_history", sbi->s_proc);
2175 }
2175 kfree(sbi->s_mb_history); 2176 kfree(sbi->s_mb_history);
2176} 2177}
2177 2178
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2180 struct ext4_sb_info *sbi = EXT4_SB(sb); 2181 struct ext4_sb_info *sbi = EXT4_SB(sb);
2181 int i; 2182 int i;
2182 2183
2183 if (sbi->s_mb_proc != NULL) { 2184 if (sbi->s_proc != NULL) {
2184 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, 2185 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2185 &ext4_mb_seq_history_fops, sb); 2186 &ext4_mb_seq_history_fops, sb);
2186 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, 2187 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2187 &ext4_mb_seq_groups_fops, sb); 2188 &ext4_mb_seq_groups_fops, sb);
2188 } 2189 }
2189 2190
@@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2485 unsigned max; 2486 unsigned max;
2486 int ret; 2487 int ret;
2487 2488
2488 if (!test_opt(sb, MBALLOC))
2489 return 0;
2490
2491 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); 2489 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
2492 2490
2493 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2491 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2494 if (sbi->s_mb_offsets == NULL) { 2492 if (sbi->s_mb_offsets == NULL) {
2495 clear_opt(sbi->s_mount_opt, MBALLOC);
2496 return -ENOMEM; 2493 return -ENOMEM;
2497 } 2494 }
2498 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2495 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2499 if (sbi->s_mb_maxs == NULL) { 2496 if (sbi->s_mb_maxs == NULL) {
2500 clear_opt(sbi->s_mount_opt, MBALLOC);
2501 kfree(sbi->s_mb_maxs); 2497 kfree(sbi->s_mb_maxs);
2502 return -ENOMEM; 2498 return -ENOMEM;
2503 } 2499 }
@@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2520 /* init file for buddy data */ 2516 /* init file for buddy data */
2521 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2522 if (ret != 0) { 2518 if (ret != 0) {
2523 clear_opt(sbi->s_mount_opt, MBALLOC);
2524 kfree(sbi->s_mb_offsets); 2519 kfree(sbi->s_mb_offsets);
2525 kfree(sbi->s_mb_maxs); 2520 kfree(sbi->s_mb_maxs);
2526 return ret; 2521 return ret;
@@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2535 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2536 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2542 2537
2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids; 2538 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2545 if (sbi->s_locality_groups == NULL) { 2539 if (sbi->s_locality_groups == NULL) {
2546 clear_opt(sbi->s_mount_opt, MBALLOC);
2547 kfree(sbi->s_mb_offsets); 2540 kfree(sbi->s_mb_offsets);
2548 kfree(sbi->s_mb_maxs); 2541 kfree(sbi->s_mb_maxs);
2549 return -ENOMEM; 2542 return -ENOMEM;
2550 } 2543 }
2551 for (i = 0; i < nr_cpu_ids; i++) { 2544 for_each_possible_cpu(i) {
2552 struct ext4_locality_group *lg; 2545 struct ext4_locality_group *lg;
2553 lg = &sbi->s_locality_groups[i]; 2546 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2554 mutex_init(&lg->lg_mutex); 2547 mutex_init(&lg->lg_mutex);
2555 for (j = 0; j < PREALLOC_TB_SIZE; j++) 2548 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); 2549 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
@@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2560 ext4_mb_init_per_dev_proc(sb); 2553 ext4_mb_init_per_dev_proc(sb);
2561 ext4_mb_history_init(sb); 2554 ext4_mb_history_init(sb);
2562 2555
2563 printk("EXT4-fs: mballoc enabled\n"); 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2564 return 0; 2557 return 0;
2565} 2558}
2566 2559
@@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2589 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2590 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2591 2584
2592 if (!test_opt(sb, MBALLOC))
2593 return 0;
2594
2595 /* release freed, non-committed blocks */ 2585 /* release freed, non-committed blocks */
2596 spin_lock(&sbi->s_md_lock); 2586 spin_lock(&sbi->s_md_lock);
2597 list_splice_init(&sbi->s_closed_transaction, 2587 list_splice_init(&sbi->s_closed_transaction,
@@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb)
2647 atomic_read(&sbi->s_mb_discarded)); 2637 atomic_read(&sbi->s_mb_discarded));
2648 } 2638 }
2649 2639
2650 kfree(sbi->s_locality_groups); 2640 free_percpu(sbi->s_locality_groups);
2651
2652 ext4_mb_history_release(sb); 2641 ext4_mb_history_release(sb);
2653 ext4_mb_destroy_per_dev_proc(sb); 2642 ext4_mb_destroy_per_dev_proc(sb);
2654 2643
@@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2721#define EXT4_MB_STREAM_REQ "stream_req" 2710#define EXT4_MB_STREAM_REQ "stream_req"
2722#define EXT4_MB_GROUP_PREALLOC "group_prealloc" 2711#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2723 2712
2724
2725
2726#define MB_PROC_FOPS(name) \
2727static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
2728{ \
2729 struct ext4_sb_info *sbi = m->private; \
2730 \
2731 seq_printf(m, "%ld\n", sbi->s_mb_##name); \
2732 return 0; \
2733} \
2734 \
2735static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
2736{ \
2737 return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
2738} \
2739 \
2740static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
2741 const char __user *buf, size_t cnt, loff_t *ppos) \
2742{ \
2743 struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
2744 char str[32]; \
2745 long value; \
2746 if (cnt >= sizeof(str)) \
2747 return -EINVAL; \
2748 if (copy_from_user(str, buf, cnt)) \
2749 return -EFAULT; \
2750 value = simple_strtol(str, NULL, 0); \
2751 if (value <= 0) \
2752 return -ERANGE; \
2753 sbi->s_mb_##name = value; \
2754 return cnt; \
2755} \
2756 \
2757static const struct file_operations ext4_mb_##name##_proc_fops = { \
2758 .owner = THIS_MODULE, \
2759 .open = ext4_mb_##name##_proc_open, \
2760 .read = seq_read, \
2761 .llseek = seq_lseek, \
2762 .release = single_release, \
2763 .write = ext4_mb_##name##_proc_write, \
2764};
2765
2766MB_PROC_FOPS(stats);
2767MB_PROC_FOPS(max_to_scan);
2768MB_PROC_FOPS(min_to_scan);
2769MB_PROC_FOPS(order2_reqs);
2770MB_PROC_FOPS(stream_request);
2771MB_PROC_FOPS(group_prealloc);
2772
2773#define MB_PROC_HANDLER(name, var) \
2774do { \
2775 proc = proc_create_data(name, mode, sbi->s_mb_proc, \
2776 &ext4_mb_##var##_proc_fops, sbi); \
2777 if (proc == NULL) { \
2778 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
2779 goto err_out; \
2780 } \
2781} while (0)
2782
2783static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2713static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2784{ 2714{
2785 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2715 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2786 struct ext4_sb_info *sbi = EXT4_SB(sb); 2716 struct ext4_sb_info *sbi = EXT4_SB(sb);
2787 struct proc_dir_entry *proc; 2717 struct proc_dir_entry *proc;
2788 char devname[64];
2789 2718
2790 if (proc_root_ext4 == NULL) { 2719 if (sbi->s_proc == NULL)
2791 sbi->s_mb_proc = NULL;
2792 return -EINVAL; 2720 return -EINVAL;
2793 }
2794 bdevname(sb->s_bdev, devname);
2795 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2796
2797 MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
2798 MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
2799 MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
2800 MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
2801 MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
2802 MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
2803 2721
2722 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2723 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2724 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2725 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2726 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2727 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2804 return 0; 2728 return 0;
2805 2729
2806err_out: 2730err_out:
2807 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); 2731 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2808 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2732 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2809 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2733 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2810 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2734 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2811 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2735 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2812 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2736 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2813 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
2814 remove_proc_entry(devname, proc_root_ext4);
2815 sbi->s_mb_proc = NULL;
2816
2817 return -ENOMEM; 2737 return -ENOMEM;
2818} 2738}
2819 2739
2820static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2740static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2821{ 2741{
2822 struct ext4_sb_info *sbi = EXT4_SB(sb); 2742 struct ext4_sb_info *sbi = EXT4_SB(sb);
2823 char devname[64];
2824 2743
2825 if (sbi->s_mb_proc == NULL) 2744 if (sbi->s_proc == NULL)
2826 return -EINVAL; 2745 return -EINVAL;
2827 2746
2828 bdevname(sb->s_bdev, devname); 2747 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2829 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2748 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2830 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2749 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2831 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2750 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2832 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2751 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2833 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2752 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2834 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
2835 remove_proc_entry(devname, proc_root_ext4);
2836 2753
2837 return 0; 2754 return 0;
2838} 2755}
@@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void)
2854 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2855 return -ENOMEM; 2772 return -ENOMEM;
2856 } 2773 }
2857#ifdef CONFIG_PROC_FS
2858 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
2859 if (proc_root_ext4 == NULL)
2860 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
2861#endif
2862 return 0; 2774 return 0;
2863} 2775}
2864 2776
@@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void)
2867 /* XXX: synchronize_rcu(); */ 2779 /* XXX: synchronize_rcu(); */
2868 kmem_cache_destroy(ext4_pspace_cachep); 2780 kmem_cache_destroy(ext4_pspace_cachep);
2869 kmem_cache_destroy(ext4_ac_cachep); 2781 kmem_cache_destroy(ext4_ac_cachep);
2870#ifdef CONFIG_PROC_FS
2871 remove_proc_entry("fs/ext4", NULL);
2872#endif
2873} 2782}
2874 2783
2875 2784
@@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void)
2879 */ 2788 */
2880static noinline_for_stack int 2789static noinline_for_stack int
2881ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2790ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2882 handle_t *handle) 2791 handle_t *handle, unsigned long reserv_blks)
2883{ 2792{
2884 struct buffer_head *bitmap_bh = NULL; 2793 struct buffer_head *bitmap_bh = NULL;
2885 struct ext4_super_block *es; 2794 struct ext4_super_block *es;
@@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2968 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2877 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2969 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2878 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2970 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2879 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2971 2880 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2972 /* 2881 /*
2973 * free blocks account has already be reduced/reserved 2882 * Now reduce the dirty block count also. Should not go negative
2974 * at write_begin() time for delayed allocation
2975 * do not double accounting
2976 */ 2883 */
2977 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2884 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2978 percpu_counter_sub(&sbi->s_freeblocks_counter, 2885 /* release all the reserved blocks if non delalloc */
2979 ac->ac_b_ex.fe_len); 2886 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2887 else
2888 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2889 ac->ac_b_ex.fe_len);
2980 2890
2981 if (sbi->s_log_groups_per_flex) { 2891 if (sbi->s_log_groups_per_flex) {
2982 ext4_group_t flex_group = ext4_flex_group(sbi, 2892 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3884,7 +3794,7 @@ out:
3884 * 3794 *
3885 * FIXME!! Make sure it is valid at all the call sites 3795 * FIXME!! Make sure it is valid at all the call sites
3886 */ 3796 */
3887void ext4_mb_discard_inode_preallocations(struct inode *inode) 3797void ext4_discard_preallocations(struct inode *inode)
3888{ 3798{
3889 struct ext4_inode_info *ei = EXT4_I(inode); 3799 struct ext4_inode_info *ei = EXT4_I(inode);
3890 struct super_block *sb = inode->i_sb; 3800 struct super_block *sb = inode->i_sb;
@@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3896 struct ext4_buddy e4b; 3806 struct ext4_buddy e4b;
3897 int err; 3807 int err;
3898 3808
3899 if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { 3809 if (!S_ISREG(inode->i_mode)) {
3900 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ 3810 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
3901 return; 3811 return;
3902 } 3812 }
@@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4094 * per cpu locality group is to reduce the contention between block 4004 * per cpu locality group is to reduce the contention between block
4095 * request from multiple CPUs. 4005 * request from multiple CPUs.
4096 */ 4006 */
4097 ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; 4007 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
4098 put_cpu();
4099 4008
4100 /* we're going to use group allocation */ 4009 /* we're going to use group allocation */
4101 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 4010 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4369ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4278ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4370 struct ext4_allocation_request *ar, int *errp) 4279 struct ext4_allocation_request *ar, int *errp)
4371{ 4280{
4281 int freed;
4372 struct ext4_allocation_context *ac = NULL; 4282 struct ext4_allocation_context *ac = NULL;
4373 struct ext4_sb_info *sbi; 4283 struct ext4_sb_info *sbi;
4374 struct super_block *sb; 4284 struct super_block *sb;
4375 ext4_fsblk_t block = 0; 4285 ext4_fsblk_t block = 0;
4376 int freed; 4286 unsigned long inquota;
4377 int inquota; 4287 unsigned long reserv_blks = 0;
4378 4288
4379 sb = ar->inode->i_sb; 4289 sb = ar->inode->i_sb;
4380 sbi = EXT4_SB(sb); 4290 sbi = EXT4_SB(sb);
4381 4291
4382 if (!test_opt(sb, MBALLOC)) {
4383 block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
4384 &(ar->len), errp);
4385 return block;
4386 }
4387 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { 4292 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4388 /* 4293 /*
4389 * With delalloc we already reserved the blocks 4294 * With delalloc we already reserved the blocks
4390 */ 4295 */
4391 ar->len = ext4_has_free_blocks(sbi, ar->len); 4296 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4392 } 4297 /* let others to free the space */
4393 4298 yield();
4394 if (ar->len == 0) { 4299 ar->len = ar->len >> 1;
4395 *errp = -ENOSPC; 4300 }
4396 return 0; 4301 if (!ar->len) {
4302 *errp = -ENOSPC;
4303 return 0;
4304 }
4305 reserv_blks = ar->len;
4397 } 4306 }
4398
4399 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4307 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4400 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4308 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4401 ar->len--; 4309 ar->len--;
@@ -4441,7 +4349,7 @@ repeat:
4441 } 4349 }
4442 4350
4443 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4351 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4444 *errp = ext4_mb_mark_diskspace_used(ac, handle); 4352 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4445 if (*errp == -EAGAIN) { 4353 if (*errp == -EAGAIN) {
4446 ac->ac_b_ex.fe_group = 0; 4354 ac->ac_b_ex.fe_group = 0;
4447 ac->ac_b_ex.fe_start = 0; 4355 ac->ac_b_ex.fe_start = 0;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c7c9906c2a75..b3b4828f8b89 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac);
257 257
258#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 258#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
259 259
260static struct proc_dir_entry *proc_root_ext4;
261struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); 260struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
262 261
263static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 262static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 46fc0b5b12ba..f2a9cf498ecd 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
447 447
448} 448}
449 449
450int ext4_ext_migrate(struct inode *inode, struct file *filp, 450int ext4_ext_migrate(struct inode *inode)
451 unsigned int cmd, unsigned long arg)
452{ 451{
453 handle_t *handle; 452 handle_t *handle;
454 int retval = 0, i; 453 int retval = 0, i;
@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
516 * when we add extents we extent the journal 515 * when we add extents we extent the journal
517 */ 516 */
518 /* 517 /*
519 * inode_mutex prevent write and truncate on the file. Read still goes
520 * through. We take i_data_sem in ext4_ext_swap_inode_data before we
521 * switch the inode format to prevent read.
522 */
523 mutex_lock(&(inode->i_mutex));
524 /*
525 * Even though we take i_mutex we can still cause block allocation 518 * Even though we take i_mutex we can still cause block allocation
526 * via mmap write to holes. If we have allocated new blocks we fail 519 * via mmap write to holes. If we have allocated new blocks we fail
527 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. 520 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
@@ -623,7 +616,6 @@ err_out:
623 tmp_inode->i_nlink = 0; 616 tmp_inode->i_nlink = 0;
624 617
625 ext4_journal_stop(handle); 618 ext4_journal_stop(handle);
626 mutex_unlock(&(inode->i_mutex));
627 619
628 if (tmp_inode) 620 if (tmp_inode)
629 iput(tmp_inode); 621 iput(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 387ad98350c3..92db9e945147 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -151,34 +151,36 @@ struct dx_map_entry
151 151
152static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); 152static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
153static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); 153static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
154static inline unsigned dx_get_hash (struct dx_entry *entry); 154static inline unsigned dx_get_hash(struct dx_entry *entry);
155static void dx_set_hash (struct dx_entry *entry, unsigned value); 155static void dx_set_hash(struct dx_entry *entry, unsigned value);
156static unsigned dx_get_count (struct dx_entry *entries); 156static unsigned dx_get_count(struct dx_entry *entries);
157static unsigned dx_get_limit (struct dx_entry *entries); 157static unsigned dx_get_limit(struct dx_entry *entries);
158static void dx_set_count (struct dx_entry *entries, unsigned value); 158static void dx_set_count(struct dx_entry *entries, unsigned value);
159static void dx_set_limit (struct dx_entry *entries, unsigned value); 159static void dx_set_limit(struct dx_entry *entries, unsigned value);
160static unsigned dx_root_limit (struct inode *dir, unsigned infosize); 160static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
161static unsigned dx_node_limit (struct inode *dir); 161static unsigned dx_node_limit(struct inode *dir);
162static struct dx_frame *dx_probe(struct dentry *dentry, 162static struct dx_frame *dx_probe(const struct qstr *d_name,
163 struct inode *dir, 163 struct inode *dir,
164 struct dx_hash_info *hinfo, 164 struct dx_hash_info *hinfo,
165 struct dx_frame *frame, 165 struct dx_frame *frame,
166 int *err); 166 int *err);
167static void dx_release (struct dx_frame *frames); 167static void dx_release(struct dx_frame *frames);
168static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 168static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
169 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 169 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
170static void dx_sort_map(struct dx_map_entry *map, unsigned count); 170static void dx_sort_map(struct dx_map_entry *map, unsigned count);
171static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, 171static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
172 struct dx_map_entry *offsets, int count); 172 struct dx_map_entry *offsets, int count);
173static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); 173static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
174static void dx_insert_block(struct dx_frame *frame, 174static void dx_insert_block(struct dx_frame *frame,
175 u32 hash, ext4_lblk_t block); 175 u32 hash, ext4_lblk_t block);
176static int ext4_htree_next_block(struct inode *dir, __u32 hash, 176static int ext4_htree_next_block(struct inode *dir, __u32 hash,
177 struct dx_frame *frame, 177 struct dx_frame *frame,
178 struct dx_frame *frames, 178 struct dx_frame *frames,
179 __u32 *start_hash); 179 __u32 *start_hash);
180static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, 180static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
181 struct ext4_dir_entry_2 **res_dir, int *err); 181 const struct qstr *d_name,
182 struct ext4_dir_entry_2 **res_dir,
183 int *err);
182static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 184static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
183 struct inode *inode); 185 struct inode *inode);
184 186
@@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
207 entry->block = cpu_to_le32(value); 209 entry->block = cpu_to_le32(value);
208} 210}
209 211
210static inline unsigned dx_get_hash (struct dx_entry *entry) 212static inline unsigned dx_get_hash(struct dx_entry *entry)
211{ 213{
212 return le32_to_cpu(entry->hash); 214 return le32_to_cpu(entry->hash);
213} 215}
214 216
215static inline void dx_set_hash (struct dx_entry *entry, unsigned value) 217static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
216{ 218{
217 entry->hash = cpu_to_le32(value); 219 entry->hash = cpu_to_le32(value);
218} 220}
219 221
220static inline unsigned dx_get_count (struct dx_entry *entries) 222static inline unsigned dx_get_count(struct dx_entry *entries)
221{ 223{
222 return le16_to_cpu(((struct dx_countlimit *) entries)->count); 224 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
223} 225}
224 226
225static inline unsigned dx_get_limit (struct dx_entry *entries) 227static inline unsigned dx_get_limit(struct dx_entry *entries)
226{ 228{
227 return le16_to_cpu(((struct dx_countlimit *) entries)->limit); 229 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
228} 230}
229 231
230static inline void dx_set_count (struct dx_entry *entries, unsigned value) 232static inline void dx_set_count(struct dx_entry *entries, unsigned value)
231{ 233{
232 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); 234 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
233} 235}
234 236
235static inline void dx_set_limit (struct dx_entry *entries, unsigned value) 237static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
236{ 238{
237 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); 239 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
238} 240}
239 241
240static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) 242static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
241{ 243{
242 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 244 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
243 EXT4_DIR_REC_LEN(2) - infosize; 245 EXT4_DIR_REC_LEN(2) - infosize;
244 return entry_space / sizeof(struct dx_entry); 246 return entry_space / sizeof(struct dx_entry);
245} 247}
246 248
247static inline unsigned dx_node_limit (struct inode *dir) 249static inline unsigned dx_node_limit(struct inode *dir)
248{ 250{
249 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 251 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
250 return entry_space / sizeof(struct dx_entry); 252 return entry_space / sizeof(struct dx_entry);
@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir)
254 * Debug 256 * Debug
255 */ 257 */
256#ifdef DX_DEBUG 258#ifdef DX_DEBUG
257static void dx_show_index (char * label, struct dx_entry *entries) 259static void dx_show_index(char * label, struct dx_entry *entries)
258{ 260{
259 int i, n = dx_get_count (entries); 261 int i, n = dx_get_count (entries);
260 printk("%s index ", label); 262 printk(KERN_DEBUG "%s index ", label);
261 for (i = 0; i < n; i++) { 263 for (i = 0; i < n; i++) {
262 printk("%x->%lu ", i? dx_get_hash(entries + i) : 264 printk("%x->%lu ", i ? dx_get_hash(entries + i) :
263 0, (unsigned long)dx_get_block(entries + i)); 265 0, (unsigned long)dx_get_block(entries + i));
264 } 266 }
265 printk("\n"); 267 printk("\n");
@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
306 struct dx_entry *entries, int levels) 308 struct dx_entry *entries, int levels)
307{ 309{
308 unsigned blocksize = dir->i_sb->s_blocksize; 310 unsigned blocksize = dir->i_sb->s_blocksize;
309 unsigned count = dx_get_count (entries), names = 0, space = 0, i; 311 unsigned count = dx_get_count(entries), names = 0, space = 0, i;
310 unsigned bcount = 0; 312 unsigned bcount = 0;
311 struct buffer_head *bh; 313 struct buffer_head *bh;
312 int err; 314 int err;
@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
325 names += stats.names; 327 names += stats.names;
326 space += stats.space; 328 space += stats.space;
327 bcount += stats.bcount; 329 bcount += stats.bcount;
328 brelse (bh); 330 brelse(bh);
329 } 331 }
330 if (bcount) 332 if (bcount)
331 printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", 333 printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
332 names, space/bcount,(space/bcount)*100/blocksize); 334 levels ? "" : " ", names, space/bcount,
335 (space/bcount)*100/blocksize);
333 return (struct stats) { names, space, bcount}; 336 return (struct stats) { names, space, bcount};
334} 337}
335#endif /* DX_DEBUG */ 338#endif /* DX_DEBUG */
@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
344 * back to userspace. 347 * back to userspace.
345 */ 348 */
346static struct dx_frame * 349static struct dx_frame *
347dx_probe(struct dentry *dentry, struct inode *dir, 350dx_probe(const struct qstr *d_name, struct inode *dir,
348 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 351 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
349{ 352{
350 unsigned count, indirect; 353 unsigned count, indirect;
@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
355 u32 hash; 358 u32 hash;
356 359
357 frame->bh = NULL; 360 frame->bh = NULL;
358 if (dentry)
359 dir = dentry->d_parent->d_inode;
360 if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) 361 if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
361 goto fail; 362 goto fail;
362 root = (struct dx_root *) bh->b_data; 363 root = (struct dx_root *) bh->b_data;
@@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
372 } 373 }
373 hinfo->hash_version = root->info.hash_version; 374 hinfo->hash_version = root->info.hash_version;
374 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; 375 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
375 if (dentry) 376 if (d_name)
376 ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); 377 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
377 hash = hinfo->hash; 378 hash = hinfo->hash;
378 379
379 if (root->info.unused_flags & 1) { 380 if (root->info.unused_flags & 1) {
@@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
406 goto fail; 407 goto fail;
407 } 408 }
408 409
409 dxtrace (printk("Look up %x", hash)); 410 dxtrace(printk("Look up %x", hash));
410 while (1) 411 while (1)
411 { 412 {
412 count = dx_get_count(entries); 413 count = dx_get_count(entries);
@@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
555 0, &err))) 556 0, &err)))
556 return err; /* Failure */ 557 return err; /* Failure */
557 p++; 558 p++;
558 brelse (p->bh); 559 brelse(p->bh);
559 p->bh = bh; 560 p->bh = bh;
560 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; 561 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
561 } 562 }
@@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
593 /* On error, skip the f_pos to the next block. */ 594 /* On error, skip the f_pos to the next block. */
594 dir_file->f_pos = (dir_file->f_pos | 595 dir_file->f_pos = (dir_file->f_pos |
595 (dir->i_sb->s_blocksize - 1)) + 1; 596 (dir->i_sb->s_blocksize - 1)) + 1;
596 brelse (bh); 597 brelse(bh);
597 return count; 598 return count;
598 } 599 }
599 ext4fs_dirhash(de->name, de->name_len, hinfo); 600 ext4fs_dirhash(de->name, de->name_len, hinfo);
@@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
635 int ret, err; 636 int ret, err;
636 __u32 hashval; 637 __u32 hashval;
637 638
638 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, 639 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
639 start_minor_hash)); 640 start_hash, start_minor_hash));
640 dir = dir_file->f_path.dentry->d_inode; 641 dir = dir_file->f_path.dentry->d_inode;
641 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { 642 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
642 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 643 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
648 } 649 }
649 hinfo.hash = start_hash; 650 hinfo.hash = start_hash;
650 hinfo.minor_hash = 0; 651 hinfo.minor_hash = 0;
651 frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); 652 frame = dx_probe(NULL, dir, &hinfo, frames, &err);
652 if (!frame) 653 if (!frame)
653 return err; 654 return err;
654 655
@@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
694 break; 695 break;
695 } 696 }
696 dx_release(frames); 697 dx_release(frames);
697 dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 698 dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
698 count, *next_hash)); 699 "next hash: %x\n", count, *next_hash));
699 return count; 700 return count;
700errout: 701errout:
701 dx_release(frames); 702 dx_release(frames);
@@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name,
802/* 803/*
803 * Returns 0 if not found, -1 on failure, and 1 on success 804 * Returns 0 if not found, -1 on failure, and 1 on success
804 */ 805 */
805static inline int search_dirblock(struct buffer_head * bh, 806static inline int search_dirblock(struct buffer_head *bh,
806 struct inode *dir, 807 struct inode *dir,
807 struct dentry *dentry, 808 const struct qstr *d_name,
808 unsigned long offset, 809 unsigned long offset,
809 struct ext4_dir_entry_2 ** res_dir) 810 struct ext4_dir_entry_2 ** res_dir)
810{ 811{
811 struct ext4_dir_entry_2 * de; 812 struct ext4_dir_entry_2 * de;
812 char * dlimit; 813 char * dlimit;
813 int de_len; 814 int de_len;
814 const char *name = dentry->d_name.name; 815 const char *name = d_name->name;
815 int namelen = dentry->d_name.len; 816 int namelen = d_name->len;
816 817
817 de = (struct ext4_dir_entry_2 *) bh->b_data; 818 de = (struct ext4_dir_entry_2 *) bh->b_data;
818 dlimit = bh->b_data + dir->i_sb->s_blocksize; 819 dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh,
851 * The returned buffer_head has ->b_count elevated. The caller is expected 852 * The returned buffer_head has ->b_count elevated. The caller is expected
852 * to brelse() it when appropriate. 853 * to brelse() it when appropriate.
853 */ 854 */
854static struct buffer_head * ext4_find_entry (struct dentry *dentry, 855static struct buffer_head * ext4_find_entry (struct inode *dir,
856 const struct qstr *d_name,
855 struct ext4_dir_entry_2 ** res_dir) 857 struct ext4_dir_entry_2 ** res_dir)
856{ 858{
857 struct super_block * sb; 859 struct super_block *sb;
858 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 860 struct buffer_head *bh_use[NAMEI_RA_SIZE];
859 struct buffer_head * bh, *ret = NULL; 861 struct buffer_head *bh, *ret = NULL;
860 ext4_lblk_t start, block, b; 862 ext4_lblk_t start, block, b;
861 int ra_max = 0; /* Number of bh's in the readahead 863 int ra_max = 0; /* Number of bh's in the readahead
862 buffer, bh_use[] */ 864 buffer, bh_use[] */
@@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
865 int num = 0; 867 int num = 0;
866 ext4_lblk_t nblocks; 868 ext4_lblk_t nblocks;
867 int i, err; 869 int i, err;
868 struct inode *dir = dentry->d_parent->d_inode;
869 int namelen; 870 int namelen;
870 871
871 *res_dir = NULL; 872 *res_dir = NULL;
872 sb = dir->i_sb; 873 sb = dir->i_sb;
873 namelen = dentry->d_name.len; 874 namelen = d_name->len;
874 if (namelen > EXT4_NAME_LEN) 875 if (namelen > EXT4_NAME_LEN)
875 return NULL; 876 return NULL;
876 if (is_dx(dir)) { 877 if (is_dx(dir)) {
877 bh = ext4_dx_find_entry(dentry, res_dir, &err); 878 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
878 /* 879 /*
879 * On success, or if the error was file not found, 880 * On success, or if the error was file not found,
880 * return. Otherwise, fall back to doing a search the 881 * return. Otherwise, fall back to doing a search the
@@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
882 */ 883 */
883 if (bh || (err != ERR_BAD_DX_DIR)) 884 if (bh || (err != ERR_BAD_DX_DIR))
884 return bh; 885 return bh;
885 dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); 886 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
887 "falling back\n"));
886 } 888 }
887 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 889 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
888 start = EXT4_I(dir)->i_dir_start_lookup; 890 start = EXT4_I(dir)->i_dir_start_lookup;
@@ -926,7 +928,7 @@ restart:
926 brelse(bh); 928 brelse(bh);
927 goto next; 929 goto next;
928 } 930 }
929 i = search_dirblock(bh, dir, dentry, 931 i = search_dirblock(bh, dir, d_name,
930 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); 932 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
931 if (i == 1) { 933 if (i == 1) {
932 EXT4_I(dir)->i_dir_start_lookup = block; 934 EXT4_I(dir)->i_dir_start_lookup = block;
@@ -956,11 +958,11 @@ restart:
956cleanup_and_exit: 958cleanup_and_exit:
957 /* Clean up the read-ahead blocks */ 959 /* Clean up the read-ahead blocks */
958 for (; ra_ptr < ra_max; ra_ptr++) 960 for (; ra_ptr < ra_max; ra_ptr++)
959 brelse (bh_use[ra_ptr]); 961 brelse(bh_use[ra_ptr]);
960 return ret; 962 return ret;
961} 963}
962 964
963static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, 965static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
964 struct ext4_dir_entry_2 **res_dir, int *err) 966 struct ext4_dir_entry_2 **res_dir, int *err)
965{ 967{
966 struct super_block * sb; 968 struct super_block * sb;
@@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
971 struct buffer_head *bh; 973 struct buffer_head *bh;
972 ext4_lblk_t block; 974 ext4_lblk_t block;
973 int retval; 975 int retval;
974 int namelen = dentry->d_name.len; 976 int namelen = d_name->len;
975 const u8 *name = dentry->d_name.name; 977 const u8 *name = d_name->name;
976 struct inode *dir = dentry->d_parent->d_inode;
977 978
978 sb = dir->i_sb; 979 sb = dir->i_sb;
979 /* NFS may look up ".." - look at dx_root directory block */ 980 /* NFS may look up ".." - look at dx_root directory block */
980 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ 981 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
981 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 982 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
982 return NULL; 983 return NULL;
983 } else { 984 } else {
984 frame = frames; 985 frame = frames;
@@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1010 return bh; 1011 return bh;
1011 } 1012 }
1012 } 1013 }
1013 brelse (bh); 1014 brelse(bh);
1014 /* Check to see if we should continue to search */ 1015 /* Check to see if we should continue to search */
1015 retval = ext4_htree_next_block(dir, hash, frame, 1016 retval = ext4_htree_next_block(dir, hash, frame,
1016 frames, NULL); 1017 frames, NULL);
@@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1025 1026
1026 *err = -ENOENT; 1027 *err = -ENOENT;
1027errout: 1028errout:
1028 dxtrace(printk("%s not found\n", name)); 1029 dxtrace(printk(KERN_DEBUG "%s not found\n", name));
1029 dx_release (frames); 1030 dx_release (frames);
1030 return NULL; 1031 return NULL;
1031} 1032}
1032 1033
1033static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 1034static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1034{ 1035{
1035 struct inode * inode; 1036 struct inode *inode;
1036 struct ext4_dir_entry_2 * de; 1037 struct ext4_dir_entry_2 *de;
1037 struct buffer_head * bh; 1038 struct buffer_head *bh;
1038 1039
1039 if (dentry->d_name.len > EXT4_NAME_LEN) 1040 if (dentry->d_name.len > EXT4_NAME_LEN)
1040 return ERR_PTR(-ENAMETOOLONG); 1041 return ERR_PTR(-ENAMETOOLONG);
1041 1042
1042 bh = ext4_find_entry(dentry, &de); 1043 bh = ext4_find_entry(dir, &dentry->d_name, &de);
1043 inode = NULL; 1044 inode = NULL;
1044 if (bh) { 1045 if (bh) {
1045 unsigned long ino = le32_to_cpu(de->inode); 1046 unsigned long ino = le32_to_cpu(de->inode);
1046 brelse (bh); 1047 brelse(bh);
1047 if (!ext4_valid_inum(dir->i_sb, ino)) { 1048 if (!ext4_valid_inum(dir->i_sb, ino)) {
1048 ext4_error(dir->i_sb, "ext4_lookup", 1049 ext4_error(dir->i_sb, "ext4_lookup",
1049 "bad inode number: %lu", ino); 1050 "bad inode number: %lu", ino);
@@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child)
1062 unsigned long ino; 1063 unsigned long ino;
1063 struct dentry *parent; 1064 struct dentry *parent;
1064 struct inode *inode; 1065 struct inode *inode;
1065 struct dentry dotdot; 1066 static const struct qstr dotdot = {
1067 .name = "..",
1068 .len = 2,
1069 };
1066 struct ext4_dir_entry_2 * de; 1070 struct ext4_dir_entry_2 * de;
1067 struct buffer_head *bh; 1071 struct buffer_head *bh;
1068 1072
1069 dotdot.d_name.name = ".."; 1073 bh = ext4_find_entry(child->d_inode, &dotdot, &de);
1070 dotdot.d_name.len = 2;
1071 dotdot.d_parent = child; /* confusing, isn't it! */
1072
1073 bh = ext4_find_entry(&dotdot, &de);
1074 inode = NULL; 1074 inode = NULL;
1075 if (!bh) 1075 if (!bh)
1076 return ERR_PTR(-ENOENT); 1076 return ERR_PTR(-ENOENT);
@@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1201 1201
1202 /* create map in the end of data2 block */ 1202 /* create map in the end of data2 block */
1203 map = (struct dx_map_entry *) (data2 + blocksize); 1203 map = (struct dx_map_entry *) (data2 + blocksize);
1204 count = dx_make_map ((struct ext4_dir_entry_2 *) data1, 1204 count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1205 blocksize, hinfo, map); 1205 blocksize, hinfo, map);
1206 map -= count; 1206 map -= count;
1207 dx_sort_map (map, count); 1207 dx_sort_map(map, count);
1208 /* Split the existing block in the middle, size-wise */ 1208 /* Split the existing block in the middle, size-wise */
1209 size = 0; 1209 size = 0;
1210 move = 0; 1210 move = 0;
@@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1225 1225
1226 /* Fancy dance to stay within two buffers */ 1226 /* Fancy dance to stay within two buffers */
1227 de2 = dx_move_dirents(data1, data2, map + split, count - split); 1227 de2 = dx_move_dirents(data1, data2, map + split, count - split);
1228 de = dx_pack_dirents(data1,blocksize); 1228 de = dx_pack_dirents(data1, blocksize);
1229 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); 1229 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
1230 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); 1230 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
1231 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1231 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
@@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1237 swap(*bh, bh2); 1237 swap(*bh, bh2);
1238 de = de2; 1238 de = de2;
1239 } 1239 }
1240 dx_insert_block (frame, hash2 + continued, newblock); 1240 dx_insert_block(frame, hash2 + continued, newblock);
1241 err = ext4_journal_dirty_metadata (handle, bh2); 1241 err = ext4_journal_dirty_metadata(handle, bh2);
1242 if (err) 1242 if (err)
1243 goto journal_error; 1243 goto journal_error;
1244 err = ext4_journal_dirty_metadata (handle, frame->bh); 1244 err = ext4_journal_dirty_metadata(handle, frame->bh);
1245 if (err) 1245 if (err)
1246 goto journal_error; 1246 goto journal_error;
1247 brelse (bh2); 1247 brelse(bh2);
1248 dxtrace(dx_show_index ("frame", frame->entries)); 1248 dxtrace(dx_show_index("frame", frame->entries));
1249 return de; 1249 return de;
1250 1250
1251journal_error: 1251journal_error:
@@ -1271,7 +1271,7 @@ errout:
1271 */ 1271 */
1272static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1272static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1273 struct inode *inode, struct ext4_dir_entry_2 *de, 1273 struct inode *inode, struct ext4_dir_entry_2 *de,
1274 struct buffer_head * bh) 1274 struct buffer_head *bh)
1275{ 1275{
1276 struct inode *dir = dentry->d_parent->d_inode; 1276 struct inode *dir = dentry->d_parent->d_inode;
1277 const char *name = dentry->d_name.name; 1277 const char *name = dentry->d_name.name;
@@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1288 while ((char *) de <= top) { 1288 while ((char *) de <= top) {
1289 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1289 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1290 bh, offset)) { 1290 bh, offset)) {
1291 brelse (bh); 1291 brelse(bh);
1292 return -EIO; 1292 return -EIO;
1293 } 1293 }
1294 if (ext4_match (namelen, name, de)) { 1294 if (ext4_match(namelen, name, de)) {
1295 brelse (bh); 1295 brelse(bh);
1296 return -EEXIST; 1296 return -EEXIST;
1297 } 1297 }
1298 nlen = EXT4_DIR_REC_LEN(de->name_len); 1298 nlen = EXT4_DIR_REC_LEN(de->name_len);
@@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1329 } else 1329 } else
1330 de->inode = 0; 1330 de->inode = 0;
1331 de->name_len = namelen; 1331 de->name_len = namelen;
1332 memcpy (de->name, name, namelen); 1332 memcpy(de->name, name, namelen);
1333 /* 1333 /*
1334 * XXX shouldn't update any times until successful 1334 * XXX shouldn't update any times until successful
1335 * completion of syscall, but too many callers depend 1335 * completion of syscall, but too many callers depend
@@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1377 struct fake_dirent *fde; 1377 struct fake_dirent *fde;
1378 1378
1379 blocksize = dir->i_sb->s_blocksize; 1379 blocksize = dir->i_sb->s_blocksize;
1380 dxtrace(printk("Creating index\n")); 1380 dxtrace(printk(KERN_DEBUG "Creating index\n"));
1381 retval = ext4_journal_get_write_access(handle, bh); 1381 retval = ext4_journal_get_write_access(handle, bh);
1382 if (retval) { 1382 if (retval) {
1383 ext4_std_error(dir->i_sb, retval); 1383 ext4_std_error(dir->i_sb, retval);
@@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1386 } 1386 }
1387 root = (struct dx_root *) bh->b_data; 1387 root = (struct dx_root *) bh->b_data;
1388 1388
1389 bh2 = ext4_append (handle, dir, &block, &retval); 1389 bh2 = ext4_append(handle, dir, &block, &retval);
1390 if (!(bh2)) { 1390 if (!(bh2)) {
1391 brelse(bh); 1391 brelse(bh);
1392 return retval; 1392 return retval;
@@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1412 root->info.info_length = sizeof(root->info); 1412 root->info.info_length = sizeof(root->info);
1413 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 1413 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1414 entries = root->entries; 1414 entries = root->entries;
1415 dx_set_block (entries, 1); 1415 dx_set_block(entries, 1);
1416 dx_set_count (entries, 1); 1416 dx_set_count(entries, 1);
1417 dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); 1417 dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1418 1418
1419 /* Initialize as for dx_probe */ 1419 /* Initialize as for dx_probe */
1420 hinfo.hash_version = root->info.hash_version; 1420 hinfo.hash_version = root->info.hash_version;
@@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1443 * may not sleep between calling this and putting something into 1443 * may not sleep between calling this and putting something into
1444 * the entry, as someone else might have used it while you slept. 1444 * the entry, as someone else might have used it while you slept.
1445 */ 1445 */
1446static int ext4_add_entry (handle_t *handle, struct dentry *dentry, 1446static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1447 struct inode *inode) 1447 struct inode *inode)
1448{ 1448{
1449 struct inode *dir = dentry->d_parent->d_inode; 1449 struct inode *dir = dentry->d_parent->d_inode;
1450 unsigned long offset; 1450 unsigned long offset;
1451 struct buffer_head * bh; 1451 struct buffer_head *bh;
1452 struct ext4_dir_entry_2 *de; 1452 struct ext4_dir_entry_2 *de;
1453 struct super_block * sb; 1453 struct super_block *sb;
1454 int retval; 1454 int retval;
1455 int dx_fallback=0; 1455 int dx_fallback=0;
1456 unsigned blocksize; 1456 unsigned blocksize;
@@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1500 struct dx_frame frames[2], *frame; 1500 struct dx_frame frames[2], *frame;
1501 struct dx_entry *entries, *at; 1501 struct dx_entry *entries, *at;
1502 struct dx_hash_info hinfo; 1502 struct dx_hash_info hinfo;
1503 struct buffer_head * bh; 1503 struct buffer_head *bh;
1504 struct inode *dir = dentry->d_parent->d_inode; 1504 struct inode *dir = dentry->d_parent->d_inode;
1505 struct super_block * sb = dir->i_sb; 1505 struct super_block *sb = dir->i_sb;
1506 struct ext4_dir_entry_2 *de; 1506 struct ext4_dir_entry_2 *de;
1507 int err; 1507 int err;
1508 1508
1509 frame = dx_probe(dentry, NULL, &hinfo, frames, &err); 1509 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1510 if (!frame) 1510 if (!frame)
1511 return err; 1511 return err;
1512 entries = frame->entries; 1512 entries = frame->entries;
@@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1527 } 1527 }
1528 1528
1529 /* Block full, should compress but for now just split */ 1529 /* Block full, should compress but for now just split */
1530 dxtrace(printk("using %u of %u node entries\n", 1530 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1531 dx_get_count(entries), dx_get_limit(entries))); 1531 dx_get_count(entries), dx_get_limit(entries)));
1532 /* Need to split index? */ 1532 /* Need to split index? */
1533 if (dx_get_count(entries) == dx_get_limit(entries)) { 1533 if (dx_get_count(entries) == dx_get_limit(entries)) {
@@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1559 if (levels) { 1559 if (levels) {
1560 unsigned icount1 = icount/2, icount2 = icount - icount1; 1560 unsigned icount1 = icount/2, icount2 = icount - icount1;
1561 unsigned hash2 = dx_get_hash(entries + icount1); 1561 unsigned hash2 = dx_get_hash(entries + icount1);
1562 dxtrace(printk("Split index %i/%i\n", icount1, icount2)); 1562 dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
1563 icount1, icount2));
1563 1564
1564 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ 1565 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
1565 err = ext4_journal_get_write_access(handle, 1566 err = ext4_journal_get_write_access(handle,
@@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1567 if (err) 1568 if (err)
1568 goto journal_error; 1569 goto journal_error;
1569 1570
1570 memcpy ((char *) entries2, (char *) (entries + icount1), 1571 memcpy((char *) entries2, (char *) (entries + icount1),
1571 icount2 * sizeof(struct dx_entry)); 1572 icount2 * sizeof(struct dx_entry));
1572 dx_set_count (entries, icount1); 1573 dx_set_count(entries, icount1);
1573 dx_set_count (entries2, icount2); 1574 dx_set_count(entries2, icount2);
1574 dx_set_limit (entries2, dx_node_limit(dir)); 1575 dx_set_limit(entries2, dx_node_limit(dir));
1575 1576
1576 /* Which index block gets the new entry? */ 1577 /* Which index block gets the new entry? */
1577 if (at - entries >= icount1) { 1578 if (at - entries >= icount1) {
@@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1579 frame->entries = entries = entries2; 1580 frame->entries = entries = entries2;
1580 swap(frame->bh, bh2); 1581 swap(frame->bh, bh2);
1581 } 1582 }
1582 dx_insert_block (frames + 0, hash2, newblock); 1583 dx_insert_block(frames + 0, hash2, newblock);
1583 dxtrace(dx_show_index ("node", frames[1].entries)); 1584 dxtrace(dx_show_index("node", frames[1].entries));
1584 dxtrace(dx_show_index ("node", 1585 dxtrace(dx_show_index("node",
1585 ((struct dx_node *) bh2->b_data)->entries)); 1586 ((struct dx_node *) bh2->b_data)->entries));
1586 err = ext4_journal_dirty_metadata(handle, bh2); 1587 err = ext4_journal_dirty_metadata(handle, bh2);
1587 if (err) 1588 if (err)
1588 goto journal_error; 1589 goto journal_error;
1589 brelse (bh2); 1590 brelse (bh2);
1590 } else { 1591 } else {
1591 dxtrace(printk("Creating second level index...\n")); 1592 dxtrace(printk(KERN_DEBUG
1593 "Creating second level index...\n"));
1592 memcpy((char *) entries2, (char *) entries, 1594 memcpy((char *) entries2, (char *) entries,
1593 icount * sizeof(struct dx_entry)); 1595 icount * sizeof(struct dx_entry));
1594 dx_set_limit(entries2, dx_node_limit(dir)); 1596 dx_set_limit(entries2, dx_node_limit(dir));
@@ -1630,12 +1632,12 @@ cleanup:
1630 * ext4_delete_entry deletes a directory entry by merging it with the 1632 * ext4_delete_entry deletes a directory entry by merging it with the
1631 * previous entry 1633 * previous entry
1632 */ 1634 */
1633static int ext4_delete_entry (handle_t *handle, 1635static int ext4_delete_entry(handle_t *handle,
1634 struct inode * dir, 1636 struct inode *dir,
1635 struct ext4_dir_entry_2 * de_del, 1637 struct ext4_dir_entry_2 *de_del,
1636 struct buffer_head * bh) 1638 struct buffer_head *bh)
1637{ 1639{
1638 struct ext4_dir_entry_2 * de, * pde; 1640 struct ext4_dir_entry_2 *de, *pde;
1639 int i; 1641 int i;
1640 1642
1641 i = 0; 1643 i = 0;
@@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle,
1716 * If the create succeeds, we fill in the inode information 1718 * If the create succeeds, we fill in the inode information
1717 * with d_instantiate(). 1719 * with d_instantiate().
1718 */ 1720 */
1719static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, 1721static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1720 struct nameidata *nd) 1722 struct nameidata *nd)
1721{ 1723{
1722 handle_t *handle; 1724 handle_t *handle;
1723 struct inode * inode; 1725 struct inode *inode;
1724 int err, retries = 0; 1726 int err, retries = 0;
1725 1727
1726retry: 1728retry:
@@ -1747,8 +1749,8 @@ retry:
1747 return err; 1749 return err;
1748} 1750}
1749 1751
1750static int ext4_mknod (struct inode * dir, struct dentry *dentry, 1752static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1751 int mode, dev_t rdev) 1753 int mode, dev_t rdev)
1752{ 1754{
1753 handle_t *handle; 1755 handle_t *handle;
1754 struct inode *inode; 1756 struct inode *inode;
@@ -1767,11 +1769,11 @@ retry:
1767 if (IS_DIRSYNC(dir)) 1769 if (IS_DIRSYNC(dir))
1768 handle->h_sync = 1; 1770 handle->h_sync = 1;
1769 1771
1770 inode = ext4_new_inode (handle, dir, mode); 1772 inode = ext4_new_inode(handle, dir, mode);
1771 err = PTR_ERR(inode); 1773 err = PTR_ERR(inode);
1772 if (!IS_ERR(inode)) { 1774 if (!IS_ERR(inode)) {
1773 init_special_inode(inode, inode->i_mode, rdev); 1775 init_special_inode(inode, inode->i_mode, rdev);
1774#ifdef CONFIG_EXT4DEV_FS_XATTR 1776#ifdef CONFIG_EXT4_FS_XATTR
1775 inode->i_op = &ext4_special_inode_operations; 1777 inode->i_op = &ext4_special_inode_operations;
1776#endif 1778#endif
1777 err = ext4_add_nondir(handle, dentry, inode); 1779 err = ext4_add_nondir(handle, dentry, inode);
@@ -1782,12 +1784,12 @@ retry:
1782 return err; 1784 return err;
1783} 1785}
1784 1786
1785static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) 1787static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1786{ 1788{
1787 handle_t *handle; 1789 handle_t *handle;
1788 struct inode * inode; 1790 struct inode *inode;
1789 struct buffer_head * dir_block; 1791 struct buffer_head *dir_block;
1790 struct ext4_dir_entry_2 * de; 1792 struct ext4_dir_entry_2 *de;
1791 int err, retries = 0; 1793 int err, retries = 0;
1792 1794
1793 if (EXT4_DIR_LINK_MAX(dir)) 1795 if (EXT4_DIR_LINK_MAX(dir))
@@ -1803,7 +1805,7 @@ retry:
1803 if (IS_DIRSYNC(dir)) 1805 if (IS_DIRSYNC(dir))
1804 handle->h_sync = 1; 1806 handle->h_sync = 1;
1805 1807
1806 inode = ext4_new_inode (handle, dir, S_IFDIR | mode); 1808 inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
1807 err = PTR_ERR(inode); 1809 err = PTR_ERR(inode);
1808 if (IS_ERR(inode)) 1810 if (IS_ERR(inode))
1809 goto out_stop; 1811 goto out_stop;
@@ -1811,7 +1813,7 @@ retry:
1811 inode->i_op = &ext4_dir_inode_operations; 1813 inode->i_op = &ext4_dir_inode_operations;
1812 inode->i_fop = &ext4_dir_operations; 1814 inode->i_fop = &ext4_dir_operations;
1813 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1815 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1814 dir_block = ext4_bread (handle, inode, 0, 1, &err); 1816 dir_block = ext4_bread(handle, inode, 0, 1, &err);
1815 if (!dir_block) 1817 if (!dir_block)
1816 goto out_clear_inode; 1818 goto out_clear_inode;
1817 BUFFER_TRACE(dir_block, "get_write_access"); 1819 BUFFER_TRACE(dir_block, "get_write_access");
@@ -1820,26 +1822,26 @@ retry:
1820 de->inode = cpu_to_le32(inode->i_ino); 1822 de->inode = cpu_to_le32(inode->i_ino);
1821 de->name_len = 1; 1823 de->name_len = 1;
1822 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); 1824 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
1823 strcpy (de->name, "."); 1825 strcpy(de->name, ".");
1824 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1826 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1825 de = ext4_next_entry(de); 1827 de = ext4_next_entry(de);
1826 de->inode = cpu_to_le32(dir->i_ino); 1828 de->inode = cpu_to_le32(dir->i_ino);
1827 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - 1829 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
1828 EXT4_DIR_REC_LEN(1)); 1830 EXT4_DIR_REC_LEN(1));
1829 de->name_len = 2; 1831 de->name_len = 2;
1830 strcpy (de->name, ".."); 1832 strcpy(de->name, "..");
1831 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1833 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1832 inode->i_nlink = 2; 1834 inode->i_nlink = 2;
1833 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); 1835 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
1834 ext4_journal_dirty_metadata(handle, dir_block); 1836 ext4_journal_dirty_metadata(handle, dir_block);
1835 brelse (dir_block); 1837 brelse(dir_block);
1836 ext4_mark_inode_dirty(handle, inode); 1838 ext4_mark_inode_dirty(handle, inode);
1837 err = ext4_add_entry (handle, dentry, inode); 1839 err = ext4_add_entry(handle, dentry, inode);
1838 if (err) { 1840 if (err) {
1839out_clear_inode: 1841out_clear_inode:
1840 clear_nlink(inode); 1842 clear_nlink(inode);
1841 ext4_mark_inode_dirty(handle, inode); 1843 ext4_mark_inode_dirty(handle, inode);
1842 iput (inode); 1844 iput(inode);
1843 goto out_stop; 1845 goto out_stop;
1844 } 1846 }
1845 ext4_inc_count(handle, dir); 1847 ext4_inc_count(handle, dir);
@@ -1856,17 +1858,17 @@ out_stop:
1856/* 1858/*
1857 * routine to check that the specified directory is empty (for rmdir) 1859 * routine to check that the specified directory is empty (for rmdir)
1858 */ 1860 */
1859static int empty_dir (struct inode * inode) 1861static int empty_dir(struct inode *inode)
1860{ 1862{
1861 unsigned long offset; 1863 unsigned long offset;
1862 struct buffer_head * bh; 1864 struct buffer_head *bh;
1863 struct ext4_dir_entry_2 * de, * de1; 1865 struct ext4_dir_entry_2 *de, *de1;
1864 struct super_block * sb; 1866 struct super_block *sb;
1865 int err = 0; 1867 int err = 0;
1866 1868
1867 sb = inode->i_sb; 1869 sb = inode->i_sb;
1868 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1870 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1869 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1871 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
1870 if (err) 1872 if (err)
1871 ext4_error(inode->i_sb, __func__, 1873 ext4_error(inode->i_sb, __func__,
1872 "error %d reading directory #%lu offset 0", 1874 "error %d reading directory #%lu offset 0",
@@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode)
1881 de1 = ext4_next_entry(de); 1883 de1 = ext4_next_entry(de);
1882 if (le32_to_cpu(de->inode) != inode->i_ino || 1884 if (le32_to_cpu(de->inode) != inode->i_ino ||
1883 !le32_to_cpu(de1->inode) || 1885 !le32_to_cpu(de1->inode) ||
1884 strcmp (".", de->name) || 1886 strcmp(".", de->name) ||
1885 strcmp ("..", de1->name)) { 1887 strcmp("..", de1->name)) {
1886 ext4_warning (inode->i_sb, "empty_dir", 1888 ext4_warning(inode->i_sb, "empty_dir",
1887 "bad directory (dir #%lu) - no `.' or `..'", 1889 "bad directory (dir #%lu) - no `.' or `..'",
1888 inode->i_ino); 1890 inode->i_ino);
1889 brelse (bh); 1891 brelse(bh);
1890 return 1; 1892 return 1;
1891 } 1893 }
1892 offset = ext4_rec_len_from_disk(de->rec_len) + 1894 offset = ext4_rec_len_from_disk(de->rec_len) +
1893 ext4_rec_len_from_disk(de1->rec_len); 1895 ext4_rec_len_from_disk(de1->rec_len);
1894 de = ext4_next_entry(de1); 1896 de = ext4_next_entry(de1);
1895 while (offset < inode->i_size ) { 1897 while (offset < inode->i_size) {
1896 if (!bh || 1898 if (!bh ||
1897 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1899 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1898 err = 0; 1900 err = 0;
1899 brelse (bh); 1901 brelse(bh);
1900 bh = ext4_bread (NULL, inode, 1902 bh = ext4_bread(NULL, inode,
1901 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1903 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1902 if (!bh) { 1904 if (!bh) {
1903 if (err) 1905 if (err)
@@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode)
1917 continue; 1919 continue;
1918 } 1920 }
1919 if (le32_to_cpu(de->inode)) { 1921 if (le32_to_cpu(de->inode)) {
1920 brelse (bh); 1922 brelse(bh);
1921 return 0; 1923 return 0;
1922 } 1924 }
1923 offset += ext4_rec_len_from_disk(de->rec_len); 1925 offset += ext4_rec_len_from_disk(de->rec_len);
1924 de = ext4_next_entry(de); 1926 de = ext4_next_entry(de);
1925 } 1927 }
1926 brelse (bh); 1928 brelse(bh);
1927 return 1; 1929 return 1;
1928} 1930}
1929 1931
@@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1954 * ->i_nlink. For, say it, character device. Not a regular file, 1956 * ->i_nlink. For, say it, character device. Not a regular file,
1955 * not a directory, not a symlink and ->i_nlink > 0. 1957 * not a directory, not a symlink and ->i_nlink > 0.
1956 */ 1958 */
1957 J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1959 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1958 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1960 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
1959 1961
1960 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); 1962 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
1961 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 1963 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
@@ -2069,12 +2071,12 @@ out_brelse:
2069 goto out_err; 2071 goto out_err;
2070} 2072}
2071 2073
2072static int ext4_rmdir (struct inode * dir, struct dentry *dentry) 2074static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2073{ 2075{
2074 int retval; 2076 int retval;
2075 struct inode * inode; 2077 struct inode *inode;
2076 struct buffer_head * bh; 2078 struct buffer_head *bh;
2077 struct ext4_dir_entry_2 * de; 2079 struct ext4_dir_entry_2 *de;
2078 handle_t *handle; 2080 handle_t *handle;
2079 2081
2080 /* Initialize quotas before so that eventual writes go in 2082 /* Initialize quotas before so that eventual writes go in
@@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2085 return PTR_ERR(handle); 2087 return PTR_ERR(handle);
2086 2088
2087 retval = -ENOENT; 2089 retval = -ENOENT;
2088 bh = ext4_find_entry (dentry, &de); 2090 bh = ext4_find_entry(dir, &dentry->d_name, &de);
2089 if (!bh) 2091 if (!bh)
2090 goto end_rmdir; 2092 goto end_rmdir;
2091 2093
@@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2099 goto end_rmdir; 2101 goto end_rmdir;
2100 2102
2101 retval = -ENOTEMPTY; 2103 retval = -ENOTEMPTY;
2102 if (!empty_dir (inode)) 2104 if (!empty_dir(inode))
2103 goto end_rmdir; 2105 goto end_rmdir;
2104 2106
2105 retval = ext4_delete_entry(handle, dir, de, bh); 2107 retval = ext4_delete_entry(handle, dir, de, bh);
2106 if (retval) 2108 if (retval)
2107 goto end_rmdir; 2109 goto end_rmdir;
2108 if (!EXT4_DIR_LINK_EMPTY(inode)) 2110 if (!EXT4_DIR_LINK_EMPTY(inode))
2109 ext4_warning (inode->i_sb, "ext4_rmdir", 2111 ext4_warning(inode->i_sb, "ext4_rmdir",
2110 "empty directory has too many links (%d)", 2112 "empty directory has too many links (%d)",
2111 inode->i_nlink); 2113 inode->i_nlink);
2112 inode->i_version++; 2114 inode->i_version++;
2113 clear_nlink(inode); 2115 clear_nlink(inode);
2114 /* There's no need to set i_disksize: the fact that i_nlink is 2116 /* There's no need to set i_disksize: the fact that i_nlink is
@@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
2124 2126
2125end_rmdir: 2127end_rmdir:
2126 ext4_journal_stop(handle); 2128 ext4_journal_stop(handle);
2127 brelse (bh); 2129 brelse(bh);
2128 return retval; 2130 return retval;
2129} 2131}
2130 2132
2131static int ext4_unlink(struct inode * dir, struct dentry *dentry) 2133static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2132{ 2134{
2133 int retval; 2135 int retval;
2134 struct inode * inode; 2136 struct inode *inode;
2135 struct buffer_head * bh; 2137 struct buffer_head *bh;
2136 struct ext4_dir_entry_2 * de; 2138 struct ext4_dir_entry_2 *de;
2137 handle_t *handle; 2139 handle_t *handle;
2138 2140
2139 /* Initialize quotas before so that eventual writes go 2141 /* Initialize quotas before so that eventual writes go
@@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2147 handle->h_sync = 1; 2149 handle->h_sync = 1;
2148 2150
2149 retval = -ENOENT; 2151 retval = -ENOENT;
2150 bh = ext4_find_entry (dentry, &de); 2152 bh = ext4_find_entry(dir, &dentry->d_name, &de);
2151 if (!bh) 2153 if (!bh)
2152 goto end_unlink; 2154 goto end_unlink;
2153 2155
@@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2158 goto end_unlink; 2160 goto end_unlink;
2159 2161
2160 if (!inode->i_nlink) { 2162 if (!inode->i_nlink) {
2161 ext4_warning (inode->i_sb, "ext4_unlink", 2163 ext4_warning(inode->i_sb, "ext4_unlink",
2162 "Deleting nonexistent file (%lu), %d", 2164 "Deleting nonexistent file (%lu), %d",
2163 inode->i_ino, inode->i_nlink); 2165 inode->i_ino, inode->i_nlink);
2164 inode->i_nlink = 1; 2166 inode->i_nlink = 1;
2165 } 2167 }
2166 retval = ext4_delete_entry(handle, dir, de, bh); 2168 retval = ext4_delete_entry(handle, dir, de, bh);
@@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
2178 2180
2179end_unlink: 2181end_unlink:
2180 ext4_journal_stop(handle); 2182 ext4_journal_stop(handle);
2181 brelse (bh); 2183 brelse(bh);
2182 return retval; 2184 return retval;
2183} 2185}
2184 2186
2185static int ext4_symlink (struct inode * dir, 2187static int ext4_symlink(struct inode *dir,
2186 struct dentry *dentry, const char * symname) 2188 struct dentry *dentry, const char *symname)
2187{ 2189{
2188 handle_t *handle; 2190 handle_t *handle;
2189 struct inode * inode; 2191 struct inode *inode;
2190 int l, err, retries = 0; 2192 int l, err, retries = 0;
2191 2193
2192 l = strlen(symname)+1; 2194 l = strlen(symname)+1;
@@ -2203,12 +2205,12 @@ retry:
2203 if (IS_DIRSYNC(dir)) 2205 if (IS_DIRSYNC(dir))
2204 handle->h_sync = 1; 2206 handle->h_sync = 1;
2205 2207
2206 inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); 2208 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
2207 err = PTR_ERR(inode); 2209 err = PTR_ERR(inode);
2208 if (IS_ERR(inode)) 2210 if (IS_ERR(inode))
2209 goto out_stop; 2211 goto out_stop;
2210 2212
2211 if (l > sizeof (EXT4_I(inode)->i_data)) { 2213 if (l > sizeof(EXT4_I(inode)->i_data)) {
2212 inode->i_op = &ext4_symlink_inode_operations; 2214 inode->i_op = &ext4_symlink_inode_operations;
2213 ext4_set_aops(inode); 2215 ext4_set_aops(inode);
2214 /* 2216 /*
@@ -2221,14 +2223,14 @@ retry:
2221 if (err) { 2223 if (err) {
2222 clear_nlink(inode); 2224 clear_nlink(inode);
2223 ext4_mark_inode_dirty(handle, inode); 2225 ext4_mark_inode_dirty(handle, inode);
2224 iput (inode); 2226 iput(inode);
2225 goto out_stop; 2227 goto out_stop;
2226 } 2228 }
2227 } else { 2229 } else {
2228 /* clear the extent format for fast symlink */ 2230 /* clear the extent format for fast symlink */
2229 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; 2231 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2230 inode->i_op = &ext4_fast_symlink_inode_operations; 2232 inode->i_op = &ext4_fast_symlink_inode_operations;
2231 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2233 memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2232 inode->i_size = l-1; 2234 inode->i_size = l-1;
2233 } 2235 }
2234 EXT4_I(inode)->i_disksize = inode->i_size; 2236 EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2240,8 +2242,8 @@ out_stop:
2240 return err; 2242 return err;
2241} 2243}
2242 2244
2243static int ext4_link (struct dentry * old_dentry, 2245static int ext4_link(struct dentry *old_dentry,
2244 struct inode * dir, struct dentry *dentry) 2246 struct inode *dir, struct dentry *dentry)
2245{ 2247{
2246 handle_t *handle; 2248 handle_t *handle;
2247 struct inode *inode = old_dentry->d_inode; 2249 struct inode *inode = old_dentry->d_inode;
@@ -2284,13 +2286,13 @@ retry:
2284 * Anybody can rename anything with this: the permission checks are left to the 2286 * Anybody can rename anything with this: the permission checks are left to the
2285 * higher-level routines. 2287 * higher-level routines.
2286 */ 2288 */
2287static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, 2289static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2288 struct inode * new_dir,struct dentry *new_dentry) 2290 struct inode *new_dir, struct dentry *new_dentry)
2289{ 2291{
2290 handle_t *handle; 2292 handle_t *handle;
2291 struct inode * old_inode, * new_inode; 2293 struct inode *old_inode, *new_inode;
2292 struct buffer_head * old_bh, * new_bh, * dir_bh; 2294 struct buffer_head *old_bh, *new_bh, *dir_bh;
2293 struct ext4_dir_entry_2 * old_de, * new_de; 2295 struct ext4_dir_entry_2 *old_de, *new_de;
2294 int retval; 2296 int retval;
2295 2297
2296 old_bh = new_bh = dir_bh = NULL; 2298 old_bh = new_bh = dir_bh = NULL;
@@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2308 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2310 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2309 handle->h_sync = 1; 2311 handle->h_sync = 1;
2310 2312
2311 old_bh = ext4_find_entry (old_dentry, &old_de); 2313 old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
2312 /* 2314 /*
2313 * Check for inode number is _not_ due to possible IO errors. 2315 * Check for inode number is _not_ due to possible IO errors.
2314 * We might rmdir the source, keep it as pwd of some process 2316 * We might rmdir the source, keep it as pwd of some process
@@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2321 goto end_rename; 2323 goto end_rename;
2322 2324
2323 new_inode = new_dentry->d_inode; 2325 new_inode = new_dentry->d_inode;
2324 new_bh = ext4_find_entry (new_dentry, &new_de); 2326 new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
2325 if (new_bh) { 2327 if (new_bh) {
2326 if (!new_inode) { 2328 if (!new_inode) {
2327 brelse (new_bh); 2329 brelse(new_bh);
2328 new_bh = NULL; 2330 new_bh = NULL;
2329 } 2331 }
2330 } 2332 }
2331 if (S_ISDIR(old_inode->i_mode)) { 2333 if (S_ISDIR(old_inode->i_mode)) {
2332 if (new_inode) { 2334 if (new_inode) {
2333 retval = -ENOTEMPTY; 2335 retval = -ENOTEMPTY;
2334 if (!empty_dir (new_inode)) 2336 if (!empty_dir(new_inode))
2335 goto end_rename; 2337 goto end_rename;
2336 } 2338 }
2337 retval = -EIO; 2339 retval = -EIO;
2338 dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); 2340 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
2339 if (!dir_bh) 2341 if (!dir_bh)
2340 goto end_rename; 2342 goto end_rename;
2341 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2343 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
2342 goto end_rename; 2344 goto end_rename;
2343 retval = -EMLINK; 2345 retval = -EMLINK;
2344 if (!new_inode && new_dir!=old_dir && 2346 if (!new_inode && new_dir != old_dir &&
2345 new_dir->i_nlink >= EXT4_LINK_MAX) 2347 new_dir->i_nlink >= EXT4_LINK_MAX)
2346 goto end_rename; 2348 goto end_rename;
2347 } 2349 }
2348 if (!new_bh) { 2350 if (!new_bh) {
2349 retval = ext4_add_entry (handle, new_dentry, old_inode); 2351 retval = ext4_add_entry(handle, new_dentry, old_inode);
2350 if (retval) 2352 if (retval)
2351 goto end_rename; 2353 goto end_rename;
2352 } else { 2354 } else {
@@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2388 struct buffer_head *old_bh2; 2390 struct buffer_head *old_bh2;
2389 struct ext4_dir_entry_2 *old_de2; 2391 struct ext4_dir_entry_2 *old_de2;
2390 2392
2391 old_bh2 = ext4_find_entry(old_dentry, &old_de2); 2393 old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
2392 if (old_bh2) { 2394 if (old_bh2) {
2393 retval = ext4_delete_entry(handle, old_dir, 2395 retval = ext4_delete_entry(handle, old_dir,
2394 old_de2, old_bh2); 2396 old_de2, old_bh2);
@@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2433 retval = 0; 2435 retval = 0;
2434 2436
2435end_rename: 2437end_rename:
2436 brelse (dir_bh); 2438 brelse(dir_bh);
2437 brelse (old_bh); 2439 brelse(old_bh);
2438 brelse (new_bh); 2440 brelse(new_bh);
2439 ext4_journal_stop(handle); 2441 ext4_journal_stop(handle);
2440 return retval; 2442 return retval;
2441} 2443}
@@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2454 .mknod = ext4_mknod, 2456 .mknod = ext4_mknod,
2455 .rename = ext4_rename, 2457 .rename = ext4_rename,
2456 .setattr = ext4_setattr, 2458 .setattr = ext4_setattr,
2457#ifdef CONFIG_EXT4DEV_FS_XATTR 2459#ifdef CONFIG_EXT4_FS_XATTR
2458 .setxattr = generic_setxattr, 2460 .setxattr = generic_setxattr,
2459 .getxattr = generic_getxattr, 2461 .getxattr = generic_getxattr,
2460 .listxattr = ext4_listxattr, 2462 .listxattr = ext4_listxattr,
@@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2465 2467
2466const struct inode_operations ext4_special_inode_operations = { 2468const struct inode_operations ext4_special_inode_operations = {
2467 .setattr = ext4_setattr, 2469 .setattr = ext4_setattr,
2468#ifdef CONFIG_EXT4DEV_FS_XATTR 2470#ifdef CONFIG_EXT4_FS_XATTR
2469 .setxattr = generic_setxattr, 2471 .setxattr = generic_setxattr,
2470 .getxattr = generic_getxattr, 2472 .getxattr = generic_getxattr,
2471 .listxattr = ext4_listxattr, 2473 .listxattr = ext4_listxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b3d35604ea18..b6ec1843a015 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
@@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
870 * We can allocate memory for mb_alloc based on the new group 870 * We can allocate memory for mb_alloc based on the new group
871 * descriptor 871 * descriptor
872 */ 872 */
873 if (test_opt(sb, MBALLOC)) { 873 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
874 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); 874 if (err)
875 if (err) 875 goto exit_journal;
876 goto exit_journal; 876
877 }
878 /* 877 /*
879 * Make the new blocks and inodes valid next. We do this before 878 * Make the new blocks and inodes valid next. We do this before
880 * increasing the group count so that once the group is enabled, 879 * increasing the group count so that once the group is enabled,
@@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
929 percpu_counter_add(&sbi->s_freeinodes_counter, 928 percpu_counter_add(&sbi->s_freeinodes_counter,
930 EXT4_INODES_PER_GROUP(sb)); 929 EXT4_INODES_PER_GROUP(sb));
931 930
931 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
932 ext4_group_t flex_group;
933 flex_group = ext4_flex_group(sbi, input->group);
934 sbi->s_flex_groups[flex_group].free_blocks +=
935 input->free_blocks_count;
936 sbi->s_flex_groups[flex_group].free_inodes +=
937 EXT4_INODES_PER_GROUP(sb);
938 }
939
932 ext4_journal_dirty_metadata(handle, sbi->s_sbh); 940 ext4_journal_dirty_metadata(handle, sbi->s_sbh);
933 sb->s_dirt = 1; 941 sb->s_dirt = 1;
934 942
@@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
964 ext4_group_t o_groups_count; 972 ext4_group_t o_groups_count;
965 ext4_grpblk_t last; 973 ext4_grpblk_t last;
966 ext4_grpblk_t add; 974 ext4_grpblk_t add;
967 struct buffer_head * bh; 975 struct buffer_head *bh;
968 handle_t *handle; 976 handle_t *handle;
969 int err; 977 int err;
970 unsigned long freed_blocks; 978 unsigned long freed_blocks;
@@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1077 /* 1085 /*
1078 * Mark mballoc pages as not up to date so that they will be updated 1086 * Mark mballoc pages as not up to date so that they will be updated
1079 * next time they are loaded by ext4_mb_load_buddy. 1087 * next time they are loaded by ext4_mb_load_buddy.
1088 *
1089 * XXX Bad, Bad, BAD!!! We should not be overloading the
1090 * Uptodate flag, particularly on thte bitmap bh, as way of
1091 * hinting to ext4_mb_load_buddy() that it needs to be
1092 * overloaded. A user could take a LVM snapshot, then do an
1093 * on-line fsck, and clear the uptodate flag, and this would
1094 * not be a bug in userspace, but a bug in the kernel. FIXME!!!
1080 */ 1095 */
1081 if (test_opt(sb, MBALLOC)) { 1096 {
1082 struct ext4_sb_info *sbi = EXT4_SB(sb); 1097 struct ext4_sb_info *sbi = EXT4_SB(sb);
1083 struct inode *inode = sbi->s_buddy_cache; 1098 struct inode *inode = sbi->s_buddy_cache;
1084 int blocks_per_page; 1099 int blocks_per_page;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 566344b926b7..0e661c569660 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -34,6 +34,8 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/quotaops.h> 35#include <linux/quotaops.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/proc_fs.h>
38#include <linux/marker.h>
37#include <linux/log2.h> 39#include <linux/log2.h>
38#include <linux/crc16.h> 40#include <linux/crc16.h>
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
@@ -45,6 +47,8 @@
45#include "namei.h" 47#include "namei.h"
46#include "group.h" 48#include "group.h"
47 49
50struct proc_dir_entry *ext4_proc_root;
51
48static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 52static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 53 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 54static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
@@ -508,10 +512,12 @@ static void ext4_put_super(struct super_block *sb)
508 if (!(sb->s_flags & MS_RDONLY)) { 512 if (!(sb->s_flags & MS_RDONLY)) {
509 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 513 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
510 es->s_state = cpu_to_le16(sbi->s_mount_state); 514 es->s_state = cpu_to_le16(sbi->s_mount_state);
511 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
512 mark_buffer_dirty(sbi->s_sbh);
513 ext4_commit_super(sb, es, 1); 515 ext4_commit_super(sb, es, 1);
514 } 516 }
517 if (sbi->s_proc) {
518 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
519 remove_proc_entry(sb->s_id, ext4_proc_root);
520 }
515 521
516 for (i = 0; i < sbi->s_gdb_count; i++) 522 for (i = 0; i < sbi->s_gdb_count; i++)
517 brelse(sbi->s_group_desc[i]); 523 brelse(sbi->s_group_desc[i]);
@@ -520,6 +526,7 @@ static void ext4_put_super(struct super_block *sb)
520 percpu_counter_destroy(&sbi->s_freeblocks_counter); 526 percpu_counter_destroy(&sbi->s_freeblocks_counter);
521 percpu_counter_destroy(&sbi->s_freeinodes_counter); 527 percpu_counter_destroy(&sbi->s_freeinodes_counter);
522 percpu_counter_destroy(&sbi->s_dirs_counter); 528 percpu_counter_destroy(&sbi->s_dirs_counter);
529 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
523 brelse(sbi->s_sbh); 530 brelse(sbi->s_sbh);
524#ifdef CONFIG_QUOTA 531#ifdef CONFIG_QUOTA
525 for (i = 0; i < MAXQUOTAS; i++) 532 for (i = 0; i < MAXQUOTAS; i++)
@@ -562,11 +569,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
562 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 569 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
563 if (!ei) 570 if (!ei)
564 return NULL; 571 return NULL;
565#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 572#ifdef CONFIG_EXT4_FS_POSIX_ACL
566 ei->i_acl = EXT4_ACL_NOT_CACHED; 573 ei->i_acl = EXT4_ACL_NOT_CACHED;
567 ei->i_default_acl = EXT4_ACL_NOT_CACHED; 574 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
568#endif 575#endif
569 ei->i_block_alloc_info = NULL;
570 ei->vfs_inode.i_version = 1; 576 ei->vfs_inode.i_version = 1;
571 ei->vfs_inode.i_data.writeback_index = 0; 577 ei->vfs_inode.i_data.writeback_index = 0;
572 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 578 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -599,7 +605,7 @@ static void init_once(void *foo)
599 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 605 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
600 606
601 INIT_LIST_HEAD(&ei->i_orphan); 607 INIT_LIST_HEAD(&ei->i_orphan);
602#ifdef CONFIG_EXT4DEV_FS_XATTR 608#ifdef CONFIG_EXT4_FS_XATTR
603 init_rwsem(&ei->xattr_sem); 609 init_rwsem(&ei->xattr_sem);
604#endif 610#endif
605 init_rwsem(&ei->i_data_sem); 611 init_rwsem(&ei->i_data_sem);
@@ -625,8 +631,7 @@ static void destroy_inodecache(void)
625 631
626static void ext4_clear_inode(struct inode *inode) 632static void ext4_clear_inode(struct inode *inode)
627{ 633{
628 struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; 634#ifdef CONFIG_EXT4_FS_POSIX_ACL
629#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
630 if (EXT4_I(inode)->i_acl && 635 if (EXT4_I(inode)->i_acl &&
631 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { 636 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
632 posix_acl_release(EXT4_I(inode)->i_acl); 637 posix_acl_release(EXT4_I(inode)->i_acl);
@@ -638,10 +643,7 @@ static void ext4_clear_inode(struct inode *inode)
638 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; 643 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
639 } 644 }
640#endif 645#endif
641 ext4_discard_reservation(inode); 646 ext4_discard_preallocations(inode);
642 EXT4_I(inode)->i_block_alloc_info = NULL;
643 if (unlikely(rsv))
644 kfree(rsv);
645 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 647 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
646 &EXT4_I(inode)->jinode); 648 &EXT4_I(inode)->jinode);
647} 649}
@@ -654,7 +656,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
654 656
655 if (sbi->s_jquota_fmt) 657 if (sbi->s_jquota_fmt)
656 seq_printf(seq, ",jqfmt=%s", 658 seq_printf(seq, ",jqfmt=%s",
657 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 659 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
658 660
659 if (sbi->s_qf_names[USRQUOTA]) 661 if (sbi->s_qf_names[USRQUOTA])
660 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 662 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -718,7 +720,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
718 seq_puts(seq, ",debug"); 720 seq_puts(seq, ",debug");
719 if (test_opt(sb, OLDALLOC)) 721 if (test_opt(sb, OLDALLOC))
720 seq_puts(seq, ",oldalloc"); 722 seq_puts(seq, ",oldalloc");
721#ifdef CONFIG_EXT4DEV_FS_XATTR 723#ifdef CONFIG_EXT4_FS_XATTR
722 if (test_opt(sb, XATTR_USER) && 724 if (test_opt(sb, XATTR_USER) &&
723 !(def_mount_opts & EXT4_DEFM_XATTR_USER)) 725 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
724 seq_puts(seq, ",user_xattr"); 726 seq_puts(seq, ",user_xattr");
@@ -727,7 +729,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
727 seq_puts(seq, ",nouser_xattr"); 729 seq_puts(seq, ",nouser_xattr");
728 } 730 }
729#endif 731#endif
730#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 732#ifdef CONFIG_EXT4_FS_POSIX_ACL
731 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 733 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
732 seq_puts(seq, ",acl"); 734 seq_puts(seq, ",acl");
733 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 735 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
@@ -752,8 +754,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
752 seq_puts(seq, ",nobh"); 754 seq_puts(seq, ",nobh");
753 if (!test_opt(sb, EXTENTS)) 755 if (!test_opt(sb, EXTENTS))
754 seq_puts(seq, ",noextents"); 756 seq_puts(seq, ",noextents");
755 if (!test_opt(sb, MBALLOC))
756 seq_puts(seq, ",nomballoc");
757 if (test_opt(sb, I_VERSION)) 757 if (test_opt(sb, I_VERSION))
758 seq_puts(seq, ",i_version"); 758 seq_puts(seq, ",i_version");
759 if (!test_opt(sb, DELALLOC)) 759 if (!test_opt(sb, DELALLOC))
@@ -773,6 +773,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
773 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 773 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
774 seq_puts(seq, ",data=writeback"); 774 seq_puts(seq, ",data=writeback");
775 775
776 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
777 seq_printf(seq, ",inode_readahead_blks=%u",
778 sbi->s_inode_readahead_blks);
779
776 ext4_show_quota_options(seq, sb); 780 ext4_show_quota_options(seq, sb);
777 return 0; 781 return 0;
778} 782}
@@ -822,7 +826,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 826}
823 827
824#ifdef CONFIG_QUOTA 828#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") 829#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
826#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 830#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 831
828static int ext4_dquot_initialize(struct inode *inode, int type); 832static int ext4_dquot_initialize(struct inode *inode, int type);
@@ -907,6 +911,7 @@ enum {
907 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 911 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
908 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 912 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
909 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, 913 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
914 Opt_inode_readahead_blks
910}; 915};
911 916
912static match_table_t tokens = { 917static match_table_t tokens = {
@@ -967,6 +972,7 @@ static match_table_t tokens = {
967 {Opt_resize, "resize"}, 972 {Opt_resize, "resize"},
968 {Opt_delalloc, "delalloc"}, 973 {Opt_delalloc, "delalloc"},
969 {Opt_nodelalloc, "nodelalloc"}, 974 {Opt_nodelalloc, "nodelalloc"},
975 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
970 {Opt_err, NULL}, 976 {Opt_err, NULL},
971}; 977};
972 978
@@ -981,7 +987,7 @@ static ext4_fsblk_t get_sb_block(void **data)
981 /*todo: use simple_strtoll with >32bit ext4 */ 987 /*todo: use simple_strtoll with >32bit ext4 */
982 sb_block = simple_strtoul(options, &options, 0); 988 sb_block = simple_strtoul(options, &options, 0);
983 if (*options && *options != ',') { 989 if (*options && *options != ',') {
984 printk("EXT4-fs: Invalid sb specification: %s\n", 990 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
985 (char *) *data); 991 (char *) *data);
986 return 1; 992 return 1;
987 } 993 }
@@ -1072,7 +1078,7 @@ static int parse_options(char *options, struct super_block *sb,
1072 case Opt_orlov: 1078 case Opt_orlov:
1073 clear_opt(sbi->s_mount_opt, OLDALLOC); 1079 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1080 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1081#ifdef CONFIG_EXT4_FS_XATTR
1076 case Opt_user_xattr: 1082 case Opt_user_xattr:
1077 set_opt(sbi->s_mount_opt, XATTR_USER); 1083 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1084 break;
@@ -1082,10 +1088,11 @@ static int parse_options(char *options, struct super_block *sb,
1082#else 1088#else
1083 case Opt_user_xattr: 1089 case Opt_user_xattr:
1084 case Opt_nouser_xattr: 1090 case Opt_nouser_xattr:
1085 printk("EXT4 (no)user_xattr options not supported\n"); 1091 printk(KERN_ERR "EXT4 (no)user_xattr options "
1092 "not supported\n");
1086 break; 1093 break;
1087#endif 1094#endif
1088#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 1095#ifdef CONFIG_EXT4_FS_POSIX_ACL
1089 case Opt_acl: 1096 case Opt_acl:
1090 set_opt(sbi->s_mount_opt, POSIX_ACL); 1097 set_opt(sbi->s_mount_opt, POSIX_ACL);
1091 break; 1098 break;
@@ -1095,7 +1102,8 @@ static int parse_options(char *options, struct super_block *sb,
1095#else 1102#else
1096 case Opt_acl: 1103 case Opt_acl:
1097 case Opt_noacl: 1104 case Opt_noacl:
1098 printk("EXT4 (no)acl options not supported\n"); 1105 printk(KERN_ERR "EXT4 (no)acl options "
1106 "not supported\n");
1099 break; 1107 break;
1100#endif 1108#endif
1101 case Opt_reservation: 1109 case Opt_reservation:
@@ -1189,8 +1197,8 @@ set_qf_name:
1189 sb_any_quota_suspended(sb)) && 1197 sb_any_quota_suspended(sb)) &&
1190 !sbi->s_qf_names[qtype]) { 1198 !sbi->s_qf_names[qtype]) {
1191 printk(KERN_ERR 1199 printk(KERN_ERR
1192 "EXT4-fs: Cannot change journaled " 1200 "EXT4-fs: Cannot change journaled "
1193 "quota options when quota turned on.\n"); 1201 "quota options when quota turned on.\n");
1194 return 0; 1202 return 0;
1195 } 1203 }
1196 qname = match_strdup(&args[0]); 1204 qname = match_strdup(&args[0]);
@@ -1357,12 +1365,6 @@ set_qf_format:
1357 case Opt_nodelalloc: 1365 case Opt_nodelalloc:
1358 clear_opt(sbi->s_mount_opt, DELALLOC); 1366 clear_opt(sbi->s_mount_opt, DELALLOC);
1359 break; 1367 break;
1360 case Opt_mballoc:
1361 set_opt(sbi->s_mount_opt, MBALLOC);
1362 break;
1363 case Opt_nomballoc:
1364 clear_opt(sbi->s_mount_opt, MBALLOC);
1365 break;
1366 case Opt_stripe: 1368 case Opt_stripe:
1367 if (match_int(&args[0], &option)) 1369 if (match_int(&args[0], &option))
1368 return 0; 1370 return 0;
@@ -1373,6 +1375,13 @@ set_qf_format:
1373 case Opt_delalloc: 1375 case Opt_delalloc:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1376 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1377 break;
1378 case Opt_inode_readahead_blks:
1379 if (match_int(&args[0], &option))
1380 return 0;
1381 if (option < 0 || option > (1 << 30))
1382 return 0;
1383 sbi->s_inode_readahead_blks = option;
1384 break;
1376 default: 1385 default:
1377 printk(KERN_ERR 1386 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1387 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1473,15 +1482,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1473 EXT4_INODES_PER_GROUP(sb), 1482 EXT4_INODES_PER_GROUP(sb),
1474 sbi->s_mount_opt); 1483 sbi->s_mount_opt);
1475 1484
1476 printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); 1485 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1477 if (EXT4_SB(sb)->s_journal->j_inode == NULL) { 1486 sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1478 char b[BDEVNAME_SIZE]; 1487 "external", EXT4_SB(sb)->s_journal->j_devname);
1479
1480 printk("external journal on %s\n",
1481 bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
1482 } else {
1483 printk("internal journal\n");
1484 }
1485 return res; 1488 return res;
1486} 1489}
1487 1490
@@ -1504,8 +1507,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
1504 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1507 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1505 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1508 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1506 1509
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1510 /* We allocate both existing and potentially added groups */
1508 groups_per_flex; 1511 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1512 ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
1513 EXT4_DESC_PER_BLOCK_BITS(sb))) /
1514 groups_per_flex;
1509 sbi->s_flex_groups = kzalloc(flex_group_count * 1515 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1516 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1517 if (sbi->s_flex_groups == NULL) {
@@ -1584,7 +1590,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1584 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 1590 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1585 flexbg_flag = 1; 1591 flexbg_flag = 1;
1586 1592
1587 ext4_debug ("Checking group descriptors"); 1593 ext4_debug("Checking group descriptors");
1588 1594
1589 for (i = 0; i < sbi->s_groups_count; i++) { 1595 for (i = 0; i < sbi->s_groups_count; i++) {
1590 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1596 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
@@ -1623,8 +1629,10 @@ static int ext4_check_descriptors(struct super_block *sb)
1623 "Checksum for group %lu failed (%u!=%u)\n", 1629 "Checksum for group %lu failed (%u!=%u)\n",
1624 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1630 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1625 gdp)), le16_to_cpu(gdp->bg_checksum)); 1631 gdp)), le16_to_cpu(gdp->bg_checksum));
1626 if (!(sb->s_flags & MS_RDONLY)) 1632 if (!(sb->s_flags & MS_RDONLY)) {
1633 spin_unlock(sb_bgl_lock(sbi, i));
1627 return 0; 1634 return 0;
1635 }
1628 } 1636 }
1629 spin_unlock(sb_bgl_lock(sbi, i)); 1637 spin_unlock(sb_bgl_lock(sbi, i));
1630 if (!flexbg_flag) 1638 if (!flexbg_flag)
@@ -1714,9 +1722,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1714 DQUOT_INIT(inode); 1722 DQUOT_INIT(inode);
1715 if (inode->i_nlink) { 1723 if (inode->i_nlink) {
1716 printk(KERN_DEBUG 1724 printk(KERN_DEBUG
1717 "%s: truncating inode %lu to %Ld bytes\n", 1725 "%s: truncating inode %lu to %lld bytes\n",
1718 __func__, inode->i_ino, inode->i_size); 1726 __func__, inode->i_ino, inode->i_size);
1719 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1727 jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1720 inode->i_ino, inode->i_size); 1728 inode->i_ino, inode->i_size);
1721 ext4_truncate(inode); 1729 ext4_truncate(inode);
1722 nr_truncates++; 1730 nr_truncates++;
@@ -1914,6 +1922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1914 unsigned long journal_devnum = 0; 1922 unsigned long journal_devnum = 0;
1915 unsigned long def_mount_opts; 1923 unsigned long def_mount_opts;
1916 struct inode *root; 1924 struct inode *root;
1925 char *cp;
1917 int ret = -EINVAL; 1926 int ret = -EINVAL;
1918 int blocksize; 1927 int blocksize;
1919 int db_count; 1928 int db_count;
@@ -1930,10 +1939,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1930 sbi->s_mount_opt = 0; 1939 sbi->s_mount_opt = 0;
1931 sbi->s_resuid = EXT4_DEF_RESUID; 1940 sbi->s_resuid = EXT4_DEF_RESUID;
1932 sbi->s_resgid = EXT4_DEF_RESGID; 1941 sbi->s_resgid = EXT4_DEF_RESGID;
1942 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
1933 sbi->s_sb_block = sb_block; 1943 sbi->s_sb_block = sb_block;
1934 1944
1935 unlock_kernel(); 1945 unlock_kernel();
1936 1946
1947 /* Cleanup superblock name */
1948 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
1949 *cp = '!';
1950
1937 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); 1951 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
1938 if (!blocksize) { 1952 if (!blocksize) {
1939 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); 1953 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
@@ -1973,11 +1987,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1973 set_opt(sbi->s_mount_opt, GRPID); 1987 set_opt(sbi->s_mount_opt, GRPID);
1974 if (def_mount_opts & EXT4_DEFM_UID16) 1988 if (def_mount_opts & EXT4_DEFM_UID16)
1975 set_opt(sbi->s_mount_opt, NO_UID32); 1989 set_opt(sbi->s_mount_opt, NO_UID32);
1976#ifdef CONFIG_EXT4DEV_FS_XATTR 1990#ifdef CONFIG_EXT4_FS_XATTR
1977 if (def_mount_opts & EXT4_DEFM_XATTR_USER) 1991 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
1978 set_opt(sbi->s_mount_opt, XATTR_USER); 1992 set_opt(sbi->s_mount_opt, XATTR_USER);
1979#endif 1993#endif
1980#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 1994#ifdef CONFIG_EXT4_FS_POSIX_ACL
1981 if (def_mount_opts & EXT4_DEFM_ACL) 1995 if (def_mount_opts & EXT4_DEFM_ACL)
1982 set_opt(sbi->s_mount_opt, POSIX_ACL); 1996 set_opt(sbi->s_mount_opt, POSIX_ACL);
1983#endif 1997#endif
@@ -2012,11 +2026,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2012 ext4_warning(sb, __func__, 2026 ext4_warning(sb, __func__,
2013 "extents feature not enabled on this filesystem, " 2027 "extents feature not enabled on this filesystem, "
2014 "use tune2fs.\n"); 2028 "use tune2fs.\n");
2015 /*
2016 * turn on mballoc code by default in ext4 filesystem
2017 * Use -o nomballoc to turn it off
2018 */
2019 set_opt(sbi->s_mount_opt, MBALLOC);
2020 2029
2021 /* 2030 /*
2022 * enable delayed allocation by default 2031 * enable delayed allocation by default
@@ -2041,16 +2050,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2041 "running e2fsck is recommended\n"); 2050 "running e2fsck is recommended\n");
2042 2051
2043 /* 2052 /*
2044 * Since ext4 is still considered development code, we require
2045 * that the TEST_FILESYS flag in s->flags be set.
2046 */
2047 if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
2048 printk(KERN_WARNING "EXT4-fs: %s: not marked "
2049 "OK to use with test code.\n", sb->s_id);
2050 goto failed_mount;
2051 }
2052
2053 /*
2054 * Check feature flags regardless of the revision level, since we 2053 * Check feature flags regardless of the revision level, since we
2055 * previously didn't change the revision level when setting the flags, 2054 * previously didn't change the revision level when setting the flags,
2056 * so there is a chance incompat flags are set on a rev 0 filesystem. 2055 * so there is a chance incompat flags are set on a rev 0 filesystem.
@@ -2219,6 +2218,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2219 goto failed_mount; 2218 goto failed_mount;
2220 } 2219 }
2221 2220
2221 if (ext4_proc_root)
2222 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2223
2224 if (sbi->s_proc)
2225 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2226 &ext4_ui_proc_fops,
2227 &sbi->s_inode_readahead_blks);
2228
2222 bgl_lock_init(&sbi->s_blockgroup_lock); 2229 bgl_lock_init(&sbi->s_blockgroup_lock);
2223 2230
2224 for (i = 0; i < db_count; i++) { 2231 for (i = 0; i < db_count; i++) {
@@ -2257,24 +2264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2257 err = percpu_counter_init(&sbi->s_dirs_counter, 2264 err = percpu_counter_init(&sbi->s_dirs_counter,
2258 ext4_count_dirs(sb)); 2265 ext4_count_dirs(sb));
2259 } 2266 }
2267 if (!err) {
2268 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2269 }
2260 if (err) { 2270 if (err) {
2261 printk(KERN_ERR "EXT4-fs: insufficient memory\n"); 2271 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2262 goto failed_mount3; 2272 goto failed_mount3;
2263 } 2273 }
2264 2274
2265 /* per fileystem reservation list head & lock */
2266 spin_lock_init(&sbi->s_rsv_window_lock);
2267 sbi->s_rsv_window_root = RB_ROOT;
2268 /* Add a single, static dummy reservation to the start of the
2269 * reservation window list --- it gives us a placeholder for
2270 * append-at-start-of-list which makes the allocation logic
2271 * _much_ simpler. */
2272 sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
2273 sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
2274 sbi->s_rsv_window_head.rsv_alloc_hit = 0;
2275 sbi->s_rsv_window_head.rsv_goal_size = 0;
2276 ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
2277
2278 sbi->s_stripe = ext4_get_stripe_size(sbi); 2275 sbi->s_stripe = ext4_get_stripe_size(sbi);
2279 2276
2280 /* 2277 /*
@@ -2471,7 +2468,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2471 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); 2468 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2472 2469
2473 ext4_ext_init(sb); 2470 ext4_ext_init(sb);
2474 ext4_mb_init(sb, needs_recovery); 2471 err = ext4_mb_init(sb, needs_recovery);
2472 if (err) {
2473 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2474 err);
2475 goto failed_mount4;
2476 }
2475 2477
2476 lock_kernel(); 2478 lock_kernel();
2477 return 0; 2479 return 0;
@@ -2489,11 +2491,16 @@ failed_mount3:
2489 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2491 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2490 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2492 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2491 percpu_counter_destroy(&sbi->s_dirs_counter); 2493 percpu_counter_destroy(&sbi->s_dirs_counter);
2494 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2492failed_mount2: 2495failed_mount2:
2493 for (i = 0; i < db_count; i++) 2496 for (i = 0; i < db_count; i++)
2494 brelse(sbi->s_group_desc[i]); 2497 brelse(sbi->s_group_desc[i]);
2495 kfree(sbi->s_group_desc); 2498 kfree(sbi->s_group_desc);
2496failed_mount: 2499failed_mount:
2500 if (sbi->s_proc) {
2501 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2502 remove_proc_entry(sb->s_id, ext4_proc_root);
2503 }
2497#ifdef CONFIG_QUOTA 2504#ifdef CONFIG_QUOTA
2498 for (i = 0; i < MAXQUOTAS; i++) 2505 for (i = 0; i < MAXQUOTAS; i++)
2499 kfree(sbi->s_qf_names[i]); 2506 kfree(sbi->s_qf_names[i]);
@@ -2552,7 +2559,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2552 return NULL; 2559 return NULL;
2553 } 2560 }
2554 2561
2555 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2562 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2556 journal_inode, journal_inode->i_size); 2563 journal_inode, journal_inode->i_size);
2557 if (!S_ISREG(journal_inode->i_mode)) { 2564 if (!S_ISREG(journal_inode->i_mode)) {
2558 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); 2565 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
@@ -2715,6 +2722,11 @@ static int ext4_load_journal(struct super_block *sb,
2715 return -EINVAL; 2722 return -EINVAL;
2716 } 2723 }
2717 2724
2725 if (journal->j_flags & JBD2_BARRIER)
2726 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
2727 else
2728 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
2729
2718 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2730 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2719 err = jbd2_journal_update_format(journal); 2731 err = jbd2_journal_update_format(journal);
2720 if (err) { 2732 if (err) {
@@ -2799,13 +2811,34 @@ static void ext4_commit_super(struct super_block *sb,
2799 2811
2800 if (!sbh) 2812 if (!sbh)
2801 return; 2813 return;
2814 if (buffer_write_io_error(sbh)) {
2815 /*
2816 * Oh, dear. A previous attempt to write the
2817 * superblock failed. This could happen because the
2818 * USB device was yanked out. Or it could happen to
2819 * be a transient write error and maybe the block will
2820 * be remapped. Nothing we can do but to retry the
2821 * write and hope for the best.
2822 */
2823 printk(KERN_ERR "ext4: previous I/O error to "
2824 "superblock detected for %s.\n", sb->s_id);
2825 clear_buffer_write_io_error(sbh);
2826 set_buffer_uptodate(sbh);
2827 }
2802 es->s_wtime = cpu_to_le32(get_seconds()); 2828 es->s_wtime = cpu_to_le32(get_seconds());
2803 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2829 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
2804 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2830 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
2805 BUFFER_TRACE(sbh, "marking dirty"); 2831 BUFFER_TRACE(sbh, "marking dirty");
2806 mark_buffer_dirty(sbh); 2832 mark_buffer_dirty(sbh);
2807 if (sync) 2833 if (sync) {
2808 sync_dirty_buffer(sbh); 2834 sync_dirty_buffer(sbh);
2835 if (buffer_write_io_error(sbh)) {
2836 printk(KERN_ERR "ext4: I/O error while writing "
2837 "superblock for %s.\n", sb->s_id);
2838 clear_buffer_write_io_error(sbh);
2839 set_buffer_uptodate(sbh);
2840 }
2841 }
2809} 2842}
2810 2843
2811 2844
@@ -2907,6 +2940,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
2907{ 2940{
2908 tid_t target; 2941 tid_t target;
2909 2942
2943 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2910 sb->s_dirt = 0; 2944 sb->s_dirt = 0;
2911 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2945 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
2912 if (wait) 2946 if (wait)
@@ -3162,7 +3196,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3162 buf->f_type = EXT4_SUPER_MAGIC; 3196 buf->f_type = EXT4_SUPER_MAGIC;
3163 buf->f_bsize = sb->s_blocksize; 3197 buf->f_bsize = sb->s_blocksize;
3164 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3198 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3165 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 3199 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3200 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3166 ext4_free_blocks_count_set(es, buf->f_bfree); 3201 ext4_free_blocks_count_set(es, buf->f_bfree);
3167 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3202 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3168 if (buf->f_bfree < ext4_r_blocks_count(es)) 3203 if (buf->f_bfree < ext4_r_blocks_count(es))
@@ -3432,7 +3467,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3432 handle_t *handle = journal_current_handle(); 3467 handle_t *handle = journal_current_handle();
3433 3468
3434 if (!handle) { 3469 if (!handle) {
3435 printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" 3470 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3436 " cancelled because transaction is not started.\n", 3471 " cancelled because transaction is not started.\n",
3437 (unsigned long long)off, (unsigned long long)len); 3472 (unsigned long long)off, (unsigned long long)len);
3438 return -EIO; 3473 return -EIO;
@@ -3493,18 +3528,82 @@ static int ext4_get_sb(struct file_system_type *fs_type,
3493 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3528 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3494} 3529}
3495 3530
3531#ifdef CONFIG_PROC_FS
3532static int ext4_ui_proc_show(struct seq_file *m, void *v)
3533{
3534 unsigned int *p = m->private;
3535
3536 seq_printf(m, "%u\n", *p);
3537 return 0;
3538}
3539
3540static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3541{
3542 return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3543}
3544
3545static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3546 size_t cnt, loff_t *ppos)
3547{
3548 unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
3549 char str[32];
3550 unsigned long value;
3551
3552 if (cnt >= sizeof(str))
3553 return -EINVAL;
3554 if (copy_from_user(str, buf, cnt))
3555 return -EFAULT;
3556 value = simple_strtol(str, NULL, 0);
3557 if (value < 0)
3558 return -ERANGE;
3559 *p = value;
3560 return cnt;
3561}
3562
3563const struct file_operations ext4_ui_proc_fops = {
3564 .owner = THIS_MODULE,
3565 .open = ext4_ui_proc_open,
3566 .read = seq_read,
3567 .llseek = seq_lseek,
3568 .release = single_release,
3569 .write = ext4_ui_proc_write,
3570};
3571#endif
3572
3573static struct file_system_type ext4_fs_type = {
3574 .owner = THIS_MODULE,
3575 .name = "ext4",
3576 .get_sb = ext4_get_sb,
3577 .kill_sb = kill_block_super,
3578 .fs_flags = FS_REQUIRES_DEV,
3579};
3580
3581#ifdef CONFIG_EXT4DEV_COMPAT
3582static int ext4dev_get_sb(struct file_system_type *fs_type,
3583 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3584{
3585 printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3586 "to mount using ext4\n");
3587 printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3588 "will go away by 2.6.31\n");
3589 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3590}
3591
3496static struct file_system_type ext4dev_fs_type = { 3592static struct file_system_type ext4dev_fs_type = {
3497 .owner = THIS_MODULE, 3593 .owner = THIS_MODULE,
3498 .name = "ext4dev", 3594 .name = "ext4dev",
3499 .get_sb = ext4_get_sb, 3595 .get_sb = ext4dev_get_sb,
3500 .kill_sb = kill_block_super, 3596 .kill_sb = kill_block_super,
3501 .fs_flags = FS_REQUIRES_DEV, 3597 .fs_flags = FS_REQUIRES_DEV,
3502}; 3598};
3599MODULE_ALIAS("ext4dev");
3600#endif
3503 3601
3504static int __init init_ext4_fs(void) 3602static int __init init_ext4_fs(void)
3505{ 3603{
3506 int err; 3604 int err;
3507 3605
3606 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3508 err = init_ext4_mballoc(); 3607 err = init_ext4_mballoc();
3509 if (err) 3608 if (err)
3510 return err; 3609 return err;
@@ -3515,9 +3614,16 @@ static int __init init_ext4_fs(void)
3515 err = init_inodecache(); 3614 err = init_inodecache();
3516 if (err) 3615 if (err)
3517 goto out1; 3616 goto out1;
3518 err = register_filesystem(&ext4dev_fs_type); 3617 err = register_filesystem(&ext4_fs_type);
3519 if (err) 3618 if (err)
3520 goto out; 3619 goto out;
3620#ifdef CONFIG_EXT4DEV_COMPAT
3621 err = register_filesystem(&ext4dev_fs_type);
3622 if (err) {
3623 unregister_filesystem(&ext4_fs_type);
3624 goto out;
3625 }
3626#endif
3521 return 0; 3627 return 0;
3522out: 3628out:
3523 destroy_inodecache(); 3629 destroy_inodecache();
@@ -3530,10 +3636,14 @@ out2:
3530 3636
3531static void __exit exit_ext4_fs(void) 3637static void __exit exit_ext4_fs(void)
3532{ 3638{
3639 unregister_filesystem(&ext4_fs_type);
3640#ifdef CONFIG_EXT4DEV_COMPAT
3533 unregister_filesystem(&ext4dev_fs_type); 3641 unregister_filesystem(&ext4dev_fs_type);
3642#endif
3534 destroy_inodecache(); 3643 destroy_inodecache();
3535 exit_ext4_xattr(); 3644 exit_ext4_xattr();
3536 exit_ext4_mballoc(); 3645 exit_ext4_mballoc();
3646 remove_proc_entry("fs/ext4", NULL);
3537} 3647}
3538 3648
3539MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3649MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e9178643dc01..00740cb32be3 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,10 +23,10 @@
23#include "ext4.h" 23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
27{ 27{
28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); 28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
29 nd_set_link(nd, (char*)ei->i_data); 29 nd_set_link(nd, (char *) ei->i_data);
30 return NULL; 30 return NULL;
31} 31}
32 32
@@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
34 .readlink = generic_readlink, 34 .readlink = generic_readlink,
35 .follow_link = page_follow_link_light, 35 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 36 .put_link = page_put_link,
37#ifdef CONFIG_EXT4DEV_FS_XATTR 37#ifdef CONFIG_EXT4_FS_XATTR
38 .setxattr = generic_setxattr, 38 .setxattr = generic_setxattr,
39 .getxattr = generic_getxattr, 39 .getxattr = generic_getxattr,
40 .listxattr = ext4_listxattr, 40 .listxattr = ext4_listxattr,
@@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
45const struct inode_operations ext4_fast_symlink_inode_operations = { 45const struct inode_operations ext4_fast_symlink_inode_operations = {
46 .readlink = generic_readlink, 46 .readlink = generic_readlink,
47 .follow_link = ext4_follow_link, 47 .follow_link = ext4_follow_link,
48#ifdef CONFIG_EXT4DEV_FS_XATTR 48#ifdef CONFIG_EXT4_FS_XATTR
49 .setxattr = generic_setxattr, 49 .setxattr = generic_setxattr,
50 .getxattr = generic_getxattr, 50 .getxattr = generic_getxattr,
51 .listxattr = ext4_listxattr, 51 .listxattr = ext4_listxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8954208b4893..80626d516fee 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache;
99 99
100static struct xattr_handler *ext4_xattr_handler_map[] = { 100static struct xattr_handler *ext4_xattr_handler_map[] = {
101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, 101 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
102#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 102#ifdef CONFIG_EXT4_FS_POSIX_ACL
103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, 103 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
104 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, 104 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
105#endif 105#endif
106 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, 106 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
107#ifdef CONFIG_EXT4DEV_FS_SECURITY 107#ifdef CONFIG_EXT4_FS_SECURITY
108 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, 108 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
109#endif 109#endif
110}; 110};
@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = {
112struct xattr_handler *ext4_xattr_handlers[] = { 112struct xattr_handler *ext4_xattr_handlers[] = {
113 &ext4_xattr_user_handler, 113 &ext4_xattr_user_handler,
114 &ext4_xattr_trusted_handler, 114 &ext4_xattr_trusted_handler,
115#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL 115#ifdef CONFIG_EXT4_FS_POSIX_ACL
116 &ext4_xattr_acl_access_handler, 116 &ext4_xattr_acl_access_handler,
117 &ext4_xattr_acl_default_handler, 117 &ext4_xattr_acl_default_handler,
118#endif 118#endif
119#ifdef CONFIG_EXT4DEV_FS_SECURITY 119#ifdef CONFIG_EXT4_FS_SECURITY
120 &ext4_xattr_security_handler, 120 &ext4_xattr_security_handler,
121#endif 121#endif
122 NULL 122 NULL
@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
959 struct ext4_xattr_block_find bs = { 959 struct ext4_xattr_block_find bs = {
960 .s = { .not_found = -ENODATA, }, 960 .s = { .not_found = -ENODATA, },
961 }; 961 };
962 unsigned long no_expand;
962 int error; 963 int error;
963 964
964 if (!name) 965 if (!name)
@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
966 if (strlen(name) > 255) 967 if (strlen(name) > 255)
967 return -ERANGE; 968 return -ERANGE;
968 down_write(&EXT4_I(inode)->xattr_sem); 969 down_write(&EXT4_I(inode)->xattr_sem);
970 no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
971 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
972
969 error = ext4_get_inode_loc(inode, &is.iloc); 973 error = ext4_get_inode_loc(inode, &is.iloc);
970 if (error) 974 if (error)
971 goto cleanup; 975 goto cleanup;
@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1042cleanup: 1046cleanup:
1043 brelse(is.iloc.bh); 1047 brelse(is.iloc.bh);
1044 brelse(bs.bh); 1048 brelse(bs.bh);
1049 if (no_expand == 0)
1050 EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
1045 up_write(&EXT4_I(inode)->xattr_sem); 1051 up_write(&EXT4_I(inode)->xattr_sem);
1046 return error; 1052 return error;
1047} 1053}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 5992fe979bb9..8ede88b18c29 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -51,8 +51,8 @@ struct ext4_xattr_entry {
51 (((name_len) + EXT4_XATTR_ROUND + \ 51 (((name_len) + EXT4_XATTR_ROUND + \
52 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) 52 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
53#define EXT4_XATTR_NEXT(entry) \ 53#define EXT4_XATTR_NEXT(entry) \
54 ( (struct ext4_xattr_entry *)( \ 54 ((struct ext4_xattr_entry *)( \
55 (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) 55 (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
56#define EXT4_XATTR_SIZE(size) \ 56#define EXT4_XATTR_SIZE(size) \
57 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) 57 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
58 58
@@ -63,7 +63,7 @@ struct ext4_xattr_entry {
63 EXT4_I(inode)->i_extra_isize)) 63 EXT4_I(inode)->i_extra_isize))
64#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) 64#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
65 65
66# ifdef CONFIG_EXT4DEV_FS_XATTR 66# ifdef CONFIG_EXT4_FS_XATTR
67 67
68extern struct xattr_handler ext4_xattr_user_handler; 68extern struct xattr_handler ext4_xattr_user_handler;
69extern struct xattr_handler ext4_xattr_trusted_handler; 69extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
88 88
89extern struct xattr_handler *ext4_xattr_handlers[]; 89extern struct xattr_handler *ext4_xattr_handlers[];
90 90
91# else /* CONFIG_EXT4DEV_FS_XATTR */ 91# else /* CONFIG_EXT4_FS_XATTR */
92 92
93static inline int 93static inline int
94ext4_xattr_get(struct inode *inode, int name_index, const char *name, 94ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
141 141
142#define ext4_xattr_handlers NULL 142#define ext4_xattr_handlers NULL
143 143
144# endif /* CONFIG_EXT4DEV_FS_XATTR */ 144# endif /* CONFIG_EXT4_FS_XATTR */
145 145
146#ifdef CONFIG_EXT4DEV_FS_SECURITY 146#ifdef CONFIG_EXT4_FS_SECURITY
147extern int ext4_init_security(handle_t *handle, struct inode *inode, 147extern int ext4_init_security(handle_t *handle, struct inode *inode,
148 struct inode *dir); 148 struct inode *dir);
149#else 149#else
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7db32b3382d3..33a6b7ecb8b8 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -13,9 +13,14 @@
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/writeback.h>
17#include <linux/buffer_head.h>
16 18
17#include <asm/ioctls.h> 19#include <asm/ioctls.h>
18 20
21/* So that the fiemap access checks can't overflow on 32 bit machines. */
22#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
23
19/** 24/**
20 * vfs_ioctl - call filesystem specific ioctl methods 25 * vfs_ioctl - call filesystem specific ioctl methods
21 * @filp: open file to invoke ioctl method on 26 * @filp: open file to invoke ioctl method on
@@ -71,6 +76,272 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
71 return put_user(res, p); 76 return put_user(res, p);
72} 77}
73 78
79/**
80 * fiemap_fill_next_extent - Fiemap helper function
81 * @fieinfo: Fiemap context passed into ->fiemap
82 * @logical: Extent logical start offset, in bytes
83 * @phys: Extent physical start offset, in bytes
84 * @len: Extent length, in bytes
85 * @flags: FIEMAP_EXTENT flags that describe this extent
86 *
87 * Called from file system ->fiemap callback. Will populate extent
88 * info as passed in via arguments and copy to user memory. On
89 * success, extent count on fieinfo is incremented.
90 *
91 * Returns 0 on success, -errno on error, 1 if this was the last
92 * extent that will fit in user array.
93 */
94#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
95#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
96#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
97int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
98 u64 phys, u64 len, u32 flags)
99{
100 struct fiemap_extent extent;
101 struct fiemap_extent *dest = fieinfo->fi_extents_start;
102
103 /* only count the extents */
104 if (fieinfo->fi_extents_max == 0) {
105 fieinfo->fi_extents_mapped++;
106 return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
107 }
108
109 if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
110 return 1;
111
112 if (flags & SET_UNKNOWN_FLAGS)
113 flags |= FIEMAP_EXTENT_UNKNOWN;
114 if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
115 flags |= FIEMAP_EXTENT_ENCODED;
116 if (flags & SET_NOT_ALIGNED_FLAGS)
117 flags |= FIEMAP_EXTENT_NOT_ALIGNED;
118
119 memset(&extent, 0, sizeof(extent));
120 extent.fe_logical = logical;
121 extent.fe_physical = phys;
122 extent.fe_length = len;
123 extent.fe_flags = flags;
124
125 dest += fieinfo->fi_extents_mapped;
126 if (copy_to_user(dest, &extent, sizeof(extent)))
127 return -EFAULT;
128
129 fieinfo->fi_extents_mapped++;
130 if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
131 return 1;
132 return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
133}
134EXPORT_SYMBOL(fiemap_fill_next_extent);
135
136/**
137 * fiemap_check_flags - check validity of requested flags for fiemap
138 * @fieinfo: Fiemap context passed into ->fiemap
139 * @fs_flags: Set of fiemap flags that the file system understands
140 *
141 * Called from file system ->fiemap callback. This will compute the
142 * intersection of valid fiemap flags and those that the fs supports. That
143 * value is then compared against the user supplied flags. In case of bad user
144 * flags, the invalid values will be written into the fieinfo structure, and
145 * -EBADR is returned, which tells ioctl_fiemap() to return those values to
146 * userspace. For this reason, a return code of -EBADR should be preserved.
147 *
148 * Returns 0 on success, -EBADR on bad flags.
149 */
150int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
151{
152 u32 incompat_flags;
153
154 incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
155 if (incompat_flags) {
156 fieinfo->fi_flags = incompat_flags;
157 return -EBADR;
158 }
159 return 0;
160}
161EXPORT_SYMBOL(fiemap_check_flags);
162
163static int fiemap_check_ranges(struct super_block *sb,
164 u64 start, u64 len, u64 *new_len)
165{
166 *new_len = len;
167
168 if (len == 0)
169 return -EINVAL;
170
171 if (start > sb->s_maxbytes)
172 return -EFBIG;
173
174 /*
175 * Shrink request scope to what the fs can actually handle.
176 */
177 if ((len > sb->s_maxbytes) ||
178 (sb->s_maxbytes - len) < start)
179 *new_len = sb->s_maxbytes - start;
180
181 return 0;
182}
183
184static int ioctl_fiemap(struct file *filp, unsigned long arg)
185{
186 struct fiemap fiemap;
187 struct fiemap_extent_info fieinfo = { 0, };
188 struct inode *inode = filp->f_path.dentry->d_inode;
189 struct super_block *sb = inode->i_sb;
190 u64 len;
191 int error;
192
193 if (!inode->i_op->fiemap)
194 return -EOPNOTSUPP;
195
196 if (copy_from_user(&fiemap, (struct fiemap __user *)arg,
197 sizeof(struct fiemap)))
198 return -EFAULT;
199
200 if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
201 return -EINVAL;
202
203 error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
204 &len);
205 if (error)
206 return error;
207
208 fieinfo.fi_flags = fiemap.fm_flags;
209 fieinfo.fi_extents_max = fiemap.fm_extent_count;
210 fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
211
212 if (fiemap.fm_extent_count != 0 &&
213 !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
214 fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
215 return -EFAULT;
216
217 if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
218 filemap_write_and_wait(inode->i_mapping);
219
220 error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
221 fiemap.fm_flags = fieinfo.fi_flags;
222 fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
223 if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
224 error = -EFAULT;
225
226 return error;
227}
228
229#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
230#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
231
232/*
233 * @inode - the inode to map
234 * @arg - the pointer to userspace where we copy everything to
235 * @get_block - the fs's get_block function
236 *
237 * This does FIEMAP for block based inodes. Basically it will just loop
238 * through get_block until we hit the number of extents we want to map, or we
239 * go past the end of the file and hit a hole.
240 *
241 * If it is possible to have data blocks beyond a hole past @inode->i_size, then
242 * please do not use this function, it will stop at the first unmapped block
243 * beyond i_size
244 */
245int generic_block_fiemap(struct inode *inode,
246 struct fiemap_extent_info *fieinfo, u64 start,
247 u64 len, get_block_t *get_block)
248{
249 struct buffer_head tmp;
250 unsigned int start_blk;
251 long long length = 0, map_len = 0;
252 u64 logical = 0, phys = 0, size = 0;
253 u32 flags = FIEMAP_EXTENT_MERGED;
254 int ret = 0;
255
256 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
257 return ret;
258
259 start_blk = logical_to_blk(inode, start);
260
261 /* guard against change */
262 mutex_lock(&inode->i_mutex);
263
264 length = (long long)min_t(u64, len, i_size_read(inode));
265 map_len = length;
266
267 do {
268 /*
269 * we set b_size to the total size we want so it will map as
270 * many contiguous blocks as possible at once
271 */
272 memset(&tmp, 0, sizeof(struct buffer_head));
273 tmp.b_size = map_len;
274
275 ret = get_block(inode, start_blk, &tmp, 0);
276 if (ret)
277 break;
278
279 /* HOLE */
280 if (!buffer_mapped(&tmp)) {
281 /*
282 * first hole after going past the EOF, this is our
283 * last extent
284 */
285 if (length <= 0) {
286 flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
287 ret = fiemap_fill_next_extent(fieinfo, logical,
288 phys, size,
289 flags);
290 break;
291 }
292
293 length -= blk_to_logical(inode, 1);
294
295 /* if we have holes up to/past EOF then we're done */
296 if (length <= 0)
297 break;
298
299 start_blk++;
300 } else {
301 if (length <= 0 && size) {
302 ret = fiemap_fill_next_extent(fieinfo, logical,
303 phys, size,
304 flags);
305 if (ret)
306 break;
307 }
308
309 logical = blk_to_logical(inode, start_blk);
310 phys = blk_to_logical(inode, tmp.b_blocknr);
311 size = tmp.b_size;
312 flags = FIEMAP_EXTENT_MERGED;
313
314 length -= tmp.b_size;
315 start_blk += logical_to_blk(inode, size);
316
317 /*
318 * if we are past the EOF we need to loop again to see
319 * if there is a hole so we can mark this extent as the
320 * last one, and if not keep mapping things until we
321 * find a hole, or we run out of slots in the extent
322 * array
323 */
324 if (length <= 0)
325 continue;
326
327 ret = fiemap_fill_next_extent(fieinfo, logical, phys,
328 size, flags);
329 if (ret)
330 break;
331 }
332 cond_resched();
333 } while (1);
334
335 mutex_unlock(&inode->i_mutex);
336
337 /* if ret is 1 then we just hit the end of the extent array */
338 if (ret == 1)
339 ret = 0;
340
341 return ret;
342}
343EXPORT_SYMBOL(generic_block_fiemap);
344
74static int file_ioctl(struct file *filp, unsigned int cmd, 345static int file_ioctl(struct file *filp, unsigned int cmd,
75 unsigned long arg) 346 unsigned long arg)
76{ 347{
@@ -80,6 +351,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
80 switch (cmd) { 351 switch (cmd) {
81 case FIBMAP: 352 case FIBMAP:
82 return ioctl_fibmap(filp, p); 353 return ioctl_fibmap(filp, p);
354 case FS_IOC_FIEMAP:
355 return ioctl_fiemap(filp, arg);
83 case FIGETBSZ: 356 case FIGETBSZ:
84 return put_user(inode->i_sb->s_blocksize, p); 357 return put_user(inode->i_sb->s_blocksize, p);
85 case FIONREAD: 358 case FIONREAD:
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 91389c8aee8a..42895d369458 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,6 +20,7 @@
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/marker.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25 26
@@ -126,14 +127,29 @@ void __jbd2_log_wait_for_space(journal_t *journal)
126 127
127 /* 128 /*
128 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
129 * were waiting for the checkpoint lock 130 * were waiting for the checkpoint lock. If there are no
131 * outstanding transactions there is nothing to checkpoint and
132 * we can't make progress. Abort the journal in this case.
130 */ 133 */
131 spin_lock(&journal->j_state_lock); 134 spin_lock(&journal->j_state_lock);
135 spin_lock(&journal->j_list_lock);
132 nblocks = jbd_space_needed(journal); 136 nblocks = jbd_space_needed(journal);
133 if (__jbd2_log_space_left(journal) < nblocks) { 137 if (__jbd2_log_space_left(journal) < nblocks) {
138 int chkpt = journal->j_checkpoint_transactions != NULL;
139
140 spin_unlock(&journal->j_list_lock);
134 spin_unlock(&journal->j_state_lock); 141 spin_unlock(&journal->j_state_lock);
135 jbd2_log_do_checkpoint(journal); 142 if (chkpt) {
143 jbd2_log_do_checkpoint(journal);
144 } else {
145 printk(KERN_ERR "%s: no transactions\n",
146 __func__);
147 jbd2_journal_abort(journal, 0);
148 }
149
136 spin_lock(&journal->j_state_lock); 150 spin_lock(&journal->j_state_lock);
151 } else {
152 spin_unlock(&journal->j_list_lock);
137 } 153 }
138 mutex_unlock(&journal->j_checkpoint_mutex); 154 mutex_unlock(&journal->j_checkpoint_mutex);
139 } 155 }
@@ -313,6 +329,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
313 * journal straight away. 329 * journal straight away.
314 */ 330 */
315 result = jbd2_cleanup_journal_tail(journal); 331 result = jbd2_cleanup_journal_tail(journal);
332 trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
333 journal->j_devname, result);
316 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 334 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
317 if (result <= 0) 335 if (result <= 0)
318 return result; 336 return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f2ad061e95ec..0d3814a35ed1 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,6 +16,7 @@
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/jbd2.h> 18#include <linux/jbd2.h>
19#include <linux/marker.h>
19#include <linux/errno.h> 20#include <linux/errno.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
21#include <linux/mm.h> 22#include <linux/mm.h>
@@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal,
126 127
127 JBUFFER_TRACE(descriptor, "submit commit block"); 128 JBUFFER_TRACE(descriptor, "submit commit block");
128 lock_buffer(bh); 129 lock_buffer(bh);
129 get_bh(bh); 130 clear_buffer_dirty(bh);
130 set_buffer_dirty(bh);
131 set_buffer_uptodate(bh); 131 set_buffer_uptodate(bh);
132 bh->b_end_io = journal_end_buffer_io_sync; 132 bh->b_end_io = journal_end_buffer_io_sync;
133 133
@@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal,
147 * to remember if we sent a barrier request 147 * to remember if we sent a barrier request
148 */ 148 */
149 if (ret == -EOPNOTSUPP && barrier_done) { 149 if (ret == -EOPNOTSUPP && barrier_done) {
150 char b[BDEVNAME_SIZE];
151
152 printk(KERN_WARNING 150 printk(KERN_WARNING
153 "JBD: barrier-based sync failed on %s - " 151 "JBD: barrier-based sync failed on %s - "
154 "disabling barriers\n", 152 "disabling barriers\n", journal->j_devname);
155 bdevname(journal->j_dev, b));
156 spin_lock(&journal->j_state_lock); 153 spin_lock(&journal->j_state_lock);
157 journal->j_flags &= ~JBD2_BARRIER; 154 journal->j_flags &= ~JBD2_BARRIER;
158 spin_unlock(&journal->j_state_lock); 155 spin_unlock(&journal->j_state_lock);
@@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal,
160 /* And try again, without the barrier */ 157 /* And try again, without the barrier */
161 lock_buffer(bh); 158 lock_buffer(bh);
162 set_buffer_uptodate(bh); 159 set_buffer_uptodate(bh);
163 set_buffer_dirty(bh); 160 clear_buffer_dirty(bh);
164 ret = submit_bh(WRITE, bh); 161 ret = submit_bh(WRITE, bh);
165 } 162 }
166 *cbh = bh; 163 *cbh = bh;
@@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
371 commit_transaction = journal->j_running_transaction; 368 commit_transaction = journal->j_running_transaction;
372 J_ASSERT(commit_transaction->t_state == T_RUNNING); 369 J_ASSERT(commit_transaction->t_state == T_RUNNING);
373 370
371 trace_mark(jbd2_start_commit, "dev %s transaction %d",
372 journal->j_devname, commit_transaction->t_tid);
374 jbd_debug(1, "JBD: starting commit of transaction %d\n", 373 jbd_debug(1, "JBD: starting commit of transaction %d\n",
375 commit_transaction->t_tid); 374 commit_transaction->t_tid);
376 375
@@ -681,11 +680,9 @@ start_journal_io:
681 */ 680 */
682 err = journal_finish_inode_data_buffers(journal, commit_transaction); 681 err = journal_finish_inode_data_buffers(journal, commit_transaction);
683 if (err) { 682 if (err) {
684 char b[BDEVNAME_SIZE];
685
686 printk(KERN_WARNING 683 printk(KERN_WARNING
687 "JBD2: Detected IO errors while flushing file data " 684 "JBD2: Detected IO errors while flushing file data "
688 "on %s\n", bdevname(journal->j_fs_dev, b)); 685 "on %s\n", journal->j_devname);
689 err = 0; 686 err = 0;
690 } 687 }
691 688
@@ -990,6 +987,9 @@ restart_loop:
990 } 987 }
991 spin_unlock(&journal->j_list_lock); 988 spin_unlock(&journal->j_list_lock);
992 989
990 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
991 journal->j_devname, commit_transaction->t_tid,
992 journal->j_tail_sequence);
993 jbd_debug(1, "JBD: commit %d complete, head %d\n", 993 jbd_debug(1, "JBD: commit %d complete, head %d\n",
994 journal->j_commit_sequence, journal->j_tail_sequence); 994 journal->j_commit_sequence, journal->j_tail_sequence);
995 995
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8207a01c4edb..01c3901c3a07 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
597 if (ret) 597 if (ret)
598 *retp = ret; 598 *retp = ret;
599 else { 599 else {
600 char b[BDEVNAME_SIZE];
601
602 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
603 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
604 __func__, 602 __func__, blocknr, journal->j_devname);
605 blocknr,
606 bdevname(journal->j_dev, b));
607 err = -EIO; 603 err = -EIO;
608 __journal_abort_soft(journal, err); 604 __journal_abort_soft(journal, err);
609 } 605 }
@@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats;
901 897
902static void jbd2_stats_proc_init(journal_t *journal) 898static void jbd2_stats_proc_init(journal_t *journal)
903{ 899{
904 char name[BDEVNAME_SIZE]; 900 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
905
906 bdevname(journal->j_dev, name);
907 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
908 if (journal->j_proc_entry) { 901 if (journal->j_proc_entry) {
909 proc_create_data("history", S_IRUGO, journal->j_proc_entry, 902 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
910 &jbd2_seq_history_fops, journal); 903 &jbd2_seq_history_fops, journal);
@@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal)
915 908
916static void jbd2_stats_proc_exit(journal_t *journal) 909static void jbd2_stats_proc_exit(journal_t *journal)
917{ 910{
918 char name[BDEVNAME_SIZE];
919
920 bdevname(journal->j_dev, name);
921 remove_proc_entry("info", journal->j_proc_entry); 911 remove_proc_entry("info", journal->j_proc_entry);
922 remove_proc_entry("history", journal->j_proc_entry); 912 remove_proc_entry("history", journal->j_proc_entry);
923 remove_proc_entry(name, proc_jbd2_stats); 913 remove_proc_entry(journal->j_devname, proc_jbd2_stats);
924} 914}
925 915
926static void journal_init_stats(journal_t *journal) 916static void journal_init_stats(journal_t *journal)
@@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1018{ 1008{
1019 journal_t *journal = journal_init_common(); 1009 journal_t *journal = journal_init_common();
1020 struct buffer_head *bh; 1010 struct buffer_head *bh;
1011 char *p;
1021 int n; 1012 int n;
1022 1013
1023 if (!journal) 1014 if (!journal)
@@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1039 journal->j_fs_dev = fs_dev; 1030 journal->j_fs_dev = fs_dev;
1040 journal->j_blk_offset = start; 1031 journal->j_blk_offset = start;
1041 journal->j_maxlen = len; 1032 journal->j_maxlen = len;
1033 bdevname(journal->j_dev, journal->j_devname);
1034 p = journal->j_devname;
1035 while ((p = strchr(p, '/')))
1036 *p = '!';
1042 jbd2_stats_proc_init(journal); 1037 jbd2_stats_proc_init(journal);
1043 1038
1044 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 1039 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
@@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1061{ 1056{
1062 struct buffer_head *bh; 1057 struct buffer_head *bh;
1063 journal_t *journal = journal_init_common(); 1058 journal_t *journal = journal_init_common();
1059 char *p;
1064 int err; 1060 int err;
1065 int n; 1061 int n;
1066 unsigned long long blocknr; 1062 unsigned long long blocknr;
@@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1070 1066
1071 journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; 1067 journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
1072 journal->j_inode = inode; 1068 journal->j_inode = inode;
1069 bdevname(journal->j_dev, journal->j_devname);
1070 p = journal->j_devname;
1071 while ((p = strchr(p, '/')))
1072 *p = '!';
1073 p = journal->j_devname + strlen(journal->j_devname);
1074 sprintf(p, ":%lu", journal->j_inode->i_ino);
1073 jbd_debug(1, 1075 jbd_debug(1,
1074 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", 1076 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
1075 journal, inode->i_sb->s_id, inode->i_ino, 1077 journal, inode->i_sb->s_id, inode->i_ino,
@@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1253 goto out; 1255 goto out;
1254 } 1256 }
1255 1257
1258 if (buffer_write_io_error(bh)) {
1259 /*
1260 * Oh, dear. A previous attempt to write the journal
1261 * superblock failed. This could happen because the
1262 * USB device was yanked out. Or it could happen to
1263 * be a transient write error and maybe the block will
1264 * be remapped. Nothing we can do but to retry the
1265 * write and hope for the best.
1266 */
1267 printk(KERN_ERR "JBD2: previous I/O error detected "
1268 "for journal superblock update for %s.\n",
1269 journal->j_devname);
1270 clear_buffer_write_io_error(bh);
1271 set_buffer_uptodate(bh);
1272 }
1273
1256 spin_lock(&journal->j_state_lock); 1274 spin_lock(&journal->j_state_lock);
1257 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1275 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
1258 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1276 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1264 1282
1265 BUFFER_TRACE(bh, "marking dirty"); 1283 BUFFER_TRACE(bh, "marking dirty");
1266 mark_buffer_dirty(bh); 1284 mark_buffer_dirty(bh);
1267 if (wait) 1285 if (wait) {
1268 sync_dirty_buffer(bh); 1286 sync_dirty_buffer(bh);
1269 else 1287 if (buffer_write_io_error(bh)) {
1288 printk(KERN_ERR "JBD2: I/O error detected "
1289 "when updating journal superblock for %s.\n",
1290 journal->j_devname);
1291 clear_buffer_write_io_error(bh);
1292 set_buffer_uptodate(bh);
1293 }
1294 } else
1270 ll_rw_block(SWRITE, 1, &bh); 1295 ll_rw_block(SWRITE, 1, &bh);
1271 1296
1272out: 1297out:
@@ -1761,23 +1786,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1761} 1786}
1762 1787
1763/* 1788/*
1764 * journal_dev_name: format a character string to describe on what
1765 * device this journal is present.
1766 */
1767
1768static const char *journal_dev_name(journal_t *journal, char *buffer)
1769{
1770 struct block_device *bdev;
1771
1772 if (journal->j_inode)
1773 bdev = journal->j_inode->i_sb->s_bdev;
1774 else
1775 bdev = journal->j_dev;
1776
1777 return bdevname(bdev, buffer);
1778}
1779
1780/*
1781 * Journal abort has very specific semantics, which we describe 1789 * Journal abort has very specific semantics, which we describe
1782 * for journal abort. 1790 * for journal abort.
1783 * 1791 *
@@ -1793,13 +1801,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
1793void __jbd2_journal_abort_hard(journal_t *journal) 1801void __jbd2_journal_abort_hard(journal_t *journal)
1794{ 1802{
1795 transaction_t *transaction; 1803 transaction_t *transaction;
1796 char b[BDEVNAME_SIZE];
1797 1804
1798 if (journal->j_flags & JBD2_ABORT) 1805 if (journal->j_flags & JBD2_ABORT)
1799 return; 1806 return;
1800 1807
1801 printk(KERN_ERR "Aborting journal on device %s.\n", 1808 printk(KERN_ERR "Aborting journal on device %s.\n",
1802 journal_dev_name(journal, b)); 1809 journal->j_devname);
1803 1810
1804 spin_lock(&journal->j_state_lock); 1811 spin_lock(&journal->j_state_lock);
1805 journal->j_flags |= JBD2_ABORT; 1812 journal->j_flags |= JBD2_ABORT;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 10bfb466e068..29ff57ec5d1f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -990,15 +990,6 @@ out:
990} 990}
991 991
992/* 992/*
993 * This is only valid for leaf nodes, which are the only ones that can
994 * have empty extents anyway.
995 */
996static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
997{
998 return !rec->e_leaf_clusters;
999}
1000
1001/*
1002 * This function will discard the rightmost extent record. 993 * This function will discard the rightmost extent record.
1003 */ 994 */
1004static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) 995static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 42ff94bd8011..60cd3d59230c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el,
146 return le16_to_cpu(rec->e_leaf_clusters); 146 return le16_to_cpu(rec->e_leaf_clusters);
147} 147}
148 148
149/*
150 * This is only valid for leaf nodes, which are the only ones that can
151 * have empty extents anyway.
152 */
153static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
154{
155 return !rec->e_leaf_clusters;
156}
157
149#endif /* OCFS2_ALLOC_H */ 158#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index c58668a326fe..aed268e80b49 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/fiemap.h>
28 29
29#define MLOG_MASK_PREFIX ML_EXTENT_MAP 30#define MLOG_MASK_PREFIX ML_EXTENT_MAP
30#include <cluster/masklog.h> 31#include <cluster/masklog.h>
@@ -32,6 +33,7 @@
32#include "ocfs2.h" 33#include "ocfs2.h"
33 34
34#include "alloc.h" 35#include "alloc.h"
36#include "dlmglue.h"
35#include "extent_map.h" 37#include "extent_map.h"
36#include "inode.h" 38#include "inode.h"
37#include "super.h" 39#include "super.h"
@@ -282,6 +284,51 @@ out:
282 kfree(new_emi); 284 kfree(new_emi);
283} 285}
284 286
287static int ocfs2_last_eb_is_empty(struct inode *inode,
288 struct ocfs2_dinode *di)
289{
290 int ret, next_free;
291 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
292 struct buffer_head *eb_bh = NULL;
293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el;
295
296 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
297 &eb_bh, OCFS2_BH_CACHED, inode);
298 if (ret) {
299 mlog_errno(ret);
300 goto out;
301 }
302
303 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
304 el = &eb->h_list;
305
306 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
307 ret = -EROFS;
308 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
309 goto out;
310 }
311
312 if (el->l_tree_depth) {
313 ocfs2_error(inode->i_sb,
314 "Inode %lu has non zero tree depth in "
315 "leaf block %llu\n", inode->i_ino,
316 (unsigned long long)eb_bh->b_blocknr);
317 ret = -EROFS;
318 goto out;
319 }
320
321 next_free = le16_to_cpu(el->l_next_free_rec);
322
323 if (next_free == 0 ||
324 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
325 ret = 1;
326
327out:
328 brelse(eb_bh);
329 return ret;
330}
331
285/* 332/*
286 * Return the 1st index within el which contains an extent start 333 * Return the 1st index within el which contains an extent start
287 * larger than v_cluster. 334 * larger than v_cluster.
@@ -373,42 +420,28 @@ out:
373 return ret; 420 return ret;
374} 421}
375 422
376int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 423static int ocfs2_get_clusters_nocache(struct inode *inode,
377 u32 *p_cluster, u32 *num_clusters, 424 struct buffer_head *di_bh,
378 unsigned int *extent_flags) 425 u32 v_cluster, unsigned int *hole_len,
426 struct ocfs2_extent_rec *ret_rec,
427 unsigned int *is_last)
379{ 428{
380 int ret, i; 429 int i, ret, tree_height, len;
381 unsigned int flags = 0;
382 struct buffer_head *di_bh = NULL;
383 struct buffer_head *eb_bh = NULL;
384 struct ocfs2_dinode *di; 430 struct ocfs2_dinode *di;
385 struct ocfs2_extent_block *eb; 431 struct ocfs2_extent_block *uninitialized_var(eb);
386 struct ocfs2_extent_list *el; 432 struct ocfs2_extent_list *el;
387 struct ocfs2_extent_rec *rec; 433 struct ocfs2_extent_rec *rec;
388 u32 coff; 434 struct buffer_head *eb_bh = NULL;
389
390 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
391 ret = -ERANGE;
392 mlog_errno(ret);
393 goto out;
394 }
395
396 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
397 num_clusters, extent_flags);
398 if (ret == 0)
399 goto out;
400 435
401 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 436 memset(ret_rec, 0, sizeof(*ret_rec));
402 &di_bh, OCFS2_BH_CACHED, inode); 437 if (is_last)
403 if (ret) { 438 *is_last = 0;
404 mlog_errno(ret);
405 goto out;
406 }
407 439
408 di = (struct ocfs2_dinode *) di_bh->b_data; 440 di = (struct ocfs2_dinode *) di_bh->b_data;
409 el = &di->id2.i_list; 441 el = &di->id2.i_list;
442 tree_height = le16_to_cpu(el->l_tree_depth);
410 443
411 if (el->l_tree_depth) { 444 if (tree_height > 0) {
412 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 445 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
413 if (ret) { 446 if (ret) {
414 mlog_errno(ret); 447 mlog_errno(ret);
@@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
431 i = ocfs2_search_extent_list(el, v_cluster); 464 i = ocfs2_search_extent_list(el, v_cluster);
432 if (i == -1) { 465 if (i == -1) {
433 /* 466 /*
434 * A hole was found. Return some canned values that 467 * Holes can be larger than the maximum size of an
435 * callers can key on. If asked for, num_clusters will 468 * extent, so we return their lengths in a seperate
436 * be populated with the size of the hole. 469 * field.
437 */ 470 */
438 *p_cluster = 0; 471 if (hole_len) {
439 if (num_clusters) {
440 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, 472 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
441 v_cluster, 473 v_cluster, &len);
442 num_clusters);
443 if (ret) { 474 if (ret) {
444 mlog_errno(ret); 475 mlog_errno(ret);
445 goto out; 476 goto out;
446 } 477 }
478
479 *hole_len = len;
447 } 480 }
448 } else { 481 goto out_hole;
449 rec = &el->l_recs[i]; 482 }
450 483
451 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 484 rec = &el->l_recs[i];
452 485
453 if (!rec->e_blkno) { 486 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
454 ocfs2_error(inode->i_sb, "Inode %lu has bad extent " 487
455 "record (%u, %u, 0)", inode->i_ino, 488 if (!rec->e_blkno) {
456 le32_to_cpu(rec->e_cpos), 489 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
457 ocfs2_rec_clusters(el, rec)); 490 "record (%u, %u, 0)", inode->i_ino,
458 ret = -EROFS; 491 le32_to_cpu(rec->e_cpos),
459 goto out; 492 ocfs2_rec_clusters(el, rec));
493 ret = -EROFS;
494 goto out;
495 }
496
497 *ret_rec = *rec;
498
499 /*
500 * Checking for last extent is potentially expensive - we
501 * might have to look at the next leaf over to see if it's
502 * empty.
503 *
504 * The first two checks are to see whether the caller even
505 * cares for this information, and if the extent is at least
506 * the last in it's list.
507 *
508 * If those hold true, then the extent is last if any of the
509 * additional conditions hold true:
510 * - Extent list is in-inode
511 * - Extent list is right-most
512 * - Extent list is 2nd to rightmost, with empty right-most
513 */
514 if (is_last) {
515 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
516 if (tree_height == 0)
517 *is_last = 1;
518 else if (eb->h_blkno == di->i_last_eb_blk)
519 *is_last = 1;
520 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
521 ret = ocfs2_last_eb_is_empty(inode, di);
522 if (ret < 0) {
523 mlog_errno(ret);
524 goto out;
525 }
526 if (ret == 1)
527 *is_last = 1;
528 }
460 } 529 }
530 }
531
532out_hole:
533 ret = 0;
534out:
535 brelse(eb_bh);
536 return ret;
537}
538
539static void ocfs2_relative_extent_offsets(struct super_block *sb,
540 u32 v_cluster,
541 struct ocfs2_extent_rec *rec,
542 u32 *p_cluster, u32 *num_clusters)
543
544{
545 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
546
547 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
548 *p_cluster = *p_cluster + coff;
549
550 if (num_clusters)
551 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
552}
553
554int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
555 u32 *p_cluster, u32 *num_clusters,
556 unsigned int *extent_flags)
557{
558 int ret;
559 unsigned int uninitialized_var(hole_len), flags = 0;
560 struct buffer_head *di_bh = NULL;
561 struct ocfs2_extent_rec rec;
461 562
462 coff = v_cluster - le32_to_cpu(rec->e_cpos); 563 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
564 ret = -ERANGE;
565 mlog_errno(ret);
566 goto out;
567 }
463 568
464 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 569 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
465 le64_to_cpu(rec->e_blkno)); 570 num_clusters, extent_flags);
466 *p_cluster = *p_cluster + coff; 571 if (ret == 0)
572 goto out;
467 573
468 if (num_clusters) 574 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
469 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 575 &di_bh, OCFS2_BH_CACHED, inode);
576 if (ret) {
577 mlog_errno(ret);
578 goto out;
579 }
470 580
471 flags = rec->e_flags; 581 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
582 &rec, NULL);
583 if (ret) {
584 mlog_errno(ret);
585 goto out;
586 }
472 587
473 ocfs2_extent_map_insert_rec(inode, rec); 588 if (rec.e_blkno == 0ULL) {
589 /*
590 * A hole was found. Return some canned values that
591 * callers can key on. If asked for, num_clusters will
592 * be populated with the size of the hole.
593 */
594 *p_cluster = 0;
595 if (num_clusters) {
596 *num_clusters = hole_len;
597 }
598 } else {
599 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
600 p_cluster, num_clusters);
601 flags = rec.e_flags;
602
603 ocfs2_extent_map_insert_rec(inode, &rec);
474 } 604 }
475 605
476 if (extent_flags) 606 if (extent_flags)
@@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
478 608
479out: 609out:
480 brelse(di_bh); 610 brelse(di_bh);
481 brelse(eb_bh);
482 return ret; 611 return ret;
483} 612}
484 613
@@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
521out: 650out:
522 return ret; 651 return ret;
523} 652}
653
654static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
655 struct fiemap_extent_info *fieinfo,
656 u64 map_start)
657{
658 int ret;
659 unsigned int id_count;
660 struct ocfs2_dinode *di;
661 u64 phys;
662 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
663 struct ocfs2_inode_info *oi = OCFS2_I(inode);
664
665 di = (struct ocfs2_dinode *)di_bh->b_data;
666 id_count = le16_to_cpu(di->id2.i_data.id_count);
667
668 if (map_start < id_count) {
669 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
670 phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
671
672 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
673 flags);
674 if (ret < 0)
675 return ret;
676 }
677
678 return 0;
679}
680
681#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
682
683int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
684 u64 map_start, u64 map_len)
685{
686 int ret, is_last;
687 u32 mapping_end, cpos;
688 unsigned int hole_size;
689 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
690 u64 len_bytes, phys_bytes, virt_bytes;
691 struct buffer_head *di_bh = NULL;
692 struct ocfs2_extent_rec rec;
693
694 ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
695 if (ret)
696 return ret;
697
698 ret = ocfs2_inode_lock(inode, &di_bh, 0);
699 if (ret) {
700 mlog_errno(ret);
701 goto out;
702 }
703
704 down_read(&OCFS2_I(inode)->ip_alloc_sem);
705
706 /*
707 * Handle inline-data separately.
708 */
709 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
710 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
711 goto out_unlock;
712 }
713
714 cpos = map_start >> osb->s_clustersize_bits;
715 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
716 map_start + map_len);
717 mapping_end -= cpos;
718 is_last = 0;
719 while (cpos < mapping_end && !is_last) {
720 u32 fe_flags;
721
722 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
723 &hole_size, &rec, &is_last);
724 if (ret) {
725 mlog_errno(ret);
726 goto out;
727 }
728
729 if (rec.e_blkno == 0ULL) {
730 cpos += hole_size;
731 continue;
732 }
733
734 fe_flags = 0;
735 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
736 fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
737 if (is_last)
738 fe_flags |= FIEMAP_EXTENT_LAST;
739 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
740 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
741 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
742
743 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
744 len_bytes, fe_flags);
745 if (ret)
746 break;
747
748 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
749 }
750
751 if (ret > 0)
752 ret = 0;
753
754out_unlock:
755 brelse(di_bh);
756
757 up_read(&OCFS2_I(inode)->ip_alloc_sem);
758
759 ocfs2_inode_unlock(inode, 0);
760out:
761
762 return ret;
763}
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index de91e3e41a22..1b97490e1ea8 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
50int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 50int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
51 u64 *ret_count, unsigned int *extent_flags); 51 u64 *ret_count, unsigned int *extent_flags);
52 52
53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
54 u64 map_start, u64 map_len);
55
53#endif /* _EXTENT_MAP_H */ 56#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ec2ed15c3daa..ed38796052d2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = {
2228 .getattr = ocfs2_getattr, 2228 .getattr = ocfs2_getattr,
2229 .permission = ocfs2_permission, 2229 .permission = ocfs2_permission,
2230 .fallocate = ocfs2_fallocate, 2230 .fallocate = ocfs2_fallocate,
2231 .fiemap = ocfs2_fiemap,
2231}; 2232};
2232 2233
2233const struct inode_operations ocfs2_special_file_iops = { 2234const struct inode_operations ocfs2_special_file_iops = {
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index cb752ba72466..7440a0dceddb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -385,6 +385,7 @@
385 . = ALIGN(align); \ 385 . = ALIGN(align); \
386 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 386 VMLINUX_SYMBOL(__per_cpu_start) = .; \
387 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ 387 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
388 *(.data.percpu.page_aligned) \
388 *(.data.percpu) \ 389 *(.data.percpu) \
389 *(.data.percpu.shared_aligned) \ 390 *(.data.percpu.shared_aligned) \
390 } \ 391 } \
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 65590c9aecd4..d76a0839abe9 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -9,6 +9,8 @@
9#include <asm/apicdef.h> 9#include <asm/apicdef.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/cpufeature.h>
13#include <asm/msr.h>
12 14
13#define ARCH_APICTIMER_STOPS_ON_C3 1 15#define ARCH_APICTIMER_STOPS_ON_C3 1
14 16
@@ -47,8 +49,6 @@ extern int disable_apic;
47#ifdef CONFIG_PARAVIRT 49#ifdef CONFIG_PARAVIRT
48#include <asm/paravirt.h> 50#include <asm/paravirt.h>
49#else 51#else
50#define apic_write native_apic_write
51#define apic_read native_apic_read
52#define setup_boot_clock setup_boot_APIC_clock 52#define setup_boot_clock setup_boot_APIC_clock
53#define setup_secondary_clock setup_secondary_APIC_clock 53#define setup_secondary_clock setup_secondary_APIC_clock
54#endif 54#endif
@@ -60,7 +60,7 @@ extern u64 xapic_icr_read(void);
60extern void xapic_icr_write(u32, u32); 60extern void xapic_icr_write(u32, u32);
61extern int setup_profiling_timer(unsigned int); 61extern int setup_profiling_timer(unsigned int);
62 62
63static inline void native_apic_write(unsigned long reg, u32 v) 63static inline void native_apic_mem_write(u32 reg, u32 v)
64{ 64{
65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
66 66
@@ -69,15 +69,68 @@ static inline void native_apic_write(unsigned long reg, u32 v)
69 ASM_OUTPUT2("0" (v), "m" (*addr))); 69 ASM_OUTPUT2("0" (v), "m" (*addr)));
70} 70}
71 71
72static inline u32 native_apic_read(unsigned long reg) 72static inline u32 native_apic_mem_read(u32 reg)
73{ 73{
74 return *((volatile u32 *)(APIC_BASE + reg)); 74 return *((volatile u32 *)(APIC_BASE + reg));
75} 75}
76 76
77extern void apic_wait_icr_idle(void); 77static inline void native_apic_msr_write(u32 reg, u32 v)
78extern u32 safe_apic_wait_icr_idle(void); 78{
79 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
80 reg == APIC_LVR)
81 return;
82
83 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
84}
85
86static inline u32 native_apic_msr_read(u32 reg)
87{
88 u32 low, high;
89
90 if (reg == APIC_DFR)
91 return -1;
92
93 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
94 return low;
95}
96
97#ifndef CONFIG_X86_32
98extern int x2apic, x2apic_preenabled;
99extern void check_x2apic(void);
100extern void enable_x2apic(void);
101extern void enable_IR_x2apic(void);
102extern void x2apic_icr_write(u32 low, u32 id);
103#endif
104
105struct apic_ops {
106 u32 (*read)(u32 reg);
107 void (*write)(u32 reg, u32 v);
108 u64 (*icr_read)(void);
109 void (*icr_write)(u32 low, u32 high);
110 void (*wait_icr_idle)(void);
111 u32 (*safe_wait_icr_idle)(void);
112};
113
114extern struct apic_ops *apic_ops;
115
116#define apic_read (apic_ops->read)
117#define apic_write (apic_ops->write)
118#define apic_icr_read (apic_ops->icr_read)
119#define apic_icr_write (apic_ops->icr_write)
120#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
121#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
122
79extern int get_physical_broadcast(void); 123extern int get_physical_broadcast(void);
80 124
125#ifdef CONFIG_X86_64
126static inline void ack_x2APIC_irq(void)
127{
128 /* Docs say use 0 for future compatibility */
129 native_apic_msr_write(APIC_EOI, 0);
130}
131#endif
132
133
81static inline void ack_APIC_irq(void) 134static inline void ack_APIC_irq(void)
82{ 135{
83 /* 136 /*
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index c40687da20fc..b922c85ac91d 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -105,6 +105,7 @@
105#define APIC_TMICT 0x380 105#define APIC_TMICT 0x380
106#define APIC_TMCCT 0x390 106#define APIC_TMCCT 0x390
107#define APIC_TDCR 0x3E0 107#define APIC_TDCR 0x3E0
108#define APIC_SELF_IPI 0x3F0
108#define APIC_TDR_DIV_TMBASE (1 << 2) 109#define APIC_TDR_DIV_TMBASE (1 << 2)
109#define APIC_TDR_DIV_1 0xB 110#define APIC_TDR_DIV_1 0xB
110#define APIC_TDR_DIV_2 0x0 111#define APIC_TDR_DIV_2 0x0
@@ -128,6 +129,8 @@
128#define APIC_EILVT3 0x530 129#define APIC_EILVT3 0x530
129 130
130#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 131#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
132#define APIC_BASE_MSR 0x800
133#define X2APIC_ENABLE (1UL << 10)
131 134
132#ifdef CONFIG_X86_32 135#ifdef CONFIG_X86_32
133# define MAX_IO_APICS 64 136# define MAX_IO_APICS 64
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 72adc3a109cc..de4596b24c23 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -12,8 +12,6 @@
12/* these aren't arch hooks, they are generic routines 12/* these aren't arch hooks, they are generic routines
13 * that can be used by the hooks */ 13 * that can be used by the hooks */
14extern void init_ISA_irqs(void); 14extern void init_ISA_irqs(void);
15extern void apic_intr_init(void);
16extern void smp_intr_init(void);
17extern irqreturn_t timer_interrupt(int irq, void *dev_id); 15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
18 16
19/* these are the defined hooks */ 17/* these are the defined hooks */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/bigsmp/apic.h
index 05362d44a3ee..0a9cd7c5ca0c 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H 1#ifndef __ASM_MACH_APIC_H
2#define ASM_X86__MACH_BIGSMP__MACH_APIC_H 2#define __ASM_MACH_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) 4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
5#define esr_disable (1) 5#define esr_disable (1)
@@ -11,7 +11,7 @@ static inline int apic_id_registered(void)
11 11
12/* Round robin the irqs amoung the online cpus */ 12/* Round robin the irqs amoung the online cpus */
13static inline cpumask_t target_cpus(void) 13static inline cpumask_t target_cpus(void)
14{ 14{
15 static unsigned long cpu = NR_CPUS; 15 static unsigned long cpu = NR_CPUS;
16 do { 16 do {
17 if (cpu >= NR_CPUS) 17 if (cpu >= NR_CPUS)
@@ -23,7 +23,7 @@ static inline cpumask_t target_cpus(void)
23} 23}
24 24
25#undef APIC_DEST_LOGICAL 25#undef APIC_DEST_LOGICAL
26#define APIC_DEST_LOGICAL 0 26#define APIC_DEST_LOGICAL 0
27#define TARGET_CPUS (target_cpus()) 27#define TARGET_CPUS (target_cpus())
28#define APIC_DFR_VALUE (APIC_DFR_FLAT) 28#define APIC_DFR_VALUE (APIC_DFR_FLAT)
29#define INT_DELIVERY_MODE (dest_Fixed) 29#define INT_DELIVERY_MODE (dest_Fixed)
@@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
141 return cpuid_apic >> index_msb; 141 return cpuid_apic >> index_msb;
142} 142}
143 143
144#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ 144#endif /* __ASM_MACH_APIC_H */
diff --git a/include/asm-x86/bigsmp/apicdef.h b/include/asm-x86/bigsmp/apicdef.h
new file mode 100644
index 000000000000..392c3f5ef2fe
--- /dev/null
+++ b/include/asm-x86/bigsmp/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_MACH_APICDEF_H
2#define __ASM_MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/bigsmp/ipi.h
index b1b0f966a009..9404c535b7ec 100644
--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
+++ b/include/asm-x86/bigsmp/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define ASM_X86__MACH_BIGSMP__MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ 25#endif /* __ASM_MACH_IPI_H */
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 4761c461d23a..dc604985f2ad 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -2,6 +2,11 @@
2#define ASM_X86__BUGS_H 2#define ASM_X86__BUGS_H
3 3
4extern void check_bugs(void); 4extern void check_bugs(void);
5
6#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
5int ppro_with_ram_bug(void); 7int ppro_with_ram_bug(void);
8#else
9static inline int ppro_with_ram_bug(void) { return 0; }
10#endif
6 11
7#endif /* ASM_X86__BUGS_H */ 12#endif /* ASM_X86__BUGS_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 250fa0cb144b..adfeae6586e1 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -6,7 +6,13 @@
6 6
7#include <asm/required-features.h> 7#include <asm/required-features.h>
8 8
9#define NCAPINTS 8 /* N 32-bit words worth of info */ 9#define NCAPINTS 9 /* N 32-bit words worth of info */
10
11/*
12 * Note: If the comment begins with a quoted string, that string is used
13 * in /proc/cpuinfo instead of the macro name. If the string is "",
14 * this feature bit is not displayed in /proc/cpuinfo at all.
15 */
10 16
11/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 17/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
12#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ 18#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
@@ -14,7 +20,7 @@
14#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ 20#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
15#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ 21#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
16#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ 22#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
17#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ 23#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
18#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ 24#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
19#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ 25#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
20#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ 26#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
@@ -23,22 +29,23 @@
23#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ 29#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
24#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ 30#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
25#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ 31#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
26#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ 32#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */
33 /* (plus FCMOVcc, FCOMI with FPU) */
27#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ 34#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
28#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ 35#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
29#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ 36#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
30#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ 37#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */
31#define X86_FEATURE_DS (0*32+21) /* Debug Store */ 38#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */
32#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ 39#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
33#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ 40#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
34#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ 41#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
35 /* of FPU context), and CR4.OSFXSR available */ 42#define X86_FEATURE_XMM (0*32+25) /* "sse" */
36#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ 43#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */
37#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ 44#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */
38#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
39#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ 45#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
40#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ 46#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */
41#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ 47#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
48#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
42 49
43/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 50/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
44/* Don't duplicate feature flags which are redundant with Intel! */ 51/* Don't duplicate feature flags which are redundant with Intel! */
@@ -46,7 +53,8 @@
46#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ 53#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
47#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ 54#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
48#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 55#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
49#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */ 56#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */
57#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */
50#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ 58#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
51#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ 59#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
52#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ 60#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
@@ -64,53 +72,79 @@
64#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 72#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
65#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ 73#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
66/* cpu types for specific tunings: */ 74/* cpu types for specific tunings: */
67#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ 75#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */
68#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ 76#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */
69#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ 77#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */
70#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ 78#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */
71#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ 79#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
72#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
73#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ 81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
74#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
75#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 84#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
76#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 85#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
77#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ 86#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
78#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ 87#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */
79#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ 88#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */
80#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ 89#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
81#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ 90#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
82#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ 91#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 92#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
84#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ 93#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
94#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */
85 95
86/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 96/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
87#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 97#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
88#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ 98#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */
89#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ 99#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
100#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */
101#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
102#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */
103#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */
90#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ 104#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
91#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ 105#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
106#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
92#define X86_FEATURE_CID (4*32+10) /* Context ID */ 107#define X86_FEATURE_CID (4*32+10) /* Context ID */
108#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */
93#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 109#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
94#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 110#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
111#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */
95#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 112#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
96#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ 113#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
114#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
115#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
116#define X86_FEATURE_AES (4*32+25) /* AES instructions */
117#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
118#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
119#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
97 120
98/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 121/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
99#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ 122#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
100#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ 123#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */
101#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ 124#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
102#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ 125#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */
103#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ 126#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
104#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ 127#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
105#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ 128#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */
106#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ 129#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */
107#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ 130#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */
108#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ 131#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */
109 132
110/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 133/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
111#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ 134#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
112#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ 135#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
113#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ 136#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
137#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
138#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
139#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
140#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
141#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
142#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
143#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
144#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
145#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
146#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
147#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
114 148
115/* 149/*
116 * Auxiliary flags: Linux defined - For features scattered in various 150 * Auxiliary flags: Linux defined - For features scattered in various
@@ -118,6 +152,13 @@
118 */ 152 */
119#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ 153#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
120 154
155/* Virtualization flags: Linux defined */
156#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
157#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */
158#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
159#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */
160#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */
161
121#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 162#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
122 163
123#include <linux/bitops.h> 164#include <linux/bitops.h>
@@ -151,7 +192,7 @@ extern const char * const x86_power_flags[32];
151} while (0) 192} while (0)
152#define setup_force_cpu_cap(bit) do { \ 193#define setup_force_cpu_cap(bit) do { \
153 set_cpu_cap(&boot_cpu_data, bit); \ 194 set_cpu_cap(&boot_cpu_data, bit); \
154 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ 195 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \
155} while (0) 196} while (0)
156 197
157#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) 198#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
@@ -192,7 +233,10 @@ extern const char * const x86_power_flags[32];
192#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) 233#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
193#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) 234#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
194#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) 235#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
236#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1)
195#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 237#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
238#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
239#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
196 240
197#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 241#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
198# define cpu_has_invlpg 1 242# define cpu_has_invlpg 1
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f52daf176bcb..5abbdec06bd2 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -43,6 +43,7 @@
43#define E820_RESERVED 2 43#define E820_RESERVED 2
44#define E820_ACPI 3 44#define E820_ACPI 3
45#define E820_NVS 4 45#define E820_NVS 4
46#define E820_UNUSABLE 5
46 47
47/* reserved RAM used by kernel itself */ 48/* reserved RAM used by kernel itself */
48#define E820_RESERVED_KERN 128 49#define E820_RESERVED_KERN 128
@@ -121,6 +122,7 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn,
121extern u64 e820_hole_size(u64 start, u64 end); 122extern u64 e820_hole_size(u64 start, u64 end);
122extern void finish_e820_parsing(void); 123extern void finish_e820_parsing(void);
123extern void e820_reserve_resources(void); 124extern void e820_reserve_resources(void);
125extern void e820_reserve_resources_late(void);
124extern void setup_memory_map(void); 126extern void setup_memory_map(void);
125extern char *default_machine_specific_memory_setup(void); 127extern char *default_machine_specific_memory_setup(void);
126extern char *machine_specific_memory_setup(void); 128extern char *machine_specific_memory_setup(void);
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/es7000/apic.h
index c1f6f682d619..bd2c44d1f7ac 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H 1#ifndef __ASM_ES7000_APIC_H
2#define ASM_X86__MACH_ES7000__MACH_APIC_H 2#define __ASM_ES7000_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) 4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
5#define esr_disable (1) 5#define esr_disable (1)
@@ -10,7 +10,7 @@ static inline int apic_id_registered(void)
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC 14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 15 return CPU_MASK_ALL;
16#else 16#else
@@ -23,24 +23,24 @@ static inline cpumask_t target_cpus(void)
23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
24#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DELIVERY_MODE (dest_LowestPrio)
25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
26#define NO_BALANCE_IRQ (1) 26#define NO_BALANCE_IRQ (1)
27#undef WAKE_SECONDARY_VIA_INIT 27#undef WAKE_SECONDARY_VIA_INIT
28#define WAKE_SECONDARY_VIA_MIP 28#define WAKE_SECONDARY_VIA_MIP
29#else 29#else
30#define APIC_DFR_VALUE (APIC_DFR_FLAT) 30#define APIC_DFR_VALUE (APIC_DFR_FLAT)
31#define INT_DELIVERY_MODE (dest_Fixed) 31#define INT_DELIVERY_MODE (dest_Fixed)
32#define INT_DEST_MODE (0) /* phys delivery to target procs */ 32#define INT_DEST_MODE (0) /* phys delivery to target procs */
33#define NO_BALANCE_IRQ (0) 33#define NO_BALANCE_IRQ (0)
34#undef APIC_DEST_LOGICAL 34#undef APIC_DEST_LOGICAL
35#define APIC_DEST_LOGICAL 0x0 35#define APIC_DEST_LOGICAL 0x0
36#define WAKE_SECONDARY_VIA_INIT 36#define WAKE_SECONDARY_VIA_INIT
37#endif 37#endif
38 38
39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
40{ 40{
41 return 0; 41 return 0;
42} 42}
43static inline unsigned long check_apicid_present(int bit) 43static inline unsigned long check_apicid_present(int bit)
44{ 44{
45 return physid_isset(bit, phys_cpu_present_map); 45 return physid_isset(bit, phys_cpu_present_map);
46} 46}
@@ -80,7 +80,7 @@ static inline void setup_apic_routing(void)
80{ 80{
81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
83 (apic_version[apic] == 0x14) ? 83 (apic_version[apic] == 0x14) ?
84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); 84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
85} 85}
86 86
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
141extern unsigned int boot_cpu_physical_apicid; 141extern unsigned int boot_cpu_physical_apicid;
142static inline int check_phys_apicid_present(int cpu_physical_apicid) 142static inline int check_phys_apicid_present(int cpu_physical_apicid)
143{ 143{
144 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 144 boot_cpu_physical_apicid = read_apic_id();
145 return (1); 145 return (1);
146} 146}
147 147
@@ -150,7 +150,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
150 int num_bits_set; 150 int num_bits_set;
151 int cpus_found = 0; 151 int cpus_found = 0;
152 int cpu; 152 int cpu;
153 int apicid; 153 int apicid;
154 154
155 num_bits_set = cpus_weight(cpumask); 155 num_bits_set = cpus_weight(cpumask);
156 /* Return id to all */ 156 /* Return id to all */
@@ -160,16 +160,16 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
160#else 160#else
161 return cpu_to_logical_apicid(0); 161 return cpu_to_logical_apicid(0);
162#endif 162#endif
163 /* 163 /*
164 * The cpus in the mask must all be on the apic cluster. If are not 164 * The cpus in the mask must all be on the apic cluster. If are not
165 * on the same apicid cluster return default value of TARGET_CPUS. 165 * on the same apicid cluster return default value of TARGET_CPUS.
166 */ 166 */
167 cpu = first_cpu(cpumask); 167 cpu = first_cpu(cpumask);
168 apicid = cpu_to_logical_apicid(cpu); 168 apicid = cpu_to_logical_apicid(cpu);
169 while (cpus_found < num_bits_set) { 169 while (cpus_found < num_bits_set) {
170 if (cpu_isset(cpu, cpumask)) { 170 if (cpu_isset(cpu, cpumask)) {
171 int new_apicid = cpu_to_logical_apicid(cpu); 171 int new_apicid = cpu_to_logical_apicid(cpu);
172 if (apicid_cluster(apicid) != 172 if (apicid_cluster(apicid) !=
173 apicid_cluster(new_apicid)){ 173 apicid_cluster(new_apicid)){
174 printk ("%s: Not a valid mask!\n",__FUNCTION__); 174 printk ("%s: Not a valid mask!\n",__FUNCTION__);
175#if defined CONFIG_ES7000_CLUSTERED_APIC 175#if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
191 return cpuid_apic >> index_msb; 191 return cpuid_apic >> index_msb;
192} 192}
193 193
194#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ 194#endif /* __ASM_ES7000_APIC_H */
diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h
new file mode 100644
index 000000000000..8b234a3cb851
--- /dev/null
+++ b/include/asm-x86/es7000/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_ES7000_APICDEF_H
2#define __ASM_ES7000_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/es7000/ipi.h
index 3a21240e03dc..632a955fcc0a 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/es7000/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define ASM_X86__MACH_ES7000__MACH_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector)
21 send_IPI_mask(cpu_online_map, vector); 21 send_IPI_mask(cpu_online_map, vector);
22} 22}
23 23
24#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ 24#endif /* __ASM_ES7000_IPI_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/es7000/mpparse.h
index befde24705b7..7b5c889d8e7d 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/es7000/mpparse.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H 1#ifndef __ASM_ES7000_MPPARSE_H
2#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H 2#define __ASM_ES7000_MPPARSE_H
3 3
4#include <linux/acpi.h> 4#include <linux/acpi.h>
5 5
@@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void)
26} 26}
27#endif 27#endif
28 28
29#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ 29#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/es7000/wakecpu.h
index 97c776ce13f2..3ffc5a7bf667 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/es7000/wakecpu.h
@@ -1,7 +1,7 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4/*
5 * This file copes with machines that wakeup secondary CPUs by the 5 * This file copes with machines that wakeup secondary CPUs by the
6 * INIT, INIT, STARTUP sequence. 6 * INIT, INIT, STARTUP sequence.
7 */ 7 */
@@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
56 #define inquire_remote_apic(apicid) {} 56 #define inquire_remote_apic(apicid) {}
57#endif 57#endif
58 58
59#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ 59#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 25097a8cc5ef..ed6a4886c082 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -14,6 +14,7 @@
14 14
15struct genapic { 15struct genapic {
16 char *name; 16 char *name;
17 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
17 u32 int_delivery_mode; 18 u32 int_delivery_mode;
18 u32 int_dest_mode; 19 u32 int_dest_mode;
19 int (*apic_id_registered)(void); 20 int (*apic_id_registered)(void);
@@ -24,17 +25,24 @@ struct genapic {
24 void (*send_IPI_mask)(cpumask_t mask, int vector); 25 void (*send_IPI_mask)(cpumask_t mask, int vector);
25 void (*send_IPI_allbutself)(int vector); 26 void (*send_IPI_allbutself)(int vector);
26 void (*send_IPI_all)(int vector); 27 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector);
27 /* */ 29 /* */
28 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
29 unsigned int (*phys_pkg_id)(int index_msb); 31 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask;
30}; 35};
31 36
32extern struct genapic *genapic; 37extern struct genapic *genapic;
33 38
34extern struct genapic apic_flat; 39extern struct genapic apic_flat;
35extern struct genapic apic_physflat; 40extern struct genapic apic_physflat;
41extern struct genapic apic_x2apic_cluster;
42extern struct genapic apic_x2apic_phys;
36extern int acpi_madt_oem_check(char *, char *); 43extern int acpi_madt_oem_check(char *, char *);
37 44
45extern void apic_send_IPI_self(int vector);
38enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 46enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
39extern enum uv_system_type get_uv_system_type(void); 47extern enum uv_system_type get_uv_system_type(void);
40extern int is_uv_system(void); 48extern int is_uv_system(void);
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 65997b15d56a..50f6e0316b50 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs;
64extern void init_VISWS_APIC_irqs(void); 64extern void init_VISWS_APIC_irqs(void);
65extern void setup_IO_APIC(void); 65extern void setup_IO_APIC(void);
66extern void disable_IO_APIC(void); 66extern void disable_IO_APIC(void);
67extern void print_IO_APIC(void);
68extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); 67extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
69extern void setup_ioapic_dest(void); 68extern void setup_ioapic_dest(void);
70 69
@@ -73,7 +72,9 @@ extern void enable_IO_APIC(void);
73#endif 72#endif
74 73
75/* IPI functions */ 74/* IPI functions */
75#ifdef CONFIG_X86_32
76extern void send_IPI_self(int vector); 76extern void send_IPI_self(int vector);
77#endif
77extern void send_IPI(int dest, int vector); 78extern void send_IPI(int dest, int vector);
78 79
79/* Statistics */ 80/* Statistics */
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 1ecdc3ed96e4..9ba862a4eac0 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -19,7 +19,9 @@
19#include <asm/sigcontext.h> 19#include <asm/sigcontext.h>
20#include <asm/user.h> 20#include <asm/user.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <asm/xsave.h>
22 23
24extern unsigned int sig_xstate_size;
23extern void fpu_init(void); 25extern void fpu_init(void);
24extern void mxcsr_feature_mask_init(void); 26extern void mxcsr_feature_mask_init(void);
25extern int init_fpu(struct task_struct *child); 27extern int init_fpu(struct task_struct *child);
@@ -31,12 +33,18 @@ extern user_regset_active_fn fpregs_active, xfpregs_active;
31extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 33extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
32extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; 34extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
33 35
36extern struct _fpx_sw_bytes fx_sw_reserved;
34#ifdef CONFIG_IA32_EMULATION 37#ifdef CONFIG_IA32_EMULATION
38extern unsigned int sig_xstate_ia32_size;
39extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
35struct _fpstate_ia32; 40struct _fpstate_ia32;
36extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); 41struct _xstate_ia32;
37extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); 42extern int save_i387_xstate_ia32(void __user *buf);
43extern int restore_i387_xstate_ia32(void __user *buf);
38#endif 44#endif
39 45
46#define X87_FSW_ES (1 << 7) /* Exception Summary */
47
40#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
41 49
42/* Ignore delayed exceptions from user space */ 50/* Ignore delayed exceptions from user space */
@@ -47,7 +55,7 @@ static inline void tolerant_fwait(void)
47 _ASM_EXTABLE(1b, 2b)); 55 _ASM_EXTABLE(1b, 2b));
48} 56}
49 57
50static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 58static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
51{ 59{
52 int err; 60 int err;
53 61
@@ -67,15 +75,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
67 return err; 75 return err;
68} 76}
69 77
70#define X87_FSW_ES (1 << 7) /* Exception Summary */ 78static inline int restore_fpu_checking(struct task_struct *tsk)
79{
80 if (task_thread_info(tsk)->status & TS_XSAVE)
81 return xrstor_checking(&tsk->thread.xstate->xsave);
82 else
83 return fxrstor_checking(&tsk->thread.xstate->fxsave);
84}
71 85
72/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception 86/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
73 is pending. Clear the x87 state here by setting it to fixed 87 is pending. Clear the x87 state here by setting it to fixed
74 values. The kernel data segment can be sometimes 0 and sometimes 88 values. The kernel data segment can be sometimes 0 and sometimes
75 new user value. Both should be ok. 89 new user value. Both should be ok.
76 Use the PDA as safe address because it should be already in L1. */ 90 Use the PDA as safe address because it should be already in L1. */
77static inline void clear_fpu_state(struct i387_fxsave_struct *fx) 91static inline void clear_fpu_state(struct task_struct *tsk)
78{ 92{
93 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
94 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
95
96 /*
97 * xsave header may indicate the init state of the FP.
98 */
99 if ((task_thread_info(tsk)->status & TS_XSAVE) &&
100 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
101 return;
102
79 if (unlikely(fx->swd & X87_FSW_ES)) 103 if (unlikely(fx->swd & X87_FSW_ES))
80 asm volatile("fnclex"); 104 asm volatile("fnclex");
81 alternative_input(ASM_NOP8 ASM_NOP2, 105 alternative_input(ASM_NOP8 ASM_NOP2,
@@ -84,7 +108,7 @@ static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
84 X86_FEATURE_FXSAVE_LEAK); 108 X86_FEATURE_FXSAVE_LEAK);
85} 109}
86 110
87static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) 111static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
88{ 112{
89 int err; 113 int err;
90 114
@@ -108,7 +132,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
108 return err; 132 return err;
109} 133}
110 134
111static inline void __save_init_fpu(struct task_struct *tsk) 135static inline void fxsave(struct task_struct *tsk)
112{ 136{
113 /* Using "rex64; fxsave %0" is broken because, if the memory operand 137 /* Using "rex64; fxsave %0" is broken because, if the memory operand
114 uses any extended registers for addressing, a second REX prefix 138 uses any extended registers for addressing, a second REX prefix
@@ -133,7 +157,16 @@ static inline void __save_init_fpu(struct task_struct *tsk)
133 : "=m" (tsk->thread.xstate->fxsave) 157 : "=m" (tsk->thread.xstate->fxsave)
134 : "cdaSDb" (&tsk->thread.xstate->fxsave)); 158 : "cdaSDb" (&tsk->thread.xstate->fxsave));
135#endif 159#endif
136 clear_fpu_state(&tsk->thread.xstate->fxsave); 160}
161
162static inline void __save_init_fpu(struct task_struct *tsk)
163{
164 if (task_thread_info(tsk)->status & TS_XSAVE)
165 xsave(tsk);
166 else
167 fxsave(tsk);
168
169 clear_fpu_state(tsk);
137 task_thread_info(tsk)->status &= ~TS_USEDFPU; 170 task_thread_info(tsk)->status &= ~TS_USEDFPU;
138} 171}
139 172
@@ -148,6 +181,10 @@ static inline void tolerant_fwait(void)
148 181
149static inline void restore_fpu(struct task_struct *tsk) 182static inline void restore_fpu(struct task_struct *tsk)
150{ 183{
184 if (task_thread_info(tsk)->status & TS_XSAVE) {
185 xrstor_checking(&tsk->thread.xstate->xsave);
186 return;
187 }
151 /* 188 /*
152 * The "nop" is needed to make the instructions the same 189 * The "nop" is needed to make the instructions the same
153 * length. 190 * length.
@@ -173,6 +210,27 @@ static inline void restore_fpu(struct task_struct *tsk)
173 */ 210 */
174static inline void __save_init_fpu(struct task_struct *tsk) 211static inline void __save_init_fpu(struct task_struct *tsk)
175{ 212{
213 if (task_thread_info(tsk)->status & TS_XSAVE) {
214 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
215 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
216
217 xsave(tsk);
218
219 /*
220 * xsave header may indicate the init state of the FP.
221 */
222 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
223 goto end;
224
225 if (unlikely(fx->swd & X87_FSW_ES))
226 asm volatile("fnclex");
227
228 /*
229 * we can do a simple return here or be paranoid :)
230 */
231 goto clear_state;
232 }
233
176 /* Use more nops than strictly needed in case the compiler 234 /* Use more nops than strictly needed in case the compiler
177 varies code */ 235 varies code */
178 alternative_input( 236 alternative_input(
@@ -182,6 +240,7 @@ static inline void __save_init_fpu(struct task_struct *tsk)
182 X86_FEATURE_FXSR, 240 X86_FEATURE_FXSR,
183 [fx] "m" (tsk->thread.xstate->fxsave), 241 [fx] "m" (tsk->thread.xstate->fxsave),
184 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); 242 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
243clear_state:
185 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 244 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
186 is pending. Clear the x87 state here by setting it to fixed 245 is pending. Clear the x87 state here by setting it to fixed
187 values. safe_address is a random variable that should be in L1 */ 246 values. safe_address is a random variable that should be in L1 */
@@ -191,16 +250,17 @@ static inline void __save_init_fpu(struct task_struct *tsk)
191 "fildl %[addr]", /* set F?P to defined value */ 250 "fildl %[addr]", /* set F?P to defined value */
192 X86_FEATURE_FXSAVE_LEAK, 251 X86_FEATURE_FXSAVE_LEAK,
193 [addr] "m" (safe_address)); 252 [addr] "m" (safe_address));
253end:
194 task_thread_info(tsk)->status &= ~TS_USEDFPU; 254 task_thread_info(tsk)->status &= ~TS_USEDFPU;
195} 255}
196 256
257#endif /* CONFIG_X86_64 */
258
197/* 259/*
198 * Signal frame handlers... 260 * Signal frame handlers...
199 */ 261 */
200extern int save_i387(struct _fpstate __user *buf); 262extern int save_i387_xstate(void __user *buf);
201extern int restore_i387(struct _fpstate __user *buf); 263extern int restore_i387_xstate(void __user *buf);
202
203#endif /* CONFIG_X86_64 */
204 264
205static inline void __unlazy_fpu(struct task_struct *tsk) 265static inline void __unlazy_fpu(struct task_struct *tsk)
206{ 266{
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index c586559a6957..23c1b3baaecd 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 57
58extern struct irq_chip i8259A_chip; 58extern struct irq_chip i8259A_chip;
59 59
60extern void mask_8259A(void);
61extern void unmask_8259A(void);
62
60#endif /* ASM_X86__I8259_H */ 63#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index be62847ab07e..8ec68a50cf10 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
183extern int (*ioapic_renumber_irq)(int ioapic, int irq); 197extern int (*ioapic_renumber_irq)(int ioapic, int irq);
184extern void ioapic_init_mappings(void); 198extern void ioapic_init_mappings(void);
185 199
200#ifdef CONFIG_X86_64
201extern int save_mask_IO_APIC_setup(void);
202extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int);
204#endif
205
186#else /* !CONFIG_X86_IO_APIC */ 206#else /* !CONFIG_X86_IO_APIC */
187#define io_apic_assign_pci_irqs 0 207#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0; 208static const int timer_through_8259 = 0;
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index c1b226797518..30a692cfaff8 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
49 return SET_APIC_DEST_FIELD(mask); 49 return SET_APIC_DEST_FIELD(mask);
50} 50}
51 51
52static inline void __xapic_wait_icr_idle(void)
53{
54 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
55 cpu_relax();
56}
57
52static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, 58static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
53 unsigned int dest) 59 unsigned int dest)
54{ 60{
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
64 /* 70 /*
65 * Wait for idle. 71 * Wait for idle.
66 */ 72 */
67 apic_wait_icr_idle(); 73 __xapic_wait_icr_idle();
68 74
69 /* 75 /*
70 * No need to touch the target chip field 76 * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
74 /* 80 /*
75 * Send the IPI. The write to APIC_ICR fires this off. 81 * Send the IPI. The write to APIC_ICR fires this off.
76 */ 82 */
77 apic_write(APIC_ICR, cfg); 83 native_apic_mem_write(APIC_ICR, cfg);
78} 84}
79 85
80/* 86/*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
92 if (unlikely(vector == NMI_VECTOR)) 98 if (unlikely(vector == NMI_VECTOR))
93 safe_apic_wait_icr_idle(); 99 safe_apic_wait_icr_idle();
94 else 100 else
95 apic_wait_icr_idle(); 101 __xapic_wait_icr_idle();
96 102
97 /* 103 /*
98 * prepare target chip field 104 * prepare target chip field
99 */ 105 */
100 cfg = __prepare_ICR2(mask); 106 cfg = __prepare_ICR2(mask);
101 apic_write(APIC_ICR2, cfg); 107 native_apic_mem_write(APIC_ICR2, cfg);
102 108
103 /* 109 /*
104 * program the ICR 110 * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
108 /* 114 /*
109 * Send the IPI. The write to APIC_ICR fires this off. 115 * Send the IPI. The write to APIC_ICR fires this off.
110 */ 116 */
111 apic_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
112} 118}
113 119
114static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 000000000000..78242c6ffa58
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h
deleted file mode 100644
index 811935d9d49b..000000000000
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
2#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index b615f40736be..2a330a41b3dd 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define phys_pkg_id (genapic->phys_pkg_id) 31#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self)
33extern void setup_apic_routing(void); 35extern void setup_apic_routing(void);
34#else 36#else
35#define INT_DELIVERY_MODE dest_LowestPrio 37#define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
54 56
55static inline int apic_id_registered(void) 57static inline int apic_id_registered(void)
56{ 58{
57 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 59 return physid_isset(read_apic_id(), phys_cpu_present_map);
58} 60}
59 61
60static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index 936704f816d6..0c2d41c41b20 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
4#include <asm/apic.h> 4#include <asm/apic.h>
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7#define APIC_ID_MASK (0xFFu<<24) 7#define APIC_ID_MASK (genapic->apic_id_mask)
8#define GET_APIC_ID(x) (((x)>>24)&0xFFu) 8#define GET_APIC_ID(x) (genapic->get_apic_id(x))
9#define SET_APIC_ID(x) (((x)<<24)) 9#define SET_APIC_ID(x) (genapic->set_apic_id(x))
10#else 10#else
11#define APIC_ID_MASK (0xF<<24) 11#define APIC_ID_MASK (0xF<<24)
12static inline unsigned get_apic_id(unsigned long x) 12static inline unsigned get_apic_id(unsigned long x)
diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
deleted file mode 100644
index a07e56744028..000000000000
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H
2#define ASM_X86__MACH_ES7000__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
deleted file mode 100644
index 74ade184920b..000000000000
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
2#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
deleted file mode 100644
index d4bc8590c4f6..000000000000
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
2#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 118da365e371..be2241a818f1 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -5,11 +5,12 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS];
9
8#ifdef CONFIG_X86_32 10#ifdef CONFIG_X86_32
9#include <mach_mpspec.h> 11#include <mach_mpspec.h>
10 12
11extern unsigned int def_to_bigsmp; 13extern unsigned int def_to_bigsmp;
12extern int apic_version[MAX_APICS];
13extern u8 apicid_2_node[]; 14extern u8 apicid_2_node[];
14extern int pic_mode; 15extern int pic_mode;
15 16
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 3139666a94fa..ed9190246876 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -48,4 +48,8 @@
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50 50
51#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3)
53#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
54#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
51#endif /* ASM_X86__MSIDEF_H */ 55#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 3052f058ab06..0bb43301a202 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -176,6 +176,7 @@
176#define MSR_IA32_TSC 0x00000010 176#define MSR_IA32_TSC 0x00000010
177#define MSR_IA32_PLATFORM_ID 0x00000017 177#define MSR_IA32_PLATFORM_ID 0x00000017
178#define MSR_IA32_EBL_CR_POWERON 0x0000002a 178#define MSR_IA32_EBL_CR_POWERON 0x0000002a
179#define MSR_IA32_FEATURE_CONTROL 0x0000003a
179 180
180#define MSR_IA32_APICBASE 0x0000001b 181#define MSR_IA32_APICBASE 0x0000001b
181#define MSR_IA32_APICBASE_BSP (1<<8) 182#define MSR_IA32_APICBASE_BSP (1<<8)
@@ -310,4 +311,19 @@
310/* Geode defined MSRs */ 311/* Geode defined MSRs */
311#define MSR_GEODE_BUSCONT_CONF0 0x00001900 312#define MSR_GEODE_BUSCONT_CONF0 0x00001900
312 313
314/* Intel VT MSRs */
315#define MSR_IA32_VMX_BASIC 0x00000480
316#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
317#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
318#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
319#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
320#define MSR_IA32_VMX_MISC 0x00000485
321#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
322#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
323#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
324#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
325#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
326#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
327#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
328
313#endif /* ASM_X86__MSR_INDEX_H */ 329#endif /* ASM_X86__MSR_INDEX_H */
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/numaq/apic.h
index 7a0d39edfcfa..a8344ba6ea15 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H 1#ifndef __ASM_NUMAQ_APIC_H
2#define ASM_X86__MACH_NUMAQ__MACH_APIC_H 2#define __ASM_NUMAQ_APIC_H
3 3
4#include <asm/io.h> 4#include <asm/io.h>
5#include <linux/mmzone.h> 5#include <linux/mmzone.h>
@@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
135 return cpuid_apic >> index_msb; 135 return cpuid_apic >> index_msb;
136} 136}
137 137
138#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ 138#endif /* __ASM_NUMAQ_APIC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/numaq/apicdef.h
index f870ec5f7782..e012a46cc22a 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/numaq/apicdef.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 1#ifndef __ASM_NUMAQ_APICDEF_H
2#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 2#define __ASM_NUMAQ_APICDEF_H
3 3
4 4
5#define APIC_ID_MASK (0xF<<24) 5#define APIC_ID_MASK (0xF<<24)
@@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x)
11 11
12#define GET_APIC_ID(x) get_apic_id(x) 12#define GET_APIC_ID(x) get_apic_id(x)
13 13
14#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ 14#endif
diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/numaq/ipi.h
index 1e835823f4bc..935588d286cf 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/numaq/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define ASM_X86__MACH_NUMAQ__MACH_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(cpumask_t, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ 25#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h
new file mode 100644
index 000000000000..252292e077b6
--- /dev/null
+++ b/include/asm-x86/numaq/mpparse.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_NUMAQ_MPPARSE_H
2#define __ASM_NUMAQ_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/numaq/wakecpu.h
index 0db8cea643c0..c577bda5b1c5 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/numaq/wakecpu.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 1#ifndef __ASM_NUMAQ_WAKECPU_H
2#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 2#define __ASM_NUMAQ_WAKECPU_H
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
@@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
40 40
41#define inquire_remote_apic(apicid) {} 41#define inquire_remote_apic(apicid) {}
42 42
43#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ 43#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 891971f57d35..d7d358a43996 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -201,12 +201,6 @@ struct pv_irq_ops {
201 201
202struct pv_apic_ops { 202struct pv_apic_ops {
203#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
204 /*
205 * Direct APIC operations, principally for VMI. Ideally
206 * these shouldn't be in this interface.
207 */
208 void (*apic_write)(unsigned long reg, u32 v);
209 u32 (*apic_read)(unsigned long reg);
210 void (*setup_boot_clock)(void); 204 void (*setup_boot_clock)(void);
211 void (*setup_secondary_clock)(void); 205 void (*setup_secondary_clock)(void);
212 206
@@ -910,19 +904,6 @@ static inline void slow_down_io(void)
910} 904}
911 905
912#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
913/*
914 * Basic functions accessing APICs.
915 */
916static inline void apic_write(unsigned long reg, u32 v)
917{
918 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
919}
920
921static inline u32 apic_read(unsigned long reg)
922{
923 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
924}
925
926static inline void setup_boot_clock(void) 907static inline void setup_boot_clock(void)
927{ 908{
928 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 909 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
diff --git a/include/asm-x86/processor-cyrix.h b/include/asm-x86/processor-cyrix.h
index 97568ada1f97..1198f2a0e42c 100644
--- a/include/asm-x86/processor-cyrix.h
+++ b/include/asm-x86/processor-cyrix.h
@@ -28,3 +28,11 @@ static inline void setCx86(u8 reg, u8 data)
28 outb(reg, 0x22); 28 outb(reg, 0x22);
29 outb(data, 0x23); 29 outb(data, 0x23);
30} 30}
31
32#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); })
33
34#define setCx86_old(reg, data) do { \
35 outb((reg), 0x22); \
36 outb((data), 0x23); \
37} while (0)
38
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 5dd79774f693..dc5f0712f9fa 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -59,6 +59,7 @@
59#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ 59#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ 60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ 61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
62#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
62 63
63/* 64/*
64 * x86-64 Task Priority Register, CR8 65 * x86-64 Task Priority Register, CR8
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 5eaf9bf0a623..c7d35464a4bb 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -76,11 +76,11 @@ struct cpuinfo_x86 {
76 int x86_tlbsize; 76 int x86_tlbsize;
77 __u8 x86_virt_bits; 77 __u8 x86_virt_bits;
78 __u8 x86_phys_bits; 78 __u8 x86_phys_bits;
79#endif
79 /* CPUID returned core id bits: */ 80 /* CPUID returned core id bits: */
80 __u8 x86_coreid_bits; 81 __u8 x86_coreid_bits;
81 /* Max extended CPUID function supported: */ 82 /* Max extended CPUID function supported: */
82 __u32 extended_cpuid_level; 83 __u32 extended_cpuid_level;
83#endif
84 /* Maximum supported CPUID level, -1=no CPUID: */ 84 /* Maximum supported CPUID level, -1=no CPUID: */
85 int cpuid_level; 85 int cpuid_level;
86 __u32 x86_capability[NCAPINTS]; 86 __u32 x86_capability[NCAPINTS];
@@ -166,11 +166,8 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
167extern unsigned short num_cache_leaves; 167extern unsigned short num_cache_leaves;
168 168
169#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) 169extern void detect_extended_topology(struct cpuinfo_x86 *c);
170extern void detect_ht(struct cpuinfo_x86 *c); 170extern void detect_ht(struct cpuinfo_x86 *c);
171#else
172static inline void detect_ht(struct cpuinfo_x86 *c) {}
173#endif
174 171
175static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, 172static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
176 unsigned int *ecx, unsigned int *edx) 173 unsigned int *ecx, unsigned int *edx)
@@ -327,7 +324,12 @@ struct i387_fxsave_struct {
327 /* 16*16 bytes for each XMM-reg = 256 bytes: */ 324 /* 16*16 bytes for each XMM-reg = 256 bytes: */
328 u32 xmm_space[64]; 325 u32 xmm_space[64];
329 326
330 u32 padding[24]; 327 u32 padding[12];
328
329 union {
330 u32 padding1[12];
331 u32 sw_reserved[12];
332 };
331 333
332} __attribute__((aligned(16))); 334} __attribute__((aligned(16)));
333 335
@@ -351,10 +353,23 @@ struct i387_soft_struct {
351 u32 entry_eip; 353 u32 entry_eip;
352}; 354};
353 355
356struct xsave_hdr_struct {
357 u64 xstate_bv;
358 u64 reserved1[2];
359 u64 reserved2[5];
360} __attribute__((packed));
361
362struct xsave_struct {
363 struct i387_fxsave_struct i387;
364 struct xsave_hdr_struct xsave_hdr;
365 /* new processor state extensions will go here */
366} __attribute__ ((packed, aligned (64)));
367
354union thread_xstate { 368union thread_xstate {
355 struct i387_fsave_struct fsave; 369 struct i387_fsave_struct fsave;
356 struct i387_fxsave_struct fxsave; 370 struct i387_fxsave_struct fxsave;
357 struct i387_soft_struct soft; 371 struct i387_soft_struct soft;
372 struct xsave_struct xsave;
358}; 373};
359 374
360#ifdef CONFIG_X86_64 375#ifdef CONFIG_X86_64
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 9030cb73c4d7..11b6cc14b289 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -38,6 +38,7 @@ struct x86_quirks {
38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); 38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 40 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void);
41}; 42};
42 43
43extern struct x86_quirks *x86_quirks; 44extern struct x86_quirks *x86_quirks;
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 24879c85b291..ee813f4fe5d5 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -4,6 +4,40 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/types.h> 5#include <asm/types.h>
6 6
7#define FP_XSTATE_MAGIC1 0x46505853U
8#define FP_XSTATE_MAGIC2 0x46505845U
9#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
10
11/*
12 * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
13 * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
14 * are used to extended the fpstate pointer in the sigcontext, which now
15 * includes the extended state information along with fpstate information.
16 *
17 * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
18 * area and FP_XSTATE_MAGIC2 at the end of memory layout
19 * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
20 * extended state information in the memory layout pointed by the fpstate
21 * pointer in sigcontext.
22 */
23struct _fpx_sw_bytes {
24 __u32 magic1; /* FP_XSTATE_MAGIC1 */
25 __u32 extended_size; /* total size of the layout referred by
26 * fpstate pointer in the sigcontext.
27 */
28 __u64 xstate_bv;
29 /* feature bit mask (including fp/sse/extended
30 * state) that is present in the memory
31 * layout.
32 */
33 __u32 xstate_size; /* actual xsave state size, based on the
34 * features saved in the layout.
35 * 'extended_size' will be greater than
36 * 'xstate_size'.
37 */
38 __u32 padding[7]; /* for future use. */
39};
40
7#ifdef __i386__ 41#ifdef __i386__
8/* 42/*
9 * As documented in the iBCS2 standard.. 43 * As documented in the iBCS2 standard..
@@ -53,7 +87,13 @@ struct _fpstate {
53 unsigned long reserved; 87 unsigned long reserved;
54 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ 88 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
55 struct _xmmreg _xmm[8]; 89 struct _xmmreg _xmm[8];
56 unsigned long padding[56]; 90 unsigned long padding1[44];
91
92 union {
93 unsigned long padding2[12];
94 struct _fpx_sw_bytes sw_reserved; /* represents the extended
95 * state info */
96 };
57}; 97};
58 98
59#define X86_FXSR_MAGIC 0x0000 99#define X86_FXSR_MAGIC 0x0000
@@ -79,7 +119,15 @@ struct sigcontext {
79 unsigned long flags; 119 unsigned long flags;
80 unsigned long sp_at_signal; 120 unsigned long sp_at_signal;
81 unsigned short ss, __ssh; 121 unsigned short ss, __ssh;
82 struct _fpstate __user *fpstate; 122
123 /*
124 * fpstate is really (struct _fpstate *) or (struct _xstate *)
125 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
126 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
127 * of extended memory layout. See comments at the defintion of
128 * (struct _fpx_sw_bytes)
129 */
130 void __user *fpstate; /* zero when no FPU/extended context */
83 unsigned long oldmask; 131 unsigned long oldmask;
84 unsigned long cr2; 132 unsigned long cr2;
85}; 133};
@@ -130,7 +178,12 @@ struct _fpstate {
130 __u32 mxcsr_mask; 178 __u32 mxcsr_mask;
131 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ 179 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
132 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ 180 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
133 __u32 reserved2[24]; 181 __u32 reserved2[12];
182 union {
183 __u32 reserved3[12];
184 struct _fpx_sw_bytes sw_reserved; /* represents the extended
185 * state information */
186 };
134}; 187};
135 188
136#ifdef __KERNEL__ 189#ifdef __KERNEL__
@@ -161,7 +214,15 @@ struct sigcontext {
161 unsigned long trapno; 214 unsigned long trapno;
162 unsigned long oldmask; 215 unsigned long oldmask;
163 unsigned long cr2; 216 unsigned long cr2;
164 struct _fpstate __user *fpstate; /* zero when no FPU context */ 217
218 /*
219 * fpstate is really (struct _fpstate *) or (struct _xstate *)
220 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
221 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
222 * of extended memory layout. See comments at the defintion of
223 * (struct _fpx_sw_bytes)
224 */
225 void __user *fpstate; /* zero when no FPU/extended context */
165 unsigned long reserved1[8]; 226 unsigned long reserved1[8];
166}; 227};
167#else /* __KERNEL__ */ 228#else /* __KERNEL__ */
@@ -202,4 +263,22 @@ struct sigcontext {
202 263
203#endif /* !__i386__ */ 264#endif /* !__i386__ */
204 265
266struct _xsave_hdr {
267 __u64 xstate_bv;
268 __u64 reserved1[2];
269 __u64 reserved2[5];
270};
271
272/*
273 * Extended state pointed by the fpstate pointer in the sigcontext.
274 * In addition to the fpstate, information encoded in the xstate_hdr
275 * indicates the presence of other extended state information
276 * supported by the processor and OS.
277 */
278struct _xstate {
279 struct _fpstate fpstate;
280 struct _xsave_hdr xstate_hdr;
281 /* new processor state extensions go here */
282};
283
205#endif /* ASM_X86__SIGCONTEXT_H */ 284#endif /* ASM_X86__SIGCONTEXT_H */
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 4e2ec732dd01..8c347032c2f2 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -40,7 +40,11 @@ struct _fpstate_ia32 {
40 __u32 reserved; 40 __u32 reserved;
41 struct _fpxreg _fxsr_st[8]; 41 struct _fpxreg _fxsr_st[8];
42 struct _xmmreg _xmm[8]; /* It's actually 16 */ 42 struct _xmmreg _xmm[8]; /* It's actually 16 */
43 __u32 padding[56]; 43 __u32 padding[44];
44 union {
45 __u32 padding2[12];
46 struct _fpx_sw_bytes sw_reserved;
47 };
44}; 48};
45 49
46struct sigcontext_ia32 { 50struct sigcontext_ia32 {
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 04f84f4e2c8b..29324c103341 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -167,30 +167,33 @@ extern int safe_smp_processor_id(void);
167 167
168#ifdef CONFIG_X86_LOCAL_APIC 168#ifdef CONFIG_X86_LOCAL_APIC
169 169
170#ifndef CONFIG_X86_64
170static inline int logical_smp_processor_id(void) 171static inline int logical_smp_processor_id(void)
171{ 172{
172 /* we don't want to mark this access volatile - bad code generation */ 173 /* we don't want to mark this access volatile - bad code generation */
173 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 174 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
174} 175}
175 176
176#ifndef CONFIG_X86_64 177#include <mach_apicdef.h>
177static inline unsigned int read_apic_id(void) 178static inline unsigned int read_apic_id(void)
178{ 179{
179 return *(u32 *)(APIC_BASE + APIC_ID); 180 unsigned int reg;
181
182 reg = *(u32 *)(APIC_BASE + APIC_ID);
183
184 return GET_APIC_ID(reg);
180} 185}
181#else
182extern unsigned int read_apic_id(void);
183#endif 186#endif
184 187
185 188
186# ifdef APIC_DEFINITION 189# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
187extern int hard_smp_processor_id(void); 190extern int hard_smp_processor_id(void);
188# else 191# else
189# include <mach_apicdef.h> 192#include <mach_apicdef.h>
190static inline int hard_smp_processor_id(void) 193static inline int hard_smp_processor_id(void)
191{ 194{
192 /* we don't want to mark this access volatile - bad code generation */ 195 /* we don't want to mark this access volatile - bad code generation */
193 return GET_APIC_ID(read_apic_id()); 196 return read_apic_id();
194} 197}
195# endif /* APIC_DEFINITION */ 198# endif /* APIC_DEFINITION */
196 199
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/summit/apic.h
index 7a66758d701d..c5b2e4b10358 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H 1#ifndef __ASM_SUMMIT_APIC_H
2#define ASM_X86__MACH_SUMMIT__MACH_APIC_H 2#define __ASM_SUMMIT_APIC_H
3 3
4#include <asm/smp.h> 4#include <asm/smp.h>
5 5
@@ -21,7 +21,7 @@ static inline cpumask_t target_cpus(void)
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return cpumask_of_cpu(0);
24} 24}
25#define TARGET_CPUS (target_cpus()) 25#define TARGET_CPUS (target_cpus())
26 26
27#define INT_DELIVERY_MODE (dest_LowestPrio) 27#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -30,10 +30,10 @@ static inline cpumask_t target_cpus(void)
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 31{
32 return 0; 32 return 0;
33} 33}
34 34
35/* we don't use the phys_cpu_present_map to indicate apicid presence */ 35/* we don't use the phys_cpu_present_map to indicate apicid presence */
36static inline unsigned long check_apicid_present(int bit) 36static inline unsigned long check_apicid_present(int bit)
37{ 37{
38 return 1; 38 return 1;
39} 39}
@@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
122 122
123static inline physid_mask_t apicid_to_cpu_present(int apicid) 123static inline physid_mask_t apicid_to_cpu_present(int apicid)
124{ 124{
125 return physid_mask_of_physid(apicid); 125 return physid_mask_of_physid(0);
126} 126}
127 127
128static inline void setup_portio_remap(void) 128static inline void setup_portio_remap(void)
@@ -143,22 +143,22 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
143 int num_bits_set; 143 int num_bits_set;
144 int cpus_found = 0; 144 int cpus_found = 0;
145 int cpu; 145 int cpu;
146 int apicid; 146 int apicid;
147 147
148 num_bits_set = cpus_weight(cpumask); 148 num_bits_set = cpus_weight(cpumask);
149 /* Return id to all */ 149 /* Return id to all */
150 if (num_bits_set == NR_CPUS) 150 if (num_bits_set == NR_CPUS)
151 return (int) 0xFF; 151 return (int) 0xFF;
152 /* 152 /*
153 * The cpus in the mask must all be on the apic cluster. If are not 153 * The cpus in the mask must all be on the apic cluster. If are not
154 * on the same apicid cluster return default value of TARGET_CPUS. 154 * on the same apicid cluster return default value of TARGET_CPUS.
155 */ 155 */
156 cpu = first_cpu(cpumask); 156 cpu = first_cpu(cpumask);
157 apicid = cpu_to_logical_apicid(cpu); 157 apicid = cpu_to_logical_apicid(cpu);
158 while (cpus_found < num_bits_set) { 158 while (cpus_found < num_bits_set) {
159 if (cpu_isset(cpu, cpumask)) { 159 if (cpu_isset(cpu, cpumask)) {
160 int new_apicid = cpu_to_logical_apicid(cpu); 160 int new_apicid = cpu_to_logical_apicid(cpu);
161 if (apicid_cluster(apicid) != 161 if (apicid_cluster(apicid) !=
162 apicid_cluster(new_apicid)){ 162 apicid_cluster(new_apicid)){
163 printk ("%s: Not a valid mask!\n",__FUNCTION__); 163 printk ("%s: Not a valid mask!\n",__FUNCTION__);
164 return 0xFF; 164 return 0xFF;
@@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
182 return hard_smp_processor_id() >> index_msb; 182 return hard_smp_processor_id() >> index_msb;
183} 183}
184 184
185#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ 185#endif /* __ASM_SUMMIT_APIC_H */
diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h
new file mode 100644
index 000000000000..f3fbca1f61c1
--- /dev/null
+++ b/include/asm-x86/summit/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_SUMMIT_APICDEF_H
2#define __ASM_SUMMIT_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (x>>24)&0xFF;
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/summit/ipi.h
index a3b31c528d90..53bd1e7bd7b4 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/summit/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define ASM_X86__MACH_SUMMIT__MACH_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ 25#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
index 22f376ad68e1..890ce3f5e09a 100644
--- a/include/asm-x86/mach-summit/irq_vectors_limits.h
+++ b/include/asm-x86/summit/irq_vectors_limits.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 2#define _ASM_IRQ_VECTORS_LIMITS_H
3 3
4/* 4/*
5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, 5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
11#define NR_IRQS 224 11#define NR_IRQS 224
12#define NR_IRQ_VECTORS 1024 12#define NR_IRQ_VECTORS 1024
13 13
14#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ 14#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/summit/mpparse.h
index 92396f28772b..013ce6fab2d5 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/summit/mpparse.h
@@ -1,7 +1,6 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 1#ifndef __ASM_SUMMIT_MPPARSE_H
2#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 2#define __ASM_SUMMIT_MPPARSE_H
3 3
4#include <mach_apic.h>
5#include <asm/tsc.h> 4#include <asm/tsc.h>
6 5
7extern int use_cyclone; 6extern int use_cyclone;
@@ -12,11 +11,11 @@ extern void setup_summit(void);
12#define setup_summit() {} 11#define setup_summit() {}
13#endif 12#endif
14 13
15static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 14static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
16 char *productid) 15 char *productid)
17{ 16{
18 if (!strncmp(oem, "IBM ENSW", 8) && 17 if (!strncmp(oem, "IBM ENSW", 8) &&
19 (!strncmp(productid, "VIGIL SMP", 9) 18 (!strncmp(productid, "VIGIL SMP", 9)
20 || !strncmp(productid, "EXA", 3) 19 || !strncmp(productid, "EXA", 3)
21 || !strncmp(productid, "RUTHLESS SMP", 12))){ 20 || !strncmp(productid, "RUTHLESS SMP", 12))){
22 mark_tsc_unstable("Summit based system"); 21 mark_tsc_unstable("Summit based system");
@@ -107,4 +106,4 @@ static inline int is_WPEG(struct rio_detail *rio){
107 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); 106 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
108} 107}
109 108
110#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ 109#endif /* __ASM_SUMMIT_MPPARSE_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 4db0066a3a35..3f4e52bb77f5 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -241,6 +241,7 @@ static inline struct thread_info *stack_thread_info(void)
241#define TS_POLLING 0x0004 /* true if in idle loop 241#define TS_POLLING 0x0004 /* true if in idle loop
242 and not sleeping */ 242 and not sleeping */
243#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ 243#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
244#define TS_XSAVE 0x0010 /* Use xsave/xrstor */
244 245
245#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) 246#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
246 247
diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h
index 9948dd328084..89eaa5456a7e 100644
--- a/include/asm-x86/ucontext.h
+++ b/include/asm-x86/ucontext.h
@@ -1,6 +1,12 @@
1#ifndef ASM_X86__UCONTEXT_H 1#ifndef ASM_X86__UCONTEXT_H
2#define ASM_X86__UCONTEXT_H 2#define ASM_X86__UCONTEXT_H
3 3
4#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
5 * information in the memory layout pointed
6 * by the fpstate pointer in the ucontext's
7 * sigcontext struct (uc_mcontext).
8 */
9
4struct ucontext { 10struct ucontext {
5 unsigned long uc_flags; 11 unsigned long uc_flags;
6 struct ucontext *uc_link; 12 struct ucontext *uc_link;
diff --git a/include/asm-x86/xcr.h b/include/asm-x86/xcr.h
new file mode 100644
index 000000000000..f2cba4e79a23
--- /dev/null
+++ b/include/asm-x86/xcr.h
@@ -0,0 +1,49 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2008 rPath, Inc. - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * asm-x86/xcr.h
13 *
14 * Definitions for the eXtended Control Register instructions
15 */
16
17#ifndef _ASM_X86_XCR_H
18#define _ASM_X86_XCR_H
19
20#define XCR_XFEATURE_ENABLED_MASK 0x00000000
21
22#ifdef __KERNEL__
23# ifndef __ASSEMBLY__
24
25#include <linux/types.h>
26
27static inline u64 xgetbv(u32 index)
28{
29 u32 eax, edx;
30
31 asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
32 : "=a" (eax), "=d" (edx)
33 : "c" (index));
34 return eax + ((u64)edx << 32);
35}
36
37static inline void xsetbv(u32 index, u64 value)
38{
39 u32 eax = value;
40 u32 edx = value >> 32;
41
42 asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
43 : : "a" (eax), "d" (edx), "c" (index));
44}
45
46# endif /* __ASSEMBLY__ */
47#endif /* __KERNEL__ */
48
49#endif /* _ASM_X86_XCR_H */
diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h
new file mode 100644
index 000000000000..08e9a1ac07a9
--- /dev/null
+++ b/include/asm-x86/xsave.h
@@ -0,0 +1,118 @@
1#ifndef __ASM_X86_XSAVE_H
2#define __ASM_X86_XSAVE_H
3
4#include <linux/types.h>
5#include <asm/processor.h>
6#include <asm/i387.h>
7
8#define XSTATE_FP 0x1
9#define XSTATE_SSE 0x2
10
11#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
12
13#define FXSAVE_SIZE 512
14
15/*
16 * These are the features that the OS can handle currently.
17 */
18#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE)
19
20#ifdef CONFIG_X86_64
21#define REX_PREFIX "0x48, "
22#else
23#define REX_PREFIX
24#endif
25
26extern unsigned int xstate_size;
27extern u64 pcntxt_mask;
28extern struct xsave_struct *init_xstate_buf;
29
30extern void xsave_cntxt_init(void);
31extern void xsave_init(void);
32extern int init_fpu(struct task_struct *child);
33extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
34 void __user *fpstate,
35 struct _fpx_sw_bytes *sw);
36
37static inline int xrstor_checking(struct xsave_struct *fx)
38{
39 int err;
40
41 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
42 "2:\n"
43 ".section .fixup,\"ax\"\n"
44 "3: movl $-1,%[err]\n"
45 " jmp 2b\n"
46 ".previous\n"
47 _ASM_EXTABLE(1b, 3b)
48 : [err] "=r" (err)
49 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
50 : "memory");
51
52 return err;
53}
54
55static inline int xsave_user(struct xsave_struct __user *buf)
56{
57 int err;
58 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
59 "2:\n"
60 ".section .fixup,\"ax\"\n"
61 "3: movl $-1,%[err]\n"
62 " jmp 2b\n"
63 ".previous\n"
64 ".section __ex_table,\"a\"\n"
65 _ASM_ALIGN "\n"
66 _ASM_PTR "1b,3b\n"
67 ".previous"
68 : [err] "=r" (err)
69 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
70 : "memory");
71 if (unlikely(err) && __clear_user(buf, xstate_size))
72 err = -EFAULT;
73 /* No need to clear here because the caller clears USED_MATH */
74 return err;
75}
76
77static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
78{
79 int err;
80 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
81 u32 lmask = mask;
82 u32 hmask = mask >> 32;
83
84 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
85 "2:\n"
86 ".section .fixup,\"ax\"\n"
87 "3: movl $-1,%[err]\n"
88 " jmp 2b\n"
89 ".previous\n"
90 ".section __ex_table,\"a\"\n"
91 _ASM_ALIGN "\n"
92 _ASM_PTR "1b,3b\n"
93 ".previous"
94 : [err] "=r" (err)
95 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
96 : "memory"); /* memory required? */
97 return err;
98}
99
100static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
101{
102 u32 lmask = mask;
103 u32 hmask = mask >> 32;
104
105 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
106 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
107 : "memory");
108}
109
110static inline void xsave(struct task_struct *tsk)
111{
112 /* This, however, we can work around by forcing the compiler to select
113 an addressing mode that doesn't require extended registers. */
114 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
115 : : "D" (&(tsk->thread.xstate->xsave)),
116 "a" (-1), "d"(-1) : "memory");
117}
118#endif
diff --git a/include/linux/ata.h b/include/linux/ata.h
index be00973d1a8c..a53318b8cbd0 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -30,6 +30,7 @@
30#define __LINUX_ATA_H__ 30#define __LINUX_ATA_H__
31 31
32#include <linux/types.h> 32#include <linux/types.h>
33#include <asm/byteorder.h>
33 34
34/* defines only for the constants which don't work well as enums */ 35/* defines only for the constants which don't work well as enums */
35#define ATA_DMA_BOUNDARY 0xffffUL 36#define ATA_DMA_BOUNDARY 0xffffUL
@@ -558,6 +559,15 @@ static inline int ata_id_has_flush(const u16 *id)
558 return id[ATA_ID_COMMAND_SET_2] & (1 << 12); 559 return id[ATA_ID_COMMAND_SET_2] & (1 << 12);
559} 560}
560 561
562static inline int ata_id_flush_enabled(const u16 *id)
563{
564 if (ata_id_has_flush(id) == 0)
565 return 0;
566 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
567 return 0;
568 return id[ATA_ID_CFS_ENABLE_2] & (1 << 12);
569}
570
561static inline int ata_id_has_flush_ext(const u16 *id) 571static inline int ata_id_has_flush_ext(const u16 *id)
562{ 572{
563 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 573 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
@@ -565,6 +575,19 @@ static inline int ata_id_has_flush_ext(const u16 *id)
565 return id[ATA_ID_COMMAND_SET_2] & (1 << 13); 575 return id[ATA_ID_COMMAND_SET_2] & (1 << 13);
566} 576}
567 577
578static inline int ata_id_flush_ext_enabled(const u16 *id)
579{
580 if (ata_id_has_flush_ext(id) == 0)
581 return 0;
582 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
583 return 0;
584 /*
585 * some Maxtor disks have bit 13 defined incorrectly
586 * so check bit 10 too
587 */
588 return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400;
589}
590
568static inline int ata_id_has_lba48(const u16 *id) 591static inline int ata_id_has_lba48(const u16 *id)
569{ 592{
570 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) 593 if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
@@ -574,6 +597,15 @@ static inline int ata_id_has_lba48(const u16 *id)
574 return id[ATA_ID_COMMAND_SET_2] & (1 << 10); 597 return id[ATA_ID_COMMAND_SET_2] & (1 << 10);
575} 598}
576 599
600static inline int ata_id_lba48_enabled(const u16 *id)
601{
602 if (ata_id_has_lba48(id) == 0)
603 return 0;
604 if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
605 return 0;
606 return id[ATA_ID_CFS_ENABLE_2] & (1 << 10);
607}
608
577static inline int ata_id_hpa_enabled(const u16 *id) 609static inline int ata_id_hpa_enabled(const u16 *id)
578{ 610{
579 /* Yes children, word 83 valid bits cover word 82 data */ 611 /* Yes children, word 83 valid bits cover word 82 data */
@@ -645,7 +677,15 @@ static inline unsigned int ata_id_major_version(const u16 *id)
645 677
646static inline int ata_id_is_sata(const u16 *id) 678static inline int ata_id_is_sata(const u16 *id)
647{ 679{
648 return ata_id_major_version(id) >= 5 && id[ATA_ID_HW_CONFIG] == 0; 680 /*
681 * See if word 93 is 0 AND drive is at least ATA-5 compatible
682 * verifying that word 80 by casting it to a signed type --
683 * this trick allows us to filter out the reserved values of
684 * 0x0000 and 0xffff along with the earlier ATA revisions...
685 */
686 if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020)
687 return 1;
688 return 0;
649} 689}
650 690
651static inline int ata_id_has_tpm(const u16 *id) 691static inline int ata_id_has_tpm(const u16 *id)
@@ -742,6 +782,76 @@ static inline int atapi_id_dmadir(const u16 *dev_id)
742 return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); 782 return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000);
743} 783}
744 784
785/*
786 * ata_id_is_lba_capacity_ok() performs a sanity check on
787 * the claimed LBA capacity value for the device.
788 *
789 * Returns 1 if LBA capacity looks sensible, 0 otherwise.
790 *
791 * It is called only once for each device.
792 */
793static inline int ata_id_is_lba_capacity_ok(u16 *id)
794{
795 unsigned long lba_sects, chs_sects, head, tail;
796
797 /* No non-LBA info .. so valid! */
798 if (id[ATA_ID_CYLS] == 0)
799 return 1;
800
801 lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
802
803 /*
804 * The ATA spec tells large drives to return
805 * C/H/S = 16383/16/63 independent of their size.
806 * Some drives can be jumpered to use 15 heads instead of 16.
807 * Some drives can be jumpered to use 4092 cyls instead of 16383.
808 */
809 if ((id[ATA_ID_CYLS] == 16383 ||
810 (id[ATA_ID_CYLS] == 4092 && id[ATA_ID_CUR_CYLS] == 16383)) &&
811 id[ATA_ID_SECTORS] == 63 &&
812 (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) &&
813 (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS]))
814 return 1;
815
816 chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS];
817
818 /* perform a rough sanity check on lba_sects: within 10% is OK */
819 if (lba_sects - chs_sects < chs_sects/10)
820 return 1;
821
822 /* some drives have the word order reversed */
823 head = (lba_sects >> 16) & 0xffff;
824 tail = lba_sects & 0xffff;
825 lba_sects = head | (tail << 16);
826
827 if (lba_sects - chs_sects < chs_sects/10) {
828 *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects);
829 return 1; /* LBA capacity is (now) good */
830 }
831
832 return 0; /* LBA capacity value may be bad */
833}
834
835static inline void ata_id_to_hd_driveid(u16 *id)
836{
837#ifdef __BIG_ENDIAN
838 /* accessed in struct hd_driveid as 8-bit values */
839 id[ATA_ID_MAX_MULTSECT] = __cpu_to_le16(id[ATA_ID_MAX_MULTSECT]);
840 id[ATA_ID_CAPABILITY] = __cpu_to_le16(id[ATA_ID_CAPABILITY]);
841 id[ATA_ID_OLD_PIO_MODES] = __cpu_to_le16(id[ATA_ID_OLD_PIO_MODES]);
842 id[ATA_ID_OLD_DMA_MODES] = __cpu_to_le16(id[ATA_ID_OLD_DMA_MODES]);
843 id[ATA_ID_MULTSECT] = __cpu_to_le16(id[ATA_ID_MULTSECT]);
844
845 /* as 32-bit values */
846 *(u32 *)&id[ATA_ID_LBA_CAPACITY] = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
847 *(u32 *)&id[ATA_ID_SPG] = ata_id_u32(id, ATA_ID_SPG);
848
849 /* as 64-bit value */
850 *(u64 *)&id[ATA_ID_LBA_CAPACITY_2] =
851 ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
852#endif
853}
854
745static inline int is_multi_taskfile(struct ata_taskfile *tf) 855static inline int is_multi_taskfile(struct ata_taskfile *tf)
746{ 856{
747 return (tf->command == ATA_CMD_READ_MULTI) || 857 return (tf->command == ATA_CMD_READ_MULTI) ||
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b847551..c360c558e59e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27 27
28#ifdef CONFIG_DMAR 28#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
29struct intel_iommu; 29struct intel_iommu;
30 30
31struct dmar_drhd_unit {
32 struct list_head list; /* list of drhd units */
33 struct acpi_dmar_header *hdr; /* ACPI header */
34 u64 reg_base_addr; /* register base address*/
35 struct pci_dev **devices; /* target device array */
36 int devices_cnt; /* target device count */
37 u8 ignored:1; /* ignore drhd */
38 u8 include_all:1;
39 struct intel_iommu *iommu;
40};
41
42extern struct list_head dmar_drhd_units;
43
44#define for_each_drhd_unit(drhd) \
45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void);
50
51/* Intel IOMMU detection */
52extern void detect_intel_iommu(void);
53
54
55extern int parse_ioapics_under_ir(void);
56extern int alloc_iommu(struct dmar_drhd_unit *);
57#else
58static inline void detect_intel_iommu(void)
59{
60 return;
61}
62
63static inline int dmar_table_init(void)
64{
65 return -ENODEV;
66}
67#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
68
69#ifdef CONFIG_INTR_REMAP
70extern int intr_remapping_enabled;
71extern int enable_intr_remapping(int);
72
73struct irte {
74 union {
75 struct {
76 __u64 present : 1,
77 fpd : 1,
78 dst_mode : 1,
79 redir_hint : 1,
80 trigger_mode : 1,
81 dlvry_mode : 3,
82 avail : 4,
83 __reserved_1 : 4,
84 vector : 8,
85 __reserved_2 : 8,
86 dest_id : 32;
87 };
88 __u64 low;
89 };
90
91 union {
92 struct {
93 __u64 sid : 16,
94 sq : 2,
95 svt : 2,
96 __reserved_3 : 44;
97 };
98 __u64 high;
99 };
100};
101extern int get_irte(int irq, struct irte *entry);
102extern int modify_irte(int irq, struct irte *irte_modified);
103extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
104extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
105 u16 sub_handle);
106extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
107extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
108extern int flush_irte(int irq);
109extern int free_irte(int irq);
110
111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
113extern struct intel_iommu *map_ioapic_to_ir(int apic);
114#else
115#define irq_remapped(irq) (0)
116#define enable_intr_remapping(mode) (-1)
117#define intr_remapping_enabled (0)
118#endif
119
120#ifdef CONFIG_DMAR
31extern const char *dmar_get_fault_reason(u8 fault_reason); 121extern const char *dmar_get_fault_reason(u8 fault_reason);
32 122
33/* Can't use the common MSI interrupt functions 123/* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
40extern int dmar_set_interrupt(struct intel_iommu *iommu); 130extern int dmar_set_interrupt(struct intel_iommu *iommu);
41extern int arch_setup_dmar_msi(unsigned int irq); 131extern int arch_setup_dmar_msi(unsigned int irq);
42 132
43/* Intel IOMMU detection and initialization functions */ 133extern int iommu_detected, no_iommu;
44extern void detect_intel_iommu(void);
45extern int intel_iommu_init(void);
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49
50extern struct list_head dmar_drhd_units;
51extern struct list_head dmar_rmrr_units; 134extern struct list_head dmar_rmrr_units;
52
53struct dmar_drhd_unit {
54 struct list_head list; /* list of drhd units */
55 u64 reg_base_addr; /* register base address*/
56 struct pci_dev **devices; /* target device array */
57 int devices_cnt; /* target device count */
58 u8 ignored:1; /* ignore drhd */
59 u8 include_all:1;
60 struct intel_iommu *iommu;
61};
62
63struct dmar_rmrr_unit { 135struct dmar_rmrr_unit {
64 struct list_head list; /* list of rmrr units */ 136 struct list_head list; /* list of rmrr units */
137 struct acpi_dmar_header *hdr; /* ACPI header */
65 u64 base_address; /* reserved base address*/ 138 u64 base_address; /* reserved base address*/
66 u64 end_address; /* reserved end address */ 139 u64 end_address; /* reserved end address */
67 struct pci_dev **devices; /* target devices */ 140 struct pci_dev **devices; /* target devices */
68 int devices_cnt; /* target device count */ 141 int devices_cnt; /* target device count */
69}; 142};
70 143
71#define for_each_drhd_unit(drhd) \
72 list_for_each_entry(drhd, &dmar_drhd_units, list)
73#define for_each_rmrr_units(rmrr) \ 144#define for_each_rmrr_units(rmrr) \
74 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
146/* Intel DMAR initialization functions */
147extern int intel_iommu_init(void);
148extern int dmar_disabled;
75#else 149#else
76static inline void detect_intel_iommu(void)
77{
78 return;
79}
80static inline int intel_iommu_init(void) 150static inline int intel_iommu_init(void)
81{ 151{
152#ifdef CONFIG_INTR_REMAP
153 return dmar_dev_scope_init();
154#else
82 return -ENODEV; 155 return -ENODEV;
156#endif
83} 157}
84
85#endif /* !CONFIG_DMAR */ 158#endif /* !CONFIG_DMAR */
86#endif /* __DMAR_H__ */ 159#endif /* __DMAR_H__ */
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 80171ee89a22..8120fa1bc235 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *);
837extern void ext3_set_inode_flags(struct inode *); 837extern void ext3_set_inode_flags(struct inode *);
838extern void ext3_get_inode_flags(struct ext3_inode_info *); 838extern void ext3_get_inode_flags(struct ext3_inode_info *);
839extern void ext3_set_aops(struct inode *inode); 839extern void ext3_set_aops(struct inode *inode);
840extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
841 u64 start, u64 len);
840 842
841/* ioctl.c */ 843/* ioctl.c */
842extern int ext3_ioctl (struct inode *, struct file *, unsigned int, 844extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
new file mode 100644
index 000000000000..671decbd2aeb
--- /dev/null
+++ b/include/linux/fiemap.h
@@ -0,0 +1,64 @@
1/*
2 * FS_IOC_FIEMAP ioctl infrastructure.
3 *
4 * Some portions copyright (C) 2007 Cluster File Systems, Inc
5 *
6 * Authors: Mark Fasheh <mfasheh@suse.com>
7 * Kalpak Shah <kalpak.shah@sun.com>
8 * Andreas Dilger <adilger@sun.com>
9 */
10
11#ifndef _LINUX_FIEMAP_H
12#define _LINUX_FIEMAP_H
13
14struct fiemap_extent {
15 __u64 fe_logical; /* logical offset in bytes for the start of
16 * the extent from the beginning of the file */
17 __u64 fe_physical; /* physical offset in bytes for the start
18 * of the extent from the beginning of the disk */
19 __u64 fe_length; /* length in bytes for this extent */
20 __u64 fe_reserved64[2];
21 __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
22 __u32 fe_reserved[3];
23};
24
25struct fiemap {
26 __u64 fm_start; /* logical offset (inclusive) at
27 * which to start mapping (in) */
28 __u64 fm_length; /* logical length of mapping which
29 * userspace wants (in) */
30 __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
31 __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
32 __u32 fm_extent_count; /* size of fm_extents array (in) */
33 __u32 fm_reserved;
34 struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
35};
36
37#define FIEMAP_MAX_OFFSET (~0ULL)
38
39#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
40#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
41
42#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
43
44#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
45#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
46#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
47 * Sets EXTENT_UNKNOWN. */
48#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
49 * while fs is unmounted */
50#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
51 * Sets EXTENT_NO_BYPASS. */
52#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
53 * block aligned. */
54#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
55 * Sets EXTENT_NOT_ALIGNED.*/
56#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
57 * Sets EXTENT_NOT_ALIGNED.*/
58#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
59 * no data (i.e. zero). */
60#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
61 * support extents. Result
62 * merged for efficiency. */
63
64#endif /* _LINUX_FIEMAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 32477e8872d5..44e3cb2f1966 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -234,6 +234,7 @@ extern int dir_notify_enable;
234#define FS_IOC_SETFLAGS _IOW('f', 2, long) 234#define FS_IOC_SETFLAGS _IOW('f', 2, long)
235#define FS_IOC_GETVERSION _IOR('v', 1, long) 235#define FS_IOC_GETVERSION _IOR('v', 1, long)
236#define FS_IOC_SETVERSION _IOW('v', 2, long) 236#define FS_IOC_SETVERSION _IOW('v', 2, long)
237#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
237#define FS_IOC32_GETFLAGS _IOR('f', 1, int) 238#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
238#define FS_IOC32_SETFLAGS _IOW('f', 2, int) 239#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
239#define FS_IOC32_GETVERSION _IOR('v', 1, int) 240#define FS_IOC32_GETVERSION _IOR('v', 1, int)
@@ -294,6 +295,7 @@ extern int dir_notify_enable;
294#include <linux/mutex.h> 295#include <linux/mutex.h>
295#include <linux/capability.h> 296#include <linux/capability.h>
296#include <linux/semaphore.h> 297#include <linux/semaphore.h>
298#include <linux/fiemap.h>
297 299
298#include <asm/atomic.h> 300#include <asm/atomic.h>
299#include <asm/byteorder.h> 301#include <asm/byteorder.h>
@@ -1182,6 +1184,20 @@ extern void dentry_unhash(struct dentry *dentry);
1182extern int file_permission(struct file *, int); 1184extern int file_permission(struct file *, int);
1183 1185
1184/* 1186/*
1187 * VFS FS_IOC_FIEMAP helper definitions.
1188 */
1189struct fiemap_extent_info {
1190 unsigned int fi_flags; /* Flags as passed from user */
1191 unsigned int fi_extents_mapped; /* Number of mapped extents */
1192 unsigned int fi_extents_max; /* Size of fiemap_extent array */
1193 struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
1194 * array */
1195};
1196int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
1197 u64 phys, u64 len, u32 flags);
1198int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
1199
1200/*
1185 * File types 1201 * File types
1186 * 1202 *
1187 * NOTE! These match bits 12..15 of stat.st_mode 1203 * NOTE! These match bits 12..15 of stat.st_mode
@@ -1290,6 +1306,8 @@ struct inode_operations {
1290 void (*truncate_range)(struct inode *, loff_t, loff_t); 1306 void (*truncate_range)(struct inode *, loff_t, loff_t);
1291 long (*fallocate)(struct inode *inode, int mode, loff_t offset, 1307 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1292 loff_t len); 1308 loff_t len);
1309 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1310 u64 len);
1293}; 1311};
1294 1312
1295struct seq_file; 1313struct seq_file;
@@ -1987,6 +2005,9 @@ extern int vfs_fstat(unsigned int, struct kstat *);
1987 2005
1988extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, 2006extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
1989 unsigned long arg); 2007 unsigned long arg);
2008extern int generic_block_fiemap(struct inode *inode,
2009 struct fiemap_extent_info *fieinfo, u64 start,
2010 u64 len, get_block_t *get_block);
1990 2011
1991extern void get_filesystem(struct file_system_type *fs); 2012extern void get_filesystem(struct file_system_type *fs);
1992extern void put_filesystem(struct file_system_type *fs); 2013extern void put_filesystem(struct file_system_type *fs);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6514db8fd2e4..a9d82d6e6bdd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -8,7 +8,7 @@
8 8
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/ioport.h> 10#include <linux/ioport.h>
11#include <linux/hdreg.h> 11#include <linux/ata.h>
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/interrupt.h> 14#include <linux/interrupt.h>
@@ -17,6 +17,7 @@
17#include <linux/device.h> 17#include <linux/device.h>
18#include <linux/pci.h> 18#include <linux/pci.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/pm.h>
20#ifdef CONFIG_BLK_DEV_IDEACPI 21#ifdef CONFIG_BLK_DEV_IDEACPI
21#include <acpi/acpi.h> 22#include <acpi/acpi.h>
22#endif 23#endif
@@ -87,12 +88,13 @@ struct ide_io_ports {
87}; 88};
88 89
89#define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) 90#define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good))
90#define BAD_R_STAT (BUSY_STAT | ERR_STAT)
91#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT)
92#define BAD_STAT (BAD_R_STAT | DRQ_STAT)
93#define DRIVE_READY (READY_STAT | SEEK_STAT)
94 91
95#define BAD_CRC (ABRT_ERR | ICRC_ERR) 92#define BAD_R_STAT (ATA_BUSY | ATA_ERR)
93#define BAD_W_STAT (BAD_R_STAT | ATA_DF)
94#define BAD_STAT (BAD_R_STAT | ATA_DRQ)
95#define DRIVE_READY (ATA_DRDY | ATA_DSC)
96
97#define BAD_CRC (ATA_ABORTED | ATA_ICRC)
96 98
97#define SATA_NR_PORTS (3) /* 16 possible ?? */ 99#define SATA_NR_PORTS (3) /* 16 possible ?? */
98 100
@@ -125,24 +127,41 @@ struct ide_io_ports {
125#define PARTN_BITS 6 /* number of minor dev bits for partitions */ 127#define PARTN_BITS 6 /* number of minor dev bits for partitions */
126#define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ 128#define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */
127#define SECTOR_SIZE 512 129#define SECTOR_SIZE 512
128#define SECTOR_WORDS (SECTOR_SIZE / 4) /* number of 32bit words per sector */ 130
129#define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t))) 131#define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
130 132
131/* 133/*
132 * Timeouts for various operations: 134 * Timeouts for various operations:
133 */ 135 */
134#define WAIT_DRQ (HZ/10) /* 100msec - spec allows up to 20ms */ 136enum {
135#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */ 137 /* spec allows up to 20ms */
136#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ 138 WAIT_DRQ = HZ / 10, /* 100ms */
137#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ 139 /* some laptops are very slow */
138#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ 140 WAIT_READY = 5 * HZ, /* 5s */
139#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ 141 /* should be less than 3ms (?), if all ATAPI CD is closed at boot */
142 WAIT_PIDENTIFY = 10 * HZ, /* 10s */
143 /* worst case when spinning up */
144 WAIT_WORSTCASE = 30 * HZ, /* 30s */
145 /* maximum wait for an IRQ to happen */
146 WAIT_CMD = 10 * HZ, /* 10s */
147 /* Some drives require a longer IRQ timeout. */
148 WAIT_FLOPPY_CMD = 50 * HZ, /* 50s */
149 /*
150 * Some drives (for example, Seagate STT3401A Travan) require a very
151 * long timeout, because they don't return an interrupt or clear their
152 * BSY bit until after the command completes (even retension commands).
153 */
154 WAIT_TAPE_CMD = 900 * HZ, /* 900s */
155 /* minimum sleep time */
156 WAIT_MIN_SLEEP = HZ / 50, /* 20ms */
157};
140 158
141/* 159/*
142 * Op codes for special requests to be handled by ide_special_rq(). 160 * Op codes for special requests to be handled by ide_special_rq().
143 * Values should be in the range of 0x20 to 0x3f. 161 * Values should be in the range of 0x20 to 0x3f.
144 */ 162 */
145#define REQ_DRIVE_RESET 0x20 163#define REQ_DRIVE_RESET 0x20
164#define REQ_DEVSET_EXEC 0x21
146 165
147/* 166/*
148 * Check for an interrupt and acknowledge the interrupt status 167 * Check for an interrupt and acknowledge the interrupt status
@@ -303,8 +322,8 @@ typedef enum {
303 ide_started, /* a drive operation was started, handler was set */ 322 ide_started, /* a drive operation was started, handler was set */
304} ide_startstop_t; 323} ide_startstop_t;
305 324
325struct ide_devset;
306struct ide_driver_s; 326struct ide_driver_s;
307struct ide_settings_s;
308 327
309#ifdef CONFIG_BLK_DEV_IDEACPI 328#ifdef CONFIG_BLK_DEV_IDEACPI
310struct ide_acpi_drive_link; 329struct ide_acpi_drive_link;
@@ -315,10 +334,10 @@ struct ide_acpi_hwif_link;
315enum { 334enum {
316 IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), 335 IDE_AFLAG_DRQ_INTERRUPT = (1 << 0),
317 IDE_AFLAG_MEDIA_CHANGED = (1 << 1), 336 IDE_AFLAG_MEDIA_CHANGED = (1 << 1),
318
319 /* ide-cd */
320 /* Drive cannot lock the door. */ 337 /* Drive cannot lock the door. */
321 IDE_AFLAG_NO_DOORLOCK = (1 << 2), 338 IDE_AFLAG_NO_DOORLOCK = (1 << 2),
339
340 /* ide-cd */
322 /* Drive cannot eject the disc. */ 341 /* Drive cannot eject the disc. */
323 IDE_AFLAG_NO_EJECT = (1 << 3), 342 IDE_AFLAG_NO_EJECT = (1 << 3),
324 /* Drive is a pre ATAPI 1.2 drive. */ 343 /* Drive is a pre ATAPI 1.2 drive. */
@@ -354,21 +373,25 @@ enum {
354 IDE_AFLAG_CLIK_DRIVE = (1 << 19), 373 IDE_AFLAG_CLIK_DRIVE = (1 << 19),
355 /* Requires BH algorithm for packets */ 374 /* Requires BH algorithm for packets */
356 IDE_AFLAG_ZIP_DRIVE = (1 << 20), 375 IDE_AFLAG_ZIP_DRIVE = (1 << 20),
376 /* Write protect */
377 IDE_AFLAG_WP = (1 << 21),
378 /* Supports format progress report */
379 IDE_AFLAG_SRFP = (1 << 22),
357 380
358 /* ide-tape */ 381 /* ide-tape */
359 IDE_AFLAG_IGNORE_DSC = (1 << 21), 382 IDE_AFLAG_IGNORE_DSC = (1 << 23),
360 /* 0 When the tape position is unknown */ 383 /* 0 When the tape position is unknown */
361 IDE_AFLAG_ADDRESS_VALID = (1 << 22), 384 IDE_AFLAG_ADDRESS_VALID = (1 << 24),
362 /* Device already opened */ 385 /* Device already opened */
363 IDE_AFLAG_BUSY = (1 << 23), 386 IDE_AFLAG_BUSY = (1 << 25),
364 /* Attempt to auto-detect the current user block size */ 387 /* Attempt to auto-detect the current user block size */
365 IDE_AFLAG_DETECT_BS = (1 << 24), 388 IDE_AFLAG_DETECT_BS = (1 << 26),
366 /* Currently on a filemark */ 389 /* Currently on a filemark */
367 IDE_AFLAG_FILEMARK = (1 << 25), 390 IDE_AFLAG_FILEMARK = (1 << 27),
368 /* 0 = no tape is loaded, so we don't rewind after ejecting */ 391 /* 0 = no tape is loaded, so we don't rewind after ejecting */
369 IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), 392 IDE_AFLAG_MEDIUM_PRESENT = (1 << 28),
370 393
371 IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), 394 IDE_AFLAG_NO_AUTOCLOSE = (1 << 29),
372}; 395};
373 396
374struct ide_drive_s { 397struct ide_drive_s {
@@ -380,10 +403,10 @@ struct ide_drive_s {
380 struct request *rq; /* current request */ 403 struct request *rq; /* current request */
381 struct ide_drive_s *next; /* circular list of hwgroup drives */ 404 struct ide_drive_s *next; /* circular list of hwgroup drives */
382 void *driver_data; /* extra driver data */ 405 void *driver_data; /* extra driver data */
383 struct hd_driveid *id; /* drive model identification info */ 406 u16 *id; /* identification info */
384#ifdef CONFIG_IDE_PROC_FS 407#ifdef CONFIG_IDE_PROC_FS
385 struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ 408 struct proc_dir_entry *proc; /* /proc/ide/ directory entry */
386 struct ide_settings_s *settings;/* /proc/ide/ drive settings */ 409 const struct ide_proc_devset *settings; /* /proc/ide/ drive settings */
387#endif 410#endif
388 struct hwif_s *hwif; /* actually (ide_hwif_t *) */ 411 struct hwif_s *hwif; /* actually (ide_hwif_t *) */
389 412
@@ -395,16 +418,16 @@ struct ide_drive_s {
395 special_t special; /* special action flags */ 418 special_t special; /* special action flags */
396 select_t select; /* basic drive/head select reg value */ 419 select_t select; /* basic drive/head select reg value */
397 420
398 u8 keep_settings; /* restore settings after drive reset */
399 u8 using_dma; /* disk is using dma for read/write */
400 u8 retry_pio; /* retrying dma capable host in pio */ 421 u8 retry_pio; /* retrying dma capable host in pio */
401 u8 state; /* retry state */ 422 u8 state; /* retry state */
402 u8 waiting_for_dma; /* dma currently in progress */ 423 u8 waiting_for_dma; /* dma currently in progress */
403 u8 unmask; /* okay to unmask other irqs */
404 u8 noflush; /* don't attempt flushes */
405 u8 dsc_overlap; /* DSC overlap */
406 u8 nice1; /* give potential excess bandwidth */
407 424
425 unsigned keep_settings : 1; /* restore settings after drive reset */
426 unsigned using_dma : 1; /* disk is using dma for read/write */
427 unsigned unmask : 1; /* okay to unmask other irqs */
428 unsigned noflush : 1; /* don't attempt flushes */
429 unsigned dsc_overlap : 1; /* DSC overlap */
430 unsigned nice1 : 1; /* give potential excess bandwidth */
408 unsigned present : 1; /* drive is physically present */ 431 unsigned present : 1; /* drive is physically present */
409 unsigned dead : 1; /* device ejected hint */ 432 unsigned dead : 1; /* device ejected hint */
410 unsigned id_read : 1; /* 1=id read from disk 0 = synthetic */ 433 unsigned id_read : 1; /* 1=id read from disk 0 = synthetic */
@@ -414,23 +437,22 @@ struct ide_drive_s {
414 unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */ 437 unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */
415 unsigned no_unmask : 1; /* disallow setting unmask bit */ 438 unsigned no_unmask : 1; /* disallow setting unmask bit */
416 unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */ 439 unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */
417 unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */
418 unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ 440 unsigned doorlocking : 1; /* for removable only: door lock/unlock works */
419 unsigned nodma : 1; /* disallow DMA */ 441 unsigned nodma : 1; /* disallow DMA */
420 unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */
421 unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ 442 unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */
422 unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ 443 unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */
423 unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ 444 unsigned sleeping : 1; /* 1=sleeping & sleep field valid */
424 unsigned post_reset : 1; 445 unsigned post_reset : 1;
425 unsigned udma33_warned : 1; 446 unsigned udma33_warned : 1;
447 unsigned addressing : 2; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */
448 unsigned wcache : 1; /* status of write cache */
449 unsigned nowerr : 1; /* used for ignoring ATA_DF */
426 450
427 u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */
428 u8 quirk_list; /* considered quirky, set for a specific host */ 451 u8 quirk_list; /* considered quirky, set for a specific host */
429 u8 init_speed; /* transfer rate set at boot */ 452 u8 init_speed; /* transfer rate set at boot */
430 u8 current_speed; /* current transfer rate set */ 453 u8 current_speed; /* current transfer rate set */
431 u8 desired_speed; /* desired transfer rate set */ 454 u8 desired_speed; /* desired transfer rate set */
432 u8 dn; /* now wide spread use */ 455 u8 dn; /* now wide spread use */
433 u8 wcache; /* status of write cache */
434 u8 acoustic; /* acoustic management */ 456 u8 acoustic; /* acoustic management */
435 u8 media; /* disk, cdrom, tape, floppy, ... */ 457 u8 media; /* disk, cdrom, tape, floppy, ... */
436 u8 ready_stat; /* min status value for drive ready */ 458 u8 ready_stat; /* min status value for drive ready */
@@ -438,9 +460,7 @@ struct ide_drive_s {
438 u8 mult_req; /* requested multiple sector setting */ 460 u8 mult_req; /* requested multiple sector setting */
439 u8 tune_req; /* requested drive tuning setting */ 461 u8 tune_req; /* requested drive tuning setting */
440 u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ 462 u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */
441 u8 bad_wstat; /* used for ignoring WRERR_STAT */ 463 u8 bad_wstat; /* used for ignoring ATA_DF */
442 u8 nowerr; /* used for ignoring WRERR_STAT */
443 u8 sect0; /* offset of first sector for DM6:DDO */
444 u8 head; /* "real" number of heads */ 464 u8 head; /* "real" number of heads */
445 u8 sect; /* "real" sectors per track */ 465 u8 sect; /* "real" sectors per track */
446 u8 bios_head; /* BIOS/fdisk/LILO number of heads */ 466 u8 bios_head; /* BIOS/fdisk/LILO number of heads */
@@ -474,10 +494,6 @@ typedef struct ide_drive_s ide_drive_t;
474 494
475#define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) 495#define to_ide_device(dev)container_of(dev, ide_drive_t, gendev)
476 496
477#define IDE_CHIPSET_PCI_MASK \
478 ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
479#define IDE_CHIPSET_IS_PCI(c) ((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
480
481struct ide_task_s; 497struct ide_task_s;
482struct ide_port_info; 498struct ide_port_info;
483 499
@@ -567,7 +583,6 @@ typedef struct hwif_s {
567 u8 major; /* our major number */ 583 u8 major; /* our major number */
568 u8 index; /* 0 for ide0; 1 for ide1; ... */ 584 u8 index; /* 0 for ide0; 1 for ide1; ... */
569 u8 channel; /* for dual-port chips: 0=primary, 1=secondary */ 585 u8 channel; /* for dual-port chips: 0=primary, 1=secondary */
570 u8 bus_state; /* power state of the IDE bus */
571 586
572 u32 host_flags; 587 u32 host_flags;
573 588
@@ -645,6 +660,7 @@ struct ide_host {
645 ide_hwif_t *ports[MAX_HWIFS]; 660 ide_hwif_t *ports[MAX_HWIFS];
646 unsigned int n_ports; 661 unsigned int n_ports;
647 struct device *dev[2]; 662 struct device *dev[2];
663 unsigned int (*init_chipset)(struct pci_dev *);
648 unsigned long host_flags; 664 unsigned long host_flags;
649 void *host_priv; 665 void *host_priv;
650}; 666};
@@ -692,9 +708,61 @@ typedef struct ide_driver_s ide_driver_t;
692 708
693extern struct mutex ide_setting_mtx; 709extern struct mutex ide_setting_mtx;
694 710
695int set_io_32bit(ide_drive_t *, int); 711/*
696int set_pio_mode(ide_drive_t *, int); 712 * configurable drive settings
697int set_using_dma(ide_drive_t *, int); 713 */
714
715#define DS_SYNC (1 << 0)
716
717struct ide_devset {
718 int (*get)(ide_drive_t *);
719 int (*set)(ide_drive_t *, int);
720 unsigned int flags;
721};
722
723#define __DEVSET(_flags, _get, _set) { \
724 .flags = _flags, \
725 .get = _get, \
726 .set = _set, \
727}
728
729#define ide_devset_get(name, field) \
730static int get_##name(ide_drive_t *drive) \
731{ \
732 return drive->field; \
733}
734
735#define ide_devset_set(name, field) \
736static int set_##name(ide_drive_t *drive, int arg) \
737{ \
738 drive->field = arg; \
739 return 0; \
740}
741
742#define __IDE_DEVSET(_name, _flags, _get, _set) \
743const struct ide_devset ide_devset_##_name = \
744 __DEVSET(_flags, _get, _set)
745
746#define IDE_DEVSET(_name, _flags, _get, _set) \
747static __IDE_DEVSET(_name, _flags, _get, _set)
748
749#define ide_devset_rw(_name, _func) \
750IDE_DEVSET(_name, 0, get_##_func, set_##_func)
751
752#define ide_devset_w(_name, _func) \
753IDE_DEVSET(_name, 0, NULL, set_##_func)
754
755#define ide_devset_rw_sync(_name, _func) \
756IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func)
757
758#define ide_decl_devset(_name) \
759extern const struct ide_devset ide_devset_##_name
760
761ide_decl_devset(io_32bit);
762ide_decl_devset(keepsettings);
763ide_decl_devset(pio_mode);
764ide_decl_devset(unmaskirq);
765ide_decl_devset(using_dma);
698 766
699/* ATAPI packet command flags */ 767/* ATAPI packet command flags */
700enum { 768enum {
@@ -710,6 +778,12 @@ enum {
710 PC_FLAG_TIMEDOUT = (1 << 7), 778 PC_FLAG_TIMEDOUT = (1 << 7),
711}; 779};
712 780
781/*
782 * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes.
783 * This is used for several packet commands (not for READ/WRITE commands).
784 */
785#define IDE_PC_BUFFER_SIZE 256
786
713struct ide_atapi_pc { 787struct ide_atapi_pc {
714 /* actual packet bytes */ 788 /* actual packet bytes */
715 u8 c[12]; 789 u8 c[12];
@@ -739,7 +813,7 @@ struct ide_atapi_pc {
739 * those are more or less driver-specific and some of them are subject 813 * those are more or less driver-specific and some of them are subject
740 * to change/removal later. 814 * to change/removal later.
741 */ 815 */
742 u8 pc_buf[256]; 816 u8 pc_buf[IDE_PC_BUFFER_SIZE];
743 817
744 /* idetape only */ 818 /* idetape only */
745 struct idetape_bh *bh; 819 struct idetape_bh *bh;
@@ -757,37 +831,34 @@ struct ide_atapi_pc {
757 831
758#ifdef CONFIG_IDE_PROC_FS 832#ifdef CONFIG_IDE_PROC_FS
759/* 833/*
760 * configurable drive settings 834 * /proc/ide interface
761 */ 835 */
762 836
763#define TYPE_INT 0 837#define ide_devset_rw_field(_name, _field) \
764#define TYPE_BYTE 1 838ide_devset_get(_name, _field); \
765#define TYPE_SHORT 2 839ide_devset_set(_name, _field); \
840IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name)
841
842struct ide_proc_devset {
843 const char *name;
844 const struct ide_devset *setting;
845 int min, max;
846 int (*mulf)(ide_drive_t *);
847 int (*divf)(ide_drive_t *);
848};
766 849
767#define SETTING_READ (1 << 0) 850#define __IDE_PROC_DEVSET(_name, _min, _max, _mulf, _divf) { \
768#define SETTING_WRITE (1 << 1) 851 .name = __stringify(_name), \
769#define SETTING_RW (SETTING_READ | SETTING_WRITE) 852 .setting = &ide_devset_##_name, \
853 .min = _min, \
854 .max = _max, \
855 .mulf = _mulf, \
856 .divf = _divf, \
857}
770 858
771typedef int (ide_procset_t)(ide_drive_t *, int); 859#define IDE_PROC_DEVSET(_name, _min, _max) \
772typedef struct ide_settings_s { 860__IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
773 char *name;
774 int rw;
775 int data_type;
776 int min;
777 int max;
778 int mul_factor;
779 int div_factor;
780 void *data;
781 ide_procset_t *set;
782 int auto_remove;
783 struct ide_settings_s *next;
784} ide_settings_t;
785
786int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set);
787 861
788/*
789 * /proc/ide interface
790 */
791typedef struct { 862typedef struct {
792 const char *name; 863 const char *name;
793 mode_t mode; 864 mode_t mode;
@@ -804,8 +875,6 @@ void ide_proc_unregister_port(ide_hwif_t *);
804void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); 875void ide_proc_register_driver(ide_drive_t *, ide_driver_t *);
805void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); 876void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *);
806 877
807void ide_add_generic_settings(ide_drive_t *);
808
809read_proc_t proc_ide_read_capacity; 878read_proc_t proc_ide_read_capacity;
810read_proc_t proc_ide_read_geometry; 879read_proc_t proc_ide_read_geometry;
811 880
@@ -833,7 +902,6 @@ static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; }
833static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } 902static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; }
834static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } 903static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
835static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } 904static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
836static inline void ide_add_generic_settings(ide_drive_t *drive) { ; }
837#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; 905#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
838#endif 906#endif
839 907
@@ -879,7 +947,6 @@ enum {
879struct ide_driver_s { 947struct ide_driver_s {
880 const char *version; 948 const char *version;
881 u8 media; 949 u8 media;
882 unsigned supports_dsc_overlap : 1;
883 ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); 950 ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t);
884 int (*end_request)(ide_drive_t *, int, int); 951 int (*end_request)(ide_drive_t *, int, int);
885 ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); 952 ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8);
@@ -889,7 +956,8 @@ struct ide_driver_s {
889 void (*resume)(ide_drive_t *); 956 void (*resume)(ide_drive_t *);
890 void (*shutdown)(ide_drive_t *); 957 void (*shutdown)(ide_drive_t *);
891#ifdef CONFIG_IDE_PROC_FS 958#ifdef CONFIG_IDE_PROC_FS
892 ide_proc_entry_t *proc; 959 ide_proc_entry_t *proc;
960 const struct ide_proc_devset *settings;
893#endif 961#endif
894}; 962};
895 963
@@ -898,7 +966,17 @@ struct ide_driver_s {
898int ide_device_get(ide_drive_t *); 966int ide_device_get(ide_drive_t *);
899void ide_device_put(ide_drive_t *); 967void ide_device_put(ide_drive_t *);
900 968
901int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); 969struct ide_ioctl_devset {
970 unsigned int get_ioctl;
971 unsigned int set_ioctl;
972 const struct ide_devset *setting;
973};
974
975int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int,
976 unsigned long, const struct ide_ioctl_devset *);
977
978int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *,
979 unsigned, unsigned long);
902 980
903extern int ide_vlb_clk; 981extern int ide_vlb_clk;
904extern int ide_pci_clk; 982extern int ide_pci_clk;
@@ -920,14 +998,19 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
920 998
921ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); 999ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
922 1000
923extern void ide_fix_driveid(struct hd_driveid *); 1001void ide_fix_driveid(u16 *);
924 1002
925extern void ide_fixstring(u8 *, const int, const int); 1003extern void ide_fixstring(u8 *, const int, const int);
926 1004
1005int ide_busy_sleep(ide_hwif_t *, unsigned long, int);
1006
927int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); 1007int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
928 1008
929extern ide_startstop_t ide_do_reset (ide_drive_t *); 1009extern ide_startstop_t ide_do_reset (ide_drive_t *);
930 1010
1011extern int ide_devset_execute(ide_drive_t *drive,
1012 const struct ide_devset *setting, int arg);
1013
931extern void ide_do_drive_cmd(ide_drive_t *, struct request *); 1014extern void ide_do_drive_cmd(ide_drive_t *, struct request *);
932 1015
933extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); 1016extern void ide_end_drive_cmd(ide_drive_t *, u8, u8);
@@ -1051,6 +1134,8 @@ void ide_tf_read(ide_drive_t *, ide_task_t *);
1051void ide_input_data(ide_drive_t *, struct request *, void *, unsigned int); 1134void ide_input_data(ide_drive_t *, struct request *, void *, unsigned int);
1052void ide_output_data(ide_drive_t *, struct request *, void *, unsigned int); 1135void ide_output_data(ide_drive_t *, struct request *, void *, unsigned int);
1053 1136
1137int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int);
1138
1054extern void SELECT_DRIVE(ide_drive_t *); 1139extern void SELECT_DRIVE(ide_drive_t *);
1055void SELECT_MASK(ide_drive_t *, int); 1140void SELECT_MASK(ide_drive_t *, int);
1056 1141
@@ -1061,11 +1146,36 @@ extern int drive_is_ready(ide_drive_t *);
1061 1146
1062void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); 1147void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8);
1063 1148
1149int ide_check_atapi_device(ide_drive_t *, const char *);
1150
1151void ide_init_pc(struct ide_atapi_pc *);
1152
1153/*
1154 * Special requests for ide-tape block device strategy routine.
1155 *
1156 * In order to service a character device command, we add special requests to
1157 * the tail of our block device request queue and wait for their completion.
1158 */
1159enum {
1160 REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */
1161 REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */
1162 REQ_IDETAPE_READ = (1 << 2),
1163 REQ_IDETAPE_WRITE = (1 << 3),
1164};
1165
1166void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *,
1167 struct request *);
1168int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *);
1169
1170int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
1171int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
1172int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
1173
1064ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, 1174ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
1065 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, 1175 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
1066 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), 1176 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
1067 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), 1177 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
1068 void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, 1178 int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int,
1069 int)); 1179 int));
1070ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, 1180ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *,
1071 ide_handler_t *, unsigned int, ide_expiry_t *); 1181 ide_handler_t *, unsigned int, ide_expiry_t *);
@@ -1080,8 +1190,6 @@ int ide_raw_taskfile(ide_drive_t *, ide_task_t *, u8 *, u16);
1080int ide_no_data_taskfile(ide_drive_t *, ide_task_t *); 1190int ide_no_data_taskfile(ide_drive_t *, ide_task_t *);
1081 1191
1082int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); 1192int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long);
1083int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long);
1084int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long);
1085 1193
1086extern int ide_driveid_update(ide_drive_t *); 1194extern int ide_driveid_update(ide_drive_t *);
1087extern int ide_config_drive_speed(ide_drive_t *, u8); 1195extern int ide_config_drive_speed(ide_drive_t *, u8);
@@ -1092,7 +1200,6 @@ extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout);
1092 1200
1093extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); 1201extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
1094 1202
1095extern int ide_spin_wait_hwgroup(ide_drive_t *);
1096extern void ide_timer_expiry(unsigned long); 1203extern void ide_timer_expiry(unsigned long);
1097extern irqreturn_t ide_intr(int irq, void *dev_id); 1204extern irqreturn_t ide_intr(int irq, void *dev_id);
1098extern void do_ide_request(struct request_queue *); 1205extern void do_ide_request(struct request_queue *);
@@ -1229,6 +1336,14 @@ int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
1229 const struct ide_port_info *, void *); 1336 const struct ide_port_info *, void *);
1230void ide_pci_remove(struct pci_dev *); 1337void ide_pci_remove(struct pci_dev *);
1231 1338
1339#ifdef CONFIG_PM
1340int ide_pci_suspend(struct pci_dev *, pm_message_t);
1341int ide_pci_resume(struct pci_dev *);
1342#else
1343#define ide_pci_suspend NULL
1344#define ide_pci_resume NULL
1345#endif
1346
1232void ide_map_sg(ide_drive_t *, struct request *); 1347void ide_map_sg(ide_drive_t *, struct request *);
1233void ide_init_sg_cmd(ide_drive_t *, struct request *); 1348void ide_init_sg_cmd(ide_drive_t *, struct request *);
1234 1349
@@ -1240,7 +1355,7 @@ struct drive_list_entry {
1240 const char *id_firmware; 1355 const char *id_firmware;
1241}; 1356};
1242 1357
1243int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); 1358int ide_in_drive_list(u16 *, const struct drive_list_entry *);
1244 1359
1245#ifdef CONFIG_BLK_DEV_IDEDMA 1360#ifdef CONFIG_BLK_DEV_IDEDMA
1246int __ide_dma_bad_drive(ide_drive_t *); 1361int __ide_dma_bad_drive(ide_drive_t *);
@@ -1347,24 +1462,6 @@ const char *ide_xfer_verbose(u8 mode);
1347extern void ide_toggle_bounce(ide_drive_t *drive, int on); 1462extern void ide_toggle_bounce(ide_drive_t *drive, int on);
1348extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); 1463extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate);
1349 1464
1350static inline int ide_dev_has_iordy(struct hd_driveid *id)
1351{
1352 return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0;
1353}
1354
1355static inline int ide_dev_is_sata(struct hd_driveid *id)
1356{
1357 /*
1358 * See if word 93 is 0 AND drive is at least ATA-5 compatible
1359 * verifying that word 80 by casting it to a signed type --
1360 * this trick allows us to filter out the reserved values of
1361 * 0x0000 and 0xffff along with the earlier ATA revisions...
1362 */
1363 if (id->hw_config == 0 && (short)id->major_rev_num >= 0x0020)
1364 return 1;
1365 return 0;
1366}
1367
1368u64 ide_get_lba_addr(struct ide_taskfile *, int); 1465u64 ide_get_lba_addr(struct ide_taskfile *, int);
1369u8 ide_dump_status(ide_drive_t *, const char *, u8); 1466u8 ide_dump_status(ide_drive_t *, const char *, u8);
1370 1467
@@ -1436,13 +1533,6 @@ extern struct mutex ide_cfg_mtx;
1436extern struct bus_type ide_bus_type; 1533extern struct bus_type ide_bus_type;
1437extern struct class *ide_port_class; 1534extern struct class *ide_port_class;
1438 1535
1439/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */
1440#define ide_id_has_flush_cache(id) ((id)->cfs_enable_2 & 0x3000)
1441
1442/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */
1443#define ide_id_has_flush_cache_ext(id) \
1444 (((id)->cfs_enable_2 & 0x2400) == 0x2400)
1445
1446static inline void ide_dump_identify(u8 *id) 1536static inline void ide_dump_identify(u8 *id)
1447{ 1537{
1448 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0); 1538 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0);
@@ -1453,10 +1543,10 @@ static inline int hwif_to_node(ide_hwif_t *hwif)
1453 return hwif->dev ? dev_to_node(hwif->dev) : -1; 1543 return hwif->dev ? dev_to_node(hwif->dev) : -1;
1454} 1544}
1455 1545
1456static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive) 1546static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
1457{ 1547{
1458 ide_hwif_t *hwif = HWIF(drive); 1548 ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1];
1459 1549
1460 return &hwif->drives[(drive->dn ^ 1) & 1]; 1550 return peer->present ? peer : NULL;
1461} 1551}
1462#endif /* _IDE_H */ 1552#endif /* _IDE_H */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 350033e8f4e1..ee9bcc6f32b6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -108,6 +108,9 @@ extern struct resource iomem_resource;
108 108
109extern int request_resource(struct resource *root, struct resource *new); 109extern int request_resource(struct resource *root, struct resource *new);
110extern int release_resource(struct resource *new); 110extern int release_resource(struct resource *new);
111extern void reserve_region_with_split(struct resource *root,
112 resource_size_t start, resource_size_t end,
113 const char *name);
111extern int insert_resource(struct resource *parent, struct resource *new); 114extern int insert_resource(struct resource *parent, struct resource *new);
112extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); 115extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
113extern int allocate_resource(struct resource *root, struct resource *new, 116extern int allocate_resource(struct resource *root, struct resource *new,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..8d9411bc60f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
65#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
65 66
66#ifdef CONFIG_IRQ_PER_CPU 67#ifdef CONFIG_IRQ_PER_CPU
67# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 68# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3dd209007098..66c3499478b5 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -850,7 +850,8 @@ struct journal_s
850 */ 850 */
851 struct block_device *j_dev; 851 struct block_device *j_dev;
852 int j_blocksize; 852 int j_blocksize;
853 unsigned long long j_blk_offset; 853 unsigned long long j_blk_offset;
854 char j_devname[BDEVNAME_SIZE+24];
854 855
855 /* 856 /*
856 * Device which holds the client fs. For internal journal this will be 857 * Device which holds the client fs. For internal journal this will be
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index fac3337547eb..9f2a3751873a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -23,12 +23,19 @@
23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ 23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \
24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ 24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
25 ____cacheline_aligned_in_smp 25 ____cacheline_aligned_in_smp
26
27#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
28 __attribute__((__section__(".data.percpu.page_aligned"))) \
29 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
26#else 30#else
27#define DEFINE_PER_CPU(type, name) \ 31#define DEFINE_PER_CPU(type, name) \
28 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 32 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
29 33
30#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 34#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
31 DEFINE_PER_CPU(type, name) 35 DEFINE_PER_CPU(type, name)
36
37#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
38 DEFINE_PER_CPU(type, name)
32#endif 39#endif
33 40
34#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) 41#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 208388835357..9007ccdfc112 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
35void percpu_counter_destroy(struct percpu_counter *fbc); 35void percpu_counter_destroy(struct percpu_counter *fbc);
36void percpu_counter_set(struct percpu_counter *fbc, s64 amount); 36void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
37void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); 37void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
38s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); 38s64 __percpu_counter_sum(struct percpu_counter *fbc);
39 39
40static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) 40static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
41{ 41{
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
44 44
45static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) 45static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
46{ 46{
47 s64 ret = __percpu_counter_sum(fbc, 0); 47 s64 ret = __percpu_counter_sum(fbc);
48 return ret < 0 ? 0 : ret; 48 return ret < 0 ? 0 : ret;
49} 49}
50 50
51static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
52{
53 return __percpu_counter_sum(fbc, 1);
54}
55
56
57static inline s64 percpu_counter_sum(struct percpu_counter *fbc) 51static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
58{ 52{
59 return __percpu_counter_sum(fbc, 0); 53 return __percpu_counter_sum(fbc);
60} 54}
61 55
62static inline s64 percpu_counter_read(struct percpu_counter *fbc) 56static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0314074fa232..60c49e324390 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
89 set_balance_irq_affinity(irq, cpumask); 89 set_balance_irq_affinity(irq, cpumask);
90 90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 91#ifdef CONFIG_GENERIC_PENDING_IRQ
92 set_pending_irq(irq, cpumask); 92 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags;
94
95 spin_lock_irqsave(&desc->lock, flags);
96 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags);
98 } else
99 set_pending_irq(irq, cpumask);
93#else 100#else
94 desc->affinity = cpumask; 101 desc->affinity = cpumask;
95 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);
diff --git a/kernel/resource.c b/kernel/resource.c
index 03d796c1b2e9..414d6fc9131e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -516,6 +516,74 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
516 return result; 516 return result;
517} 517}
518 518
519static void __init __reserve_region_with_split(struct resource *root,
520 resource_size_t start, resource_size_t end,
521 const char *name)
522{
523 struct resource *parent = root;
524 struct resource *conflict;
525 struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
526
527 if (!res)
528 return;
529
530 res->name = name;
531 res->start = start;
532 res->end = end;
533 res->flags = IORESOURCE_BUSY;
534
535 for (;;) {
536 conflict = __request_resource(parent, res);
537 if (!conflict)
538 break;
539 if (conflict != parent) {
540 parent = conflict;
541 if (!(conflict->flags & IORESOURCE_BUSY))
542 continue;
543 }
544
545 /* Uhhuh, that didn't work out.. */
546 kfree(res);
547 res = NULL;
548 break;
549 }
550
551 if (!res) {
552 printk(KERN_DEBUG " __reserve_region_with_split: (%s) [%llx, %llx], res: (%s) [%llx, %llx]\n",
553 conflict->name, conflict->start, conflict->end,
554 name, start, end);
555
556 /* failed, split and try again */
557
558 /* conflict coverred whole area */
559 if (conflict->start <= start && conflict->end >= end)
560 return;
561
562 if (conflict->start > start)
563 __reserve_region_with_split(root, start, conflict->start-1, name);
564 if (!(conflict->flags & IORESOURCE_BUSY)) {
565 resource_size_t common_start, common_end;
566
567 common_start = max(conflict->start, start);
568 common_end = min(conflict->end, end);
569 if (common_start < common_end)
570 __reserve_region_with_split(root, common_start, common_end, name);
571 }
572 if (conflict->end < end)
573 __reserve_region_with_split(root, conflict->end+1, end, name);
574 }
575
576}
577
578void reserve_region_with_split(struct resource *root,
579 resource_size_t start, resource_size_t end,
580 const char *name)
581{
582 write_lock(&resource_lock);
583 __reserve_region_with_split(root, start, end, name);
584 write_unlock(&resource_lock);
585}
586
519EXPORT_SYMBOL(adjust_resource); 587EXPORT_SYMBOL(adjust_resource);
520 588
521/** 589/**
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 4a8ba4bf5f6f..a8663890a88c 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
52 * Add up all the per-cpu counts, return the result. This is a more accurate 52 * Add up all the per-cpu counts, return the result. This is a more accurate
53 * but much slower version of percpu_counter_read_positive() 53 * but much slower version of percpu_counter_read_positive()
54 */ 54 */
55s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) 55s64 __percpu_counter_sum(struct percpu_counter *fbc)
56{ 56{
57 s64 ret; 57 s64 ret;
58 int cpu; 58 int cpu;
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
62 for_each_online_cpu(cpu) { 62 for_each_online_cpu(cpu) {
63 s32 *pcount = per_cpu_ptr(fbc->counters, cpu); 63 s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
64 ret += *pcount; 64 ret += *pcount;
65 if (set) 65 *pcount = 0;
66 *pcount = 0;
67 } 66 }
68 if (set) 67 fbc->count = ret;
69 fbc->count = ret;
70 68
71 spin_unlock(&fbc->lock); 69 spin_unlock(&fbc->lock);
72 return ret; 70 return ret;