aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/kgdb.tmpl8
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/vfs.txt4
-rw-r--r--Documentation/kbuild/kconfig-language.txt24
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/scheduler/sched-design.txt165
-rw-r--r--arch/m68k/kernel/traps.c17
-rw-r--r--arch/m68k/mac/config.c24
-rw-r--r--arch/powerpc/kvm/booke_guest.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c20
-rw-r--r--arch/powerpc/lib/Makefile1
-rw-r--r--arch/powerpc/lib/devres.c42
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c19
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/relocs.c2
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile5
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/mmu.c89
-rw-r--r--arch/x86/kvm/mmu.h37
-rw-r--r--arch/x86/kvm/svm.c10
-rw-r--r--arch/x86/kvm/vmx.c375
-rw-r--r--arch/x86/kvm/vmx.h38
-rw-r--r--arch/x86/kvm/x86.c22
-rw-r--r--arch/x86/kvm/x86_emulate.c1
-rw-r--r--arch/x86/mm/discontig_32.c26
-rw-r--r--arch/x86/mm/pgtable_32.c7
-rw-r--r--arch/x86/pci/Makefile_3212
-rw-r--r--arch/x86/pci/acpi.c41
-rw-r--r--arch/x86/pci/common.c58
-rw-r--r--arch/x86/pci/fixup.c2
-rw-r--r--arch/x86/pci/init.c4
-rw-r--r--arch/x86/pci/pci.h3
-rw-r--r--arch/x86/vdso/vdso32-setup.c2
-rw-r--r--arch/x86/video/fbdev.c2
-rw-r--r--drivers/ata/Kconfig13
-rw-r--r--drivers/ata/Makefile1
-rw-r--r--drivers/ata/ahci.c4
-rw-r--r--drivers/ata/ata_generic.c6
-rw-r--r--drivers/ata/ata_piix.c25
-rw-r--r--drivers/ata/libata-core.c1
-rw-r--r--drivers/ata/libata-eh.c2
-rw-r--r--drivers/ata/libata-sff.c6
-rw-r--r--drivers/ata/pata_acpi.c6
-rw-r--r--drivers/ata/pata_sch.c206
-rw-r--r--drivers/ata/sata_inic162x.c646
-rw-r--r--drivers/ata/sata_mv.c690
-rw-r--r--drivers/base/sys.c3
-rw-r--r--drivers/char/serial167.c2
-rw-r--r--drivers/edac/edac_core.h2
-rw-r--r--drivers/edac/edac_device.c6
-rw-r--r--drivers/edac/edac_mc.c6
-rw-r--r--drivers/edac/edac_pci.c6
-rw-r--r--drivers/ide/ide-probe.c12
-rw-r--r--drivers/ide/legacy/falconide.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c13
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c13
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c7
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c47
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/input/serio/hp_sdc.c1
-rw-r--r--drivers/macintosh/adb.c30
-rw-r--r--drivers/macintosh/therm_pm72.c31
-rw-r--r--drivers/macintosh/windfarm_smu_sat.c10
-rw-r--r--drivers/misc/kgdbts.c75
-rw-r--r--drivers/net/irda/nsc-ircc.c6
-rw-r--r--drivers/net/irda/smsc-ircc2.c5
-rw-r--r--drivers/net/mlx4/mr.c2
-rw-r--r--drivers/net/niu.c11
-rw-r--r--drivers/net/wan/lapbether.c1
-rw-r--r--drivers/net/wireless/iwlwifi/Kconfig3
-rw-r--r--drivers/pci/probe.c33
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--fs/affs/affs.h4
-rw-r--r--fs/affs/file.c25
-rw-r--r--fs/affs/inode.c34
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/affs/super.c18
-rw-r--r--fs/inode.c5
-rw-r--r--fs/locks.c17
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--include/asm-alpha/types.h6
-rw-r--r--include/asm-m68k/machw.h30
-rw-r--r--include/asm-mips/types.h2
-rw-r--r--include/asm-powerpc/io.h8
-rw-r--r--include/asm-powerpc/kvm_host.h1
-rw-r--r--include/asm-powerpc/kvm_ppc.h5
-rw-r--r--include/asm-x86/bootparam.h8
-rw-r--r--include/asm-x86/kvm_host.h10
-rw-r--r--include/asm-x86/pgtable_32.h9
-rw-r--r--include/asm-x86/pgtable_64.h6
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/io.h1
-rw-r--r--include/linux/kgdb.h4
-rw-r--r--include/linux/libata.h16
-rw-r--r--include/linux/pci.h5
-rw-r--r--include/linux/sched.h38
-rw-r--r--include/linux/sysfs.h6
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/xfrm.h48
-rw-r--r--init/Kconfig20
-rw-r--r--init/main.c1
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/futex.c176
-rw-r--r--kernel/kgdb.c8
-rw-r--r--kernel/module.c44
-rw-r--r--kernel/sched.c323
-rw-r--r--kernel/sched_clock.c236
-rw-r--r--kernel/sched_debug.c7
-rw-r--r--kernel/sched_fair.c39
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c9
-rw-r--r--lib/Kconfig.kgdb16
-rw-r--r--lib/devres.c2
-rw-r--r--mm/memory.c5
-rw-r--r--net/atm/br2684.c4
-rw-r--r--net/bridge/br_if.c12
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/dccp/feat.c2
-rw-r--r--net/decnet/dn_route.c12
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/mac80211/main.c7
-rw-r--r--net/sched/act_simple.c45
-rw-r--r--net/sched/sch_htb.c8
-rw-r--r--scripts/kconfig/lkc.h6
-rw-r--r--scripts/kconfig/mconf.c3
-rw-r--r--scripts/mod/file2alias.c35
-rw-r--r--sound/drivers/pcsp/pcsp.c4
-rw-r--r--sound/pci/Kconfig5
-rw-r--r--sound/pci/ac97/ac97_patch.c9
-rw-r--r--sound/pci/hda/patch_realtek.c5
-rw-r--r--sound/pci/hda/patch_sigmatel.c2
-rw-r--r--sound/soc/s3c24xx/s3c24xx-i2s.c2
-rw-r--r--sound/soc/s3c24xx/s3c24xx-pcm.c2
-rw-r--r--virt/kvm/kvm_main.c1
146 files changed, 2870 insertions, 1597 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 97618bed4d65..028a8444d95e 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -72,7 +72,7 @@
72 kgdb is a source level debugger for linux kernel. It is used along 72 kgdb is a source level debugger for linux kernel. It is used along
73 with gdb to debug a linux kernel. The expectation is that gdb can 73 with gdb to debug a linux kernel. The expectation is that gdb can
74 be used to "break in" to the kernel to inspect memory, variables 74 be used to "break in" to the kernel to inspect memory, variables
75 and look through a cal stack information similar to what an 75 and look through call stack information similar to what an
76 application developer would use gdb for. It is possible to place 76 application developer would use gdb for. It is possible to place
77 breakpoints in kernel code and perform some limited execution 77 breakpoints in kernel code and perform some limited execution
78 stepping. 78 stepping.
@@ -93,8 +93,10 @@
93 <chapter id="CompilingAKernel"> 93 <chapter id="CompilingAKernel">
94 <title>Compiling a kernel</title> 94 <title>Compiling a kernel</title>
95 <para> 95 <para>
96 To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging" 96 To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
97 and then select "KGDB: kernel debugging with remote gdb". 97 "Prompt for development and/or incomplete code/drivers"
98 (CONFIG_EXPERIMENTAL) in "General setup", then under the
99 "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
98 </para> 100 </para>
99 <para> 101 <para>
100 Next you should choose one of more I/O drivers to interconnect debugging 102 Next you should choose one of more I/O drivers to interconnect debugging
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index c2992bc54f2f..8b22d7d8b991 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -92,7 +92,6 @@ prototypes:
92 void (*destroy_inode)(struct inode *); 92 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 93 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 94 int (*write_inode) (struct inode *, int);
95 void (*put_inode) (struct inode *);
96 void (*drop_inode) (struct inode *); 95 void (*drop_inode) (struct inode *);
97 void (*delete_inode) (struct inode *); 96 void (*delete_inode) (struct inode *);
98 void (*put_super) (struct super_block *); 97 void (*put_super) (struct super_block *);
@@ -115,7 +114,6 @@ alloc_inode: no no no
115destroy_inode: no 114destroy_inode: no
116dirty_inode: no (must not sleep) 115dirty_inode: no (must not sleep)
117write_inode: no 116write_inode: no
118put_inode: no
119drop_inode: no !!!inode_lock!!! 117drop_inode: no !!!inode_lock!!!
120delete_inode: no 118delete_inode: no
121put_super: yes yes no 119put_super: yes yes no
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 81e5be6e6e35..b7522c6cbae3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -205,7 +205,6 @@ struct super_operations {
205 205
206 void (*dirty_inode) (struct inode *); 206 void (*dirty_inode) (struct inode *);
207 int (*write_inode) (struct inode *, int); 207 int (*write_inode) (struct inode *, int);
208 void (*put_inode) (struct inode *);
209 void (*drop_inode) (struct inode *); 208 void (*drop_inode) (struct inode *);
210 void (*delete_inode) (struct inode *); 209 void (*delete_inode) (struct inode *);
211 void (*put_super) (struct super_block *); 210 void (*put_super) (struct super_block *);
@@ -246,9 +245,6 @@ or bottom half).
246 inode to disc. The second parameter indicates whether the write 245 inode to disc. The second parameter indicates whether the write
247 should be synchronous or not, not all filesystems check this flag. 246 should be synchronous or not, not all filesystems check this flag.
248 247
249 put_inode: called when the VFS inode is removed from the inode
250 cache.
251
252 drop_inode: called when the last access to the inode is dropped, 248 drop_inode: called when the last access to the inode is dropped,
253 with the inode_lock spinlock held. 249 with the inode_lock spinlock held.
254 250
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 00b950d1c193..c412c245848f 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -377,27 +377,3 @@ config FOO
377 377
378limits FOO to module (=m) or disabled (=n). 378limits FOO to module (=m) or disabled (=n).
379 379
380
381Build limited by a third config symbol which may be =y or =m
382~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
383A common idiom that we see (and sometimes have problems with) is this:
384
385When option C in B (module or subsystem) uses interfaces from A (module
386or subsystem), and both A and B are tristate (could be =y or =m if they
387were independent of each other, but they aren't), then we need to limit
388C such that it cannot be built statically if A is built as a loadable
389module. (C already depends on B, so there is no dependency issue to
390take care of here.)
391
392If A is linked statically into the kernel image, C can be built
393statically or as loadable module(s). However, if A is built as loadable
394module(s), then C must be restricted to loadable module(s) also. This
395can be expressed in kconfig language as:
396
397config C
398 depends on A = y || A = B
399
400or for real examples, use this command in a kernel tree:
401
402$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
403
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a3c35446e755..cdd5b934f43e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1094,9 +1094,6 @@ and is between 256 and 4096 characters. It is defined in the file
1094 mac5380= [HW,SCSI] Format: 1094 mac5380= [HW,SCSI] Format:
1095 <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags> 1095 <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
1096 1096
1097 mac53c9x= [HW,SCSI] Format:
1098 <num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
1099
1100 machvec= [IA64] Force the use of a particular machine-vector 1097 machvec= [IA64] Force the use of a particular machine-vector
1101 (machvec) in a generic kernel. 1098 (machvec) in a generic kernel.
1102 Example: machvec=hpzx1_swiotlb 1099 Example: machvec=hpzx1_swiotlb
@@ -1525,6 +1522,8 @@ and is between 256 and 4096 characters. It is defined in the file
1525 This is normally done in pci_enable_device(), 1522 This is normally done in pci_enable_device(),
1526 so this option is a temporary workaround 1523 so this option is a temporary workaround
1527 for broken drivers that don't call it. 1524 for broken drivers that don't call it.
1525 skip_isa_align [X86] do not align io start addr, so can
1526 handle more pci cards
1528 firmware [ARM] Do not re-enumerate the bus but instead 1527 firmware [ARM] Do not re-enumerate the bus but instead
1529 just use the configuration from the 1528 just use the configuration from the
1530 bootloader. This is currently used on 1529 bootloader. This is currently used on
diff --git a/Documentation/scheduler/sched-design.txt b/Documentation/scheduler/sched-design.txt
deleted file mode 100644
index 1605bf0cba8b..000000000000
--- a/Documentation/scheduler/sched-design.txt
+++ /dev/null
@@ -1,165 +0,0 @@
1 Goals, Design and Implementation of the
2 new ultra-scalable O(1) scheduler
3
4
5 This is an edited version of an email Ingo Molnar sent to
6 lkml on 4 Jan 2002. It describes the goals, design, and
7 implementation of Ingo's new ultra-scalable O(1) scheduler.
8 Last Updated: 18 April 2002.
9
10
11Goal
12====
13
14The main goal of the new scheduler is to keep all the good things we know
15and love about the current Linux scheduler:
16
17 - good interactive performance even during high load: if the user
18 types or clicks then the system must react instantly and must execute
19 the user tasks smoothly, even during considerable background load.
20
21 - good scheduling/wakeup performance with 1-2 runnable processes.
22
23 - fairness: no process should stay without any timeslice for any
24 unreasonable amount of time. No process should get an unjustly high
25 amount of CPU time.
26
27 - priorities: less important tasks can be started with lower priority,
28 more important tasks with higher priority.
29
30 - SMP efficiency: no CPU should stay idle if there is work to do.
31
32 - SMP affinity: processes which run on one CPU should stay affine to
33 that CPU. Processes should not bounce between CPUs too frequently.
34
35 - plus additional scheduler features: RT scheduling, CPU binding.
36
37and the goal is also to add a few new things:
38
39 - fully O(1) scheduling. Are you tired of the recalculation loop
40 blowing the L1 cache away every now and then? Do you think the goodness
41 loop is taking a bit too long to finish if there are lots of runnable
42 processes? This new scheduler takes no prisoners: wakeup(), schedule(),
43 the timer interrupt are all O(1) algorithms. There is no recalculation
44 loop. There is no goodness loop either.
45
46 - 'perfect' SMP scalability. With the new scheduler there is no 'big'
47 runqueue_lock anymore - it's all per-CPU runqueues and locks - two
48 tasks on two separate CPUs can wake up, schedule and context-switch
49 completely in parallel, without any interlocking. All
50 scheduling-relevant data is structured for maximum scalability.
51
52 - better SMP affinity. The old scheduler has a particular weakness that
53 causes the random bouncing of tasks between CPUs if/when higher
54 priority/interactive tasks, this was observed and reported by many
55 people. The reason is that the timeslice recalculation loop first needs
56 every currently running task to consume its timeslice. But when this
57 happens on eg. an 8-way system, then this property starves an
58 increasing number of CPUs from executing any process. Once the last
59 task that has a timeslice left has finished using up that timeslice,
60 the recalculation loop is triggered and other CPUs can start executing
61 tasks again - after having idled around for a number of timer ticks.
62 The more CPUs, the worse this effect.
63
64 Furthermore, this same effect causes the bouncing effect as well:
65 whenever there is such a 'timeslice squeeze' of the global runqueue,
66 idle processors start executing tasks which are not affine to that CPU.
67 (because the affine tasks have finished off their timeslices already.)
68
69 The new scheduler solves this problem by distributing timeslices on a
70 per-CPU basis, without having any global synchronization or
71 recalculation.
72
73 - batch scheduling. A significant proportion of computing-intensive tasks
74 benefit from batch-scheduling, where timeslices are long and processes
75 are roundrobin scheduled. The new scheduler does such batch-scheduling
76 of the lowest priority tasks - so nice +19 jobs will get
77 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
78 in essence SCHED_IDLE, from an interactiveness point of view.
79
80 - handle extreme loads more smoothly, without breakdown and scheduling
81 storms.
82
83 - O(1) RT scheduling. For those RT folks who are paranoid about the
84 O(nr_running) property of the goodness loop and the recalculation loop.
85
86 - run fork()ed children before the parent. Andrea has pointed out the
87 advantages of this a few months ago, but patches for this feature
88 do not work with the old scheduler as well as they should,
89 because idle processes often steal the new child before the fork()ing
90 CPU gets to execute it.
91
92
93Design
94======
95
96The core of the new scheduler contains the following mechanisms:
97
98 - *two* priority-ordered 'priority arrays' per CPU. There is an 'active'
99 array and an 'expired' array. The active array contains all tasks that
100 are affine to this CPU and have timeslices left. The expired array
101 contains all tasks which have used up their timeslices - but this array
102 is kept sorted as well. The active and expired array is not accessed
103 directly, it's accessed through two pointers in the per-CPU runqueue
104 structure. If all active tasks are used up then we 'switch' the two
105 pointers and from now on the ready-to-go (former-) expired array is the
106 active array - and the empty active array serves as the new collector
107 for expired tasks.
108
109 - there is a 64-bit bitmap cache for array indices. Finding the highest
110 priority task is thus a matter of two x86 BSFL bit-search instructions.
111
112the split-array solution enables us to have an arbitrary number of active
113and expired tasks, and the recalculation of timeslices can be done
114immediately when the timeslice expires. Because the arrays are always
115access through the pointers in the runqueue, switching the two arrays can
116be done very quickly.
117
118this is a hybride priority-list approach coupled with roundrobin
119scheduling and the array-switch method of distributing timeslices.
120
121 - there is a per-task 'load estimator'.
122
123one of the toughest things to get right is good interactive feel during
124heavy system load. While playing with various scheduler variants i found
125that the best interactive feel is achieved not by 'boosting' interactive
126tasks, but by 'punishing' tasks that want to use more CPU time than there
127is available. This method is also much easier to do in an O(1) fashion.
128
129to establish the actual 'load' the task contributes to the system, a
130complex-looking but pretty accurate method is used: there is a 4-entry
131'history' ringbuffer of the task's activities during the last 4 seconds.
132This ringbuffer is operated without much overhead. The entries tell the
133scheduler a pretty accurate load-history of the task: has it used up more
134CPU time or less during the past N seconds. [the size '4' and the interval
135of 4x 1 seconds was found by lots of experimentation - this part is
136flexible and can be changed in both directions.]
137
138the penalty a task gets for generating more load than the CPU can handle
139is a priority decrease - there is a maximum amount to this penalty
140relative to their static priority, so even fully CPU-bound tasks will
141observe each other's priorities, and will share the CPU accordingly.
142
143the SMP load-balancer can be extended/switched with additional parallel
144computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
145can be supported easily by changing the load-balancer. Right now it's
146tuned for my SMP systems.
147
148i skipped the prev->mm == next->mm advantage - no workload i know of shows
149any sensitivity to this. It can be added back by sacrificing O(1)
150schedule() [the current and one-lower priority list can be searched for a
151that->mm == current->mm condition], but costs a fair number of cycles
152during a number of important workloads, so i wanted to avoid this as much
153as possible.
154
155- the SMP idle-task startup code was still racy and the new scheduler
156triggered this. So i streamlined the idle-setup code a bit. We do not call
157into schedule() before all processors have started up fully and all idle
158threads are in place.
159
160- the patch also cleans up a number of aspects of sched.c - moves code
161into other areas of the kernel where it's appropriate, and simplifies
162certain code paths and data constructs. As a result, the new scheduler's
163code is smaller than the old one.
164
165 Ingo
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index fd4858e2dd63..75b8340b254b 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -468,15 +468,26 @@ static inline void access_error040(struct frame *fp)
468 * (if do_page_fault didn't fix the mapping, 468 * (if do_page_fault didn't fix the mapping,
469 * the writeback won't do good) 469 * the writeback won't do good)
470 */ 470 */
471disable_wb:
471#ifdef DEBUG 472#ifdef DEBUG
472 printk(".. disabling wb2\n"); 473 printk(".. disabling wb2\n");
473#endif 474#endif
474 if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr) 475 if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
475 fp->un.fmt7.wb2s &= ~WBV_040; 476 fp->un.fmt7.wb2s &= ~WBV_040;
477 if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
478 fp->un.fmt7.wb3s &= ~WBV_040;
476 } 479 }
477 } else if (send_fault_sig(&fp->ptregs) > 0) { 480 } else {
478 printk("68040 access error, ssw=%x\n", ssw); 481 /* In case of a bus error we either kill the process or expect
479 trap_c(fp); 482 * the kernel to catch the fault, which then is also responsible
483 * for cleaning up the mess.
484 */
485 current->thread.signo = SIGBUS;
486 current->thread.faddr = fp->un.fmt7.faddr;
487 if (send_fault_sig(&fp->ptregs) >= 0)
488 printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
489 fp->un.fmt7.faddr);
490 goto disable_wb;
480 } 491 }
481 492
482 do_040writebacks(fp); 493 do_040writebacks(fp);
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 735a49b4b936..ad3e3bacae39 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -48,9 +48,6 @@
48struct mac_booter_data mac_bi_data; 48struct mac_booter_data mac_bi_data;
49int mac_bisize = sizeof mac_bi_data; 49int mac_bisize = sizeof mac_bi_data;
50 50
51struct mac_hw_present mac_hw_present;
52EXPORT_SYMBOL(mac_hw_present);
53
54/* New m68k bootinfo stuff and videobase */ 51/* New m68k bootinfo stuff and videobase */
55 52
56extern int m68k_num_memory; 53extern int m68k_num_memory;
@@ -817,27 +814,6 @@ void __init mac_identify(void)
817 m68k_ramdisk.addr, m68k_ramdisk.size); 814 m68k_ramdisk.addr, m68k_ramdisk.size);
818#endif 815#endif
819 816
820 /*
821 * TODO: set the various fields in macintosh_config->hw_present here!
822 */
823 switch (macintosh_config->scsi_type) {
824 case MAC_SCSI_OLD:
825 MACHW_SET(MAC_SCSI_80);
826 break;
827 case MAC_SCSI_QUADRA:
828 case MAC_SCSI_QUADRA2:
829 case MAC_SCSI_QUADRA3:
830 MACHW_SET(MAC_SCSI_96);
831 if ((macintosh_config->ident == MAC_MODEL_Q900) ||
832 (macintosh_config->ident == MAC_MODEL_Q950))
833 MACHW_SET(MAC_SCSI_96_2);
834 break;
835 default:
836 printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n");
837 MACHW_SET(MAC_SCSI_80);
838 break;
839 }
840
841 iop_init(); 817 iop_init();
842 via_init(); 818 via_init();
843 oss_init(); 819 oss_init();
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
index 6d9884a6884a..712d89a28c46 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
49 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 49 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
50 { "dec", VCPU_STAT(dec_exits) }, 50 { "dec", VCPU_STAT(dec_exits) },
51 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 51 { "ext_intr", VCPU_STAT(ext_intr_exits) },
52 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
52 { NULL } 53 { NULL }
53}; 54};
54 55
@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
338 } 339 }
339 break; 340 break;
340 341
342 case BOOKE_INTERRUPT_FP_UNAVAIL:
343 kvmppc_queue_exception(vcpu, exit_nr);
344 r = RESUME_GUEST;
345 break;
346
341 case BOOKE_INTERRUPT_DATA_STORAGE: 347 case BOOKE_INTERRUPT_DATA_STORAGE:
342 vcpu->arch.dear = vcpu->arch.fault_dear; 348 vcpu->arch.dear = vcpu->arch.fault_dear;
343 vcpu->arch.esr = vcpu->arch.fault_esr; 349 vcpu->arch.esr = vcpu->arch.fault_esr;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index bad40bd2d3ac..777e0f34e0ea 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
36 36
37int kvm_cpu_has_interrupt(struct kvm_vcpu *v) 37int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
38{ 38{
39 /* XXX implement me */ 39 return !!(v->arch.pending_exceptions);
40 return 0;
41} 40}
42 41
43int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 42int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
44{ 43{
45 return 1; 44 return !(v->arch.msr & MSR_WE);
46} 45}
47 46
48 47
@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
214 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 213 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
215 214
216 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 215 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
216
217 if (waitqueue_active(&vcpu->wq)) {
218 wake_up_interruptible(&vcpu->wq);
219 vcpu->stat.halt_wakeup++;
220 }
217} 221}
218 222
219int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 223int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
339 int r; 343 int r;
340 sigset_t sigsaved; 344 sigset_t sigsaved;
341 345
346 vcpu_load(vcpu);
347
342 if (vcpu->sigset_active) 348 if (vcpu->sigset_active)
343 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 349 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
344 350
@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
363 if (vcpu->sigset_active) 369 if (vcpu->sigset_active)
364 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 370 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
365 371
372 vcpu_put(vcpu);
373
366 return r; 374 return r;
367} 375}
368 376
369int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 377int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
370{ 378{
371 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 379 kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
380
381 if (waitqueue_active(&vcpu->wq)) {
382 wake_up_interruptible(&vcpu->wq);
383 vcpu->stat.halt_wakeup++;
384 }
385
372 return 0; 386 return 0;
373} 387}
374 388
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4bb023f4c869..f1d2cdc5331b 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_SMP) += locks.o
23endif 23endif
24 24
25obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o 25obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
26obj-$(CONFIG_HAS_IOMEM) += devres.o
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
new file mode 100644
index 000000000000..292115d98ea9
--- /dev/null
+++ b/arch/powerpc/lib/devres.c
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) 2008 Freescale Semiconductor, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
11#include <linux/io.h> /* ioremap_flags() */
12#include <linux/module.h> /* EXPORT_SYMBOL() */
13
14/**
15 * devm_ioremap_prot - Managed ioremap_flags()
16 * @dev: Generic device to remap IO address for
17 * @offset: BUS offset to map
18 * @size: Size of map
19 * @flags: Page flags
20 *
21 * Managed ioremap_prot(). Map is automatically unmapped on driver
22 * detach.
23 */
24void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
25 size_t size, unsigned long flags)
26{
27 void __iomem **ptr, *addr;
28
29 ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
30 if (!ptr)
31 return NULL;
32
33 addr = ioremap_flags(offset, size, flags);
34 if (addr) {
35 *ptr = addr;
36 devres_add(dev, ptr);
37 } else
38 devres_free(ptr);
39
40 return addr;
41}
42EXPORT_SYMBOL(devm_ioremap_prot);
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index bec3803f0618..417eca79df69 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -55,11 +55,6 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
55 dp = PDE(inode); 55 dp = PDE(inode);
56 data = (unsigned int *)dp->data; 56 data = (unsigned int *)dp->data;
57 57
58 if (!data) {
59 printk(KERN_ERR "scanlog: read failed no data\n");
60 return -EIO;
61 }
62
63 if (count > RTAS_DATA_BUF_SIZE) 58 if (count > RTAS_DATA_BUF_SIZE)
64 count = RTAS_DATA_BUF_SIZE; 59 count = RTAS_DATA_BUF_SIZE;
65 60
@@ -146,11 +141,6 @@ static int scanlog_open(struct inode * inode, struct file * file)
146 struct proc_dir_entry *dp = PDE(inode); 141 struct proc_dir_entry *dp = PDE(inode);
147 unsigned int *data = (unsigned int *)dp->data; 142 unsigned int *data = (unsigned int *)dp->data;
148 143
149 if (!data) {
150 printk(KERN_ERR "scanlog: open failed no data\n");
151 return -EIO;
152 }
153
154 if (data[0] != 0) { 144 if (data[0] != 0) {
155 /* This imperfect test stops a second copy of the 145 /* This imperfect test stops a second copy of the
156 * data (or a reset while data is being copied) 146 * data (or a reset while data is being copied)
@@ -168,10 +158,6 @@ static int scanlog_release(struct inode * inode, struct file * file)
168 struct proc_dir_entry *dp = PDE(inode); 158 struct proc_dir_entry *dp = PDE(inode);
169 unsigned int *data = (unsigned int *)dp->data; 159 unsigned int *data = (unsigned int *)dp->data;
170 160
171 if (!data) {
172 printk(KERN_ERR "scanlog: release failed no data\n");
173 return -EIO;
174 }
175 data[0] = 0; 161 data[0] = 0;
176 162
177 return 0; 163 return 0;
@@ -200,12 +186,11 @@ static int __init scanlog_init(void)
200 if (!data) 186 if (!data)
201 goto err; 187 goto err;
202 188
203 ent = proc_create("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, 189 ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
204 &scanlog_fops); 190 &scanlog_fops, data);
205 if (!ent) 191 if (!ent)
206 goto err; 192 goto err;
207 193
208 ent->data = data;
209 proc_ppc64_scan_log_dump = ent; 194 proc_ppc64_scan_log_dump = ent;
210 195
211 return 0; 196 return 0;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c3f880902d66..bbcafaa160c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,7 @@ config X86_64
18### Arch settings 18### Arch settings
19config X86 19config X86
20 def_bool y 20 def_bool y
21 select HAVE_UNSTABLE_SCHED_CLOCK
21 select HAVE_IDE 22 select HAVE_IDE
22 select HAVE_OPROFILE 23 select HAVE_OPROFILE
23 select HAVE_KPROBES 24 select HAVE_KPROBES
@@ -1661,6 +1662,7 @@ config GEODE_MFGPT_TIMER
1661 1662
1662config OLPC 1663config OLPC
1663 bool "One Laptop Per Child support" 1664 bool "One Laptop Per Child support"
1665 depends on MGEODE_LX
1664 default n 1666 default n
1665 help 1667 help
1666 Add support for detecting the unique features of the OLPC 1668 Add support for detecting the unique features of the OLPC
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index d01ea42187e6..edaadea90aaf 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp)
191 die("Cannot read ELF header: %s\n", 191 die("Cannot read ELF header: %s\n",
192 strerror(errno)); 192 strerror(errno));
193 } 193 }
194 if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { 194 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) {
195 die("No ELF magic\n"); 195 die("No ELF magic\n");
196 } 196 }
197 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { 197 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 7335959b6aff..fd5ca97a2ad5 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -10,5 +10,5 @@ endif
10$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin 10$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
11 11
12$(obj)/realmode/wakeup.bin: FORCE 12$(obj)/realmode/wakeup.bin: FORCE
13 $(Q)$(MAKE) $(build)=$(obj)/realmode $@ 13 $(Q)$(MAKE) $(build)=$(obj)/realmode
14 14
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 092900854acc..1c31cc0e9def 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -6,7 +6,8 @@
6# for more details. 6# for more details.
7# 7#
8 8
9targets := wakeup.bin wakeup.elf 9always := wakeup.bin
10targets := wakeup.elf wakeup.lds
10 11
11wakeup-y += wakeup.o wakemain.o video-mode.o copy.o 12wakeup-y += wakeup.o wakemain.o video-mode.o copy.o
12 13
@@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T
48 49
49CPPFLAGS_wakeup.lds += -P -C 50CPPFLAGS_wakeup.lds += -P -C
50 51
51$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE 52$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
52 $(call if_changed,ld) 53 $(call if_changed,ld)
53 54
54OBJCOPYFLAGS_wakeup.bin := -O binary 55OBJCOPYFLAGS_wakeup.bin := -O binary
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee04043aeb..4bc1be5d5472 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -133,6 +133,7 @@ static int kvm_register_clock(void)
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 134}
135 135
136#ifdef CONFIG_X86_LOCAL_APIC
136static void kvm_setup_secondary_clock(void) 137static void kvm_setup_secondary_clock(void)
137{ 138{
138 /* 139 /*
@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
143 /* ok, done with our trickery, call native */ 144 /* ok, done with our trickery, call native */
144 setup_secondary_APIC_clock(); 145 setup_secondary_APIC_clock();
145} 146}
147#endif
146 148
147/* 149/*
148 * After the clock is registered, the host will keep writing to the 150 * After the clock is registered, the host will keep writing to the
@@ -177,7 +179,9 @@ void __init kvmclock_init(void)
177 pv_time_ops.get_wallclock = kvm_get_wallclock; 179 pv_time_ops.get_wallclock = kvm_get_wallclock;
178 pv_time_ops.set_wallclock = kvm_set_wallclock; 180 pv_time_ops.set_wallclock = kvm_set_wallclock;
179 pv_time_ops.sched_clock = kvm_clock_read; 181 pv_time_ops.sched_clock = kvm_clock_read;
182#ifdef CONFIG_X86_LOCAL_APIC
180 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif
181 machine_ops.shutdown = kvm_shutdown; 185 machine_ops.shutdown = kvm_shutdown;
182#ifdef CONFIG_KEXEC 186#ifdef CONFIG_KEXEC
183 machine_ops.crash_shutdown = kvm_crash_shutdown; 187 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3e2c54dc8b29..404683b94e79 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -794,6 +794,11 @@ void __init find_smp_config(void)
794 ACPI-based MP Configuration 794 ACPI-based MP Configuration
795 -------------------------------------------------------------------------- */ 795 -------------------------------------------------------------------------- */
796 796
797/*
798 * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
799 */
800int es7000_plat;
801
797#ifdef CONFIG_ACPI 802#ifdef CONFIG_ACPI
798 803
799#ifdef CONFIG_X86_IO_APIC 804#ifdef CONFIG_X86_IO_APIC
@@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
909 MP_intsrc_info(&intsrc); 914 MP_intsrc_info(&intsrc);
910} 915}
911 916
912int es7000_plat;
913
914void __init mp_config_acpi_legacy_irqs(void) 917void __init mp_config_acpi_legacy_irqs(void)
915{ 918{
916 struct mpc_config_intsrc intsrc; 919 struct mpc_config_intsrc intsrc;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 07c6d42ab5ff..f6be7d5f82f8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
149 .matches = { 149 .matches = {
150 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), 150 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
151 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), 151 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
152 DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
153 }, 152 },
154 }, 153 },
155 { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ 154 { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c0c68c18a788..cc6f5eb20b24 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void)
95 95
96 /* Copy section for each CPU (we discard the original) */ 96 /* Copy section for each CPU (we discard the original) */
97 size = PERCPU_ENOUGH_ROOM; 97 size = PERCPU_ENOUGH_ROOM;
98 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", 98 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
99 size); 99 size);
100 100
101 for_each_possible_cpu(i) { 101 for_each_possible_cpu(i) {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 84241a256dc8..6b087ab6cd8f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void)
299/* 299/*
300 * Activate a secondary processor. 300 * Activate a secondary processor.
301 */ 301 */
302void __cpuinit start_secondary(void *unused) 302static void __cpuinit start_secondary(void *unused)
303{ 303{
304 /* 304 /*
305 * Don't put *anything* before cpu_init(), SMP booting is too 305 * Don't put *anything* before cpu_init(), SMP booting is too
@@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu)
1306 cpu_clear(cpu, cpu_sibling_setup_map); 1306 cpu_clear(cpu, cpu_sibling_setup_map);
1307} 1307}
1308 1308
1309int additional_cpus __initdata = -1; 1309static int additional_cpus __initdata = -1;
1310 1310
1311static __init int setup_additional_cpus(char *s) 1311static __init int setup_additional_cpus(char *s)
1312{ 1312{
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4c943eabacc3..3324d90038e4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
288 * mode 1 is one shot, mode 2 is period, otherwise del timer */ 288 * mode 1 is one shot, mode 2 is period, otherwise del timer */
289 switch (ps->channels[0].mode) { 289 switch (ps->channels[0].mode) {
290 case 1: 290 case 1:
291 /* FIXME: enhance mode 4 precision */
292 case 4:
291 create_pit_timer(&ps->pit_timer, val, 0); 293 create_pit_timer(&ps->pit_timer, val, 0);
292 break; 294 break;
293 case 2: 295 case 2:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2ad6f5481671..36c5406b1813 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -79,36 +79,6 @@ static int dbg = 1;
79 } 79 }
80#endif 80#endif
81 81
82#define PT64_PT_BITS 9
83#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
84#define PT32_PT_BITS 10
85#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
86
87#define PT_WRITABLE_SHIFT 1
88
89#define PT_PRESENT_MASK (1ULL << 0)
90#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
91#define PT_USER_MASK (1ULL << 2)
92#define PT_PWT_MASK (1ULL << 3)
93#define PT_PCD_MASK (1ULL << 4)
94#define PT_ACCESSED_MASK (1ULL << 5)
95#define PT_DIRTY_MASK (1ULL << 6)
96#define PT_PAGE_SIZE_MASK (1ULL << 7)
97#define PT_PAT_MASK (1ULL << 7)
98#define PT_GLOBAL_MASK (1ULL << 8)
99#define PT64_NX_SHIFT 63
100#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
101
102#define PT_PAT_SHIFT 7
103#define PT_DIR_PAT_SHIFT 12
104#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
105
106#define PT32_DIR_PSE36_SIZE 4
107#define PT32_DIR_PSE36_SHIFT 13
108#define PT32_DIR_PSE36_MASK \
109 (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
110
111
112#define PT_FIRST_AVAIL_BITS_SHIFT 9 82#define PT_FIRST_AVAIL_BITS_SHIFT 9
113#define PT64_SECOND_AVAIL_BITS_SHIFT 52 83#define PT64_SECOND_AVAIL_BITS_SHIFT 52
114 84
@@ -154,10 +124,6 @@ static int dbg = 1;
154#define PFERR_USER_MASK (1U << 2) 124#define PFERR_USER_MASK (1U << 2)
155#define PFERR_FETCH_MASK (1U << 4) 125#define PFERR_FETCH_MASK (1U << 4)
156 126
157#define PT64_ROOT_LEVEL 4
158#define PT32_ROOT_LEVEL 2
159#define PT32E_ROOT_LEVEL 3
160
161#define PT_DIRECTORY_LEVEL 2 127#define PT_DIRECTORY_LEVEL 2
162#define PT_PAGE_TABLE_LEVEL 1 128#define PT_PAGE_TABLE_LEVEL 1
163 129
@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
186 152
187static u64 __read_mostly shadow_trap_nonpresent_pte; 153static u64 __read_mostly shadow_trap_nonpresent_pte;
188static u64 __read_mostly shadow_notrap_nonpresent_pte; 154static u64 __read_mostly shadow_notrap_nonpresent_pte;
155static u64 __read_mostly shadow_base_present_pte;
156static u64 __read_mostly shadow_nx_mask;
157static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
158static u64 __read_mostly shadow_user_mask;
159static u64 __read_mostly shadow_accessed_mask;
160static u64 __read_mostly shadow_dirty_mask;
189 161
190void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 162void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
191{ 163{
@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
194} 166}
195EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); 167EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
196 168
169void kvm_mmu_set_base_ptes(u64 base_pte)
170{
171 shadow_base_present_pte = base_pte;
172}
173EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
174
175void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
176 u64 dirty_mask, u64 nx_mask, u64 x_mask)
177{
178 shadow_user_mask = user_mask;
179 shadow_accessed_mask = accessed_mask;
180 shadow_dirty_mask = dirty_mask;
181 shadow_nx_mask = nx_mask;
182 shadow_x_mask = x_mask;
183}
184EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
185
197static int is_write_protection(struct kvm_vcpu *vcpu) 186static int is_write_protection(struct kvm_vcpu *vcpu)
198{ 187{
199 return vcpu->arch.cr0 & X86_CR0_WP; 188 return vcpu->arch.cr0 & X86_CR0_WP;
@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
232 221
233static int is_dirty_pte(unsigned long pte) 222static int is_dirty_pte(unsigned long pte)
234{ 223{
235 return pte & PT_DIRTY_MASK; 224 return pte & shadow_dirty_mask;
236} 225}
237 226
238static int is_rmap_pte(u64 pte) 227static int is_rmap_pte(u64 pte)
@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
387 376
388 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 377 write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
389 *write_count += 1; 378 *write_count += 1;
390 WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
391} 379}
392 380
393static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) 381static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
547 return; 535 return;
548 sp = page_header(__pa(spte)); 536 sp = page_header(__pa(spte));
549 pfn = spte_to_pfn(*spte); 537 pfn = spte_to_pfn(*spte);
550 if (*spte & PT_ACCESSED_MASK) 538 if (*spte & shadow_accessed_mask)
551 kvm_set_pfn_accessed(pfn); 539 kvm_set_pfn_accessed(pfn);
552 if (is_writeble_pte(*spte)) 540 if (is_writeble_pte(*spte))
553 kvm_release_pfn_dirty(pfn); 541 kvm_release_pfn_dirty(pfn);
@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1073 * whether the guest actually used the pte (in order to detect 1061 * whether the guest actually used the pte (in order to detect
1074 * demand paging). 1062 * demand paging).
1075 */ 1063 */
1076 spte = PT_PRESENT_MASK | PT_DIRTY_MASK; 1064 spte = shadow_base_present_pte | shadow_dirty_mask;
1077 if (!speculative) 1065 if (!speculative)
1078 pte_access |= PT_ACCESSED_MASK; 1066 pte_access |= PT_ACCESSED_MASK;
1079 if (!dirty) 1067 if (!dirty)
1080 pte_access &= ~ACC_WRITE_MASK; 1068 pte_access &= ~ACC_WRITE_MASK;
1081 if (!(pte_access & ACC_EXEC_MASK)) 1069 if (pte_access & ACC_EXEC_MASK)
1082 spte |= PT64_NX_MASK; 1070 spte |= shadow_x_mask;
1083 1071 else
1084 spte |= PT_PRESENT_MASK; 1072 spte |= shadow_nx_mask;
1085 if (pte_access & ACC_USER_MASK) 1073 if (pte_access & ACC_USER_MASK)
1086 spte |= PT_USER_MASK; 1074 spte |= shadow_user_mask;
1087 if (largepage) 1075 if (largepage)
1088 spte |= PT_PAGE_SIZE_MASK; 1076 spte |= PT_PAGE_SIZE_MASK;
1089 1077
@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1188 return -ENOMEM; 1176 return -ENOMEM;
1189 } 1177 }
1190 1178
1191 table[index] = __pa(new_table->spt) | PT_PRESENT_MASK 1179 table[index] = __pa(new_table->spt)
1192 | PT_WRITABLE_MASK | PT_USER_MASK; 1180 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1181 | shadow_user_mask | shadow_x_mask;
1193 } 1182 }
1194 table_addr = table[index] & PT64_BASE_ADDR_MASK; 1183 table_addr = table[index] & PT64_BASE_ADDR_MASK;
1195 } 1184 }
@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1244 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 1233 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1245 return; 1234 return;
1246 spin_lock(&vcpu->kvm->mmu_lock); 1235 spin_lock(&vcpu->kvm->mmu_lock);
1247#ifdef CONFIG_X86_64
1248 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1236 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1249 hpa_t root = vcpu->arch.mmu.root_hpa; 1237 hpa_t root = vcpu->arch.mmu.root_hpa;
1250 1238
@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1256 spin_unlock(&vcpu->kvm->mmu_lock); 1244 spin_unlock(&vcpu->kvm->mmu_lock);
1257 return; 1245 return;
1258 } 1246 }
1259#endif
1260 for (i = 0; i < 4; ++i) { 1247 for (i = 0; i < 4; ++i) {
1261 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1248 hpa_t root = vcpu->arch.mmu.pae_root[i];
1262 1249
@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1282 1269
1283 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1270 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1284 1271
1285#ifdef CONFIG_X86_64
1286 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1272 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1287 hpa_t root = vcpu->arch.mmu.root_hpa; 1273 hpa_t root = vcpu->arch.mmu.root_hpa;
1288 1274
@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1297 vcpu->arch.mmu.root_hpa = root; 1283 vcpu->arch.mmu.root_hpa = root;
1298 return; 1284 return;
1299 } 1285 }
1300#endif
1301 metaphysical = !is_paging(vcpu); 1286 metaphysical = !is_paging(vcpu);
1302 if (tdp_enabled) 1287 if (tdp_enabled)
1303 metaphysical = 1; 1288 metaphysical = 1;
@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1377 spin_lock(&vcpu->kvm->mmu_lock); 1362 spin_lock(&vcpu->kvm->mmu_lock);
1378 kvm_mmu_free_some_pages(vcpu); 1363 kvm_mmu_free_some_pages(vcpu);
1379 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1364 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1380 largepage, gfn, pfn, TDP_ROOT_LEVEL); 1365 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1381 spin_unlock(&vcpu->kvm->mmu_lock); 1366 spin_unlock(&vcpu->kvm->mmu_lock);
1382 1367
1383 return r; 1368 return r;
@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
1484 context->page_fault = tdp_page_fault; 1469 context->page_fault = tdp_page_fault;
1485 context->free = nonpaging_free; 1470 context->free = nonpaging_free;
1486 context->prefetch_page = nonpaging_prefetch_page; 1471 context->prefetch_page = nonpaging_prefetch_page;
1487 context->shadow_root_level = TDP_ROOT_LEVEL; 1472 context->shadow_root_level = kvm_x86_ops->get_tdp_level();
1488 context->root_hpa = INVALID_PAGE; 1473 context->root_hpa = INVALID_PAGE;
1489 1474
1490 if (!is_paging(vcpu)) { 1475 if (!is_paging(vcpu)) {
@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
1633{ 1618{
1634 u64 *spte = vcpu->arch.last_pte_updated; 1619 u64 *spte = vcpu->arch.last_pte_updated;
1635 1620
1636 return !!(spte && (*spte & PT_ACCESSED_MASK)); 1621 return !!(spte && (*spte & shadow_accessed_mask));
1637} 1622}
1638 1623
1639static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1624static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e64e9f56a65e..1730757bbc7a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,11 +3,38 @@
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5 5
6#ifdef CONFIG_X86_64 6#define PT64_PT_BITS 9
7#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL 7#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
8#else 8#define PT32_PT_BITS 10
9#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL 9#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
10#endif 10
11#define PT_WRITABLE_SHIFT 1
12
13#define PT_PRESENT_MASK (1ULL << 0)
14#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
15#define PT_USER_MASK (1ULL << 2)
16#define PT_PWT_MASK (1ULL << 3)
17#define PT_PCD_MASK (1ULL << 4)
18#define PT_ACCESSED_MASK (1ULL << 5)
19#define PT_DIRTY_MASK (1ULL << 6)
20#define PT_PAGE_SIZE_MASK (1ULL << 7)
21#define PT_PAT_MASK (1ULL << 7)
22#define PT_GLOBAL_MASK (1ULL << 8)
23#define PT64_NX_SHIFT 63
24#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
25
26#define PT_PAT_SHIFT 7
27#define PT_DIR_PAT_SHIFT 12
28#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
29
30#define PT32_DIR_PSE36_SIZE 4
31#define PT32_DIR_PSE36_SHIFT 13
32#define PT32_DIR_PSE36_MASK \
33 (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
34
35#define PT64_ROOT_LEVEL 4
36#define PT32_ROOT_LEVEL 2
37#define PT32E_ROOT_LEVEL 3
11 38
12static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 39static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
13{ 40{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 89e0be2c10d0..ab22615eee89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
1863 return false; 1863 return false;
1864} 1864}
1865 1865
1866static int get_npt_level(void)
1867{
1868#ifdef CONFIG_X86_64
1869 return PT64_ROOT_LEVEL;
1870#else
1871 return PT32E_ROOT_LEVEL;
1872#endif
1873}
1874
1866static struct kvm_x86_ops svm_x86_ops = { 1875static struct kvm_x86_ops svm_x86_ops = {
1867 .cpu_has_kvm_support = has_svm, 1876 .cpu_has_kvm_support = has_svm,
1868 .disabled_by_bios = is_disabled, 1877 .disabled_by_bios = is_disabled,
@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
1920 .inject_pending_vectors = do_interrupt_requests, 1929 .inject_pending_vectors = do_interrupt_requests,
1921 1930
1922 .set_tss_addr = svm_set_tss_addr, 1931 .set_tss_addr = svm_set_tss_addr,
1932 .get_tdp_level = get_npt_level,
1923}; 1933};
1924 1934
1925static int __init svm_init(void) 1935static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e5d6645b90d..bfe4db11989c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0);
42static int flexpriority_enabled = 1; 42static int flexpriority_enabled = 1;
43module_param(flexpriority_enabled, bool, 0); 43module_param(flexpriority_enabled, bool, 0);
44 44
45static int enable_ept = 1;
46module_param(enable_ept, bool, 0);
47
45struct vmcs { 48struct vmcs {
46 u32 revision_id; 49 u32 revision_id;
47 u32 abort; 50 u32 abort;
@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
84 return container_of(vcpu, struct vcpu_vmx, vcpu); 87 return container_of(vcpu, struct vcpu_vmx, vcpu);
85} 88}
86 89
87static int init_rmode_tss(struct kvm *kvm); 90static int init_rmode(struct kvm *kvm);
88 91
89static DEFINE_PER_CPU(struct vmcs *, vmxarea); 92static DEFINE_PER_CPU(struct vmcs *, vmxarea);
90static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 93static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -107,6 +110,11 @@ static struct vmcs_config {
107 u32 vmentry_ctrl; 110 u32 vmentry_ctrl;
108} vmcs_config; 111} vmcs_config;
109 112
113struct vmx_capability {
114 u32 ept;
115 u32 vpid;
116} vmx_capability;
117
110#define VMX_SEGMENT_FIELD(seg) \ 118#define VMX_SEGMENT_FIELD(seg) \
111 [VCPU_SREG_##seg] = { \ 119 [VCPU_SREG_##seg] = { \
112 .selector = GUEST_##seg##_SELECTOR, \ 120 .selector = GUEST_##seg##_SELECTOR, \
@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
214 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); 222 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
215} 223}
216 224
225static inline int cpu_has_vmx_invept_individual_addr(void)
226{
227 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
228}
229
230static inline int cpu_has_vmx_invept_context(void)
231{
232 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
233}
234
235static inline int cpu_has_vmx_invept_global(void)
236{
237 return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
238}
239
240static inline int cpu_has_vmx_ept(void)
241{
242 return (vmcs_config.cpu_based_2nd_exec_ctrl &
243 SECONDARY_EXEC_ENABLE_EPT);
244}
245
246static inline int vm_need_ept(void)
247{
248 return (cpu_has_vmx_ept() && enable_ept);
249}
250
217static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 251static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
218{ 252{
219 return ((cpu_has_vmx_virtualize_apic_accesses()) && 253 return ((cpu_has_vmx_virtualize_apic_accesses()) &&
@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
250 : : "a"(&operand), "c"(ext) : "cc", "memory"); 284 : : "a"(&operand), "c"(ext) : "cc", "memory");
251} 285}
252 286
287static inline void __invept(int ext, u64 eptp, gpa_t gpa)
288{
289 struct {
290 u64 eptp, gpa;
291 } operand = {eptp, gpa};
292
293 asm volatile (ASM_VMX_INVEPT
294 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory");
297}
298
253static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) 299static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
254{ 300{
255 int i; 301 int i;
@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
301 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); 347 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
302} 348}
303 349
350static inline void ept_sync_global(void)
351{
352 if (cpu_has_vmx_invept_global())
353 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
354}
355
356static inline void ept_sync_context(u64 eptp)
357{
358 if (vm_need_ept()) {
359 if (cpu_has_vmx_invept_context())
360 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
361 else
362 ept_sync_global();
363 }
364}
365
366static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
367{
368 if (vm_need_ept()) {
369 if (cpu_has_vmx_invept_individual_addr())
370 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
371 eptp, gpa);
372 else
373 ept_sync_context(eptp);
374 }
375}
376
304static unsigned long vmcs_readl(unsigned long field) 377static unsigned long vmcs_readl(unsigned long field)
305{ 378{
306 unsigned long value; 379 unsigned long value;
@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
388 eb |= 1u << 1; 461 eb |= 1u << 1;
389 if (vcpu->arch.rmode.active) 462 if (vcpu->arch.rmode.active)
390 eb = ~0; 463 eb = ~0;
464 if (vm_need_ept())
465 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
391 vmcs_write32(EXCEPTION_BITMAP, eb); 466 vmcs_write32(EXCEPTION_BITMAP, eb);
392} 467}
393 468
@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
985static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) 1060static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
986{ 1061{
987 u32 vmx_msr_low, vmx_msr_high; 1062 u32 vmx_msr_low, vmx_msr_high;
988 u32 min, opt; 1063 u32 min, opt, min2, opt2;
989 u32 _pin_based_exec_control = 0; 1064 u32 _pin_based_exec_control = 0;
990 u32 _cpu_based_exec_control = 0; 1065 u32 _cpu_based_exec_control = 0;
991 u32 _cpu_based_2nd_exec_control = 0; 1066 u32 _cpu_based_2nd_exec_control = 0;
@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1003 CPU_BASED_CR8_LOAD_EXITING | 1078 CPU_BASED_CR8_LOAD_EXITING |
1004 CPU_BASED_CR8_STORE_EXITING | 1079 CPU_BASED_CR8_STORE_EXITING |
1005#endif 1080#endif
1081 CPU_BASED_CR3_LOAD_EXITING |
1082 CPU_BASED_CR3_STORE_EXITING |
1006 CPU_BASED_USE_IO_BITMAPS | 1083 CPU_BASED_USE_IO_BITMAPS |
1007 CPU_BASED_MOV_DR_EXITING | 1084 CPU_BASED_MOV_DR_EXITING |
1008 CPU_BASED_USE_TSC_OFFSETING; 1085 CPU_BASED_USE_TSC_OFFSETING;
@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1018 ~CPU_BASED_CR8_STORE_EXITING; 1095 ~CPU_BASED_CR8_STORE_EXITING;
1019#endif 1096#endif
1020 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 1097 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
1021 min = 0; 1098 min2 = 0;
1022 opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 1099 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
1023 SECONDARY_EXEC_WBINVD_EXITING | 1100 SECONDARY_EXEC_WBINVD_EXITING |
1024 SECONDARY_EXEC_ENABLE_VPID; 1101 SECONDARY_EXEC_ENABLE_VPID |
1025 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2, 1102 SECONDARY_EXEC_ENABLE_EPT;
1103 if (adjust_vmx_controls(min2, opt2,
1104 MSR_IA32_VMX_PROCBASED_CTLS2,
1026 &_cpu_based_2nd_exec_control) < 0) 1105 &_cpu_based_2nd_exec_control) < 0)
1027 return -EIO; 1106 return -EIO;
1028 } 1107 }
@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1031 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) 1110 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
1032 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; 1111 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
1033#endif 1112#endif
1113 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
1114 /* CR3 accesses don't need to cause VM Exits when EPT enabled */
1115 min &= ~(CPU_BASED_CR3_LOAD_EXITING |
1116 CPU_BASED_CR3_STORE_EXITING);
1117 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
1118 &_cpu_based_exec_control) < 0)
1119 return -EIO;
1120 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
1121 vmx_capability.ept, vmx_capability.vpid);
1122 }
1034 1123
1035 min = 0; 1124 min = 0;
1036#ifdef CONFIG_X86_64 1125#ifdef CONFIG_X86_64
@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1256 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); 1345 fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
1257 1346
1258 kvm_mmu_reset_context(vcpu); 1347 kvm_mmu_reset_context(vcpu);
1259 init_rmode_tss(vcpu->kvm); 1348 init_rmode(vcpu->kvm);
1260} 1349}
1261 1350
1262#ifdef CONFIG_X86_64 1351#ifdef CONFIG_X86_64
@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1304 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1393 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
1305} 1394}
1306 1395
1396static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
1397{
1398 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
1399 if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
1400 printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
1401 return;
1402 }
1403 vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
1404 vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
1405 vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
1406 vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
1407 }
1408}
1409
1410static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
1411
1412static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1413 unsigned long cr0,
1414 struct kvm_vcpu *vcpu)
1415{
1416 if (!(cr0 & X86_CR0_PG)) {
1417 /* From paging/starting to nonpaging */
1418 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1419 vmcs_config.cpu_based_exec_ctrl |
1420 (CPU_BASED_CR3_LOAD_EXITING |
1421 CPU_BASED_CR3_STORE_EXITING));
1422 vcpu->arch.cr0 = cr0;
1423 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1424 *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
1425 *hw_cr0 &= ~X86_CR0_WP;
1426 } else if (!is_paging(vcpu)) {
1427 /* From nonpaging to paging */
1428 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1429 vmcs_config.cpu_based_exec_ctrl &
1430 ~(CPU_BASED_CR3_LOAD_EXITING |
1431 CPU_BASED_CR3_STORE_EXITING));
1432 vcpu->arch.cr0 = cr0;
1433 vmx_set_cr4(vcpu, vcpu->arch.cr4);
1434 if (!(vcpu->arch.cr0 & X86_CR0_WP))
1435 *hw_cr0 &= ~X86_CR0_WP;
1436 }
1437}
1438
1439static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1440 struct kvm_vcpu *vcpu)
1441{
1442 if (!is_paging(vcpu)) {
1443 *hw_cr4 &= ~X86_CR4_PAE;
1444 *hw_cr4 |= X86_CR4_PSE;
1445 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1446 *hw_cr4 &= ~X86_CR4_PAE;
1447}
1448
1307static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1449static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1308{ 1450{
1451 unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
1452 KVM_VM_CR0_ALWAYS_ON;
1453
1309 vmx_fpu_deactivate(vcpu); 1454 vmx_fpu_deactivate(vcpu);
1310 1455
1311 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) 1456 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1323 } 1468 }
1324#endif 1469#endif
1325 1470
1471 if (vm_need_ept())
1472 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1473
1326 vmcs_writel(CR0_READ_SHADOW, cr0); 1474 vmcs_writel(CR0_READ_SHADOW, cr0);
1327 vmcs_writel(GUEST_CR0, 1475 vmcs_writel(GUEST_CR0, hw_cr0);
1328 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
1329 vcpu->arch.cr0 = cr0; 1476 vcpu->arch.cr0 = cr0;
1330 1477
1331 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) 1478 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1332 vmx_fpu_activate(vcpu); 1479 vmx_fpu_activate(vcpu);
1333} 1480}
1334 1481
1482static u64 construct_eptp(unsigned long root_hpa)
1483{
1484 u64 eptp;
1485
1486 /* TODO write the value reading from MSR */
1487 eptp = VMX_EPT_DEFAULT_MT |
1488 VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
1489 eptp |= (root_hpa & PAGE_MASK);
1490
1491 return eptp;
1492}
1493
1335static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1494static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1336{ 1495{
1496 unsigned long guest_cr3;
1497 u64 eptp;
1498
1499 guest_cr3 = cr3;
1500 if (vm_need_ept()) {
1501 eptp = construct_eptp(cr3);
1502 vmcs_write64(EPT_POINTER, eptp);
1503 ept_sync_context(eptp);
1504 ept_load_pdptrs(vcpu);
1505 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
1506 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
1507 }
1508
1337 vmx_flush_tlb(vcpu); 1509 vmx_flush_tlb(vcpu);
1338 vmcs_writel(GUEST_CR3, cr3); 1510 vmcs_writel(GUEST_CR3, guest_cr3);
1339 if (vcpu->arch.cr0 & X86_CR0_PE) 1511 if (vcpu->arch.cr0 & X86_CR0_PE)
1340 vmx_fpu_deactivate(vcpu); 1512 vmx_fpu_deactivate(vcpu);
1341} 1513}
1342 1514
1343static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1515static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1344{ 1516{
1345 vmcs_writel(CR4_READ_SHADOW, cr4); 1517 unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
1346 vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? 1518 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1347 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); 1519
1348 vcpu->arch.cr4 = cr4; 1520 vcpu->arch.cr4 = cr4;
1521 if (vm_need_ept())
1522 ept_update_paging_mode_cr4(&hw_cr4, vcpu);
1523
1524 vmcs_writel(CR4_READ_SHADOW, cr4);
1525 vmcs_writel(GUEST_CR4, hw_cr4);
1349} 1526}
1350 1527
1351static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1528static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1530,6 +1707,41 @@ out:
1530 return ret; 1707 return ret;
1531} 1708}
1532 1709
1710static int init_rmode_identity_map(struct kvm *kvm)
1711{
1712 int i, r, ret;
1713 pfn_t identity_map_pfn;
1714 u32 tmp;
1715
1716 if (!vm_need_ept())
1717 return 1;
1718 if (unlikely(!kvm->arch.ept_identity_pagetable)) {
1719 printk(KERN_ERR "EPT: identity-mapping pagetable "
1720 "haven't been allocated!\n");
1721 return 0;
1722 }
1723 if (likely(kvm->arch.ept_identity_pagetable_done))
1724 return 1;
1725 ret = 0;
1726 identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
1727 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
1728 if (r < 0)
1729 goto out;
1730 /* Set up identity-mapping pagetable for EPT in real mode */
1731 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
1732 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
1733 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
1734 r = kvm_write_guest_page(kvm, identity_map_pfn,
1735 &tmp, i * sizeof(tmp), sizeof(tmp));
1736 if (r < 0)
1737 goto out;
1738 }
1739 kvm->arch.ept_identity_pagetable_done = true;
1740 ret = 1;
1741out:
1742 return ret;
1743}
1744
1533static void seg_setup(int seg) 1745static void seg_setup(int seg)
1534{ 1746{
1535 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1747 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1564,6 +1776,31 @@ out:
1564 return r; 1776 return r;
1565} 1777}
1566 1778
1779static int alloc_identity_pagetable(struct kvm *kvm)
1780{
1781 struct kvm_userspace_memory_region kvm_userspace_mem;
1782 int r = 0;
1783
1784 down_write(&kvm->slots_lock);
1785 if (kvm->arch.ept_identity_pagetable)
1786 goto out;
1787 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
1788 kvm_userspace_mem.flags = 0;
1789 kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
1790 kvm_userspace_mem.memory_size = PAGE_SIZE;
1791 r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
1792 if (r)
1793 goto out;
1794
1795 down_read(&current->mm->mmap_sem);
1796 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
1797 VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
1798 up_read(&current->mm->mmap_sem);
1799out:
1800 up_write(&kvm->slots_lock);
1801 return r;
1802}
1803
1567static void allocate_vpid(struct vcpu_vmx *vmx) 1804static void allocate_vpid(struct vcpu_vmx *vmx)
1568{ 1805{
1569 int vpid; 1806 int vpid;
@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1638 CPU_BASED_CR8_LOAD_EXITING; 1875 CPU_BASED_CR8_LOAD_EXITING;
1639#endif 1876#endif
1640 } 1877 }
1878 if (!vm_need_ept())
1879 exec_control |= CPU_BASED_CR3_STORE_EXITING |
1880 CPU_BASED_CR3_LOAD_EXITING;
1641 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); 1881 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
1642 1882
1643 if (cpu_has_secondary_exec_ctrls()) { 1883 if (cpu_has_secondary_exec_ctrls()) {
@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1647 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 1887 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
1648 if (vmx->vpid == 0) 1888 if (vmx->vpid == 0)
1649 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; 1889 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
1890 if (!vm_need_ept())
1891 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
1650 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 1892 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
1651 } 1893 }
1652 1894
@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1722 return 0; 1964 return 0;
1723} 1965}
1724 1966
1967static int init_rmode(struct kvm *kvm)
1968{
1969 if (!init_rmode_tss(kvm))
1970 return 0;
1971 if (!init_rmode_identity_map(kvm))
1972 return 0;
1973 return 1;
1974}
1975
1725static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) 1976static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1726{ 1977{
1727 struct vcpu_vmx *vmx = to_vmx(vcpu); 1978 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1729 int ret; 1980 int ret;
1730 1981
1731 down_read(&vcpu->kvm->slots_lock); 1982 down_read(&vcpu->kvm->slots_lock);
1732 if (!init_rmode_tss(vmx->vcpu.kvm)) { 1983 if (!init_rmode(vmx->vcpu.kvm)) {
1733 ret = -ENOMEM; 1984 ret = -ENOMEM;
1734 goto out; 1985 goto out;
1735 } 1986 }
@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1994 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 2245 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
1995 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 2246 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
1996 if (is_page_fault(intr_info)) { 2247 if (is_page_fault(intr_info)) {
2248 /* EPT won't cause page fault directly */
2249 if (vm_need_ept())
2250 BUG();
1997 cr2 = vmcs_readl(EXIT_QUALIFICATION); 2251 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1998 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, 2252 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
1999 (u32)((u64)cr2 >> 32), handler); 2253 (u32)((u64)cr2 >> 32), handler);
@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2323 return kvm_task_switch(vcpu, tss_selector, reason); 2577 return kvm_task_switch(vcpu, tss_selector, reason);
2324} 2578}
2325 2579
2580static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2581{
2582 u64 exit_qualification;
2583 enum emulation_result er;
2584 gpa_t gpa;
2585 unsigned long hva;
2586 int gla_validity;
2587 int r;
2588
2589 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2590
2591 if (exit_qualification & (1 << 6)) {
2592 printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
2593 return -ENOTSUPP;
2594 }
2595
2596 gla_validity = (exit_qualification >> 7) & 0x3;
2597 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
2598 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
2599 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2600 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2601 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2602 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2603 (long unsigned int)exit_qualification);
2604 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2605 kvm_run->hw.hardware_exit_reason = 0;
2606 return -ENOTSUPP;
2607 }
2608
2609 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
2610 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
2611 if (!kvm_is_error_hva(hva)) {
2612 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
2613 if (r < 0) {
2614 printk(KERN_ERR "EPT: Not enough memory!\n");
2615 return -ENOMEM;
2616 }
2617 return 1;
2618 } else {
2619 /* must be MMIO */
2620 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2621
2622 if (er == EMULATE_FAIL) {
2623 printk(KERN_ERR
2624 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
2625 er);
2626 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
2627 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
2628 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
2629 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
2630 (long unsigned int)exit_qualification);
2631 return -ENOTSUPP;
2632 } else if (er == EMULATE_DO_MMIO)
2633 return 0;
2634 }
2635 return 1;
2636}
2637
2326/* 2638/*
2327 * The exit handlers return 1 if the exit was handled fully and guest execution 2639 * The exit handlers return 1 if the exit was handled fully and guest execution
2328 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2640 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2346 [EXIT_REASON_APIC_ACCESS] = handle_apic_access, 2658 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
2347 [EXIT_REASON_WBINVD] = handle_wbinvd, 2659 [EXIT_REASON_WBINVD] = handle_wbinvd,
2348 [EXIT_REASON_TASK_SWITCH] = handle_task_switch, 2660 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
2661 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
2349}; 2662};
2350 2663
2351static const int kvm_vmx_max_exit_handlers = 2664static const int kvm_vmx_max_exit_handlers =
@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2364 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), 2677 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
2365 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); 2678 (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
2366 2679
2680 /* Access CR3 don't cause VMExit in paging mode, so we need
2681 * to sync with guest real CR3. */
2682 if (vm_need_ept() && is_paging(vcpu)) {
2683 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2684 ept_load_pdptrs(vcpu);
2685 }
2686
2367 if (unlikely(vmx->fail)) { 2687 if (unlikely(vmx->fail)) {
2368 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2688 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2369 kvm_run->fail_entry.hardware_entry_failure_reason 2689 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
2372 } 2692 }
2373 2693
2374 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 2694 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
2375 exit_reason != EXIT_REASON_EXCEPTION_NMI) 2695 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
2696 exit_reason != EXIT_REASON_EPT_VIOLATION))
2376 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 2697 printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
2377 "exit reason is 0x%x\n", __func__, exit_reason); 2698 "exit reason is 0x%x\n", __func__, exit_reason);
2378 if (exit_reason < kvm_vmx_max_exit_handlers 2699 if (exit_reason < kvm_vmx_max_exit_handlers
@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2674 return ERR_PTR(-ENOMEM); 2995 return ERR_PTR(-ENOMEM);
2675 2996
2676 allocate_vpid(vmx); 2997 allocate_vpid(vmx);
2998 if (id == 0 && vm_need_ept()) {
2999 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3000 VMX_EPT_WRITABLE_MASK |
3001 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
3002 kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
3003 VMX_EPT_FAKE_DIRTY_MASK, 0ull,
3004 VMX_EPT_EXECUTABLE_MASK);
3005 kvm_enable_tdp();
3006 }
2677 3007
2678 err = kvm_vcpu_init(&vmx->vcpu, kvm, id); 3008 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2679 if (err) 3009 if (err)
@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2706 if (alloc_apic_access_page(kvm) != 0) 3036 if (alloc_apic_access_page(kvm) != 0)
2707 goto free_vmcs; 3037 goto free_vmcs;
2708 3038
3039 if (vm_need_ept())
3040 if (alloc_identity_pagetable(kvm) != 0)
3041 goto free_vmcs;
3042
2709 return &vmx->vcpu; 3043 return &vmx->vcpu;
2710 3044
2711free_vmcs: 3045free_vmcs:
@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn)
2735 } 3069 }
2736} 3070}
2737 3071
3072static int get_ept_level(void)
3073{
3074 return VMX_EPT_DEFAULT_GAW + 1;
3075}
3076
2738static struct kvm_x86_ops vmx_x86_ops = { 3077static struct kvm_x86_ops vmx_x86_ops = {
2739 .cpu_has_kvm_support = cpu_has_kvm_support, 3078 .cpu_has_kvm_support = cpu_has_kvm_support,
2740 .disabled_by_bios = vmx_disabled_by_bios, 3079 .disabled_by_bios = vmx_disabled_by_bios,
@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
2791 .inject_pending_vectors = do_interrupt_requests, 3130 .inject_pending_vectors = do_interrupt_requests,
2792 3131
2793 .set_tss_addr = vmx_set_tss_addr, 3132 .set_tss_addr = vmx_set_tss_addr,
3133 .get_tdp_level = get_ept_level,
2794}; 3134};
2795 3135
2796static int __init vmx_init(void) 3136static int __init vmx_init(void)
@@ -2843,9 +3183,14 @@ static int __init vmx_init(void)
2843 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); 3183 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
2844 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); 3184 vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
2845 3185
3186 if (cpu_has_vmx_ept())
3187 bypass_guest_pf = 0;
3188
2846 if (bypass_guest_pf) 3189 if (bypass_guest_pf)
2847 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); 3190 kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
2848 3191
3192 ept_sync_global();
3193
2849 return 0; 3194 return 0;
2850 3195
2851out2: 3196out2:
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 5dff4606b988..79d94c610dfe 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -35,6 +35,8 @@
35#define CPU_BASED_MWAIT_EXITING 0x00000400 35#define CPU_BASED_MWAIT_EXITING 0x00000400
36#define CPU_BASED_RDPMC_EXITING 0x00000800 36#define CPU_BASED_RDPMC_EXITING 0x00000800
37#define CPU_BASED_RDTSC_EXITING 0x00001000 37#define CPU_BASED_RDTSC_EXITING 0x00001000
38#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
39#define CPU_BASED_CR3_STORE_EXITING 0x00010000
38#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 40#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
39#define CPU_BASED_CR8_STORE_EXITING 0x00100000 41#define CPU_BASED_CR8_STORE_EXITING 0x00100000
40#define CPU_BASED_TPR_SHADOW 0x00200000 42#define CPU_BASED_TPR_SHADOW 0x00200000
@@ -49,6 +51,7 @@
49 * Definitions of Secondary Processor-Based VM-Execution Controls. 51 * Definitions of Secondary Processor-Based VM-Execution Controls.
50 */ 52 */
51#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 53#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
54#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
52#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 55#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
53#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 56#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
54 57
@@ -100,10 +103,22 @@ enum vmcs_field {
100 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, 103 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
101 APIC_ACCESS_ADDR = 0x00002014, 104 APIC_ACCESS_ADDR = 0x00002014,
102 APIC_ACCESS_ADDR_HIGH = 0x00002015, 105 APIC_ACCESS_ADDR_HIGH = 0x00002015,
106 EPT_POINTER = 0x0000201a,
107 EPT_POINTER_HIGH = 0x0000201b,
108 GUEST_PHYSICAL_ADDRESS = 0x00002400,
109 GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
103 VMCS_LINK_POINTER = 0x00002800, 110 VMCS_LINK_POINTER = 0x00002800,
104 VMCS_LINK_POINTER_HIGH = 0x00002801, 111 VMCS_LINK_POINTER_HIGH = 0x00002801,
105 GUEST_IA32_DEBUGCTL = 0x00002802, 112 GUEST_IA32_DEBUGCTL = 0x00002802,
106 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 113 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
114 GUEST_PDPTR0 = 0x0000280a,
115 GUEST_PDPTR0_HIGH = 0x0000280b,
116 GUEST_PDPTR1 = 0x0000280c,
117 GUEST_PDPTR1_HIGH = 0x0000280d,
118 GUEST_PDPTR2 = 0x0000280e,
119 GUEST_PDPTR2_HIGH = 0x0000280f,
120 GUEST_PDPTR3 = 0x00002810,
121 GUEST_PDPTR3_HIGH = 0x00002811,
107 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 122 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
108 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 123 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
109 EXCEPTION_BITMAP = 0x00004004, 124 EXCEPTION_BITMAP = 0x00004004,
@@ -226,6 +241,8 @@ enum vmcs_field {
226#define EXIT_REASON_MWAIT_INSTRUCTION 36 241#define EXIT_REASON_MWAIT_INSTRUCTION 36
227#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 242#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
228#define EXIT_REASON_APIC_ACCESS 44 243#define EXIT_REASON_APIC_ACCESS 44
244#define EXIT_REASON_EPT_VIOLATION 48
245#define EXIT_REASON_EPT_MISCONFIG 49
229#define EXIT_REASON_WBINVD 54 246#define EXIT_REASON_WBINVD 54
230 247
231/* 248/*
@@ -316,15 +333,36 @@ enum vmcs_field {
316#define MSR_IA32_VMX_CR4_FIXED1 0x489 333#define MSR_IA32_VMX_CR4_FIXED1 0x489
317#define MSR_IA32_VMX_VMCS_ENUM 0x48a 334#define MSR_IA32_VMX_VMCS_ENUM 0x48a
318#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b 335#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
336#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
319 337
320#define MSR_IA32_FEATURE_CONTROL 0x3a 338#define MSR_IA32_FEATURE_CONTROL 0x3a
321#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 339#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
322#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 340#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
323 341
324#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 342#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
343#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
325 344
326#define VMX_NR_VPIDS (1 << 16) 345#define VMX_NR_VPIDS (1 << 16)
327#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 346#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
328#define VMX_VPID_EXTENT_ALL_CONTEXT 2 347#define VMX_VPID_EXTENT_ALL_CONTEXT 2
329 348
349#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
350#define VMX_EPT_EXTENT_CONTEXT 1
351#define VMX_EPT_EXTENT_GLOBAL 2
352#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
353#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
354#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
355#define VMX_EPT_DEFAULT_GAW 3
356#define VMX_EPT_MAX_GAW 0x4
357#define VMX_EPT_MT_EPTE_SHIFT 3
358#define VMX_EPT_GAW_EPTP_SHIFT 3
359#define VMX_EPT_DEFAULT_MT 0x6ull
360#define VMX_EPT_READABLE_MASK 0x1ull
361#define VMX_EPT_WRITABLE_MASK 0x2ull
362#define VMX_EPT_EXECUTABLE_MASK 0x4ull
363#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62)
364#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63)
365
366#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
367
330#endif 368#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ce556372a4d..21338bdb28ff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
2417 2417
2418 kvm_x86_ops = ops; 2418 kvm_x86_ops = ops;
2419 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2419 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
2420 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2421 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2422 PT_DIRTY_MASK, PT64_NX_MASK, 0);
2420 return 0; 2423 return 0;
2421 2424
2422out: 2425out:
@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3019 3022
3020 kvm_x86_ops->decache_regs(vcpu); 3023 kvm_x86_ops->decache_regs(vcpu);
3021 3024
3025 vcpu->arch.exception.pending = false;
3026
3022 vcpu_put(vcpu); 3027 vcpu_put(vcpu);
3023 3028
3024 return 0; 3029 return 0;
@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3481 } 3486 }
3482 3487
3483 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 3488 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3484 cseg_desc.type &= ~(1 << 8); //clear the B flag 3489 cseg_desc.type &= ~(1 << 1); //clear the B flag
3485 save_guest_segment_descriptor(vcpu, tr_seg.selector, 3490 save_guest_segment_descriptor(vcpu, tr_seg.selector,
3486 &cseg_desc); 3491 &cseg_desc);
3487 } 3492 }
@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3507 } 3512 }
3508 3513
3509 if (reason != TASK_SWITCH_IRET) { 3514 if (reason != TASK_SWITCH_IRET) {
3510 nseg_desc.type |= (1 << 8); 3515 nseg_desc.type |= (1 << 1);
3511 save_guest_segment_descriptor(vcpu, tss_selector, 3516 save_guest_segment_descriptor(vcpu, tss_selector,
3512 &nseg_desc); 3517 &nseg_desc);
3513 } 3518 }
@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
3698{ 3703{
3699 unsigned after_mxcsr_mask; 3704 unsigned after_mxcsr_mask;
3700 3705
3706 /*
3707 * Touch the fpu the first time in non atomic context as if
3708 * this is the first fpu instruction the exception handler
3709 * will fire before the instruction returns and it'll have to
3710 * allocate ram with GFP_KERNEL.
3711 */
3712 if (!used_math())
3713 fx_save(&vcpu->arch.host_fx_image);
3714
3701 /* Initialize guest FPU by resetting ours and saving into guest's */ 3715 /* Initialize guest FPU by resetting ours and saving into guest's */
3702 preempt_disable(); 3716 preempt_disable();
3703 fx_save(&vcpu->arch.host_fx_image); 3717 fx_save(&vcpu->arch.host_fx_image);
3704 fpu_init(); 3718 fx_finit();
3705 fx_save(&vcpu->arch.guest_fx_image); 3719 fx_save(&vcpu->arch.guest_fx_image);
3706 fx_restore(&vcpu->arch.host_fx_image); 3720 fx_restore(&vcpu->arch.host_fx_image);
3707 preempt_enable(); 3721 preempt_enable();
@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
3906 kvm_free_physmem(kvm); 3920 kvm_free_physmem(kvm);
3907 if (kvm->arch.apic_access_page) 3921 if (kvm->arch.apic_access_page)
3908 put_page(kvm->arch.apic_access_page); 3922 put_page(kvm->arch.apic_access_page);
3923 if (kvm->arch.ept_identity_pagetable)
3924 put_page(kvm->arch.ept_identity_pagetable);
3909 kfree(kvm); 3925 kfree(kvm);
3910} 3926}
3911 3927
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2ca08386f993..f2a696d6a243 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1761,6 +1761,7 @@ twobyte_insn:
1761 case 6: /* lmsw */ 1761 case 6: /* lmsw */
1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val, 1762 realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
1763 &ctxt->eflags); 1763 &ctxt->eflags);
1764 c->dst.type = OP_NONE;
1764 break; 1765 break;
1765 case 7: /* invlpg*/ 1766 case 7: /* invlpg*/
1766 emulate_invlpg(ctxt->vcpu, memop); 1767 emulate_invlpg(ctxt->vcpu, memop);
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 18378850e25a..914ccf983687 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr)
476 476
477EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 477EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
478#endif 478#endif
479
480#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
481/*
482 * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
483 *
484 * These stub functions are needed to compile 32-bit NUMA when SRAT is
485 * not set. There are functions in srat_64.c for parsing this table
486 * and it may be possible to make them common functions.
487 */
488void acpi_numa_slit_init (struct acpi_table_slit *slit)
489{
490 printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
491}
492
493void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
494{
495}
496
497void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
498{
499}
500
501void acpi_numa_arch_fixup(void)
502{
503}
504#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 9ee007be9142..369cf065b6a4 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve)
172 __FIXADDR_TOP = -reserve - PAGE_SIZE; 172 __FIXADDR_TOP = -reserve - PAGE_SIZE;
173 __VMALLOC_RESERVE += reserve; 173 __VMALLOC_RESERVE += reserve;
174} 174}
175
176int pmd_bad(pmd_t pmd)
177{
178 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
179
180 return pmd_bad_v1(pmd);
181}
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index 7fa519868d70..89ec35d00efd 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT) += direct.o
6obj-$(CONFIG_PCI_OLPC) += olpc.o 6obj-$(CONFIG_PCI_OLPC) += olpc.o
7 7
8pci-y := fixup.o 8pci-y := fixup.o
9
10# Do not change the ordering here. There is a nasty init function
11# ordering dependency which breaks when you move acpi.o below
12# legacy/irq.o
9pci-$(CONFIG_ACPI) += acpi.o 13pci-$(CONFIG_ACPI) += acpi.o
10pci-y += legacy.o irq.o 14pci-y += legacy.o irq.o
11 15
12pci-$(CONFIG_X86_VISWS) += visws.o fixup.o 16# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
13pci-$(CONFIG_X86_NUMAQ) += numa.o irq.o 17# therefor correct. This needs a proper fix by distangling the code.
18pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
19pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
20
21# Necessary for NUMAQ as well
14pci-$(CONFIG_NUMA) += mp_bus_to_node.o 22pci-$(CONFIG_NUMA) += mp_bus_to_node.o
15 23
16obj-y += $(pci-y) common.o early.o 24obj-y += $(pci-y) common.o early.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1a9c0c6a1a18..d95de2f199cd 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -6,45 +6,6 @@
6#include <asm/numa.h> 6#include <asm/numa.h>
7#include "pci.h" 7#include "pci.h"
8 8
9static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
10{
11 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
12 printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
13 return 0;
14}
15
16static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = {
17/*
18 * Systems where PCI IO resource ISA alignment can be skipped
19 * when the ISA enable bit in the bridge control is not set
20 */
21 {
22 .callback = can_skip_ioresource_align,
23 .ident = "IBM System x3800",
24 .matches = {
25 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
26 DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
27 },
28 },
29 {
30 .callback = can_skip_ioresource_align,
31 .ident = "IBM System x3850",
32 .matches = {
33 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
34 DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
35 },
36 },
37 {
38 .callback = can_skip_ioresource_align,
39 .ident = "IBM System x3950",
40 .matches = {
41 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
42 DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
43 },
44 },
45 {}
46};
47
48struct pci_root_info { 9struct pci_root_info {
49 char *name; 10 char *name;
50 unsigned int res_num; 11 unsigned int res_num;
@@ -196,8 +157,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
196 int pxm; 157 int pxm;
197#endif 158#endif
198 159
199 dmi_check_system(acpi_pciprobe_dmi_table);
200
201 if (domain && !pci_domains_supported) { 160 if (domain && !pci_domains_supported) {
202 printk(KERN_WARNING "PCI: Multiple domains not supported " 161 printk(KERN_WARNING "PCI: Multiple domains not supported "
203 "(dom %d, bus %d)\n", domain, busnum); 162 "(dom %d, bus %d)\n", domain, busnum);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2a4d751818b7..bfa72a9475b3 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -90,6 +90,50 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
90 rom_r->start = rom_r->end = rom_r->flags = 0; 90 rom_r->start = rom_r->end = rom_r->flags = 0;
91} 91}
92 92
93static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
94{
95 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
96 printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
97 return 0;
98}
99
100static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = {
101/*
102 * Systems where PCI IO resource ISA alignment can be skipped
103 * when the ISA enable bit in the bridge control is not set
104 */
105 {
106 .callback = can_skip_ioresource_align,
107 .ident = "IBM System x3800",
108 .matches = {
109 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
110 DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
111 },
112 },
113 {
114 .callback = can_skip_ioresource_align,
115 .ident = "IBM System x3850",
116 .matches = {
117 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
118 DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
119 },
120 },
121 {
122 .callback = can_skip_ioresource_align,
123 .ident = "IBM System x3950",
124 .matches = {
125 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
126 DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
127 },
128 },
129 {}
130};
131
132void __init dmi_check_skip_isa_align(void)
133{
134 dmi_check_system(can_skip_pciprobe_dmi_table);
135}
136
93/* 137/*
94 * Called after each bus is probed, but before its children 138 * Called after each bus is probed, but before its children
95 * are examined. 139 * are examined.
@@ -318,13 +362,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
318 {} 362 {}
319}; 363};
320 364
365void __init dmi_check_pciprobe(void)
366{
367 dmi_check_system(pciprobe_dmi_table);
368}
369
321struct pci_bus * __devinit pcibios_scan_root(int busnum) 370struct pci_bus * __devinit pcibios_scan_root(int busnum)
322{ 371{
323 struct pci_bus *bus = NULL; 372 struct pci_bus *bus = NULL;
324 struct pci_sysdata *sd; 373 struct pci_sysdata *sd;
325 374
326 dmi_check_system(pciprobe_dmi_table);
327
328 while ((bus = pci_find_next_bus(bus)) != NULL) { 375 while ((bus = pci_find_next_bus(bus)) != NULL) {
329 if (bus->number == busnum) { 376 if (bus->number == busnum) {
330 /* Already scanned */ 377 /* Already scanned */
@@ -462,6 +509,9 @@ char * __devinit pcibios_setup(char *str)
462 } else if (!strcmp(str, "routeirq")) { 509 } else if (!strcmp(str, "routeirq")) {
463 pci_routeirq = 1; 510 pci_routeirq = 1;
464 return NULL; 511 return NULL;
512 } else if (!strcmp(str, "skip_isa_align")) {
513 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
514 return NULL;
465 } 515 }
466 return str; 516 return str;
467} 517}
@@ -489,7 +539,7 @@ void pcibios_disable_device (struct pci_dev *dev)
489 pcibios_disable_irq(dev); 539 pcibios_disable_irq(dev);
490} 540}
491 541
492struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) 542struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
493{ 543{
494 struct pci_bus *bus = NULL; 544 struct pci_bus *bus = NULL;
495 struct pci_sysdata *sd; 545 struct pci_sysdata *sd;
@@ -512,7 +562,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
512 return bus; 562 return bus;
513} 563}
514 564
515struct pci_bus *pci_scan_bus_with_sysdata(int busno) 565struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
516{ 566{
517 return pci_scan_bus_on_node(busno, &pci_root_ops, -1); 567 return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
518} 568}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b60b2abd480c..ff3a6a336342 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
502 */ 502 */
503static void fam10h_pci_cfg_space_size(struct pci_dev *dev) 503static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
504{ 504{
505 dev->cfg_size = pci_cfg_space_size_ext(dev, 0); 505 dev->cfg_size = pci_cfg_space_size_ext(dev);
506} 506}
507 507
508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); 508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index dd30c6076b5d..e70b9c57b88e 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -33,6 +33,10 @@ static __init int pci_access_init(void)
33 printk(KERN_ERR 33 printk(KERN_ERR
34 "PCI: Fatal: No config space access function found\n"); 34 "PCI: Fatal: No config space access function found\n");
35 35
36 dmi_check_pciprobe();
37
38 dmi_check_skip_isa_align();
39
36 return 0; 40 return 0;
37} 41}
38arch_initcall(pci_access_init); 42arch_initcall(pci_access_init);
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index c58805a92db5..f3972b12c60a 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -38,6 +38,9 @@ enum pci_bf_sort_state {
38 pci_dmi_bf, 38 pci_dmi_bf,
39}; 39};
40 40
41extern void __init dmi_check_pciprobe(void);
42extern void __init dmi_check_skip_isa_align(void);
43
41/* pci-i386.c */ 44/* pci-i386.c */
42 45
43extern unsigned int pcibios_max_latency; 46extern unsigned int pcibios_max_latency;
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 4dceeb1fc5e0..cf058fecfcee 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
162 Elf32_Shdr *shdr; 162 Elf32_Shdr *shdr;
163 int i; 163 int i;
164 164
165 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || 165 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
166 !elf_check_arch_ia32(ehdr) || 166 !elf_check_arch_ia32(ehdr) ||
167 ehdr->e_type != ET_DYN); 167 ehdr->e_type != ET_DYN);
168 168
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index 4db42bff8c60..69527688f794 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@@ -1,5 +1,4 @@
1/* 1/*
2 *
3 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com> 2 * Copyright (C) 2007 Antonino Daplas <adaplas@gmail.com>
4 * 3 *
5 * This file is subject to the terms and conditions of the GNU General Public 4 * This file is subject to the terms and conditions of the GNU General Public
@@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info)
29 return retval; 28 return retval;
30} 29}
31EXPORT_SYMBOL(fb_is_primary_device); 30EXPORT_SYMBOL(fb_is_primary_device);
31MODULE_LICENSE("GPL");
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 1c11df9a5f32..9bf2986a2788 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -205,8 +205,8 @@ config SATA_VITESSE
205 If unsure, say N. 205 If unsure, say N.
206 206
207config SATA_INIC162X 207config SATA_INIC162X
208 tristate "Initio 162x SATA support (HIGHLY EXPERIMENTAL)" 208 tristate "Initio 162x SATA support"
209 depends on PCI && EXPERIMENTAL 209 depends on PCI
210 help 210 help
211 This option enables support for Initio 162x Serial ATA. 211 This option enables support for Initio 162x Serial ATA.
212 212
@@ -697,6 +697,15 @@ config PATA_SCC
697 697
698 If unsure, say N. 698 If unsure, say N.
699 699
700config PATA_SCH
701 tristate "Intel SCH PATA support"
702 depends on PCI
703 help
704 This option enables support for Intel SCH PATA on the Intel
705 SCH (US15W, US15L, UL11L) series host controllers.
706
707 If unsure, say N.
708
700config PATA_BF54X 709config PATA_BF54X
701 tristate "Blackfin 54x ATAPI support" 710 tristate "Blackfin 54x ATAPI support"
702 depends on BF542 || BF548 || BF549 711 depends on BF542 || BF548 || BF549
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index b693d829383a..674965fa326d 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_PATA_SIS) += pata_sis.o
67obj-$(CONFIG_PATA_TRIFLEX) += pata_triflex.o 67obj-$(CONFIG_PATA_TRIFLEX) += pata_triflex.o
68obj-$(CONFIG_PATA_IXP4XX_CF) += pata_ixp4xx_cf.o 68obj-$(CONFIG_PATA_IXP4XX_CF) += pata_ixp4xx_cf.o
69obj-$(CONFIG_PATA_SCC) += pata_scc.o 69obj-$(CONFIG_PATA_SCC) += pata_scc.o
70obj-$(CONFIG_PATA_SCH) += pata_sch.o
70obj-$(CONFIG_PATA_BF54X) += pata_bf54x.o 71obj-$(CONFIG_PATA_BF54X) += pata_bf54x.o
71obj-$(CONFIG_PATA_PLATFORM) += pata_platform.o 72obj-$(CONFIG_PATA_PLATFORM) += pata_platform.o
72obj-$(CONFIG_PATA_OF_PLATFORM) += pata_of_platform.o 73obj-$(CONFIG_PATA_OF_PLATFORM) += pata_of_platform.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8cace9aa9c03..97f83fb2ee2e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1267,9 +1267,7 @@ static int ahci_check_ready(struct ata_link *link)
1267 void __iomem *port_mmio = ahci_port_base(link->ap); 1267 void __iomem *port_mmio = ahci_port_base(link->ap);
1268 u8 status = readl(port_mmio + PORT_TFDATA) & 0xFF; 1268 u8 status = readl(port_mmio + PORT_TFDATA) & 0xFF;
1269 1269
1270 if (!(status & ATA_BUSY)) 1270 return ata_check_ready(status);
1271 return 1;
1272 return 0;
1273} 1271}
1274 1272
1275static int ahci_softreset(struct ata_link *link, unsigned int *class, 1273static int ahci_softreset(struct ata_link *link, unsigned int *class,
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 47aeccd52fa9..75a406f5e694 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -152,6 +152,12 @@ static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id
152 if (dev->vendor == PCI_VENDOR_ID_AL) 152 if (dev->vendor == PCI_VENDOR_ID_AL)
153 ata_pci_bmdma_clear_simplex(dev); 153 ata_pci_bmdma_clear_simplex(dev);
154 154
155 if (dev->vendor == PCI_VENDOR_ID_ATI) {
156 int rc = pcim_enable_device(dev);
157 if (rc < 0)
158 return rc;
159 pcim_pin_device(dev);
160 }
155 return ata_pci_sff_init_one(dev, ppi, &generic_sht, NULL); 161 return ata_pci_sff_init_one(dev, ppi, &generic_sht, NULL);
156} 162}
157 163
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index ea2c7649d399..a9027b8fbdd5 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1348,6 +1348,8 @@ static void __devinit piix_init_sidpr(struct ata_host *host)
1348{ 1348{
1349 struct pci_dev *pdev = to_pci_dev(host->dev); 1349 struct pci_dev *pdev = to_pci_dev(host->dev);
1350 struct piix_host_priv *hpriv = host->private_data; 1350 struct piix_host_priv *hpriv = host->private_data;
1351 struct ata_device *dev0 = &host->ports[0]->link.device[0];
1352 u32 scontrol;
1351 int i; 1353 int i;
1352 1354
1353 /* check for availability */ 1355 /* check for availability */
@@ -1366,6 +1368,29 @@ static void __devinit piix_init_sidpr(struct ata_host *host)
1366 return; 1368 return;
1367 1369
1368 hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR]; 1370 hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR];
1371
1372 /* SCR access via SIDPR doesn't work on some configurations.
1373 * Give it a test drive by inhibiting power save modes which
1374 * we'll do anyway.
1375 */
1376 scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
1377
1378 /* if IPM is already 3, SCR access is probably working. Don't
1379 * un-inhibit power save modes as BIOS might have inhibited
1380 * them for a reason.
1381 */
1382 if ((scontrol & 0xf00) != 0x300) {
1383 scontrol |= 0x300;
1384 piix_sidpr_write(dev0, SCR_CONTROL, scontrol);
1385 scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
1386
1387 if ((scontrol & 0xf00) != 0x300) {
1388 dev_printk(KERN_INFO, host->dev, "SCR access via "
1389 "SIDPR is available but doesn't work\n");
1390 return;
1391 }
1392 }
1393
1369 host->ports[0]->ops = &piix_sidpr_sata_ops; 1394 host->ports[0]->ops = &piix_sidpr_sata_ops;
1370 host->ports[1]->ops = &piix_sidpr_sata_ops; 1395 host->ports[1]->ops = &piix_sidpr_sata_ops;
1371} 1396}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3bc488538204..927b692d723c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6292,6 +6292,7 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
6292EXPORT_SYMBOL_GPL(ata_eh_thaw_port); 6292EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
6293EXPORT_SYMBOL_GPL(ata_eh_qc_complete); 6293EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
6294EXPORT_SYMBOL_GPL(ata_eh_qc_retry); 6294EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
6295EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
6295EXPORT_SYMBOL_GPL(ata_do_eh); 6296EXPORT_SYMBOL_GPL(ata_do_eh);
6296EXPORT_SYMBOL_GPL(ata_std_error_handler); 6297EXPORT_SYMBOL_GPL(ata_std_error_handler);
6297 6298
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dcd0026c64..62e033146bed 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1357,7 +1357,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
1357 * LOCKING: 1357 * LOCKING:
1358 * Kernel thread context (may sleep). 1358 * Kernel thread context (may sleep).
1359 */ 1359 */
1360static void ata_eh_analyze_ncq_error(struct ata_link *link) 1360void ata_eh_analyze_ncq_error(struct ata_link *link)
1361{ 1361{
1362 struct ata_port *ap = link->ap; 1362 struct ata_port *ap = link->ap;
1363 struct ata_eh_context *ehc = &link->eh_context; 1363 struct ata_eh_context *ehc = &link->eh_context;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2ec65a8fda79..3c2d2289f85e 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -314,11 +314,7 @@ static int ata_sff_check_ready(struct ata_link *link)
314{ 314{
315 u8 status = link->ap->ops->sff_check_status(link->ap); 315 u8 status = link->ap->ops->sff_check_status(link->ap);
316 316
317 if (!(status & ATA_BUSY)) 317 return ata_check_ready(status);
318 return 1;
319 if (status == 0xff)
320 return -ENODEV;
321 return 0;
322} 318}
323 319
324/** 320/**
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index c5f91e629945..fbe605711554 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -259,6 +259,12 @@ static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
259 .port_ops = &pacpi_ops, 259 .port_ops = &pacpi_ops,
260 }; 260 };
261 const struct ata_port_info *ppi[] = { &info, NULL }; 261 const struct ata_port_info *ppi[] = { &info, NULL };
262 if (pdev->vendor == PCI_VENDOR_ID_ATI) {
263 int rc = pcim_enable_device(pdev);
264 if (rc < 0)
265 return rc;
266 pcim_pin_device(pdev);
267 }
262 return ata_pci_sff_init_one(pdev, ppi, &pacpi_sht, NULL); 268 return ata_pci_sff_init_one(pdev, ppi, &pacpi_sht, NULL);
263} 269}
264 270
diff --git a/drivers/ata/pata_sch.c b/drivers/ata/pata_sch.c
new file mode 100644
index 000000000000..c8cc027789fe
--- /dev/null
+++ b/drivers/ata/pata_sch.c
@@ -0,0 +1,206 @@
1/*
2 * pata_sch.c - Intel SCH PATA controllers
3 *
4 * Copyright (c) 2008 Alek Du <alek.du@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 */
20
21/*
22 * Supports:
23 * Intel SCH (AF82US15W, AF82US15L, AF82UL11L) chipsets -- see spec at:
24 * http://download.intel.com/design/chipsets/embedded/datashts/319537.pdf
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <linux/init.h>
31#include <linux/blkdev.h>
32#include <linux/delay.h>
33#include <linux/device.h>
34#include <scsi/scsi_host.h>
35#include <linux/libata.h>
36#include <linux/dmi.h>
37
38#define DRV_NAME "pata_sch"
39#define DRV_VERSION "0.2"
40
41/* see SCH datasheet page 351 */
42enum {
43 D0TIM = 0x80, /* Device 0 Timing Register */
44 D1TIM = 0x84, /* Device 1 Timing Register */
45 PM = 0x07, /* PIO Mode Bit Mask */
46 MDM = (0x03 << 8), /* Multi-word DMA Mode Bit Mask */
47 UDM = (0x07 << 16), /* Ultra DMA Mode Bit Mask */
48 PPE = (1 << 30), /* Prefetch/Post Enable */
49 USD = (1 << 31), /* Use Synchronous DMA */
50};
51
52static int sch_init_one(struct pci_dev *pdev,
53 const struct pci_device_id *ent);
54static void sch_set_piomode(struct ata_port *ap, struct ata_device *adev);
55static void sch_set_dmamode(struct ata_port *ap, struct ata_device *adev);
56
57static const struct pci_device_id sch_pci_tbl[] = {
58 /* Intel SCH PATA Controller */
59 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SCH_IDE), 0 },
60 { } /* terminate list */
61};
62
63static struct pci_driver sch_pci_driver = {
64 .name = DRV_NAME,
65 .id_table = sch_pci_tbl,
66 .probe = sch_init_one,
67 .remove = ata_pci_remove_one,
68#ifdef CONFIG_PM
69 .suspend = ata_pci_device_suspend,
70 .resume = ata_pci_device_resume,
71#endif
72};
73
74static struct scsi_host_template sch_sht = {
75 ATA_BMDMA_SHT(DRV_NAME),
76};
77
78static struct ata_port_operations sch_pata_ops = {
79 .inherits = &ata_bmdma_port_ops,
80 .cable_detect = ata_cable_unknown,
81 .set_piomode = sch_set_piomode,
82 .set_dmamode = sch_set_dmamode,
83};
84
85static struct ata_port_info sch_port_info = {
86 .flags = 0,
87 .pio_mask = ATA_PIO4, /* pio0-4 */
88 .mwdma_mask = ATA_MWDMA2, /* mwdma0-2 */
89 .udma_mask = ATA_UDMA5, /* udma0-5 */
90 .port_ops = &sch_pata_ops,
91};
92
93MODULE_AUTHOR("Alek Du <alek.du@intel.com>");
94MODULE_DESCRIPTION("SCSI low-level driver for Intel SCH PATA controllers");
95MODULE_LICENSE("GPL");
96MODULE_DEVICE_TABLE(pci, sch_pci_tbl);
97MODULE_VERSION(DRV_VERSION);
98
99/**
100 * sch_set_piomode - Initialize host controller PATA PIO timings
101 * @ap: Port whose timings we are configuring
102 * @adev: ATA device
103 *
104 * Set PIO mode for device, in host controller PCI config space.
105 *
106 * LOCKING:
107 * None (inherited from caller).
108 */
109
110static void sch_set_piomode(struct ata_port *ap, struct ata_device *adev)
111{
112 unsigned int pio = adev->pio_mode - XFER_PIO_0;
113 struct pci_dev *dev = to_pci_dev(ap->host->dev);
114 unsigned int port = adev->devno ? D1TIM : D0TIM;
115 unsigned int data;
116
117 pci_read_config_dword(dev, port, &data);
118 /* see SCH datasheet page 351 */
119 /* set PIO mode */
120 data &= ~(PM | PPE);
121 data |= pio;
122 /* enable PPE for block device */
123 if (adev->class == ATA_DEV_ATA)
124 data |= PPE;
125 pci_write_config_dword(dev, port, data);
126}
127
128/**
129 * sch_set_dmamode - Initialize host controller PATA DMA timings
130 * @ap: Port whose timings we are configuring
131 * @adev: ATA device
132 *
133 * Set MW/UDMA mode for device, in host controller PCI config space.
134 *
135 * LOCKING:
136 * None (inherited from caller).
137 */
138
139static void sch_set_dmamode(struct ata_port *ap, struct ata_device *adev)
140{
141 unsigned int dma_mode = adev->dma_mode;
142 struct pci_dev *dev = to_pci_dev(ap->host->dev);
143 unsigned int port = adev->devno ? D1TIM : D0TIM;
144 unsigned int data;
145
146 pci_read_config_dword(dev, port, &data);
147 /* see SCH datasheet page 351 */
148 if (dma_mode >= XFER_UDMA_0) {
149 /* enable Synchronous DMA mode */
150 data |= USD;
151 data &= ~UDM;
152 data |= (dma_mode - XFER_UDMA_0) << 16;
153 } else { /* must be MWDMA mode, since we masked SWDMA already */
154 data &= ~(USD | MDM);
155 data |= (dma_mode - XFER_MW_DMA_0) << 8;
156 }
157 pci_write_config_dword(dev, port, data);
158}
159
160/**
161 * sch_init_one - Register SCH ATA PCI device with kernel services
162 * @pdev: PCI device to register
163 * @ent: Entry in sch_pci_tbl matching with @pdev
164 *
165 * LOCKING:
166 * Inherited from PCI layer (may sleep).
167 *
168 * RETURNS:
169 * Zero on success, or -ERRNO value.
170 */
171
172static int __devinit sch_init_one(struct pci_dev *pdev,
173 const struct pci_device_id *ent)
174{
175 static int printed_version;
176 const struct ata_port_info *ppi[] = { &sch_port_info, NULL };
177 struct ata_host *host;
178 int rc;
179
180 if (!printed_version++)
181 dev_printk(KERN_DEBUG, &pdev->dev,
182 "version " DRV_VERSION "\n");
183
184 /* enable device and prepare host */
185 rc = pcim_enable_device(pdev);
186 if (rc)
187 return rc;
188 rc = ata_pci_sff_prepare_host(pdev, ppi, &host);
189 if (rc)
190 return rc;
191 pci_set_master(pdev);
192 return ata_pci_sff_activate_host(host, ata_sff_interrupt, &sch_sht);
193}
194
195static int __init sch_init(void)
196{
197 return pci_register_driver(&sch_pci_driver);
198}
199
200static void __exit sch_exit(void)
201{
202 pci_unregister_driver(&sch_pci_driver);
203}
204
205module_init(sch_init);
206module_exit(sch_exit);
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index d27bb9a2568f..3ead02fe379e 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -10,13 +10,33 @@
10 * right. Documentation is available at initio's website but it only 10 * right. Documentation is available at initio's website but it only
11 * documents registers (not programming model). 11 * documents registers (not programming model).
12 * 12 *
13 * - ATA disks work. 13 * This driver has interesting history. The first version was written
14 * - Hotplug works. 14 * from the documentation and a 2.4 IDE driver posted on a Taiwan
15 * - ATAPI read works but burning doesn't. This thing is really 15 * company, which didn't use any IDMA features and couldn't handle
16 * peculiar about ATAPI and I couldn't figure out how ATAPI PIO and 16 * LBA48. The resulting driver couldn't handle LBA48 devices either
17 * ATAPI DMA WRITE should be programmed. If you've got a clue, be 17 * making it pretty useless.
18 * my guest. 18 *
19 * - Both STR and STD work. 19 * After a while, initio picked the driver up, renamed it to
20 * sata_initio162x, updated it to use IDMA for ATA DMA commands and
21 * posted it on their website. It only used ATA_PROT_DMA for IDMA and
22 * attaching both devices and issuing IDMA and !IDMA commands
23 * simultaneously broke it due to PIRQ masking interaction but it did
24 * show how to use the IDMA (ADMA + some initio specific twists)
25 * engine.
26 *
27 * Then, I picked up their changes again and here's the usable driver
28 * which uses IDMA for everything. Everything works now including
29 * LBA48, CD/DVD burning, suspend/resume and hotplug. There are some
30 * issues tho. Result Tf is not resported properly, NCQ isn't
31 * supported yet and CD/DVD writing works with DMA assisted PIO
32 * protocol (which, for native SATA devices, shouldn't cause any
33 * noticeable difference).
34 *
35 * Anyways, so, here's finally a working driver for inic162x. Enjoy!
36 *
37 * initio: If you guys wanna improve the driver regarding result TF
38 * access and other stuff, please feel free to contact me. I'll be
39 * happy to assist.
20 */ 40 */
21 41
22#include <linux/kernel.h> 42#include <linux/kernel.h>
@@ -28,13 +48,19 @@
28#include <scsi/scsi_device.h> 48#include <scsi/scsi_device.h>
29 49
30#define DRV_NAME "sata_inic162x" 50#define DRV_NAME "sata_inic162x"
31#define DRV_VERSION "0.3" 51#define DRV_VERSION "0.4"
32 52
33enum { 53enum {
34 MMIO_BAR = 5, 54 MMIO_BAR_PCI = 5,
55 MMIO_BAR_CARDBUS = 1,
35 56
36 NR_PORTS = 2, 57 NR_PORTS = 2,
37 58
59 IDMA_CPB_TBL_SIZE = 4 * 32,
60
61 INIC_DMA_BOUNDARY = 0xffffff,
62
63 HOST_ACTRL = 0x08,
38 HOST_CTL = 0x7c, 64 HOST_CTL = 0x7c,
39 HOST_STAT = 0x7e, 65 HOST_STAT = 0x7e,
40 HOST_IRQ_STAT = 0xbc, 66 HOST_IRQ_STAT = 0xbc,
@@ -43,22 +69,37 @@ enum {
43 PORT_SIZE = 0x40, 69 PORT_SIZE = 0x40,
44 70
45 /* registers for ATA TF operation */ 71 /* registers for ATA TF operation */
46 PORT_TF = 0x00, 72 PORT_TF_DATA = 0x00,
47 PORT_ALT_STAT = 0x08, 73 PORT_TF_FEATURE = 0x01,
74 PORT_TF_NSECT = 0x02,
75 PORT_TF_LBAL = 0x03,
76 PORT_TF_LBAM = 0x04,
77 PORT_TF_LBAH = 0x05,
78 PORT_TF_DEVICE = 0x06,
79 PORT_TF_COMMAND = 0x07,
80 PORT_TF_ALT_STAT = 0x08,
48 PORT_IRQ_STAT = 0x09, 81 PORT_IRQ_STAT = 0x09,
49 PORT_IRQ_MASK = 0x0a, 82 PORT_IRQ_MASK = 0x0a,
50 PORT_PRD_CTL = 0x0b, 83 PORT_PRD_CTL = 0x0b,
51 PORT_PRD_ADDR = 0x0c, 84 PORT_PRD_ADDR = 0x0c,
52 PORT_PRD_XFERLEN = 0x10, 85 PORT_PRD_XFERLEN = 0x10,
86 PORT_CPB_CPBLAR = 0x18,
87 PORT_CPB_PTQFIFO = 0x1c,
53 88
54 /* IDMA register */ 89 /* IDMA register */
55 PORT_IDMA_CTL = 0x14, 90 PORT_IDMA_CTL = 0x14,
91 PORT_IDMA_STAT = 0x16,
92
93 PORT_RPQ_FIFO = 0x1e,
94 PORT_RPQ_CNT = 0x1f,
56 95
57 PORT_SCR = 0x20, 96 PORT_SCR = 0x20,
58 97
59 /* HOST_CTL bits */ 98 /* HOST_CTL bits */
60 HCTL_IRQOFF = (1 << 8), /* global IRQ off */ 99 HCTL_IRQOFF = (1 << 8), /* global IRQ off */
61 HCTL_PWRDWN = (1 << 13), /* power down PHYs */ 100 HCTL_FTHD0 = (1 << 10), /* fifo threshold 0 */
101 HCTL_FTHD1 = (1 << 11), /* fifo threshold 1*/
102 HCTL_PWRDWN = (1 << 12), /* power down PHYs */
62 HCTL_SOFTRST = (1 << 13), /* global reset (no phy reset) */ 103 HCTL_SOFTRST = (1 << 13), /* global reset (no phy reset) */
63 HCTL_RPGSEL = (1 << 15), /* register page select */ 104 HCTL_RPGSEL = (1 << 15), /* register page select */
64 105
@@ -81,9 +122,7 @@ enum {
81 PIRQ_PENDING = (1 << 7), /* port IRQ pending (STAT only) */ 122 PIRQ_PENDING = (1 << 7), /* port IRQ pending (STAT only) */
82 123
83 PIRQ_ERR = PIRQ_OFFLINE | PIRQ_ONLINE | PIRQ_FATAL, 124 PIRQ_ERR = PIRQ_OFFLINE | PIRQ_ONLINE | PIRQ_FATAL,
84 125 PIRQ_MASK_DEFAULT = PIRQ_REPLY | PIRQ_ATA,
85 PIRQ_MASK_DMA_READ = PIRQ_REPLY | PIRQ_ATA,
86 PIRQ_MASK_OTHER = PIRQ_REPLY | PIRQ_COMPLETE,
87 PIRQ_MASK_FREEZE = 0xff, 126 PIRQ_MASK_FREEZE = 0xff,
88 127
89 /* PORT_PRD_CTL bits */ 128 /* PORT_PRD_CTL bits */
@@ -96,20 +135,104 @@ enum {
96 IDMA_CTL_RST_IDMA = (1 << 5), /* reset IDMA machinary */ 135 IDMA_CTL_RST_IDMA = (1 << 5), /* reset IDMA machinary */
97 IDMA_CTL_GO = (1 << 7), /* IDMA mode go */ 136 IDMA_CTL_GO = (1 << 7), /* IDMA mode go */
98 IDMA_CTL_ATA_NIEN = (1 << 8), /* ATA IRQ disable */ 137 IDMA_CTL_ATA_NIEN = (1 << 8), /* ATA IRQ disable */
138
139 /* PORT_IDMA_STAT bits */
140 IDMA_STAT_PERR = (1 << 0), /* PCI ERROR MODE */
141 IDMA_STAT_CPBERR = (1 << 1), /* ADMA CPB error */
142 IDMA_STAT_LGCY = (1 << 3), /* ADMA legacy */
143 IDMA_STAT_UIRQ = (1 << 4), /* ADMA unsolicited irq */
144 IDMA_STAT_STPD = (1 << 5), /* ADMA stopped */
145 IDMA_STAT_PSD = (1 << 6), /* ADMA pause */
146 IDMA_STAT_DONE = (1 << 7), /* ADMA done */
147
148 IDMA_STAT_ERR = IDMA_STAT_PERR | IDMA_STAT_CPBERR,
149
150 /* CPB Control Flags*/
151 CPB_CTL_VALID = (1 << 0), /* CPB valid */
152 CPB_CTL_QUEUED = (1 << 1), /* queued command */
153 CPB_CTL_DATA = (1 << 2), /* data, rsvd in datasheet */
154 CPB_CTL_IEN = (1 << 3), /* PCI interrupt enable */
155 CPB_CTL_DEVDIR = (1 << 4), /* device direction control */
156
157 /* CPB Response Flags */
158 CPB_RESP_DONE = (1 << 0), /* ATA command complete */
159 CPB_RESP_REL = (1 << 1), /* ATA release */
160 CPB_RESP_IGNORED = (1 << 2), /* CPB ignored */
161 CPB_RESP_ATA_ERR = (1 << 3), /* ATA command error */
162 CPB_RESP_SPURIOUS = (1 << 4), /* ATA spurious interrupt error */
163 CPB_RESP_UNDERFLOW = (1 << 5), /* APRD deficiency length error */
164 CPB_RESP_OVERFLOW = (1 << 6), /* APRD exccess length error */
165 CPB_RESP_CPB_ERR = (1 << 7), /* CPB error flag */
166
167 /* PRD Control Flags */
168 PRD_DRAIN = (1 << 1), /* ignore data excess */
169 PRD_CDB = (1 << 2), /* atapi packet command pointer */
170 PRD_DIRECT_INTR = (1 << 3), /* direct interrupt */
171 PRD_DMA = (1 << 4), /* data transfer method */
172 PRD_WRITE = (1 << 5), /* data dir, rsvd in datasheet */
173 PRD_IOM = (1 << 6), /* io/memory transfer */
174 PRD_END = (1 << 7), /* APRD chain end */
99}; 175};
100 176
177/* Comman Parameter Block */
178struct inic_cpb {
179 u8 resp_flags; /* Response Flags */
180 u8 error; /* ATA Error */
181 u8 status; /* ATA Status */
182 u8 ctl_flags; /* Control Flags */
183 __le32 len; /* Total Transfer Length */
184 __le32 prd; /* First PRD pointer */
185 u8 rsvd[4];
186 /* 16 bytes */
187 u8 feature; /* ATA Feature */
188 u8 hob_feature; /* ATA Ex. Feature */
189 u8 device; /* ATA Device/Head */
190 u8 mirctl; /* Mirror Control */
191 u8 nsect; /* ATA Sector Count */
192 u8 hob_nsect; /* ATA Ex. Sector Count */
193 u8 lbal; /* ATA Sector Number */
194 u8 hob_lbal; /* ATA Ex. Sector Number */
195 u8 lbam; /* ATA Cylinder Low */
196 u8 hob_lbam; /* ATA Ex. Cylinder Low */
197 u8 lbah; /* ATA Cylinder High */
198 u8 hob_lbah; /* ATA Ex. Cylinder High */
199 u8 command; /* ATA Command */
200 u8 ctl; /* ATA Control */
201 u8 slave_error; /* Slave ATA Error */
202 u8 slave_status; /* Slave ATA Status */
203 /* 32 bytes */
204} __packed;
205
206/* Physical Region Descriptor */
207struct inic_prd {
208 __le32 mad; /* Physical Memory Address */
209 __le16 len; /* Transfer Length */
210 u8 rsvd;
211 u8 flags; /* Control Flags */
212} __packed;
213
214struct inic_pkt {
215 struct inic_cpb cpb;
216 struct inic_prd prd[LIBATA_MAX_PRD + 1]; /* + 1 for cdb */
217 u8 cdb[ATAPI_CDB_LEN];
218} __packed;
219
101struct inic_host_priv { 220struct inic_host_priv {
102 u16 cached_hctl; 221 void __iomem *mmio_base;
222 u16 cached_hctl;
103}; 223};
104 224
105struct inic_port_priv { 225struct inic_port_priv {
106 u8 dfl_prdctl; 226 struct inic_pkt *pkt;
107 u8 cached_prdctl; 227 dma_addr_t pkt_dma;
108 u8 cached_pirq_mask; 228 u32 *cpb_tbl;
229 dma_addr_t cpb_tbl_dma;
109}; 230};
110 231
111static struct scsi_host_template inic_sht = { 232static struct scsi_host_template inic_sht = {
112 ATA_BMDMA_SHT(DRV_NAME), 233 ATA_BASE_SHT(DRV_NAME),
234 .sg_tablesize = LIBATA_MAX_PRD, /* maybe it can be larger? */
235 .dma_boundary = INIC_DMA_BOUNDARY,
113}; 236};
114 237
115static const int scr_map[] = { 238static const int scr_map[] = {
@@ -120,54 +243,34 @@ static const int scr_map[] = {
120 243
121static void __iomem *inic_port_base(struct ata_port *ap) 244static void __iomem *inic_port_base(struct ata_port *ap)
122{ 245{
123 return ap->host->iomap[MMIO_BAR] + ap->port_no * PORT_SIZE; 246 struct inic_host_priv *hpriv = ap->host->private_data;
124}
125
126static void __inic_set_pirq_mask(struct ata_port *ap, u8 mask)
127{
128 void __iomem *port_base = inic_port_base(ap);
129 struct inic_port_priv *pp = ap->private_data;
130 247
131 writeb(mask, port_base + PORT_IRQ_MASK); 248 return hpriv->mmio_base + ap->port_no * PORT_SIZE;
132 pp->cached_pirq_mask = mask;
133}
134
135static void inic_set_pirq_mask(struct ata_port *ap, u8 mask)
136{
137 struct inic_port_priv *pp = ap->private_data;
138
139 if (pp->cached_pirq_mask != mask)
140 __inic_set_pirq_mask(ap, mask);
141} 249}
142 250
143static void inic_reset_port(void __iomem *port_base) 251static void inic_reset_port(void __iomem *port_base)
144{ 252{
145 void __iomem *idma_ctl = port_base + PORT_IDMA_CTL; 253 void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
146 u16 ctl;
147 254
148 ctl = readw(idma_ctl); 255 /* stop IDMA engine */
149 ctl &= ~(IDMA_CTL_RST_IDMA | IDMA_CTL_ATA_NIEN | IDMA_CTL_GO); 256 readw(idma_ctl); /* flush */
257 msleep(1);
150 258
151 /* mask IRQ and assert reset */ 259 /* mask IRQ and assert reset */
152 writew(ctl | IDMA_CTL_RST_IDMA | IDMA_CTL_ATA_NIEN, idma_ctl); 260 writew(IDMA_CTL_RST_IDMA, idma_ctl);
153 readw(idma_ctl); /* flush */ 261 readw(idma_ctl); /* flush */
154
155 /* give it some time */
156 msleep(1); 262 msleep(1);
157 263
158 /* release reset */ 264 /* release reset */
159 writew(ctl | IDMA_CTL_ATA_NIEN, idma_ctl); 265 writew(0, idma_ctl);
160 266
161 /* clear irq */ 267 /* clear irq */
162 writeb(0xff, port_base + PORT_IRQ_STAT); 268 writeb(0xff, port_base + PORT_IRQ_STAT);
163
164 /* reenable ATA IRQ, turn off IDMA mode */
165 writew(ctl, idma_ctl);
166} 269}
167 270
168static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val) 271static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
169{ 272{
170 void __iomem *scr_addr = ap->ioaddr.scr_addr; 273 void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
171 void __iomem *addr; 274 void __iomem *addr;
172 275
173 if (unlikely(sc_reg >= ARRAY_SIZE(scr_map))) 276 if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
@@ -184,120 +287,126 @@ static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
184 287
185static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val) 288static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
186{ 289{
187 void __iomem *scr_addr = ap->ioaddr.scr_addr; 290 void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
188 void __iomem *addr;
189 291
190 if (unlikely(sc_reg >= ARRAY_SIZE(scr_map))) 292 if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
191 return -EINVAL; 293 return -EINVAL;
192 294
193 addr = scr_addr + scr_map[sc_reg] * 4;
194 writel(val, scr_addr + scr_map[sc_reg] * 4); 295 writel(val, scr_addr + scr_map[sc_reg] * 4);
195 return 0; 296 return 0;
196} 297}
197 298
198/* 299static void inic_stop_idma(struct ata_port *ap)
199 * In TF mode, inic162x is very similar to SFF device. TF registers
200 * function the same. DMA engine behaves similary using the same PRD
201 * format as BMDMA but different command register, interrupt and event
202 * notification methods are used. The following inic_bmdma_*()
203 * functions do the impedance matching.
204 */
205static void inic_bmdma_setup(struct ata_queued_cmd *qc)
206{ 300{
207 struct ata_port *ap = qc->ap;
208 struct inic_port_priv *pp = ap->private_data;
209 void __iomem *port_base = inic_port_base(ap); 301 void __iomem *port_base = inic_port_base(ap);
210 int rw = qc->tf.flags & ATA_TFLAG_WRITE;
211
212 /* make sure device sees PRD table writes */
213 wmb();
214
215 /* load transfer length */
216 writel(qc->nbytes, port_base + PORT_PRD_XFERLEN);
217
218 /* turn on DMA and specify data direction */
219 pp->cached_prdctl = pp->dfl_prdctl | PRD_CTL_DMAEN;
220 if (!rw)
221 pp->cached_prdctl |= PRD_CTL_WR;
222 writeb(pp->cached_prdctl, port_base + PORT_PRD_CTL);
223 302
224 /* issue r/w command */ 303 readb(port_base + PORT_RPQ_FIFO);
225 ap->ops->sff_exec_command(ap, &qc->tf); 304 readb(port_base + PORT_RPQ_CNT);
305 writew(0, port_base + PORT_IDMA_CTL);
226} 306}
227 307
228static void inic_bmdma_start(struct ata_queued_cmd *qc) 308static void inic_host_err_intr(struct ata_port *ap, u8 irq_stat, u16 idma_stat)
229{ 309{
230 struct ata_port *ap = qc->ap; 310 struct ata_eh_info *ehi = &ap->link.eh_info;
231 struct inic_port_priv *pp = ap->private_data; 311 struct inic_port_priv *pp = ap->private_data;
232 void __iomem *port_base = inic_port_base(ap); 312 struct inic_cpb *cpb = &pp->pkt->cpb;
313 bool freeze = false;
233 314
234 /* start host DMA transaction */ 315 ata_ehi_clear_desc(ehi);
235 pp->cached_prdctl |= PRD_CTL_START; 316 ata_ehi_push_desc(ehi, "irq_stat=0x%x idma_stat=0x%x",
236 writeb(pp->cached_prdctl, port_base + PORT_PRD_CTL); 317 irq_stat, idma_stat);
237}
238 318
239static void inic_bmdma_stop(struct ata_queued_cmd *qc) 319 inic_stop_idma(ap);
240{
241 struct ata_port *ap = qc->ap;
242 struct inic_port_priv *pp = ap->private_data;
243 void __iomem *port_base = inic_port_base(ap);
244 320
245 /* stop DMA engine */ 321 if (irq_stat & (PIRQ_OFFLINE | PIRQ_ONLINE)) {
246 writeb(pp->dfl_prdctl, port_base + PORT_PRD_CTL); 322 ata_ehi_push_desc(ehi, "hotplug");
247} 323 ata_ehi_hotplugged(ehi);
324 freeze = true;
325 }
248 326
249static u8 inic_bmdma_status(struct ata_port *ap) 327 if (idma_stat & IDMA_STAT_PERR) {
250{ 328 ata_ehi_push_desc(ehi, "PCI error");
251 /* event is already verified by the interrupt handler */ 329 freeze = true;
252 return ATA_DMA_INTR; 330 }
331
332 if (idma_stat & IDMA_STAT_CPBERR) {
333 ata_ehi_push_desc(ehi, "CPB error");
334
335 if (cpb->resp_flags & CPB_RESP_IGNORED) {
336 __ata_ehi_push_desc(ehi, " ignored");
337 ehi->err_mask |= AC_ERR_INVALID;
338 freeze = true;
339 }
340
341 if (cpb->resp_flags & CPB_RESP_ATA_ERR)
342 ehi->err_mask |= AC_ERR_DEV;
343
344 if (cpb->resp_flags & CPB_RESP_SPURIOUS) {
345 __ata_ehi_push_desc(ehi, " spurious-intr");
346 ehi->err_mask |= AC_ERR_HSM;
347 freeze = true;
348 }
349
350 if (cpb->resp_flags &
351 (CPB_RESP_UNDERFLOW | CPB_RESP_OVERFLOW)) {
352 __ata_ehi_push_desc(ehi, " data-over/underflow");
353 ehi->err_mask |= AC_ERR_HSM;
354 freeze = true;
355 }
356 }
357
358 if (freeze)
359 ata_port_freeze(ap);
360 else
361 ata_port_abort(ap);
253} 362}
254 363
255static void inic_host_intr(struct ata_port *ap) 364static void inic_host_intr(struct ata_port *ap)
256{ 365{
257 void __iomem *port_base = inic_port_base(ap); 366 void __iomem *port_base = inic_port_base(ap);
258 struct ata_eh_info *ehi = &ap->link.eh_info; 367 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
259 u8 irq_stat; 368 u8 irq_stat;
369 u16 idma_stat;
260 370
261 /* fetch and clear irq */ 371 /* read and clear IRQ status */
262 irq_stat = readb(port_base + PORT_IRQ_STAT); 372 irq_stat = readb(port_base + PORT_IRQ_STAT);
263 writeb(irq_stat, port_base + PORT_IRQ_STAT); 373 writeb(irq_stat, port_base + PORT_IRQ_STAT);
374 idma_stat = readw(port_base + PORT_IDMA_STAT);
264 375
265 if (likely(!(irq_stat & PIRQ_ERR))) { 376 if (unlikely((irq_stat & PIRQ_ERR) || (idma_stat & IDMA_STAT_ERR)))
266 struct ata_queued_cmd *qc = 377 inic_host_err_intr(ap, irq_stat, idma_stat);
267 ata_qc_from_tag(ap, ap->link.active_tag);
268 378
269 if (unlikely(!qc || (qc->tf.flags & ATA_TFLAG_POLLING))) { 379 if (unlikely(!qc))
270 ap->ops->sff_check_status(ap); /* clear ATA interrupt */ 380 goto spurious;
271 return;
272 }
273 381
274 if (likely(ata_sff_host_intr(ap, qc))) 382 if (likely(idma_stat & IDMA_STAT_DONE)) {
275 return; 383 inic_stop_idma(ap);
276 384
277 ap->ops->sff_check_status(ap); /* clear ATA interrupt */ 385 /* Depending on circumstances, device error
278 ata_port_printk(ap, KERN_WARNING, "unhandled " 386 * isn't reported by IDMA, check it explicitly.
279 "interrupt, irq_stat=%x\n", irq_stat); 387 */
388 if (unlikely(readb(port_base + PORT_TF_COMMAND) &
389 (ATA_DF | ATA_ERR)))
390 qc->err_mask |= AC_ERR_DEV;
391
392 ata_qc_complete(qc);
280 return; 393 return;
281 } 394 }
282 395
283 /* error */ 396 spurious:
284 ata_ehi_push_desc(ehi, "irq_stat=0x%x", irq_stat); 397 ata_port_printk(ap, KERN_WARNING, "unhandled interrupt: "
285 398 "cmd=0x%x irq_stat=0x%x idma_stat=0x%x\n",
286 if (irq_stat & (PIRQ_OFFLINE | PIRQ_ONLINE)) { 399 qc ? qc->tf.command : 0xff, irq_stat, idma_stat);
287 ata_ehi_hotplugged(ehi);
288 ata_port_freeze(ap);
289 } else
290 ata_port_abort(ap);
291} 400}
292 401
293static irqreturn_t inic_interrupt(int irq, void *dev_instance) 402static irqreturn_t inic_interrupt(int irq, void *dev_instance)
294{ 403{
295 struct ata_host *host = dev_instance; 404 struct ata_host *host = dev_instance;
296 void __iomem *mmio_base = host->iomap[MMIO_BAR]; 405 struct inic_host_priv *hpriv = host->private_data;
297 u16 host_irq_stat; 406 u16 host_irq_stat;
298 int i, handled = 0;; 407 int i, handled = 0;;
299 408
300 host_irq_stat = readw(mmio_base + HOST_IRQ_STAT); 409 host_irq_stat = readw(hpriv->mmio_base + HOST_IRQ_STAT);
301 410
302 if (unlikely(!(host_irq_stat & HIRQ_GLOBAL))) 411 if (unlikely(!(host_irq_stat & HIRQ_GLOBAL)))
303 goto out; 412 goto out;
@@ -327,60 +436,173 @@ static irqreturn_t inic_interrupt(int irq, void *dev_instance)
327 return IRQ_RETVAL(handled); 436 return IRQ_RETVAL(handled);
328} 437}
329 438
439static int inic_check_atapi_dma(struct ata_queued_cmd *qc)
440{
441 /* For some reason ATAPI_PROT_DMA doesn't work for some
442 * commands including writes and other misc ops. Use PIO
443 * protocol instead, which BTW is driven by the DMA engine
444 * anyway, so it shouldn't make much difference for native
445 * SATA devices.
446 */
447 if (atapi_cmd_type(qc->cdb[0]) == READ)
448 return 0;
449 return 1;
450}
451
452static void inic_fill_sg(struct inic_prd *prd, struct ata_queued_cmd *qc)
453{
454 struct scatterlist *sg;
455 unsigned int si;
456 u8 flags = 0;
457
458 if (qc->tf.flags & ATA_TFLAG_WRITE)
459 flags |= PRD_WRITE;
460
461 if (ata_is_dma(qc->tf.protocol))
462 flags |= PRD_DMA;
463
464 for_each_sg(qc->sg, sg, qc->n_elem, si) {
465 prd->mad = cpu_to_le32(sg_dma_address(sg));
466 prd->len = cpu_to_le16(sg_dma_len(sg));
467 prd->flags = flags;
468 prd++;
469 }
470
471 WARN_ON(!si);
472 prd[-1].flags |= PRD_END;
473}
474
475static void inic_qc_prep(struct ata_queued_cmd *qc)
476{
477 struct inic_port_priv *pp = qc->ap->private_data;
478 struct inic_pkt *pkt = pp->pkt;
479 struct inic_cpb *cpb = &pkt->cpb;
480 struct inic_prd *prd = pkt->prd;
481 bool is_atapi = ata_is_atapi(qc->tf.protocol);
482 bool is_data = ata_is_data(qc->tf.protocol);
483 unsigned int cdb_len = 0;
484
485 VPRINTK("ENTER\n");
486
487 if (is_atapi)
488 cdb_len = qc->dev->cdb_len;
489
490 /* prepare packet, based on initio driver */
491 memset(pkt, 0, sizeof(struct inic_pkt));
492
493 cpb->ctl_flags = CPB_CTL_VALID | CPB_CTL_IEN;
494 if (is_atapi || is_data)
495 cpb->ctl_flags |= CPB_CTL_DATA;
496
497 cpb->len = cpu_to_le32(qc->nbytes + cdb_len);
498 cpb->prd = cpu_to_le32(pp->pkt_dma + offsetof(struct inic_pkt, prd));
499
500 cpb->device = qc->tf.device;
501 cpb->feature = qc->tf.feature;
502 cpb->nsect = qc->tf.nsect;
503 cpb->lbal = qc->tf.lbal;
504 cpb->lbam = qc->tf.lbam;
505 cpb->lbah = qc->tf.lbah;
506
507 if (qc->tf.flags & ATA_TFLAG_LBA48) {
508 cpb->hob_feature = qc->tf.hob_feature;
509 cpb->hob_nsect = qc->tf.hob_nsect;
510 cpb->hob_lbal = qc->tf.hob_lbal;
511 cpb->hob_lbam = qc->tf.hob_lbam;
512 cpb->hob_lbah = qc->tf.hob_lbah;
513 }
514
515 cpb->command = qc->tf.command;
516 /* don't load ctl - dunno why. it's like that in the initio driver */
517
518 /* setup PRD for CDB */
519 if (is_atapi) {
520 memcpy(pkt->cdb, qc->cdb, ATAPI_CDB_LEN);
521 prd->mad = cpu_to_le32(pp->pkt_dma +
522 offsetof(struct inic_pkt, cdb));
523 prd->len = cpu_to_le16(cdb_len);
524 prd->flags = PRD_CDB | PRD_WRITE;
525 if (!is_data)
526 prd->flags |= PRD_END;
527 prd++;
528 }
529
530 /* setup sg table */
531 if (is_data)
532 inic_fill_sg(prd, qc);
533
534 pp->cpb_tbl[0] = pp->pkt_dma;
535}
536
330static unsigned int inic_qc_issue(struct ata_queued_cmd *qc) 537static unsigned int inic_qc_issue(struct ata_queued_cmd *qc)
331{ 538{
332 struct ata_port *ap = qc->ap; 539 struct ata_port *ap = qc->ap;
540 void __iomem *port_base = inic_port_base(ap);
333 541
334 /* ATA IRQ doesn't wait for DMA transfer completion and vice 542 /* fire up the ADMA engine */
335 * versa. Mask IRQ selectively to detect command completion. 543 writew(HCTL_FTHD0, port_base + HOST_CTL);
336 * Without it, ATA DMA read command can cause data corruption. 544 writew(IDMA_CTL_GO, port_base + PORT_IDMA_CTL);
337 * 545 writeb(0, port_base + PORT_CPB_PTQFIFO);
338 * Something similar might be needed for ATAPI writes. I 546
339 * tried a lot of combinations but couldn't find the solution. 547 return 0;
340 */ 548}
341 if (qc->tf.protocol == ATA_PROT_DMA && 549
342 !(qc->tf.flags & ATA_TFLAG_WRITE)) 550static void inic_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
343 inic_set_pirq_mask(ap, PIRQ_MASK_DMA_READ); 551{
344 else 552 void __iomem *port_base = inic_port_base(ap);
345 inic_set_pirq_mask(ap, PIRQ_MASK_OTHER); 553
554 tf->feature = readb(port_base + PORT_TF_FEATURE);
555 tf->nsect = readb(port_base + PORT_TF_NSECT);
556 tf->lbal = readb(port_base + PORT_TF_LBAL);
557 tf->lbam = readb(port_base + PORT_TF_LBAM);
558 tf->lbah = readb(port_base + PORT_TF_LBAH);
559 tf->device = readb(port_base + PORT_TF_DEVICE);
560 tf->command = readb(port_base + PORT_TF_COMMAND);
561}
346 562
347 /* Issuing a command to yet uninitialized port locks up the 563static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc)
348 * controller. Most of the time, this happens for the first 564{
349 * command after reset which are ATA and ATAPI IDENTIFYs. 565 struct ata_taskfile *rtf = &qc->result_tf;
350 * Fast fail if stat is 0x7f or 0xff for those commands. 566 struct ata_taskfile tf;
567
568 /* FIXME: Except for status and error, result TF access
569 * doesn't work. I tried reading from BAR0/2, CPB and BAR5.
570 * None works regardless of which command interface is used.
571 * For now return true iff status indicates device error.
572 * This means that we're reporting bogus sector for RW
573 * failures. Eeekk....
351 */ 574 */
352 if (unlikely(qc->tf.command == ATA_CMD_ID_ATA || 575 inic_tf_read(qc->ap, &tf);
353 qc->tf.command == ATA_CMD_ID_ATAPI)) {
354 u8 stat = ap->ops->sff_check_status(ap);
355 if (stat == 0x7f || stat == 0xff)
356 return AC_ERR_HSM;
357 }
358 576
359 return ata_sff_qc_issue(qc); 577 if (!(tf.command & ATA_ERR))
578 return false;
579
580 rtf->command = tf.command;
581 rtf->feature = tf.feature;
582 return true;
360} 583}
361 584
362static void inic_freeze(struct ata_port *ap) 585static void inic_freeze(struct ata_port *ap)
363{ 586{
364 void __iomem *port_base = inic_port_base(ap); 587 void __iomem *port_base = inic_port_base(ap);
365 588
366 __inic_set_pirq_mask(ap, PIRQ_MASK_FREEZE); 589 writeb(PIRQ_MASK_FREEZE, port_base + PORT_IRQ_MASK);
367
368 ap->ops->sff_check_status(ap);
369 writeb(0xff, port_base + PORT_IRQ_STAT); 590 writeb(0xff, port_base + PORT_IRQ_STAT);
370
371 readb(port_base + PORT_IRQ_STAT); /* flush */
372} 591}
373 592
374static void inic_thaw(struct ata_port *ap) 593static void inic_thaw(struct ata_port *ap)
375{ 594{
376 void __iomem *port_base = inic_port_base(ap); 595 void __iomem *port_base = inic_port_base(ap);
377 596
378 ap->ops->sff_check_status(ap);
379 writeb(0xff, port_base + PORT_IRQ_STAT); 597 writeb(0xff, port_base + PORT_IRQ_STAT);
598 writeb(PIRQ_MASK_DEFAULT, port_base + PORT_IRQ_MASK);
599}
380 600
381 __inic_set_pirq_mask(ap, PIRQ_MASK_OTHER); 601static int inic_check_ready(struct ata_link *link)
602{
603 void __iomem *port_base = inic_port_base(link->ap);
382 604
383 readb(port_base + PORT_IRQ_STAT); /* flush */ 605 return ata_check_ready(readb(port_base + PORT_TF_COMMAND));
384} 606}
385 607
386/* 608/*
@@ -394,17 +616,15 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
394 void __iomem *port_base = inic_port_base(ap); 616 void __iomem *port_base = inic_port_base(ap);
395 void __iomem *idma_ctl = port_base + PORT_IDMA_CTL; 617 void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
396 const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); 618 const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
397 u16 val;
398 int rc; 619 int rc;
399 620
400 /* hammer it into sane state */ 621 /* hammer it into sane state */
401 inic_reset_port(port_base); 622 inic_reset_port(port_base);
402 623
403 val = readw(idma_ctl); 624 writew(IDMA_CTL_RST_ATA, idma_ctl);
404 writew(val | IDMA_CTL_RST_ATA, idma_ctl);
405 readw(idma_ctl); /* flush */ 625 readw(idma_ctl); /* flush */
406 msleep(1); 626 msleep(1);
407 writew(val & ~IDMA_CTL_RST_ATA, idma_ctl); 627 writew(0, idma_ctl);
408 628
409 rc = sata_link_resume(link, timing, deadline); 629 rc = sata_link_resume(link, timing, deadline);
410 if (rc) { 630 if (rc) {
@@ -418,7 +638,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
418 struct ata_taskfile tf; 638 struct ata_taskfile tf;
419 639
420 /* wait for link to become ready */ 640 /* wait for link to become ready */
421 rc = ata_sff_wait_after_reset(link, 1, deadline); 641 rc = ata_wait_after_reset(link, deadline, inic_check_ready);
422 /* link occupied, -ENODEV too is an error */ 642 /* link occupied, -ENODEV too is an error */
423 if (rc) { 643 if (rc) {
424 ata_link_printk(link, KERN_WARNING, "device not ready " 644 ata_link_printk(link, KERN_WARNING, "device not ready "
@@ -426,7 +646,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
426 return rc; 646 return rc;
427 } 647 }
428 648
429 ata_sff_tf_read(ap, &tf); 649 inic_tf_read(ap, &tf);
430 *class = ata_dev_classify(&tf); 650 *class = ata_dev_classify(&tf);
431 } 651 }
432 652
@@ -436,18 +656,8 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
436static void inic_error_handler(struct ata_port *ap) 656static void inic_error_handler(struct ata_port *ap)
437{ 657{
438 void __iomem *port_base = inic_port_base(ap); 658 void __iomem *port_base = inic_port_base(ap);
439 struct inic_port_priv *pp = ap->private_data;
440 unsigned long flags;
441 659
442 /* reset PIO HSM and stop DMA engine */
443 inic_reset_port(port_base); 660 inic_reset_port(port_base);
444
445 spin_lock_irqsave(ap->lock, flags);
446 ap->hsm_task_state = HSM_ST_IDLE;
447 writeb(pp->dfl_prdctl, port_base + PORT_PRD_CTL);
448 spin_unlock_irqrestore(ap->lock, flags);
449
450 /* PIO and DMA engines have been stopped, perform recovery */
451 ata_std_error_handler(ap); 661 ata_std_error_handler(ap);
452} 662}
453 663
@@ -458,26 +668,18 @@ static void inic_post_internal_cmd(struct ata_queued_cmd *qc)
458 inic_reset_port(inic_port_base(qc->ap)); 668 inic_reset_port(inic_port_base(qc->ap));
459} 669}
460 670
461static void inic_dev_config(struct ata_device *dev)
462{
463 /* inic can only handle upto LBA28 max sectors */
464 if (dev->max_sectors > ATA_MAX_SECTORS)
465 dev->max_sectors = ATA_MAX_SECTORS;
466
467 if (dev->n_sectors >= 1 << 28) {
468 ata_dev_printk(dev, KERN_ERR,
469 "ERROR: This driver doesn't support LBA48 yet and may cause\n"
470 " data corruption on such devices. Disabling.\n");
471 ata_dev_disable(dev);
472 }
473}
474
475static void init_port(struct ata_port *ap) 671static void init_port(struct ata_port *ap)
476{ 672{
477 void __iomem *port_base = inic_port_base(ap); 673 void __iomem *port_base = inic_port_base(ap);
674 struct inic_port_priv *pp = ap->private_data;
478 675
479 /* Setup PRD address */ 676 /* clear packet and CPB table */
677 memset(pp->pkt, 0, sizeof(struct inic_pkt));
678 memset(pp->cpb_tbl, 0, IDMA_CPB_TBL_SIZE);
679
680 /* setup PRD and CPB lookup table addresses */
480 writel(ap->prd_dma, port_base + PORT_PRD_ADDR); 681 writel(ap->prd_dma, port_base + PORT_PRD_ADDR);
682 writel(pp->cpb_tbl_dma, port_base + PORT_CPB_CPBLAR);
481} 683}
482 684
483static int inic_port_resume(struct ata_port *ap) 685static int inic_port_resume(struct ata_port *ap)
@@ -488,28 +690,30 @@ static int inic_port_resume(struct ata_port *ap)
488 690
489static int inic_port_start(struct ata_port *ap) 691static int inic_port_start(struct ata_port *ap)
490{ 692{
491 void __iomem *port_base = inic_port_base(ap); 693 struct device *dev = ap->host->dev;
492 struct inic_port_priv *pp; 694 struct inic_port_priv *pp;
493 u8 tmp;
494 int rc; 695 int rc;
495 696
496 /* alloc and initialize private data */ 697 /* alloc and initialize private data */
497 pp = devm_kzalloc(ap->host->dev, sizeof(*pp), GFP_KERNEL); 698 pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
498 if (!pp) 699 if (!pp)
499 return -ENOMEM; 700 return -ENOMEM;
500 ap->private_data = pp; 701 ap->private_data = pp;
501 702
502 /* default PRD_CTL value, DMAEN, WR and START off */
503 tmp = readb(port_base + PORT_PRD_CTL);
504 tmp &= ~(PRD_CTL_DMAEN | PRD_CTL_WR | PRD_CTL_START);
505 pp->dfl_prdctl = tmp;
506
507 /* Alloc resources */ 703 /* Alloc resources */
508 rc = ata_port_start(ap); 704 rc = ata_port_start(ap);
509 if (rc) { 705 if (rc)
510 kfree(pp);
511 return rc; 706 return rc;
512 } 707
708 pp->pkt = dmam_alloc_coherent(dev, sizeof(struct inic_pkt),
709 &pp->pkt_dma, GFP_KERNEL);
710 if (!pp->pkt)
711 return -ENOMEM;
712
713 pp->cpb_tbl = dmam_alloc_coherent(dev, IDMA_CPB_TBL_SIZE,
714 &pp->cpb_tbl_dma, GFP_KERNEL);
715 if (!pp->cpb_tbl)
716 return -ENOMEM;
513 717
514 init_port(ap); 718 init_port(ap);
515 719
@@ -517,21 +721,18 @@ static int inic_port_start(struct ata_port *ap)
517} 721}
518 722
519static struct ata_port_operations inic_port_ops = { 723static struct ata_port_operations inic_port_ops = {
520 .inherits = &ata_sff_port_ops, 724 .inherits = &sata_port_ops,
521 725
522 .bmdma_setup = inic_bmdma_setup, 726 .check_atapi_dma = inic_check_atapi_dma,
523 .bmdma_start = inic_bmdma_start, 727 .qc_prep = inic_qc_prep,
524 .bmdma_stop = inic_bmdma_stop,
525 .bmdma_status = inic_bmdma_status,
526 .qc_issue = inic_qc_issue, 728 .qc_issue = inic_qc_issue,
729 .qc_fill_rtf = inic_qc_fill_rtf,
527 730
528 .freeze = inic_freeze, 731 .freeze = inic_freeze,
529 .thaw = inic_thaw, 732 .thaw = inic_thaw,
530 .softreset = ATA_OP_NULL, /* softreset is broken */
531 .hardreset = inic_hardreset, 733 .hardreset = inic_hardreset,
532 .error_handler = inic_error_handler, 734 .error_handler = inic_error_handler,
533 .post_internal_cmd = inic_post_internal_cmd, 735 .post_internal_cmd = inic_post_internal_cmd,
534 .dev_config = inic_dev_config,
535 736
536 .scr_read = inic_scr_read, 737 .scr_read = inic_scr_read,
537 .scr_write = inic_scr_write, 738 .scr_write = inic_scr_write,
@@ -541,12 +742,6 @@ static struct ata_port_operations inic_port_ops = {
541}; 742};
542 743
543static struct ata_port_info inic_port_info = { 744static struct ata_port_info inic_port_info = {
544 /* For some reason, ATAPI_PROT_PIO is broken on this
545 * controller, and no, PIO_POLLING does't fix it. It somehow
546 * manages to report the wrong ireason and ignoring ireason
547 * results in machine lock up. Tell libata to always prefer
548 * DMA.
549 */
550 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, 745 .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
551 .pio_mask = 0x1f, /* pio0-4 */ 746 .pio_mask = 0x1f, /* pio0-4 */
552 .mwdma_mask = 0x07, /* mwdma0-2 */ 747 .mwdma_mask = 0x07, /* mwdma0-2 */
@@ -599,7 +794,6 @@ static int inic_pci_device_resume(struct pci_dev *pdev)
599{ 794{
600 struct ata_host *host = dev_get_drvdata(&pdev->dev); 795 struct ata_host *host = dev_get_drvdata(&pdev->dev);
601 struct inic_host_priv *hpriv = host->private_data; 796 struct inic_host_priv *hpriv = host->private_data;
602 void __iomem *mmio_base = host->iomap[MMIO_BAR];
603 int rc; 797 int rc;
604 798
605 rc = ata_pci_device_do_resume(pdev); 799 rc = ata_pci_device_do_resume(pdev);
@@ -607,7 +801,7 @@ static int inic_pci_device_resume(struct pci_dev *pdev)
607 return rc; 801 return rc;
608 802
609 if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { 803 if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
610 rc = init_controller(mmio_base, hpriv->cached_hctl); 804 rc = init_controller(hpriv->mmio_base, hpriv->cached_hctl);
611 if (rc) 805 if (rc)
612 return rc; 806 return rc;
613 } 807 }
@@ -625,6 +819,7 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
625 struct ata_host *host; 819 struct ata_host *host;
626 struct inic_host_priv *hpriv; 820 struct inic_host_priv *hpriv;
627 void __iomem * const *iomap; 821 void __iomem * const *iomap;
822 int mmio_bar;
628 int i, rc; 823 int i, rc;
629 824
630 if (!printed_version++) 825 if (!printed_version++)
@@ -638,38 +833,31 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
638 833
639 host->private_data = hpriv; 834 host->private_data = hpriv;
640 835
641 /* acquire resources and fill host */ 836 /* Acquire resources and fill host. Note that PCI and cardbus
837 * use different BARs.
838 */
642 rc = pcim_enable_device(pdev); 839 rc = pcim_enable_device(pdev);
643 if (rc) 840 if (rc)
644 return rc; 841 return rc;
645 842
646 rc = pcim_iomap_regions(pdev, 0x3f, DRV_NAME); 843 if (pci_resource_flags(pdev, MMIO_BAR_PCI) & IORESOURCE_MEM)
844 mmio_bar = MMIO_BAR_PCI;
845 else
846 mmio_bar = MMIO_BAR_CARDBUS;
847
848 rc = pcim_iomap_regions(pdev, 1 << mmio_bar, DRV_NAME);
647 if (rc) 849 if (rc)
648 return rc; 850 return rc;
649 host->iomap = iomap = pcim_iomap_table(pdev); 851 host->iomap = iomap = pcim_iomap_table(pdev);
852 hpriv->mmio_base = iomap[mmio_bar];
853 hpriv->cached_hctl = readw(hpriv->mmio_base + HOST_CTL);
650 854
651 for (i = 0; i < NR_PORTS; i++) { 855 for (i = 0; i < NR_PORTS; i++) {
652 struct ata_port *ap = host->ports[i]; 856 struct ata_port *ap = host->ports[i];
653 struct ata_ioports *port = &ap->ioaddr;
654 unsigned int offset = i * PORT_SIZE;
655
656 port->cmd_addr = iomap[2 * i];
657 port->altstatus_addr =
658 port->ctl_addr = (void __iomem *)
659 ((unsigned long)iomap[2 * i + 1] | ATA_PCI_CTL_OFS);
660 port->scr_addr = iomap[MMIO_BAR] + offset + PORT_SCR;
661
662 ata_sff_std_ports(port);
663
664 ata_port_pbar_desc(ap, MMIO_BAR, -1, "mmio");
665 ata_port_pbar_desc(ap, MMIO_BAR, offset, "port");
666 ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx",
667 (unsigned long long)pci_resource_start(pdev, 2 * i),
668 (unsigned long long)pci_resource_start(pdev, (2 * i + 1)) |
669 ATA_PCI_CTL_OFS);
670 }
671 857
672 hpriv->cached_hctl = readw(iomap[MMIO_BAR] + HOST_CTL); 858 ata_port_pbar_desc(ap, mmio_bar, -1, "mmio");
859 ata_port_pbar_desc(ap, mmio_bar, i * PORT_SIZE, "port");
860 }
673 861
674 /* Set dma_mask. This devices doesn't support 64bit addressing. */ 862 /* Set dma_mask. This devices doesn't support 64bit addressing. */
675 rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK); 863 rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
@@ -698,7 +886,7 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
698 return rc; 886 return rc;
699 } 887 }
700 888
701 rc = init_controller(iomap[MMIO_BAR], hpriv->cached_hctl); 889 rc = init_controller(hpriv->mmio_base, hpriv->cached_hctl);
702 if (rc) { 890 if (rc) {
703 dev_printk(KERN_ERR, &pdev->dev, 891 dev_printk(KERN_ERR, &pdev->dev,
704 "failed to initialize controller\n"); 892 "failed to initialize controller\n");
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 842b1a15b78c..bb73b2222627 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -65,6 +65,7 @@
65#include <linux/platform_device.h> 65#include <linux/platform_device.h>
66#include <linux/ata_platform.h> 66#include <linux/ata_platform.h>
67#include <linux/mbus.h> 67#include <linux/mbus.h>
68#include <linux/bitops.h>
68#include <scsi/scsi_host.h> 69#include <scsi/scsi_host.h>
69#include <scsi/scsi_cmnd.h> 70#include <scsi/scsi_cmnd.h>
70#include <scsi/scsi_device.h> 71#include <scsi/scsi_device.h>
@@ -91,9 +92,9 @@ enum {
91 MV_IRQ_COAL_TIME_THRESHOLD = (MV_IRQ_COAL_REG_BASE + 0xd0), 92 MV_IRQ_COAL_TIME_THRESHOLD = (MV_IRQ_COAL_REG_BASE + 0xd0),
92 93
93 MV_SATAHC0_REG_BASE = 0x20000, 94 MV_SATAHC0_REG_BASE = 0x20000,
94 MV_FLASH_CTL = 0x1046c, 95 MV_FLASH_CTL_OFS = 0x1046c,
95 MV_GPIO_PORT_CTL = 0x104f0, 96 MV_GPIO_PORT_CTL_OFS = 0x104f0,
96 MV_RESET_CFG = 0x180d8, 97 MV_RESET_CFG_OFS = 0x180d8,
97 98
98 MV_PCI_REG_SZ = MV_MAJOR_REG_AREA_SZ, 99 MV_PCI_REG_SZ = MV_MAJOR_REG_AREA_SZ,
99 MV_SATAHC_REG_SZ = MV_MAJOR_REG_AREA_SZ, 100 MV_SATAHC_REG_SZ = MV_MAJOR_REG_AREA_SZ,
@@ -147,18 +148,21 @@ enum {
147 /* PCI interface registers */ 148 /* PCI interface registers */
148 149
149 PCI_COMMAND_OFS = 0xc00, 150 PCI_COMMAND_OFS = 0xc00,
151 PCI_COMMAND_MRDTRIG = (1 << 7), /* PCI Master Read Trigger */
150 152
151 PCI_MAIN_CMD_STS_OFS = 0xd30, 153 PCI_MAIN_CMD_STS_OFS = 0xd30,
152 STOP_PCI_MASTER = (1 << 2), 154 STOP_PCI_MASTER = (1 << 2),
153 PCI_MASTER_EMPTY = (1 << 3), 155 PCI_MASTER_EMPTY = (1 << 3),
154 GLOB_SFT_RST = (1 << 4), 156 GLOB_SFT_RST = (1 << 4),
155 157
156 MV_PCI_MODE = 0xd00, 158 MV_PCI_MODE_OFS = 0xd00,
159 MV_PCI_MODE_MASK = 0x30,
160
157 MV_PCI_EXP_ROM_BAR_CTL = 0xd2c, 161 MV_PCI_EXP_ROM_BAR_CTL = 0xd2c,
158 MV_PCI_DISC_TIMER = 0xd04, 162 MV_PCI_DISC_TIMER = 0xd04,
159 MV_PCI_MSI_TRIGGER = 0xc38, 163 MV_PCI_MSI_TRIGGER = 0xc38,
160 MV_PCI_SERR_MASK = 0xc28, 164 MV_PCI_SERR_MASK = 0xc28,
161 MV_PCI_XBAR_TMOUT = 0x1d04, 165 MV_PCI_XBAR_TMOUT_OFS = 0x1d04,
162 MV_PCI_ERR_LOW_ADDRESS = 0x1d40, 166 MV_PCI_ERR_LOW_ADDRESS = 0x1d40,
163 MV_PCI_ERR_HIGH_ADDRESS = 0x1d44, 167 MV_PCI_ERR_HIGH_ADDRESS = 0x1d44,
164 MV_PCI_ERR_ATTRIBUTE = 0x1d48, 168 MV_PCI_ERR_ATTRIBUTE = 0x1d48,
@@ -225,16 +229,18 @@ enum {
225 PHY_MODE4 = 0x314, 229 PHY_MODE4 = 0x314,
226 PHY_MODE2 = 0x330, 230 PHY_MODE2 = 0x330,
227 SATA_IFCTL_OFS = 0x344, 231 SATA_IFCTL_OFS = 0x344,
232 SATA_TESTCTL_OFS = 0x348,
228 SATA_IFSTAT_OFS = 0x34c, 233 SATA_IFSTAT_OFS = 0x34c,
229 VENDOR_UNIQUE_FIS_OFS = 0x35c, 234 VENDOR_UNIQUE_FIS_OFS = 0x35c,
230 235
231 FIS_CFG_OFS = 0x360, 236 FISCFG_OFS = 0x360,
232 FIS_CFG_SINGLE_SYNC = (1 << 16), /* SYNC on DMA activation */ 237 FISCFG_WAIT_DEV_ERR = (1 << 8), /* wait for host on DevErr */
238 FISCFG_SINGLE_SYNC = (1 << 16), /* SYNC on DMA activation */
233 239
234 MV5_PHY_MODE = 0x74, 240 MV5_PHY_MODE = 0x74,
235 MV5_LT_MODE = 0x30, 241 MV5_LTMODE_OFS = 0x30,
236 MV5_PHY_CTL = 0x0C, 242 MV5_PHY_CTL_OFS = 0x0C,
237 SATA_INTERFACE_CFG = 0x050, 243 SATA_INTERFACE_CFG_OFS = 0x050,
238 244
239 MV_M2_PREAMP_MASK = 0x7e0, 245 MV_M2_PREAMP_MASK = 0x7e0,
240 246
@@ -332,10 +338,16 @@ enum {
332 EDMA_CMD_OFS = 0x28, /* EDMA command register */ 338 EDMA_CMD_OFS = 0x28, /* EDMA command register */
333 EDMA_EN = (1 << 0), /* enable EDMA */ 339 EDMA_EN = (1 << 0), /* enable EDMA */
334 EDMA_DS = (1 << 1), /* disable EDMA; self-negated */ 340 EDMA_DS = (1 << 1), /* disable EDMA; self-negated */
335 ATA_RST = (1 << 2), /* reset trans/link/phy */ 341 EDMA_RESET = (1 << 2), /* reset eng/trans/link/phy */
342
343 EDMA_STATUS_OFS = 0x30, /* EDMA engine status */
344 EDMA_STATUS_CACHE_EMPTY = (1 << 6), /* GenIIe command cache empty */
345 EDMA_STATUS_IDLE = (1 << 7), /* GenIIe EDMA enabled/idle */
336 346
337 EDMA_IORDY_TMOUT = 0x34, 347 EDMA_IORDY_TMOUT_OFS = 0x34,
338 EDMA_ARB_CFG = 0x38, 348 EDMA_ARB_CFG_OFS = 0x38,
349
350 EDMA_HALTCOND_OFS = 0x60, /* GenIIe halt conditions */
339 351
340 GEN_II_NCQ_MAX_SECTORS = 256, /* max sects/io on Gen2 w/NCQ */ 352 GEN_II_NCQ_MAX_SECTORS = 256, /* max sects/io on Gen2 w/NCQ */
341 353
@@ -350,15 +362,19 @@ enum {
350 MV_HP_GEN_II = (1 << 7), /* Generation II: 60xx */ 362 MV_HP_GEN_II = (1 << 7), /* Generation II: 60xx */
351 MV_HP_GEN_IIE = (1 << 8), /* Generation IIE: 6042/7042 */ 363 MV_HP_GEN_IIE = (1 << 8), /* Generation IIE: 6042/7042 */
352 MV_HP_PCIE = (1 << 9), /* PCIe bus/regs: 7042 */ 364 MV_HP_PCIE = (1 << 9), /* PCIe bus/regs: 7042 */
365 MV_HP_CUT_THROUGH = (1 << 10), /* can use EDMA cut-through */
353 366
354 /* Port private flags (pp_flags) */ 367 /* Port private flags (pp_flags) */
355 MV_PP_FLAG_EDMA_EN = (1 << 0), /* is EDMA engine enabled? */ 368 MV_PP_FLAG_EDMA_EN = (1 << 0), /* is EDMA engine enabled? */
356 MV_PP_FLAG_NCQ_EN = (1 << 1), /* is EDMA set up for NCQ? */ 369 MV_PP_FLAG_NCQ_EN = (1 << 1), /* is EDMA set up for NCQ? */
370 MV_PP_FLAG_FBS_EN = (1 << 2), /* is EDMA set up for FBS? */
371 MV_PP_FLAG_DELAYED_EH = (1 << 3), /* delayed dev err handling */
357}; 372};
358 373
359#define IS_GEN_I(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_I) 374#define IS_GEN_I(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_I)
360#define IS_GEN_II(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_II) 375#define IS_GEN_II(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_II)
361#define IS_GEN_IIE(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_IIE) 376#define IS_GEN_IIE(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_IIE)
377#define IS_PCIE(hpriv) ((hpriv)->hp_flags & MV_HP_PCIE)
362#define HAS_PCI(host) (!((host)->ports[0]->flags & MV_FLAG_SOC)) 378#define HAS_PCI(host) (!((host)->ports[0]->flags & MV_FLAG_SOC))
363 379
364#define WINDOW_CTRL(i) (0x20030 + ((i) << 4)) 380#define WINDOW_CTRL(i) (0x20030 + ((i) << 4))
@@ -433,6 +449,7 @@ struct mv_port_priv {
433 unsigned int resp_idx; 449 unsigned int resp_idx;
434 450
435 u32 pp_flags; 451 u32 pp_flags;
452 unsigned int delayed_eh_pmp_map;
436}; 453};
437 454
438struct mv_port_signal { 455struct mv_port_signal {
@@ -479,6 +496,7 @@ static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
479static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val); 496static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
480static int mv_port_start(struct ata_port *ap); 497static int mv_port_start(struct ata_port *ap);
481static void mv_port_stop(struct ata_port *ap); 498static void mv_port_stop(struct ata_port *ap);
499static int mv_qc_defer(struct ata_queued_cmd *qc);
482static void mv_qc_prep(struct ata_queued_cmd *qc); 500static void mv_qc_prep(struct ata_queued_cmd *qc);
483static void mv_qc_prep_iie(struct ata_queued_cmd *qc); 501static void mv_qc_prep_iie(struct ata_queued_cmd *qc);
484static unsigned int mv_qc_issue(struct ata_queued_cmd *qc); 502static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
@@ -527,6 +545,9 @@ static int mv_pmp_hardreset(struct ata_link *link, unsigned int *class,
527 unsigned long deadline); 545 unsigned long deadline);
528static int mv_softreset(struct ata_link *link, unsigned int *class, 546static int mv_softreset(struct ata_link *link, unsigned int *class,
529 unsigned long deadline); 547 unsigned long deadline);
548static void mv_pmp_error_handler(struct ata_port *ap);
549static void mv_process_crpb_entries(struct ata_port *ap,
550 struct mv_port_priv *pp);
530 551
531/* .sg_tablesize is (MV_MAX_SG_CT / 2) in the structures below 552/* .sg_tablesize is (MV_MAX_SG_CT / 2) in the structures below
532 * because we have to allow room for worst case splitting of 553 * because we have to allow room for worst case splitting of
@@ -548,6 +569,7 @@ static struct scsi_host_template mv6_sht = {
548static struct ata_port_operations mv5_ops = { 569static struct ata_port_operations mv5_ops = {
549 .inherits = &ata_sff_port_ops, 570 .inherits = &ata_sff_port_ops,
550 571
572 .qc_defer = mv_qc_defer,
551 .qc_prep = mv_qc_prep, 573 .qc_prep = mv_qc_prep,
552 .qc_issue = mv_qc_issue, 574 .qc_issue = mv_qc_issue,
553 575
@@ -566,7 +588,6 @@ static struct ata_port_operations mv5_ops = {
566 588
567static struct ata_port_operations mv6_ops = { 589static struct ata_port_operations mv6_ops = {
568 .inherits = &mv5_ops, 590 .inherits = &mv5_ops,
569 .qc_defer = sata_pmp_qc_defer_cmd_switch,
570 .dev_config = mv6_dev_config, 591 .dev_config = mv6_dev_config,
571 .scr_read = mv_scr_read, 592 .scr_read = mv_scr_read,
572 .scr_write = mv_scr_write, 593 .scr_write = mv_scr_write,
@@ -574,12 +595,11 @@ static struct ata_port_operations mv6_ops = {
574 .pmp_hardreset = mv_pmp_hardreset, 595 .pmp_hardreset = mv_pmp_hardreset,
575 .pmp_softreset = mv_softreset, 596 .pmp_softreset = mv_softreset,
576 .softreset = mv_softreset, 597 .softreset = mv_softreset,
577 .error_handler = sata_pmp_error_handler, 598 .error_handler = mv_pmp_error_handler,
578}; 599};
579 600
580static struct ata_port_operations mv_iie_ops = { 601static struct ata_port_operations mv_iie_ops = {
581 .inherits = &mv6_ops, 602 .inherits = &mv6_ops,
582 .qc_defer = ata_std_qc_defer, /* FIS-based switching */
583 .dev_config = ATA_OP_NULL, 603 .dev_config = ATA_OP_NULL,
584 .qc_prep = mv_qc_prep_iie, 604 .qc_prep = mv_qc_prep_iie,
585}; 605};
@@ -875,6 +895,29 @@ static void mv_start_dma(struct ata_port *ap, void __iomem *port_mmio,
875 } 895 }
876} 896}
877 897
898static void mv_wait_for_edma_empty_idle(struct ata_port *ap)
899{
900 void __iomem *port_mmio = mv_ap_base(ap);
901 const u32 empty_idle = (EDMA_STATUS_CACHE_EMPTY | EDMA_STATUS_IDLE);
902 const int per_loop = 5, timeout = (15 * 1000 / per_loop);
903 int i;
904
905 /*
906 * Wait for the EDMA engine to finish transactions in progress.
907 * No idea what a good "timeout" value might be, but measurements
908 * indicate that it often requires hundreds of microseconds
909 * with two drives in-use. So we use the 15msec value above
910 * as a rough guess at what even more drives might require.
911 */
912 for (i = 0; i < timeout; ++i) {
913 u32 edma_stat = readl(port_mmio + EDMA_STATUS_OFS);
914 if ((edma_stat & empty_idle) == empty_idle)
915 break;
916 udelay(per_loop);
917 }
918 /* ata_port_printk(ap, KERN_INFO, "%s: %u+ usecs\n", __func__, i); */
919}
920
878/** 921/**
879 * mv_stop_edma_engine - Disable eDMA engine 922 * mv_stop_edma_engine - Disable eDMA engine
880 * @port_mmio: io base address 923 * @port_mmio: io base address
@@ -907,6 +950,7 @@ static int mv_stop_edma(struct ata_port *ap)
907 if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN)) 950 if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
908 return 0; 951 return 0;
909 pp->pp_flags &= ~MV_PP_FLAG_EDMA_EN; 952 pp->pp_flags &= ~MV_PP_FLAG_EDMA_EN;
953 mv_wait_for_edma_empty_idle(ap);
910 if (mv_stop_edma_engine(port_mmio)) { 954 if (mv_stop_edma_engine(port_mmio)) {
911 ata_port_printk(ap, KERN_ERR, "Unable to stop eDMA\n"); 955 ata_port_printk(ap, KERN_ERR, "Unable to stop eDMA\n");
912 return -EIO; 956 return -EIO;
@@ -1057,26 +1101,95 @@ static void mv6_dev_config(struct ata_device *adev)
1057 } 1101 }
1058} 1102}
1059 1103
1060static void mv_config_fbs(void __iomem *port_mmio, int enable_fbs) 1104static int mv_qc_defer(struct ata_queued_cmd *qc)
1061{ 1105{
1062 u32 old_fcfg, new_fcfg, old_ltmode, new_ltmode; 1106 struct ata_link *link = qc->dev->link;
1107 struct ata_port *ap = link->ap;
1108 struct mv_port_priv *pp = ap->private_data;
1109
1110 /*
1111 * Don't allow new commands if we're in a delayed EH state
1112 * for NCQ and/or FIS-based switching.
1113 */
1114 if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
1115 return ATA_DEFER_PORT;
1063 /* 1116 /*
1064 * Various bit settings required for operation 1117 * If the port is completely idle, then allow the new qc.
1065 * in FIS-based switching (fbs) mode on GenIIe:
1066 */ 1118 */
1067 old_fcfg = readl(port_mmio + FIS_CFG_OFS); 1119 if (ap->nr_active_links == 0)
1068 old_ltmode = readl(port_mmio + LTMODE_OFS); 1120 return 0;
1069 if (enable_fbs) { 1121
1070 new_fcfg = old_fcfg | FIS_CFG_SINGLE_SYNC; 1122 if (pp->pp_flags & MV_PP_FLAG_EDMA_EN) {
1071 new_ltmode = old_ltmode | LTMODE_BIT8; 1123 /*
1072 } else { /* disable fbs */ 1124 * The port is operating in host queuing mode (EDMA).
1073 new_fcfg = old_fcfg & ~FIS_CFG_SINGLE_SYNC; 1125 * It can accomodate a new qc if the qc protocol
1074 new_ltmode = old_ltmode & ~LTMODE_BIT8; 1126 * is compatible with the current host queue mode.
1075 } 1127 */
1076 if (new_fcfg != old_fcfg) 1128 if (pp->pp_flags & MV_PP_FLAG_NCQ_EN) {
1077 writelfl(new_fcfg, port_mmio + FIS_CFG_OFS); 1129 /*
1130 * The host queue (EDMA) is in NCQ mode.
1131 * If the new qc is also an NCQ command,
1132 * then allow the new qc.
1133 */
1134 if (qc->tf.protocol == ATA_PROT_NCQ)
1135 return 0;
1136 } else {
1137 /*
1138 * The host queue (EDMA) is in non-NCQ, DMA mode.
1139 * If the new qc is also a non-NCQ, DMA command,
1140 * then allow the new qc.
1141 */
1142 if (qc->tf.protocol == ATA_PROT_DMA)
1143 return 0;
1144 }
1145 }
1146 return ATA_DEFER_PORT;
1147}
1148
1149static void mv_config_fbs(void __iomem *port_mmio, int want_ncq, int want_fbs)
1150{
1151 u32 new_fiscfg, old_fiscfg;
1152 u32 new_ltmode, old_ltmode;
1153 u32 new_haltcond, old_haltcond;
1154
1155 old_fiscfg = readl(port_mmio + FISCFG_OFS);
1156 old_ltmode = readl(port_mmio + LTMODE_OFS);
1157 old_haltcond = readl(port_mmio + EDMA_HALTCOND_OFS);
1158
1159 new_fiscfg = old_fiscfg & ~(FISCFG_SINGLE_SYNC | FISCFG_WAIT_DEV_ERR);
1160 new_ltmode = old_ltmode & ~LTMODE_BIT8;
1161 new_haltcond = old_haltcond | EDMA_ERR_DEV;
1162
1163 if (want_fbs) {
1164 new_fiscfg = old_fiscfg | FISCFG_SINGLE_SYNC;
1165 new_ltmode = old_ltmode | LTMODE_BIT8;
1166 if (want_ncq)
1167 new_haltcond &= ~EDMA_ERR_DEV;
1168 else
1169 new_fiscfg |= FISCFG_WAIT_DEV_ERR;
1170 }
1171
1172 if (new_fiscfg != old_fiscfg)
1173 writelfl(new_fiscfg, port_mmio + FISCFG_OFS);
1078 if (new_ltmode != old_ltmode) 1174 if (new_ltmode != old_ltmode)
1079 writelfl(new_ltmode, port_mmio + LTMODE_OFS); 1175 writelfl(new_ltmode, port_mmio + LTMODE_OFS);
1176 if (new_haltcond != old_haltcond)
1177 writelfl(new_haltcond, port_mmio + EDMA_HALTCOND_OFS);
1178}
1179
1180static void mv_60x1_errata_sata25(struct ata_port *ap, int want_ncq)
1181{
1182 struct mv_host_priv *hpriv = ap->host->private_data;
1183 u32 old, new;
1184
1185 /* workaround for 88SX60x1 FEr SATA#25 (part 1) */
1186 old = readl(hpriv->base + MV_GPIO_PORT_CTL_OFS);
1187 if (want_ncq)
1188 new = old | (1 << 22);
1189 else
1190 new = old & ~(1 << 22);
1191 if (new != old)
1192 writel(new, hpriv->base + MV_GPIO_PORT_CTL_OFS);
1080} 1193}
1081 1194
1082static void mv_edma_cfg(struct ata_port *ap, int want_ncq) 1195static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
@@ -1088,25 +1201,40 @@ static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
1088 1201
1089 /* set up non-NCQ EDMA configuration */ 1202 /* set up non-NCQ EDMA configuration */
1090 cfg = EDMA_CFG_Q_DEPTH; /* always 0x1f for *all* chips */ 1203 cfg = EDMA_CFG_Q_DEPTH; /* always 0x1f for *all* chips */
1204 pp->pp_flags &= ~MV_PP_FLAG_FBS_EN;
1091 1205
1092 if (IS_GEN_I(hpriv)) 1206 if (IS_GEN_I(hpriv))
1093 cfg |= (1 << 8); /* enab config burst size mask */ 1207 cfg |= (1 << 8); /* enab config burst size mask */
1094 1208
1095 else if (IS_GEN_II(hpriv)) 1209 else if (IS_GEN_II(hpriv)) {
1096 cfg |= EDMA_CFG_RD_BRST_EXT | EDMA_CFG_WR_BUFF_LEN; 1210 cfg |= EDMA_CFG_RD_BRST_EXT | EDMA_CFG_WR_BUFF_LEN;
1211 mv_60x1_errata_sata25(ap, want_ncq);
1097 1212
1098 else if (IS_GEN_IIE(hpriv)) { 1213 } else if (IS_GEN_IIE(hpriv)) {
1099 cfg |= (1 << 23); /* do not mask PM field in rx'd FIS */ 1214 int want_fbs = sata_pmp_attached(ap);
1100 cfg |= (1 << 22); /* enab 4-entry host queue cache */ 1215 /*
1101 cfg |= (1 << 18); /* enab early completion */ 1216 * Possible future enhancement:
1102 cfg |= (1 << 17); /* enab cut-through (dis stor&forwrd) */ 1217 *
1218 * The chip can use FBS with non-NCQ, if we allow it,
1219 * But first we need to have the error handling in place
1220 * for this mode (datasheet section 7.3.15.4.2.3).
1221 * So disallow non-NCQ FBS for now.
1222 */
1223 want_fbs &= want_ncq;
1224
1225 mv_config_fbs(port_mmio, want_ncq, want_fbs);
1103 1226
1104 if (want_ncq && sata_pmp_attached(ap)) { 1227 if (want_fbs) {
1228 pp->pp_flags |= MV_PP_FLAG_FBS_EN;
1105 cfg |= EDMA_CFG_EDMA_FBS; /* FIS-based switching */ 1229 cfg |= EDMA_CFG_EDMA_FBS; /* FIS-based switching */
1106 mv_config_fbs(port_mmio, 1);
1107 } else {
1108 mv_config_fbs(port_mmio, 0);
1109 } 1230 }
1231
1232 cfg |= (1 << 23); /* do not mask PM field in rx'd FIS */
1233 cfg |= (1 << 22); /* enab 4-entry host queue cache */
1234 if (HAS_PCI(ap->host))
1235 cfg |= (1 << 18); /* enab early completion */
1236 if (hpriv->hp_flags & MV_HP_CUT_THROUGH)
1237 cfg |= (1 << 17); /* enab cut-thru (dis stor&forwrd) */
1110 } 1238 }
1111 1239
1112 if (want_ncq) { 1240 if (want_ncq) {
@@ -1483,25 +1611,186 @@ static struct ata_queued_cmd *mv_get_active_qc(struct ata_port *ap)
1483 return qc; 1611 return qc;
1484} 1612}
1485 1613
1486static void mv_unexpected_intr(struct ata_port *ap) 1614static void mv_pmp_error_handler(struct ata_port *ap)
1487{ 1615{
1616 unsigned int pmp, pmp_map;
1488 struct mv_port_priv *pp = ap->private_data; 1617 struct mv_port_priv *pp = ap->private_data;
1489 struct ata_eh_info *ehi = &ap->link.eh_info;
1490 char *when = "";
1491 1618
1619 if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH) {
1620 /*
1621 * Perform NCQ error analysis on failed PMPs
1622 * before we freeze the port entirely.
1623 *
1624 * The failed PMPs are marked earlier by mv_pmp_eh_prep().
1625 */
1626 pmp_map = pp->delayed_eh_pmp_map;
1627 pp->pp_flags &= ~MV_PP_FLAG_DELAYED_EH;
1628 for (pmp = 0; pmp_map != 0; pmp++) {
1629 unsigned int this_pmp = (1 << pmp);
1630 if (pmp_map & this_pmp) {
1631 struct ata_link *link = &ap->pmp_link[pmp];
1632 pmp_map &= ~this_pmp;
1633 ata_eh_analyze_ncq_error(link);
1634 }
1635 }
1636 ata_port_freeze(ap);
1637 }
1638 sata_pmp_error_handler(ap);
1639}
1640
1641static unsigned int mv_get_err_pmp_map(struct ata_port *ap)
1642{
1643 void __iomem *port_mmio = mv_ap_base(ap);
1644
1645 return readl(port_mmio + SATA_TESTCTL_OFS) >> 16;
1646}
1647
1648static void mv_pmp_eh_prep(struct ata_port *ap, unsigned int pmp_map)
1649{
1650 struct ata_eh_info *ehi;
1651 unsigned int pmp;
1652
1653 /*
1654 * Initialize EH info for PMPs which saw device errors
1655 */
1656 ehi = &ap->link.eh_info;
1657 for (pmp = 0; pmp_map != 0; pmp++) {
1658 unsigned int this_pmp = (1 << pmp);
1659 if (pmp_map & this_pmp) {
1660 struct ata_link *link = &ap->pmp_link[pmp];
1661
1662 pmp_map &= ~this_pmp;
1663 ehi = &link->eh_info;
1664 ata_ehi_clear_desc(ehi);
1665 ata_ehi_push_desc(ehi, "dev err");
1666 ehi->err_mask |= AC_ERR_DEV;
1667 ehi->action |= ATA_EH_RESET;
1668 ata_link_abort(link);
1669 }
1670 }
1671}
1672
1673static int mv_handle_fbs_ncq_dev_err(struct ata_port *ap)
1674{
1675 struct mv_port_priv *pp = ap->private_data;
1676 int failed_links;
1677 unsigned int old_map, new_map;
1678
1679 /*
1680 * Device error during FBS+NCQ operation:
1681 *
1682 * Set a port flag to prevent further I/O being enqueued.
1683 * Leave the EDMA running to drain outstanding commands from this port.
1684 * Perform the post-mortem/EH only when all responses are complete.
1685 * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.2).
1686 */
1687 if (!(pp->pp_flags & MV_PP_FLAG_DELAYED_EH)) {
1688 pp->pp_flags |= MV_PP_FLAG_DELAYED_EH;
1689 pp->delayed_eh_pmp_map = 0;
1690 }
1691 old_map = pp->delayed_eh_pmp_map;
1692 new_map = old_map | mv_get_err_pmp_map(ap);
1693
1694 if (old_map != new_map) {
1695 pp->delayed_eh_pmp_map = new_map;
1696 mv_pmp_eh_prep(ap, new_map & ~old_map);
1697 }
1698 failed_links = hweight16(new_map);
1699
1700 ata_port_printk(ap, KERN_INFO, "%s: pmp_map=%04x qc_map=%04x "
1701 "failed_links=%d nr_active_links=%d\n",
1702 __func__, pp->delayed_eh_pmp_map,
1703 ap->qc_active, failed_links,
1704 ap->nr_active_links);
1705
1706 if (ap->nr_active_links <= failed_links) {
1707 mv_process_crpb_entries(ap, pp);
1708 mv_stop_edma(ap);
1709 mv_eh_freeze(ap);
1710 ata_port_printk(ap, KERN_INFO, "%s: done\n", __func__);
1711 return 1; /* handled */
1712 }
1713 ata_port_printk(ap, KERN_INFO, "%s: waiting\n", __func__);
1714 return 1; /* handled */
1715}
1716
1717static int mv_handle_fbs_non_ncq_dev_err(struct ata_port *ap)
1718{
1492 /* 1719 /*
1493 * We got a device interrupt from something that 1720 * Possible future enhancement:
1494 * was supposed to be using EDMA or polling. 1721 *
1722 * FBS+non-NCQ operation is not yet implemented.
1723 * See related notes in mv_edma_cfg().
1724 *
1725 * Device error during FBS+non-NCQ operation:
1726 *
1727 * We need to snapshot the shadow registers for each failed command.
1728 * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.3).
1495 */ 1729 */
1730 return 0; /* not handled */
1731}
1732
1733static int mv_handle_dev_err(struct ata_port *ap, u32 edma_err_cause)
1734{
1735 struct mv_port_priv *pp = ap->private_data;
1736
1737 if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
1738 return 0; /* EDMA was not active: not handled */
1739 if (!(pp->pp_flags & MV_PP_FLAG_FBS_EN))
1740 return 0; /* FBS was not active: not handled */
1741
1742 if (!(edma_err_cause & EDMA_ERR_DEV))
1743 return 0; /* non DEV error: not handled */
1744 edma_err_cause &= ~EDMA_ERR_IRQ_TRANSIENT;
1745 if (edma_err_cause & ~(EDMA_ERR_DEV | EDMA_ERR_SELF_DIS))
1746 return 0; /* other problems: not handled */
1747
1748 if (pp->pp_flags & MV_PP_FLAG_NCQ_EN) {
1749 /*
1750 * EDMA should NOT have self-disabled for this case.
1751 * If it did, then something is wrong elsewhere,
1752 * and we cannot handle it here.
1753 */
1754 if (edma_err_cause & EDMA_ERR_SELF_DIS) {
1755 ata_port_printk(ap, KERN_WARNING,
1756 "%s: err_cause=0x%x pp_flags=0x%x\n",
1757 __func__, edma_err_cause, pp->pp_flags);
1758 return 0; /* not handled */
1759 }
1760 return mv_handle_fbs_ncq_dev_err(ap);
1761 } else {
1762 /*
1763 * EDMA should have self-disabled for this case.
1764 * If it did not, then something is wrong elsewhere,
1765 * and we cannot handle it here.
1766 */
1767 if (!(edma_err_cause & EDMA_ERR_SELF_DIS)) {
1768 ata_port_printk(ap, KERN_WARNING,
1769 "%s: err_cause=0x%x pp_flags=0x%x\n",
1770 __func__, edma_err_cause, pp->pp_flags);
1771 return 0; /* not handled */
1772 }
1773 return mv_handle_fbs_non_ncq_dev_err(ap);
1774 }
1775 return 0; /* not handled */
1776}
1777
1778static void mv_unexpected_intr(struct ata_port *ap, int edma_was_enabled)
1779{
1780 struct ata_eh_info *ehi = &ap->link.eh_info;
1781 char *when = "idle";
1782
1496 ata_ehi_clear_desc(ehi); 1783 ata_ehi_clear_desc(ehi);
1497 if (pp->pp_flags & MV_PP_FLAG_EDMA_EN) { 1784 if (!ap || (ap->flags & ATA_FLAG_DISABLED)) {
1498 when = " while EDMA enabled"; 1785 when = "disabled";
1786 } else if (edma_was_enabled) {
1787 when = "EDMA enabled";
1499 } else { 1788 } else {
1500 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag); 1789 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
1501 if (qc && (qc->tf.flags & ATA_TFLAG_POLLING)) 1790 if (qc && (qc->tf.flags & ATA_TFLAG_POLLING))
1502 when = " while polling"; 1791 when = "polling";
1503 } 1792 }
1504 ata_ehi_push_desc(ehi, "unexpected device interrupt%s", when); 1793 ata_ehi_push_desc(ehi, "unexpected device interrupt while %s", when);
1505 ehi->err_mask |= AC_ERR_OTHER; 1794 ehi->err_mask |= AC_ERR_OTHER;
1506 ehi->action |= ATA_EH_RESET; 1795 ehi->action |= ATA_EH_RESET;
1507 ata_port_freeze(ap); 1796 ata_port_freeze(ap);
@@ -1519,7 +1808,7 @@ static void mv_unexpected_intr(struct ata_port *ap)
1519 * LOCKING: 1808 * LOCKING:
1520 * Inherited from caller. 1809 * Inherited from caller.
1521 */ 1810 */
1522static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc) 1811static void mv_err_intr(struct ata_port *ap)
1523{ 1812{
1524 void __iomem *port_mmio = mv_ap_base(ap); 1813 void __iomem *port_mmio = mv_ap_base(ap);
1525 u32 edma_err_cause, eh_freeze_mask, serr = 0; 1814 u32 edma_err_cause, eh_freeze_mask, serr = 0;
@@ -1527,24 +1816,42 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
1527 struct mv_host_priv *hpriv = ap->host->private_data; 1816 struct mv_host_priv *hpriv = ap->host->private_data;
1528 unsigned int action = 0, err_mask = 0; 1817 unsigned int action = 0, err_mask = 0;
1529 struct ata_eh_info *ehi = &ap->link.eh_info; 1818 struct ata_eh_info *ehi = &ap->link.eh_info;
1530 1819 struct ata_queued_cmd *qc;
1531 ata_ehi_clear_desc(ehi); 1820 int abort = 0;
1532 1821
1533 /* 1822 /*
1534 * Read and clear the err_cause bits. This won't actually 1823 * Read and clear the SError and err_cause bits.
1535 * clear for some errors (eg. SError), but we will be doing
1536 * a hard reset in those cases regardless, which *will* clear it.
1537 */ 1824 */
1825 sata_scr_read(&ap->link, SCR_ERROR, &serr);
1826 sata_scr_write_flush(&ap->link, SCR_ERROR, serr);
1827
1538 edma_err_cause = readl(port_mmio + EDMA_ERR_IRQ_CAUSE_OFS); 1828 edma_err_cause = readl(port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
1539 writelfl(~edma_err_cause, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS); 1829 writelfl(~edma_err_cause, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
1540 1830
1541 ata_ehi_push_desc(ehi, "edma_err_cause=%08x", edma_err_cause); 1831 ata_port_printk(ap, KERN_INFO, "%s: err_cause=%08x pp_flags=0x%x\n",
1832 __func__, edma_err_cause, pp->pp_flags);
1833
1834 if (edma_err_cause & EDMA_ERR_DEV) {
1835 /*
1836 * Device errors during FIS-based switching operation
1837 * require special handling.
1838 */
1839 if (mv_handle_dev_err(ap, edma_err_cause))
1840 return;
1841 }
1542 1842
1843 qc = mv_get_active_qc(ap);
1844 ata_ehi_clear_desc(ehi);
1845 ata_ehi_push_desc(ehi, "edma_err_cause=%08x pp_flags=%08x",
1846 edma_err_cause, pp->pp_flags);
1543 /* 1847 /*
1544 * All generations share these EDMA error cause bits: 1848 * All generations share these EDMA error cause bits:
1545 */ 1849 */
1546 if (edma_err_cause & EDMA_ERR_DEV) 1850 if (edma_err_cause & EDMA_ERR_DEV) {
1547 err_mask |= AC_ERR_DEV; 1851 err_mask |= AC_ERR_DEV;
1852 action |= ATA_EH_RESET;
1853 ata_ehi_push_desc(ehi, "dev error");
1854 }
1548 if (edma_err_cause & (EDMA_ERR_D_PAR | EDMA_ERR_PRD_PAR | 1855 if (edma_err_cause & (EDMA_ERR_D_PAR | EDMA_ERR_PRD_PAR |
1549 EDMA_ERR_CRQB_PAR | EDMA_ERR_CRPB_PAR | 1856 EDMA_ERR_CRQB_PAR | EDMA_ERR_CRPB_PAR |
1550 EDMA_ERR_INTRL_PAR)) { 1857 EDMA_ERR_INTRL_PAR)) {
@@ -1576,13 +1883,6 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
1576 ata_ehi_push_desc(ehi, "EDMA self-disable"); 1883 ata_ehi_push_desc(ehi, "EDMA self-disable");
1577 } 1884 }
1578 if (edma_err_cause & EDMA_ERR_SERR) { 1885 if (edma_err_cause & EDMA_ERR_SERR) {
1579 /*
1580 * Ensure that we read our own SCR, not a pmp link SCR:
1581 */
1582 ap->ops->scr_read(ap, SCR_ERROR, &serr);
1583 /*
1584 * Don't clear SError here; leave it for libata-eh:
1585 */
1586 ata_ehi_push_desc(ehi, "SError=%08x", serr); 1886 ata_ehi_push_desc(ehi, "SError=%08x", serr);
1587 err_mask |= AC_ERR_ATA_BUS; 1887 err_mask |= AC_ERR_ATA_BUS;
1588 action |= ATA_EH_RESET; 1888 action |= ATA_EH_RESET;
@@ -1602,10 +1902,29 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
1602 else 1902 else
1603 ehi->err_mask |= err_mask; 1903 ehi->err_mask |= err_mask;
1604 1904
1605 if (edma_err_cause & eh_freeze_mask) 1905 if (err_mask == AC_ERR_DEV) {
1906 /*
1907 * Cannot do ata_port_freeze() here,
1908 * because it would kill PIO access,
1909 * which is needed for further diagnosis.
1910 */
1911 mv_eh_freeze(ap);
1912 abort = 1;
1913 } else if (edma_err_cause & eh_freeze_mask) {
1914 /*
1915 * Note to self: ata_port_freeze() calls ata_port_abort()
1916 */
1606 ata_port_freeze(ap); 1917 ata_port_freeze(ap);
1607 else 1918 } else {
1608 ata_port_abort(ap); 1919 abort = 1;
1920 }
1921
1922 if (abort) {
1923 if (qc)
1924 ata_link_abort(qc->dev->link);
1925 else
1926 ata_port_abort(ap);
1927 }
1609} 1928}
1610 1929
1611static void mv_process_crpb_response(struct ata_port *ap, 1930static void mv_process_crpb_response(struct ata_port *ap,
@@ -1632,8 +1951,9 @@ static void mv_process_crpb_response(struct ata_port *ap,
1632 } 1951 }
1633 } 1952 }
1634 ata_status = edma_status >> CRPB_FLAG_STATUS_SHIFT; 1953 ata_status = edma_status >> CRPB_FLAG_STATUS_SHIFT;
1635 qc->err_mask |= ac_err_mask(ata_status); 1954 if (!ac_err_mask(ata_status))
1636 ata_qc_complete(qc); 1955 ata_qc_complete(qc);
1956 /* else: leave it for mv_err_intr() */
1637 } else { 1957 } else {
1638 ata_port_printk(ap, KERN_ERR, "%s: no qc for tag=%d\n", 1958 ata_port_printk(ap, KERN_ERR, "%s: no qc for tag=%d\n",
1639 __func__, tag); 1959 __func__, tag);
@@ -1677,6 +1997,44 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
1677 port_mmio + EDMA_RSP_Q_OUT_PTR_OFS); 1997 port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
1678} 1998}
1679 1999
2000static void mv_port_intr(struct ata_port *ap, u32 port_cause)
2001{
2002 struct mv_port_priv *pp;
2003 int edma_was_enabled;
2004
2005 if (!ap || (ap->flags & ATA_FLAG_DISABLED)) {
2006 mv_unexpected_intr(ap, 0);
2007 return;
2008 }
2009 /*
2010 * Grab a snapshot of the EDMA_EN flag setting,
2011 * so that we have a consistent view for this port,
2012 * even if something we call of our routines changes it.
2013 */
2014 pp = ap->private_data;
2015 edma_was_enabled = (pp->pp_flags & MV_PP_FLAG_EDMA_EN);
2016 /*
2017 * Process completed CRPB response(s) before other events.
2018 */
2019 if (edma_was_enabled && (port_cause & DONE_IRQ)) {
2020 mv_process_crpb_entries(ap, pp);
2021 if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
2022 mv_handle_fbs_ncq_dev_err(ap);
2023 }
2024 /*
2025 * Handle chip-reported errors, or continue on to handle PIO.
2026 */
2027 if (unlikely(port_cause & ERR_IRQ)) {
2028 mv_err_intr(ap);
2029 } else if (!edma_was_enabled) {
2030 struct ata_queued_cmd *qc = mv_get_active_qc(ap);
2031 if (qc)
2032 ata_sff_host_intr(ap, qc);
2033 else
2034 mv_unexpected_intr(ap, edma_was_enabled);
2035 }
2036}
2037
1680/** 2038/**
1681 * mv_host_intr - Handle all interrupts on the given host controller 2039 * mv_host_intr - Handle all interrupts on the given host controller
1682 * @host: host specific structure 2040 * @host: host specific structure
@@ -1688,66 +2046,58 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
1688static int mv_host_intr(struct ata_host *host, u32 main_irq_cause) 2046static int mv_host_intr(struct ata_host *host, u32 main_irq_cause)
1689{ 2047{
1690 struct mv_host_priv *hpriv = host->private_data; 2048 struct mv_host_priv *hpriv = host->private_data;
1691 void __iomem *mmio = hpriv->base, *hc_mmio = NULL; 2049 void __iomem *mmio = hpriv->base, *hc_mmio;
1692 u32 hc_irq_cause = 0;
1693 unsigned int handled = 0, port; 2050 unsigned int handled = 0, port;
1694 2051
1695 for (port = 0; port < hpriv->n_ports; port++) { 2052 for (port = 0; port < hpriv->n_ports; port++) {
1696 struct ata_port *ap = host->ports[port]; 2053 struct ata_port *ap = host->ports[port];
1697 struct mv_port_priv *pp; 2054 unsigned int p, shift, hardport, port_cause;
1698 unsigned int shift, hardport, port_cause; 2055
1699 /*
1700 * When we move to the second hc, flag our cached
1701 * copies of hc_mmio (and hc_irq_cause) as invalid again.
1702 */
1703 if (port == MV_PORTS_PER_HC)
1704 hc_mmio = NULL;
1705 /*
1706 * Do nothing if port is not interrupting or is disabled:
1707 */
1708 MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport); 2056 MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport);
1709 port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ);
1710 if (!port_cause || !ap || (ap->flags & ATA_FLAG_DISABLED))
1711 continue;
1712 /* 2057 /*
1713 * Each hc within the host has its own hc_irq_cause register. 2058 * Each hc within the host has its own hc_irq_cause register,
1714 * We defer reading it until we know we need it, right now: 2059 * where the interrupting ports bits get ack'd.
1715 *
1716 * FIXME later: we don't really need to read this register
1717 * (some logic changes required below if we go that way),
1718 * because it doesn't tell us anything new. But we do need
1719 * to write to it, outside the top of this loop,
1720 * to reset the interrupt triggers for next time.
1721 */ 2060 */
1722 if (!hc_mmio) { 2061 if (hardport == 0) { /* first port on this hc ? */
2062 u32 hc_cause = (main_irq_cause >> shift) & HC0_IRQ_PEND;
2063 u32 port_mask, ack_irqs;
2064 /*
2065 * Skip this entire hc if nothing pending for any ports
2066 */
2067 if (!hc_cause) {
2068 port += MV_PORTS_PER_HC - 1;
2069 continue;
2070 }
2071 /*
2072 * We don't need/want to read the hc_irq_cause register,
2073 * because doing so hurts performance, and
2074 * main_irq_cause already gives us everything we need.
2075 *
2076 * But we do have to *write* to the hc_irq_cause to ack
2077 * the ports that we are handling this time through.
2078 *
2079 * This requires that we create a bitmap for those
2080 * ports which interrupted us, and use that bitmap
2081 * to ack (only) those ports via hc_irq_cause.
2082 */
2083 ack_irqs = 0;
2084 for (p = 0; p < MV_PORTS_PER_HC; ++p) {
2085 if ((port + p) >= hpriv->n_ports)
2086 break;
2087 port_mask = (DONE_IRQ | ERR_IRQ) << (p * 2);
2088 if (hc_cause & port_mask)
2089 ack_irqs |= (DMA_IRQ | DEV_IRQ) << p;
2090 }
1723 hc_mmio = mv_hc_base_from_port(mmio, port); 2091 hc_mmio = mv_hc_base_from_port(mmio, port);
1724 hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS); 2092 writelfl(~ack_irqs, hc_mmio + HC_IRQ_CAUSE_OFS);
1725 writelfl(~hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS);
1726 handled = 1; 2093 handled = 1;
1727 } 2094 }
1728 /* 2095 /*
1729 * Process completed CRPB response(s) before other events. 2096 * Handle interrupts signalled for this port:
1730 */
1731 pp = ap->private_data;
1732 if (hc_irq_cause & (DMA_IRQ << hardport)) {
1733 if (pp->pp_flags & MV_PP_FLAG_EDMA_EN)
1734 mv_process_crpb_entries(ap, pp);
1735 }
1736 /*
1737 * Handle chip-reported errors, or continue on to handle PIO.
1738 */ 2097 */
1739 if (unlikely(port_cause & ERR_IRQ)) { 2098 port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ);
1740 mv_err_intr(ap, mv_get_active_qc(ap)); 2099 if (port_cause)
1741 } else if (hc_irq_cause & (DEV_IRQ << hardport)) { 2100 mv_port_intr(ap, port_cause);
1742 if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN)) {
1743 struct ata_queued_cmd *qc = mv_get_active_qc(ap);
1744 if (qc) {
1745 ata_sff_host_intr(ap, qc);
1746 continue;
1747 }
1748 }
1749 mv_unexpected_intr(ap);
1750 }
1751 } 2101 }
1752 return handled; 2102 return handled;
1753} 2103}
@@ -1894,7 +2244,7 @@ static void mv5_reset_bus(struct ata_host *host, void __iomem *mmio)
1894 2244
1895static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio) 2245static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
1896{ 2246{
1897 writel(0x0fcfffff, mmio + MV_FLASH_CTL); 2247 writel(0x0fcfffff, mmio + MV_FLASH_CTL_OFS);
1898} 2248}
1899 2249
1900static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx, 2250static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx,
@@ -1913,7 +2263,7 @@ static void mv5_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
1913{ 2263{
1914 u32 tmp; 2264 u32 tmp;
1915 2265
1916 writel(0, mmio + MV_GPIO_PORT_CTL); 2266 writel(0, mmio + MV_GPIO_PORT_CTL_OFS);
1917 2267
1918 /* FIXME: handle MV_HP_ERRATA_50XXB2 errata */ 2268 /* FIXME: handle MV_HP_ERRATA_50XXB2 errata */
1919 2269
@@ -1931,14 +2281,14 @@ static void mv5_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
1931 int fix_apm_sq = (hpriv->hp_flags & MV_HP_ERRATA_50XXB0); 2281 int fix_apm_sq = (hpriv->hp_flags & MV_HP_ERRATA_50XXB0);
1932 2282
1933 if (fix_apm_sq) { 2283 if (fix_apm_sq) {
1934 tmp = readl(phy_mmio + MV5_LT_MODE); 2284 tmp = readl(phy_mmio + MV5_LTMODE_OFS);
1935 tmp |= (1 << 19); 2285 tmp |= (1 << 19);
1936 writel(tmp, phy_mmio + MV5_LT_MODE); 2286 writel(tmp, phy_mmio + MV5_LTMODE_OFS);
1937 2287
1938 tmp = readl(phy_mmio + MV5_PHY_CTL); 2288 tmp = readl(phy_mmio + MV5_PHY_CTL_OFS);
1939 tmp &= ~0x3; 2289 tmp &= ~0x3;
1940 tmp |= 0x1; 2290 tmp |= 0x1;
1941 writel(tmp, phy_mmio + MV5_PHY_CTL); 2291 writel(tmp, phy_mmio + MV5_PHY_CTL_OFS);
1942 } 2292 }
1943 2293
1944 tmp = readl(phy_mmio + MV5_PHY_MODE); 2294 tmp = readl(phy_mmio + MV5_PHY_MODE);
@@ -1956,11 +2306,6 @@ static void mv5_reset_hc_port(struct mv_host_priv *hpriv, void __iomem *mmio,
1956{ 2306{
1957 void __iomem *port_mmio = mv_port_base(mmio, port); 2307 void __iomem *port_mmio = mv_port_base(mmio, port);
1958 2308
1959 /*
1960 * The datasheet warns against setting ATA_RST when EDMA is active
1961 * (but doesn't say what the problem might be). So we first try
1962 * to disable the EDMA engine before doing the ATA_RST operation.
1963 */
1964 mv_reset_channel(hpriv, mmio, port); 2309 mv_reset_channel(hpriv, mmio, port);
1965 2310
1966 ZERO(0x028); /* command */ 2311 ZERO(0x028); /* command */
@@ -1975,7 +2320,7 @@ static void mv5_reset_hc_port(struct mv_host_priv *hpriv, void __iomem *mmio,
1975 ZERO(0x024); /* respq outp */ 2320 ZERO(0x024); /* respq outp */
1976 ZERO(0x020); /* respq inp */ 2321 ZERO(0x020); /* respq inp */
1977 ZERO(0x02c); /* test control */ 2322 ZERO(0x02c); /* test control */
1978 writel(0xbc, port_mmio + EDMA_IORDY_TMOUT); 2323 writel(0xbc, port_mmio + EDMA_IORDY_TMOUT_OFS);
1979} 2324}
1980#undef ZERO 2325#undef ZERO
1981 2326
@@ -2021,13 +2366,13 @@ static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio)
2021 struct mv_host_priv *hpriv = host->private_data; 2366 struct mv_host_priv *hpriv = host->private_data;
2022 u32 tmp; 2367 u32 tmp;
2023 2368
2024 tmp = readl(mmio + MV_PCI_MODE); 2369 tmp = readl(mmio + MV_PCI_MODE_OFS);
2025 tmp &= 0xff00ffff; 2370 tmp &= 0xff00ffff;
2026 writel(tmp, mmio + MV_PCI_MODE); 2371 writel(tmp, mmio + MV_PCI_MODE_OFS);
2027 2372
2028 ZERO(MV_PCI_DISC_TIMER); 2373 ZERO(MV_PCI_DISC_TIMER);
2029 ZERO(MV_PCI_MSI_TRIGGER); 2374 ZERO(MV_PCI_MSI_TRIGGER);
2030 writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT); 2375 writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT_OFS);
2031 ZERO(PCI_HC_MAIN_IRQ_MASK_OFS); 2376 ZERO(PCI_HC_MAIN_IRQ_MASK_OFS);
2032 ZERO(MV_PCI_SERR_MASK); 2377 ZERO(MV_PCI_SERR_MASK);
2033 ZERO(hpriv->irq_cause_ofs); 2378 ZERO(hpriv->irq_cause_ofs);
@@ -2045,10 +2390,10 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
2045 2390
2046 mv5_reset_flash(hpriv, mmio); 2391 mv5_reset_flash(hpriv, mmio);
2047 2392
2048 tmp = readl(mmio + MV_GPIO_PORT_CTL); 2393 tmp = readl(mmio + MV_GPIO_PORT_CTL_OFS);
2049 tmp &= 0x3; 2394 tmp &= 0x3;
2050 tmp |= (1 << 5) | (1 << 6); 2395 tmp |= (1 << 5) | (1 << 6);
2051 writel(tmp, mmio + MV_GPIO_PORT_CTL); 2396 writel(tmp, mmio + MV_GPIO_PORT_CTL_OFS);
2052} 2397}
2053 2398
2054/** 2399/**
@@ -2121,7 +2466,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
2121 void __iomem *port_mmio; 2466 void __iomem *port_mmio;
2122 u32 tmp; 2467 u32 tmp;
2123 2468
2124 tmp = readl(mmio + MV_RESET_CFG); 2469 tmp = readl(mmio + MV_RESET_CFG_OFS);
2125 if ((tmp & (1 << 0)) == 0) { 2470 if ((tmp & (1 << 0)) == 0) {
2126 hpriv->signal[idx].amps = 0x7 << 8; 2471 hpriv->signal[idx].amps = 0x7 << 8;
2127 hpriv->signal[idx].pre = 0x1 << 5; 2472 hpriv->signal[idx].pre = 0x1 << 5;
@@ -2137,7 +2482,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
2137 2482
2138static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio) 2483static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
2139{ 2484{
2140 writel(0x00000060, mmio + MV_GPIO_PORT_CTL); 2485 writel(0x00000060, mmio + MV_GPIO_PORT_CTL_OFS);
2141} 2486}
2142 2487
2143static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio, 2488static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
@@ -2235,11 +2580,6 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
2235{ 2580{
2236 void __iomem *port_mmio = mv_port_base(mmio, port); 2581 void __iomem *port_mmio = mv_port_base(mmio, port);
2237 2582
2238 /*
2239 * The datasheet warns against setting ATA_RST when EDMA is active
2240 * (but doesn't say what the problem might be). So we first try
2241 * to disable the EDMA engine before doing the ATA_RST operation.
2242 */
2243 mv_reset_channel(hpriv, mmio, port); 2583 mv_reset_channel(hpriv, mmio, port);
2244 2584
2245 ZERO(0x028); /* command */ 2585 ZERO(0x028); /* command */
@@ -2254,7 +2594,7 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
2254 ZERO(0x024); /* respq outp */ 2594 ZERO(0x024); /* respq outp */
2255 ZERO(0x020); /* respq inp */ 2595 ZERO(0x020); /* respq inp */
2256 ZERO(0x02c); /* test control */ 2596 ZERO(0x02c); /* test control */
2257 writel(0xbc, port_mmio + EDMA_IORDY_TMOUT); 2597 writel(0xbc, port_mmio + EDMA_IORDY_TMOUT_OFS);
2258} 2598}
2259 2599
2260#undef ZERO 2600#undef ZERO
@@ -2297,38 +2637,39 @@ static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio)
2297 return; 2637 return;
2298} 2638}
2299 2639
2300static void mv_setup_ifctl(void __iomem *port_mmio, int want_gen2i) 2640static void mv_setup_ifcfg(void __iomem *port_mmio, int want_gen2i)
2301{ 2641{
2302 u32 ifctl = readl(port_mmio + SATA_INTERFACE_CFG); 2642 u32 ifcfg = readl(port_mmio + SATA_INTERFACE_CFG_OFS);
2303 2643
2304 ifctl = (ifctl & 0xf7f) | 0x9b1000; /* from chip spec */ 2644 ifcfg = (ifcfg & 0xf7f) | 0x9b1000; /* from chip spec */
2305 if (want_gen2i) 2645 if (want_gen2i)
2306 ifctl |= (1 << 7); /* enable gen2i speed */ 2646 ifcfg |= (1 << 7); /* enable gen2i speed */
2307 writelfl(ifctl, port_mmio + SATA_INTERFACE_CFG); 2647 writelfl(ifcfg, port_mmio + SATA_INTERFACE_CFG_OFS);
2308} 2648}
2309 2649
2310/*
2311 * Caller must ensure that EDMA is not active,
2312 * by first doing mv_stop_edma() where needed.
2313 */
2314static void mv_reset_channel(struct mv_host_priv *hpriv, void __iomem *mmio, 2650static void mv_reset_channel(struct mv_host_priv *hpriv, void __iomem *mmio,
2315 unsigned int port_no) 2651 unsigned int port_no)
2316{ 2652{
2317 void __iomem *port_mmio = mv_port_base(mmio, port_no); 2653 void __iomem *port_mmio = mv_port_base(mmio, port_no);
2318 2654
2655 /*
2656 * The datasheet warns against setting EDMA_RESET when EDMA is active
2657 * (but doesn't say what the problem might be). So we first try
2658 * to disable the EDMA engine before doing the EDMA_RESET operation.
2659 */
2319 mv_stop_edma_engine(port_mmio); 2660 mv_stop_edma_engine(port_mmio);
2320 writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS); 2661 writelfl(EDMA_RESET, port_mmio + EDMA_CMD_OFS);
2321 2662
2322 if (!IS_GEN_I(hpriv)) { 2663 if (!IS_GEN_I(hpriv)) {
2323 /* Enable 3.0gb/s link speed */ 2664 /* Enable 3.0gb/s link speed: this survives EDMA_RESET */
2324 mv_setup_ifctl(port_mmio, 1); 2665 mv_setup_ifcfg(port_mmio, 1);
2325 } 2666 }
2326 /* 2667 /*
2327 * Strobing ATA_RST here causes a hard reset of the SATA transport, 2668 * Strobing EDMA_RESET here causes a hard reset of the SATA transport,
2328 * link, and physical layers. It resets all SATA interface registers 2669 * link, and physical layers. It resets all SATA interface registers
2329 * (except for SATA_INTERFACE_CFG), and issues a COMRESET to the dev. 2670 * (except for SATA_INTERFACE_CFG), and issues a COMRESET to the dev.
2330 */ 2671 */
2331 writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS); 2672 writelfl(EDMA_RESET, port_mmio + EDMA_CMD_OFS);
2332 udelay(25); /* allow reset propagation */ 2673 udelay(25); /* allow reset propagation */
2333 writelfl(0, port_mmio + EDMA_CMD_OFS); 2674 writelfl(0, port_mmio + EDMA_CMD_OFS);
2334 2675
@@ -2392,7 +2733,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
2392 sata_scr_read(link, SCR_STATUS, &sstatus); 2733 sata_scr_read(link, SCR_STATUS, &sstatus);
2393 if (!IS_GEN_I(hpriv) && ++attempts >= 5 && sstatus == 0x121) { 2734 if (!IS_GEN_I(hpriv) && ++attempts >= 5 && sstatus == 0x121) {
2394 /* Force 1.5gb/s link speed and try again */ 2735 /* Force 1.5gb/s link speed and try again */
2395 mv_setup_ifctl(mv_ap_base(ap), 0); 2736 mv_setup_ifcfg(mv_ap_base(ap), 0);
2396 if (time_after(jiffies + HZ, deadline)) 2737 if (time_after(jiffies + HZ, deadline))
2397 extra = HZ; /* only extend it once, max */ 2738 extra = HZ; /* only extend it once, max */
2398 } 2739 }
@@ -2493,6 +2834,34 @@ static void mv_port_init(struct ata_ioports *port, void __iomem *port_mmio)
2493 readl(port_mmio + EDMA_ERR_IRQ_MASK_OFS)); 2834 readl(port_mmio + EDMA_ERR_IRQ_MASK_OFS));
2494} 2835}
2495 2836
2837static unsigned int mv_in_pcix_mode(struct ata_host *host)
2838{
2839 struct mv_host_priv *hpriv = host->private_data;
2840 void __iomem *mmio = hpriv->base;
2841 u32 reg;
2842
2843 if (!HAS_PCI(host) || !IS_PCIE(hpriv))
2844 return 0; /* not PCI-X capable */
2845 reg = readl(mmio + MV_PCI_MODE_OFS);
2846 if ((reg & MV_PCI_MODE_MASK) == 0)
2847 return 0; /* conventional PCI mode */
2848 return 1; /* chip is in PCI-X mode */
2849}
2850
2851static int mv_pci_cut_through_okay(struct ata_host *host)
2852{
2853 struct mv_host_priv *hpriv = host->private_data;
2854 void __iomem *mmio = hpriv->base;
2855 u32 reg;
2856
2857 if (!mv_in_pcix_mode(host)) {
2858 reg = readl(mmio + PCI_COMMAND_OFS);
2859 if (reg & PCI_COMMAND_MRDTRIG)
2860 return 0; /* not okay */
2861 }
2862 return 1; /* okay */
2863}
2864
2496static int mv_chip_id(struct ata_host *host, unsigned int board_idx) 2865static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
2497{ 2866{
2498 struct pci_dev *pdev = to_pci_dev(host->dev); 2867 struct pci_dev *pdev = to_pci_dev(host->dev);
@@ -2560,7 +2929,7 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
2560 break; 2929 break;
2561 2930
2562 case chip_7042: 2931 case chip_7042:
2563 hp_flags |= MV_HP_PCIE; 2932 hp_flags |= MV_HP_PCIE | MV_HP_CUT_THROUGH;
2564 if (pdev->vendor == PCI_VENDOR_ID_TTI && 2933 if (pdev->vendor == PCI_VENDOR_ID_TTI &&
2565 (pdev->device == 0x2300 || pdev->device == 0x2310)) 2934 (pdev->device == 0x2300 || pdev->device == 0x2310))
2566 { 2935 {
@@ -2590,9 +2959,12 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
2590 " and avoid the final two gigabytes on" 2959 " and avoid the final two gigabytes on"
2591 " all RocketRAID BIOS initialized drives.\n"); 2960 " all RocketRAID BIOS initialized drives.\n");
2592 } 2961 }
2962 /* drop through */
2593 case chip_6042: 2963 case chip_6042:
2594 hpriv->ops = &mv6xxx_ops; 2964 hpriv->ops = &mv6xxx_ops;
2595 hp_flags |= MV_HP_GEN_IIE; 2965 hp_flags |= MV_HP_GEN_IIE;
2966 if (board_idx == chip_6042 && mv_pci_cut_through_okay(host))
2967 hp_flags |= MV_HP_CUT_THROUGH;
2596 2968
2597 switch (pdev->revision) { 2969 switch (pdev->revision) {
2598 case 0x0: 2970 case 0x0:
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 4fbb56bcb1ee..358bb0be3c08 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -175,8 +175,7 @@ int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
175 } 175 }
176 176
177 /* Check whether this driver has already been added to a class. */ 177 /* Check whether this driver has already been added to a class. */
178 if ((drv->entry.next != drv->entry.prev) || 178 if (drv->entry.next && !list_empty(&drv->entry)) {
179 (drv->entry.next != NULL)) {
180 printk(KERN_WARNING "sysdev: class %s: driver (%p) has already" 179 printk(KERN_WARNING "sysdev: class %s: driver (%p) has already"
181 " been registered to a class, something is wrong, but " 180 " been registered to a class, something is wrong, but "
182 "will forge on!\n", cls->name, drv); 181 "will forge on!\n", cls->name, drv);
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index fd2db07a50fc..3b23270eaa65 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -1073,7 +1073,7 @@ static int cy_put_char(struct tty_struct *tty, unsigned char ch)
1073 return 0; 1073 return 0;
1074 1074
1075 if (!info->xmit_buf) 1075 if (!info->xmit_buf)
1076 return; 1076 return 0;
1077 1077
1078 local_irq_save(flags); 1078 local_irq_save(flags);
1079 if (info->xmit_cnt >= PAGE_SIZE - 1) { 1079 if (info->xmit_cnt >= PAGE_SIZE - 1) {
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index a9aa845dbe74..b27b13c5eb5a 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -97,7 +97,7 @@ extern int edac_debug_level;
97#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ 97#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
98 PCI_DEVICE_ID_ ## vend ## _ ## dev 98 PCI_DEVICE_ID_ ## vend ## _ ## dev
99 99
100#define dev_name(dev) (dev)->dev_name 100#define edac_dev_name(dev) (dev)->dev_name
101 101
102/* memory devices */ 102/* memory devices */
103enum dev_type { 103enum dev_type {
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 63372fa7ecfe..5fcd3d89c75d 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -333,7 +333,7 @@ static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
333fail0: 333fail0:
334 edac_printk(KERN_WARNING, EDAC_MC, 334 edac_printk(KERN_WARNING, EDAC_MC,
335 "%s (%s) %s %s already assigned %d\n", 335 "%s (%s) %s %s already assigned %d\n",
336 rover->dev->bus_id, dev_name(rover), 336 rover->dev->bus_id, edac_dev_name(rover),
337 rover->mod_name, rover->ctl_name, rover->dev_idx); 337 rover->mod_name, rover->ctl_name, rover->dev_idx);
338 return 1; 338 return 1;
339 339
@@ -538,7 +538,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
538 "'%s': DEV '%s' (%s)\n", 538 "'%s': DEV '%s' (%s)\n",
539 edac_dev->mod_name, 539 edac_dev->mod_name,
540 edac_dev->ctl_name, 540 edac_dev->ctl_name,
541 dev_name(edac_dev), 541 edac_dev_name(edac_dev),
542 edac_op_state_to_string(edac_dev->op_state)); 542 edac_op_state_to_string(edac_dev->op_state));
543 543
544 mutex_unlock(&device_ctls_mutex); 544 mutex_unlock(&device_ctls_mutex);
@@ -599,7 +599,7 @@ struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
599 edac_printk(KERN_INFO, EDAC_MC, 599 edac_printk(KERN_INFO, EDAC_MC,
600 "Removed device %d for %s %s: DEV %s\n", 600 "Removed device %d for %s %s: DEV %s\n",
601 edac_dev->dev_idx, 601 edac_dev->dev_idx,
602 edac_dev->mod_name, edac_dev->ctl_name, dev_name(edac_dev)); 602 edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev));
603 603
604 return edac_dev; 604 return edac_dev;
605} 605}
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index a4cf1645f588..d110392d48f4 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -402,7 +402,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
402fail0: 402fail0:
403 edac_printk(KERN_WARNING, EDAC_MC, 403 edac_printk(KERN_WARNING, EDAC_MC,
404 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, 404 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
405 dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 405 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
406 return 1; 406 return 1;
407 407
408fail1: 408fail1:
@@ -517,7 +517,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
517 517
518 /* Report action taken */ 518 /* Report action taken */
519 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 519 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
520 " DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci)); 520 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
521 521
522 mutex_unlock(&mem_ctls_mutex); 522 mutex_unlock(&mem_ctls_mutex);
523 return 0; 523 return 0;
@@ -565,7 +565,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
565 565
566 edac_printk(KERN_INFO, EDAC_MC, 566 edac_printk(KERN_INFO, EDAC_MC,
567 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 567 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
568 mci->mod_name, mci->ctl_name, dev_name(mci)); 568 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
569 569
570 return mci; 570 return mci;
571} 571}
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 9b24340b52e1..22ec9d5d4312 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -150,7 +150,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
150fail0: 150fail0:
151 edac_printk(KERN_WARNING, EDAC_PCI, 151 edac_printk(KERN_WARNING, EDAC_PCI,
152 "%s (%s) %s %s already assigned %d\n", 152 "%s (%s) %s %s already assigned %d\n",
153 rover->dev->bus_id, dev_name(rover), 153 rover->dev->bus_id, edac_dev_name(rover),
154 rover->mod_name, rover->ctl_name, rover->pci_idx); 154 rover->mod_name, rover->ctl_name, rover->pci_idx);
155 return 1; 155 return 1;
156 156
@@ -360,7 +360,7 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
360 " DEV '%s' (%s)\n", 360 " DEV '%s' (%s)\n",
361 pci->mod_name, 361 pci->mod_name,
362 pci->ctl_name, 362 pci->ctl_name,
363 dev_name(pci), edac_op_state_to_string(pci->op_state)); 363 edac_dev_name(pci), edac_op_state_to_string(pci->op_state));
364 364
365 mutex_unlock(&edac_pci_ctls_mutex); 365 mutex_unlock(&edac_pci_ctls_mutex);
366 return 0; 366 return 0;
@@ -415,7 +415,7 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
415 415
416 edac_printk(KERN_INFO, EDAC_PCI, 416 edac_printk(KERN_INFO, EDAC_PCI,
417 "Removed device %d for %s %s: DEV %s\n", 417 "Removed device %d for %s %s: DEV %s\n",
418 pci->pci_idx, pci->mod_name, pci->ctl_name, dev_name(pci)); 418 pci->pci_idx, pci->mod_name, pci->ctl_name, edac_dev_name(pci));
419 419
420 return pci; 420 return pci;
421} 421}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 591deda3f86a..34b0d4f26b58 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1355,12 +1355,6 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
1355 if (hwif->chipset != ide_dtc2278 || hwif->channel == 0) 1355 if (hwif->chipset != ide_dtc2278 || hwif->channel == 0)
1356 hwif->port_ops = d->port_ops; 1356 hwif->port_ops = d->port_ops;
1357 1357
1358 if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
1359 ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
1360 if (hwif->mate)
1361 hwif->mate->serialized = hwif->serialized = 1;
1362 }
1363
1364 hwif->swdma_mask = d->swdma_mask; 1358 hwif->swdma_mask = d->swdma_mask;
1365 hwif->mwdma_mask = d->mwdma_mask; 1359 hwif->mwdma_mask = d->mwdma_mask;
1366 hwif->ultra_mask = d->udma_mask; 1360 hwif->ultra_mask = d->udma_mask;
@@ -1382,6 +1376,12 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
1382 hwif->dma_ops = d->dma_ops; 1376 hwif->dma_ops = d->dma_ops;
1383 } 1377 }
1384 1378
1379 if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
1380 ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
1381 if (hwif->mate)
1382 hwif->mate->serialized = hwif->serialized = 1;
1383 }
1384
1385 if (d->host_flags & IDE_HFLAG_RQSIZE_256) 1385 if (d->host_flags & IDE_HFLAG_RQSIZE_256)
1386 hwif->rqsize = 256; 1386 hwif->rqsize = 256;
1387 1387
diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c
index 83555ca513b5..9e449a0c623f 100644
--- a/drivers/ide/legacy/falconide.c
+++ b/drivers/ide/legacy/falconide.c
@@ -61,7 +61,7 @@ static void falconide_output_data(ide_drive_t *drive, struct request *rq,
61 unsigned long data_addr = drive->hwif->io_ports.data_addr; 61 unsigned long data_addr = drive->hwif->io_ports.data_addr;
62 62
63 if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS) 63 if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS)
64 return outsw(data_adr, buf, (len + 1) / 2); 64 return outsw(data_addr, buf, (len + 1) / 2);
65 65
66 outsw_swapw(data_addr, buf, (len + 1) / 2); 66 outsw_swapw(data_addr, buf, (len + 1) / 2);
67} 67}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index ed2ee4ba4b7c..5fd8506a8657 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
359 cq->sw_wptr++; 359 cq->sw_wptr++;
360} 360}
361 361
362void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) 362int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
363{ 363{
364 u32 ptr; 364 u32 ptr;
365 int flushed = 0;
365 366
366 PDBG("%s wq %p cq %p\n", __func__, wq, cq); 367 PDBG("%s wq %p cq %p\n", __func__, wq, cq);
367 368
@@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
369 PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, 370 PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
370 wq->rq_rptr, wq->rq_wptr, count); 371 wq->rq_rptr, wq->rq_wptr, count);
371 ptr = wq->rq_rptr + count; 372 ptr = wq->rq_rptr + count;
372 while (ptr++ != wq->rq_wptr) 373 while (ptr++ != wq->rq_wptr) {
373 insert_recv_cqe(wq, cq); 374 insert_recv_cqe(wq, cq);
375 flushed++;
376 }
377 return flushed;
374} 378}
375 379
376static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, 380static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
@@ -394,9 +398,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
394 cq->sw_wptr++; 398 cq->sw_wptr++;
395} 399}
396 400
397void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) 401int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
398{ 402{
399 __u32 ptr; 403 __u32 ptr;
404 int flushed = 0;
400 struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); 405 struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
401 406
402 ptr = wq->sq_rptr + count; 407 ptr = wq->sq_rptr + count;
@@ -405,7 +410,9 @@ void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
405 insert_sq_cqe(wq, cq, sqp); 410 insert_sq_cqe(wq, cq, sqp);
406 sqp++; 411 sqp++;
407 ptr++; 412 ptr++;
413 flushed++;
408 } 414 }
415 return flushed;
409} 416}
410 417
411/* 418/*
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 2bcff7f5046e..69ab08ebc680 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -173,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
173void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); 173void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
174int __init cxio_hal_init(void); 174int __init cxio_hal_init(void);
175void __exit cxio_hal_exit(void); 175void __exit cxio_hal_exit(void);
176void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); 176int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
177void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); 177int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
178void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); 178void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
179void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); 179void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
180void cxio_flush_hw_cq(struct t3_cq *cq); 180void cxio_flush_hw_cq(struct t3_cq *cq);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index d44a6df9ad8c..c325c44807e8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -67,10 +67,10 @@ int peer2peer = 0;
67module_param(peer2peer, int, 0644); 67module_param(peer2peer, int, 0644);
68MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); 68MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
69 69
70static int ep_timeout_secs = 10; 70static int ep_timeout_secs = 60;
71module_param(ep_timeout_secs, int, 0644); 71module_param(ep_timeout_secs, int, 0644);
72MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " 72MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
73 "in seconds (default=10)"); 73 "in seconds (default=60)");
74 74
75static int mpa_rev = 1; 75static int mpa_rev = 1;
76module_param(mpa_rev, int, 0644); 76module_param(mpa_rev, int, 0644);
@@ -1650,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1650 release = 1; 1650 release = 1;
1651 break; 1651 break;
1652 case ABORTING: 1652 case ABORTING:
1653 break;
1654 case DEAD: 1653 case DEAD:
1654 break;
1655 default: 1655 default:
1656 BUG_ON(1); 1656 BUG_ON(1);
1657 break; 1657 break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 9b4be889c58e..79dbe5beae52 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -655,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
655{ 655{
656 struct iwch_cq *rchp, *schp; 656 struct iwch_cq *rchp, *schp;
657 int count; 657 int count;
658 int flushed;
658 659
659 rchp = get_chp(qhp->rhp, qhp->attr.rcq); 660 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
660 schp = get_chp(qhp->rhp, qhp->attr.scq); 661 schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -669,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
669 spin_lock(&qhp->lock); 670 spin_lock(&qhp->lock);
670 cxio_flush_hw_cq(&rchp->cq); 671 cxio_flush_hw_cq(&rchp->cq);
671 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); 672 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
672 cxio_flush_rq(&qhp->wq, &rchp->cq, count); 673 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
673 spin_unlock(&qhp->lock); 674 spin_unlock(&qhp->lock);
674 spin_unlock_irqrestore(&rchp->lock, *flag); 675 spin_unlock_irqrestore(&rchp->lock, *flag);
675 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 676 if (flushed)
677 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
676 678
677 /* locking heirarchy: cq lock first, then qp lock. */ 679 /* locking heirarchy: cq lock first, then qp lock. */
678 spin_lock_irqsave(&schp->lock, *flag); 680 spin_lock_irqsave(&schp->lock, *flag);
679 spin_lock(&qhp->lock); 681 spin_lock(&qhp->lock);
680 cxio_flush_hw_cq(&schp->cq); 682 cxio_flush_hw_cq(&schp->cq);
681 cxio_count_scqes(&schp->cq, &qhp->wq, &count); 683 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
682 cxio_flush_sq(&qhp->wq, &schp->cq, count); 684 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
683 spin_unlock(&qhp->lock); 685 spin_unlock(&qhp->lock);
684 spin_unlock_irqrestore(&schp->lock, *flag); 686 spin_unlock_irqrestore(&schp->lock, *flag);
685 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); 687 if (flushed)
688 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
686 689
687 /* deref */ 690 /* deref */
688 if (atomic_dec_and_test(&qhp->refcnt)) 691 if (atomic_dec_and_test(&qhp->refcnt))
@@ -880,7 +883,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
880 ep = qhp->ep; 883 ep = qhp->ep;
881 get_ep(&ep->com); 884 get_ep(&ep->com);
882 } 885 }
883 flush_qp(qhp, &flag);
884 break; 886 break;
885 case IWCH_QP_STATE_TERMINATE: 887 case IWCH_QP_STATE_TERMINATE:
886 qhp->attr.state = IWCH_QP_STATE_TERMINATE; 888 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -911,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
911 } 913 }
912 switch (attrs->next_state) { 914 switch (attrs->next_state) {
913 case IWCH_QP_STATE_IDLE: 915 case IWCH_QP_STATE_IDLE:
916 flush_qp(qhp, &flag);
914 qhp->attr.state = IWCH_QP_STATE_IDLE; 917 qhp->attr.state = IWCH_QP_STATE_IDLE;
915 qhp->attr.llp_stream_handle = NULL; 918 qhp->attr.llp_stream_handle = NULL;
916 put_ep(&qhp->ep->com); 919 put_ep(&qhp->ep->com);
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 2515cbde7e65..bc3b37d2070f 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
101 props->max_ee = limit_uint(rblock->max_rd_ee_context); 101 props->max_ee = limit_uint(rblock->max_rd_ee_context);
102 props->max_rdd = limit_uint(rblock->max_rd_domain); 102 props->max_rdd = limit_uint(rblock->max_rd_domain);
103 props->max_fmr = limit_uint(rblock->max_mr); 103 props->max_fmr = limit_uint(rblock->max_mr);
104 props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
105 props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); 104 props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
106 props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); 105 props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
107 props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); 106 props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
@@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
115 } 114 }
116 115
117 props->max_pkeys = 16; 116 props->max_pkeys = 16;
118 props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); 117 props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255);
119 props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); 118 props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
120 props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); 119 props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
121 props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); 120 props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
@@ -136,7 +135,7 @@ query_device1:
136 return ret; 135 return ret;
137} 136}
138 137
139static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) 138static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
140{ 139{
141 switch (fw_mtu) { 140 switch (fw_mtu) {
142 case 0x1: 141 case 0x1:
@@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
156 } 155 }
157} 156}
158 157
159static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) 158static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
160{ 159{
161 switch (vl_cap) { 160 switch (vl_cap) {
162 case 0x1: 161 case 0x1:
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 2f199c5c4a72..4521319b1406 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -246,7 +246,7 @@ err_mtt:
246 if (context) 246 if (context)
247 ib_umem_release(cq->umem); 247 ib_umem_release(cq->umem);
248 else 248 else
249 mlx4_ib_free_cq_buf(dev, &cq->buf, entries); 249 mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
250 250
251err_db: 251err_db:
252 if (!context) 252 if (!context)
@@ -434,7 +434,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
434 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); 434 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
435 ib_umem_release(mcq->umem); 435 ib_umem_release(mcq->umem);
436 } else { 436 } else {
437 mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); 437 mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
438 mlx4_db_free(dev->dev, &mcq->db); 438 mlx4_db_free(dev->dev, &mcq->db);
439 } 439 }
440 440
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 9044f8803532..ca126fc2b853 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -334,6 +334,7 @@ struct ipoib_dev_priv {
334#endif 334#endif
335 int hca_caps; 335 int hca_caps;
336 struct ipoib_ethtool_st ethtool; 336 struct ipoib_ethtool_st ethtool;
337 struct timer_list poll_timer;
337}; 338};
338 339
339struct ipoib_ah { 340struct ipoib_ah {
@@ -404,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue;
404 405
405int ipoib_poll(struct napi_struct *napi, int budget); 406int ipoib_poll(struct napi_struct *napi, int budget);
406void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); 407void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
408void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
407 409
408struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 410struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
409 struct ib_pd *pd, struct ib_ah_attr *attr); 411 struct ib_pd *pd, struct ib_ah_attr *attr);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 97b815c1a3fc..f429bce24c20 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -461,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
461 netif_rx_schedule(dev, &priv->napi); 461 netif_rx_schedule(dev, &priv->napi);
462} 462}
463 463
464static void drain_tx_cq(struct net_device *dev)
465{
466 struct ipoib_dev_priv *priv = netdev_priv(dev);
467 unsigned long flags;
468
469 spin_lock_irqsave(&priv->tx_lock, flags);
470 while (poll_tx(priv))
471 ; /* nothing */
472
473 if (netif_queue_stopped(dev))
474 mod_timer(&priv->poll_timer, jiffies + 1);
475
476 spin_unlock_irqrestore(&priv->tx_lock, flags);
477}
478
479void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
480{
481 drain_tx_cq((struct net_device *)dev_ptr);
482}
483
464static inline int post_send(struct ipoib_dev_priv *priv, 484static inline int post_send(struct ipoib_dev_priv *priv,
465 unsigned int wr_id, 485 unsigned int wr_id,
466 struct ib_ah *address, u32 qpn, 486 struct ib_ah *address, u32 qpn,
@@ -555,12 +575,22 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
555 else 575 else
556 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 576 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
557 577
578 if (++priv->tx_outstanding == ipoib_sendq_size) {
579 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
580 if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
581 ipoib_warn(priv, "request notify on send CQ failed\n");
582 netif_stop_queue(dev);
583 }
584
558 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 585 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
559 address->ah, qpn, tx_req, phead, hlen))) { 586 address->ah, qpn, tx_req, phead, hlen))) {
560 ipoib_warn(priv, "post_send failed\n"); 587 ipoib_warn(priv, "post_send failed\n");
561 ++dev->stats.tx_errors; 588 ++dev->stats.tx_errors;
589 --priv->tx_outstanding;
562 ipoib_dma_unmap_tx(priv->ca, tx_req); 590 ipoib_dma_unmap_tx(priv->ca, tx_req);
563 dev_kfree_skb_any(skb); 591 dev_kfree_skb_any(skb);
592 if (netif_queue_stopped(dev))
593 netif_wake_queue(dev);
564 } else { 594 } else {
565 dev->trans_start = jiffies; 595 dev->trans_start = jiffies;
566 596
@@ -568,14 +598,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
568 ++priv->tx_head; 598 ++priv->tx_head;
569 skb_orphan(skb); 599 skb_orphan(skb);
570 600
571 if (++priv->tx_outstanding == ipoib_sendq_size) {
572 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
573 netif_stop_queue(dev);
574 }
575 } 601 }
576 602
577 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) 603 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
578 poll_tx(priv); 604 while (poll_tx(priv))
605 ; /* nothing */
579} 606}
580 607
581static void __ipoib_reap_ah(struct net_device *dev) 608static void __ipoib_reap_ah(struct net_device *dev)
@@ -609,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work)
609 round_jiffies_relative(HZ)); 636 round_jiffies_relative(HZ));
610} 637}
611 638
639static void ipoib_ib_tx_timer_func(unsigned long ctx)
640{
641 drain_tx_cq((struct net_device *)ctx);
642}
643
612int ipoib_ib_dev_open(struct net_device *dev) 644int ipoib_ib_dev_open(struct net_device *dev)
613{ 645{
614 struct ipoib_dev_priv *priv = netdev_priv(dev); 646 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -645,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev)
645 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, 677 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
646 round_jiffies_relative(HZ)); 678 round_jiffies_relative(HZ));
647 679
680 init_timer(&priv->poll_timer);
681 priv->poll_timer.function = ipoib_ib_tx_timer_func;
682 priv->poll_timer.data = (unsigned long)dev;
683
648 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 684 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
649 685
650 return 0; 686 return 0;
@@ -810,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
810 ipoib_dbg(priv, "All sends and receives done.\n"); 846 ipoib_dbg(priv, "All sends and receives done.\n");
811 847
812timeout: 848timeout:
849 del_timer_sync(&priv->poll_timer);
813 qp_attr.qp_state = IB_QPS_RESET; 850 qp_attr.qp_state = IB_QPS_RESET;
814 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) 851 if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
815 ipoib_warn(priv, "Failed to modify QP to RESET state\n"); 852 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c1e7ece1fd44..8766d29ce3b7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -187,7 +187,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
187 goto out_free_mr; 187 goto out_free_mr;
188 } 188 }
189 189
190 priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0); 190 priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
191 dev, ipoib_sendq_size, 0);
191 if (IS_ERR(priv->send_cq)) { 192 if (IS_ERR(priv->send_cq)) {
192 printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); 193 printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
193 goto out_free_recv_cq; 194 goto out_free_recv_cq;
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 02b3ad8c0826..edfedd9a166c 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -69,6 +69,7 @@
69#include <linux/time.h> 69#include <linux/time.h>
70#include <linux/slab.h> 70#include <linux/slab.h>
71#include <linux/hil.h> 71#include <linux/hil.h>
72#include <linux/semaphore.h>
72#include <asm/io.h> 73#include <asm/io.h>
73#include <asm/system.h> 74#include <asm/system.h>
74 75
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 20978205cd02..b8b9e44f7f4e 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -37,7 +37,7 @@
37#include <linux/device.h> 37#include <linux/device.h>
38#include <linux/kthread.h> 38#include <linux/kthread.h>
39#include <linux/platform_device.h> 39#include <linux/platform_device.h>
40#include <linux/semaphore.h> 40#include <linux/mutex.h>
41 41
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#ifdef CONFIG_PPC 43#ifdef CONFIG_PPC
@@ -102,7 +102,7 @@ static struct adb_handler {
102} adb_handler[16]; 102} adb_handler[16];
103 103
104/* 104/*
105 * The adb_handler_sem mutex protects all accesses to the original_address 105 * The adb_handler_mutex mutex protects all accesses to the original_address
106 * and handler_id fields of adb_handler[i] for all i, and changes to the 106 * and handler_id fields of adb_handler[i] for all i, and changes to the
107 * handler field. 107 * handler field.
108 * Accesses to the handler field are protected by the adb_handler_lock 108 * Accesses to the handler field are protected by the adb_handler_lock
@@ -110,7 +110,7 @@ static struct adb_handler {
110 * time adb_unregister returns, we know that the old handler isn't being 110 * time adb_unregister returns, we know that the old handler isn't being
111 * called. 111 * called.
112 */ 112 */
113static DECLARE_MUTEX(adb_handler_sem); 113static DEFINE_MUTEX(adb_handler_mutex);
114static DEFINE_RWLOCK(adb_handler_lock); 114static DEFINE_RWLOCK(adb_handler_lock);
115 115
116#if 0 116#if 0
@@ -355,7 +355,7 @@ do_adb_reset_bus(void)
355 msleep(500); 355 msleep(500);
356 } 356 }
357 357
358 down(&adb_handler_sem); 358 mutex_lock(&adb_handler_mutex);
359 write_lock_irq(&adb_handler_lock); 359 write_lock_irq(&adb_handler_lock);
360 memset(adb_handler, 0, sizeof(adb_handler)); 360 memset(adb_handler, 0, sizeof(adb_handler));
361 write_unlock_irq(&adb_handler_lock); 361 write_unlock_irq(&adb_handler_lock);
@@ -376,7 +376,7 @@ do_adb_reset_bus(void)
376 if (adb_controller->autopoll) 376 if (adb_controller->autopoll)
377 adb_controller->autopoll(autopoll_devs); 377 adb_controller->autopoll(autopoll_devs);
378 } 378 }
379 up(&adb_handler_sem); 379 mutex_unlock(&adb_handler_mutex);
380 380
381 blocking_notifier_call_chain(&adb_client_list, 381 blocking_notifier_call_chain(&adb_client_list,
382 ADB_MSG_POST_RESET, NULL); 382 ADB_MSG_POST_RESET, NULL);
@@ -454,7 +454,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
454{ 454{
455 int i; 455 int i;
456 456
457 down(&adb_handler_sem); 457 mutex_lock(&adb_handler_mutex);
458 ids->nids = 0; 458 ids->nids = 0;
459 for (i = 1; i < 16; i++) { 459 for (i = 1; i < 16; i++) {
460 if ((adb_handler[i].original_address == default_id) && 460 if ((adb_handler[i].original_address == default_id) &&
@@ -472,7 +472,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
472 ids->id[ids->nids++] = i; 472 ids->id[ids->nids++] = i;
473 } 473 }
474 } 474 }
475 up(&adb_handler_sem); 475 mutex_unlock(&adb_handler_mutex);
476 return ids->nids; 476 return ids->nids;
477} 477}
478 478
@@ -481,7 +481,7 @@ adb_unregister(int index)
481{ 481{
482 int ret = -ENODEV; 482 int ret = -ENODEV;
483 483
484 down(&adb_handler_sem); 484 mutex_lock(&adb_handler_mutex);
485 write_lock_irq(&adb_handler_lock); 485 write_lock_irq(&adb_handler_lock);
486 if (adb_handler[index].handler) { 486 if (adb_handler[index].handler) {
487 while(adb_handler[index].busy) { 487 while(adb_handler[index].busy) {
@@ -493,7 +493,7 @@ adb_unregister(int index)
493 adb_handler[index].handler = NULL; 493 adb_handler[index].handler = NULL;
494 } 494 }
495 write_unlock_irq(&adb_handler_lock); 495 write_unlock_irq(&adb_handler_lock);
496 up(&adb_handler_sem); 496 mutex_unlock(&adb_handler_mutex);
497 return ret; 497 return ret;
498} 498}
499 499
@@ -557,19 +557,19 @@ adb_try_handler_change(int address, int new_id)
557{ 557{
558 int ret; 558 int ret;
559 559
560 down(&adb_handler_sem); 560 mutex_lock(&adb_handler_mutex);
561 ret = try_handler_change(address, new_id); 561 ret = try_handler_change(address, new_id);
562 up(&adb_handler_sem); 562 mutex_unlock(&adb_handler_mutex);
563 return ret; 563 return ret;
564} 564}
565 565
566int 566int
567adb_get_infos(int address, int *original_address, int *handler_id) 567adb_get_infos(int address, int *original_address, int *handler_id)
568{ 568{
569 down(&adb_handler_sem); 569 mutex_lock(&adb_handler_mutex);
570 *original_address = adb_handler[address].original_address; 570 *original_address = adb_handler[address].original_address;
571 *handler_id = adb_handler[address].handler_id; 571 *handler_id = adb_handler[address].handler_id;
572 up(&adb_handler_sem); 572 mutex_unlock(&adb_handler_mutex);
573 573
574 return (*original_address != 0); 574 return (*original_address != 0);
575} 575}
@@ -628,10 +628,10 @@ do_adb_query(struct adb_request *req)
628 case ADB_QUERY_GETDEVINFO: 628 case ADB_QUERY_GETDEVINFO:
629 if (req->nbytes < 3) 629 if (req->nbytes < 3)
630 break; 630 break;
631 down(&adb_handler_sem); 631 mutex_lock(&adb_handler_mutex);
632 req->reply[0] = adb_handler[req->data[2]].original_address; 632 req->reply[0] = adb_handler[req->data[2]].original_address;
633 req->reply[1] = adb_handler[req->data[2]].handler_id; 633 req->reply[1] = adb_handler[req->data[2]].handler_id;
634 up(&adb_handler_sem); 634 mutex_unlock(&adb_handler_mutex);
635 req->complete = 1; 635 req->complete = 1;
636 req->reply_len = 2; 636 req->reply_len = 2;
637 adb_write_done(req); 637 adb_write_done(req);
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 1e0a69a5e815..ddfb426a9abd 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -122,6 +122,7 @@
122#include <linux/kmod.h> 122#include <linux/kmod.h>
123#include <linux/i2c.h> 123#include <linux/i2c.h>
124#include <linux/kthread.h> 124#include <linux/kthread.h>
125#include <linux/mutex.h>
125#include <asm/prom.h> 126#include <asm/prom.h>
126#include <asm/machdep.h> 127#include <asm/machdep.h>
127#include <asm/io.h> 128#include <asm/io.h>
@@ -169,7 +170,7 @@ static int rackmac;
169static s32 dimm_output_clamp; 170static s32 dimm_output_clamp;
170static int fcu_rpm_shift; 171static int fcu_rpm_shift;
171static int fcu_tickle_ticks; 172static int fcu_tickle_ticks;
172static DECLARE_MUTEX(driver_lock); 173static DEFINE_MUTEX(driver_lock);
173 174
174/* 175/*
175 * We have 3 types of CPU PID control. One is "split" old style control 176 * We have 3 types of CPU PID control. One is "split" old style control
@@ -729,9 +730,9 @@ static void fetch_cpu_pumps_minmax(void)
729static ssize_t show_##name(struct device *dev, struct device_attribute *attr, char *buf) \ 730static ssize_t show_##name(struct device *dev, struct device_attribute *attr, char *buf) \
730{ \ 731{ \
731 ssize_t r; \ 732 ssize_t r; \
732 down(&driver_lock); \ 733 mutex_lock(&driver_lock); \
733 r = sprintf(buf, "%d.%03d", FIX32TOPRINT(data)); \ 734 r = sprintf(buf, "%d.%03d", FIX32TOPRINT(data)); \
734 up(&driver_lock); \ 735 mutex_unlock(&driver_lock); \
735 return r; \ 736 return r; \
736} 737}
737#define BUILD_SHOW_FUNC_INT(name, data) \ 738#define BUILD_SHOW_FUNC_INT(name, data) \
@@ -1803,11 +1804,11 @@ static int main_control_loop(void *x)
1803{ 1804{
1804 DBG("main_control_loop started\n"); 1805 DBG("main_control_loop started\n");
1805 1806
1806 down(&driver_lock); 1807 mutex_lock(&driver_lock);
1807 1808
1808 if (start_fcu() < 0) { 1809 if (start_fcu() < 0) {
1809 printk(KERN_ERR "kfand: failed to start FCU\n"); 1810 printk(KERN_ERR "kfand: failed to start FCU\n");
1810 up(&driver_lock); 1811 mutex_unlock(&driver_lock);
1811 goto out; 1812 goto out;
1812 } 1813 }
1813 1814
@@ -1822,14 +1823,14 @@ static int main_control_loop(void *x)
1822 1823
1823 fcu_tickle_ticks = FCU_TICKLE_TICKS; 1824 fcu_tickle_ticks = FCU_TICKLE_TICKS;
1824 1825
1825 up(&driver_lock); 1826 mutex_unlock(&driver_lock);
1826 1827
1827 while (state == state_attached) { 1828 while (state == state_attached) {
1828 unsigned long elapsed, start; 1829 unsigned long elapsed, start;
1829 1830
1830 start = jiffies; 1831 start = jiffies;
1831 1832
1832 down(&driver_lock); 1833 mutex_lock(&driver_lock);
1833 1834
1834 /* Tickle the FCU just in case */ 1835 /* Tickle the FCU just in case */
1835 if (--fcu_tickle_ticks < 0) { 1836 if (--fcu_tickle_ticks < 0) {
@@ -1861,7 +1862,7 @@ static int main_control_loop(void *x)
1861 do_monitor_slots(&slots_state); 1862 do_monitor_slots(&slots_state);
1862 else 1863 else
1863 do_monitor_drives(&drives_state); 1864 do_monitor_drives(&drives_state);
1864 up(&driver_lock); 1865 mutex_unlock(&driver_lock);
1865 1866
1866 if (critical_state == 1) { 1867 if (critical_state == 1) {
1867 printk(KERN_WARNING "Temperature control detected a critical condition\n"); 1868 printk(KERN_WARNING "Temperature control detected a critical condition\n");
@@ -2019,13 +2020,13 @@ static void detach_fcu(void)
2019 */ 2020 */
2020static int therm_pm72_attach(struct i2c_adapter *adapter) 2021static int therm_pm72_attach(struct i2c_adapter *adapter)
2021{ 2022{
2022 down(&driver_lock); 2023 mutex_lock(&driver_lock);
2023 2024
2024 /* Check state */ 2025 /* Check state */
2025 if (state == state_detached) 2026 if (state == state_detached)
2026 state = state_attaching; 2027 state = state_attaching;
2027 if (state != state_attaching) { 2028 if (state != state_attaching) {
2028 up(&driver_lock); 2029 mutex_unlock(&driver_lock);
2029 return 0; 2030 return 0;
2030 } 2031 }
2031 2032
@@ -2054,7 +2055,7 @@ static int therm_pm72_attach(struct i2c_adapter *adapter)
2054 state = state_attached; 2055 state = state_attached;
2055 start_control_loops(); 2056 start_control_loops();
2056 } 2057 }
2057 up(&driver_lock); 2058 mutex_unlock(&driver_lock);
2058 2059
2059 return 0; 2060 return 0;
2060} 2061}
@@ -2065,16 +2066,16 @@ static int therm_pm72_attach(struct i2c_adapter *adapter)
2065 */ 2066 */
2066static int therm_pm72_detach(struct i2c_adapter *adapter) 2067static int therm_pm72_detach(struct i2c_adapter *adapter)
2067{ 2068{
2068 down(&driver_lock); 2069 mutex_lock(&driver_lock);
2069 2070
2070 if (state != state_detached) 2071 if (state != state_detached)
2071 state = state_detaching; 2072 state = state_detaching;
2072 2073
2073 /* Stop control loops if any */ 2074 /* Stop control loops if any */
2074 DBG("stopping control loops\n"); 2075 DBG("stopping control loops\n");
2075 up(&driver_lock); 2076 mutex_unlock(&driver_lock);
2076 stop_control_loops(); 2077 stop_control_loops();
2077 down(&driver_lock); 2078 mutex_lock(&driver_lock);
2078 2079
2079 if (u3_0 != NULL && !strcmp(adapter->name, "u3 0")) { 2080 if (u3_0 != NULL && !strcmp(adapter->name, "u3 0")) {
2080 DBG("lost U3-0, disposing control loops\n"); 2081 DBG("lost U3-0, disposing control loops\n");
@@ -2090,7 +2091,7 @@ static int therm_pm72_detach(struct i2c_adapter *adapter)
2090 if (u3_0 == NULL && u3_1 == NULL) 2091 if (u3_0 == NULL && u3_1 == NULL)
2091 state = state_detached; 2092 state = state_detached;
2092 2093
2093 up(&driver_lock); 2094 mutex_unlock(&driver_lock);
2094 2095
2095 return 0; 2096 return 0;
2096} 2097}
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index 797918d0e59c..7f2be4baaeda 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -13,7 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/wait.h> 14#include <linux/wait.h>
15#include <linux/i2c.h> 15#include <linux/i2c.h>
16#include <linux/semaphore.h> 16#include <linux/mutex.h>
17#include <asm/prom.h> 17#include <asm/prom.h>
18#include <asm/smu.h> 18#include <asm/smu.h>
19#include <asm/pmac_low_i2c.h> 19#include <asm/pmac_low_i2c.h>
@@ -36,7 +36,7 @@
36struct wf_sat { 36struct wf_sat {
37 int nr; 37 int nr;
38 atomic_t refcnt; 38 atomic_t refcnt;
39 struct semaphore mutex; 39 struct mutex mutex;
40 unsigned long last_read; /* jiffies when cache last updated */ 40 unsigned long last_read; /* jiffies when cache last updated */
41 u8 cache[16]; 41 u8 cache[16];
42 struct i2c_client i2c; 42 struct i2c_client i2c;
@@ -163,7 +163,7 @@ static int wf_sat_get(struct wf_sensor *sr, s32 *value)
163 if (sat->i2c.adapter == NULL) 163 if (sat->i2c.adapter == NULL)
164 return -ENODEV; 164 return -ENODEV;
165 165
166 down(&sat->mutex); 166 mutex_lock(&sat->mutex);
167 if (time_after(jiffies, (sat->last_read + MAX_AGE))) { 167 if (time_after(jiffies, (sat->last_read + MAX_AGE))) {
168 err = wf_sat_read_cache(sat); 168 err = wf_sat_read_cache(sat);
169 if (err) 169 if (err)
@@ -182,7 +182,7 @@ static int wf_sat_get(struct wf_sensor *sr, s32 *value)
182 err = 0; 182 err = 0;
183 183
184 fail: 184 fail:
185 up(&sat->mutex); 185 mutex_unlock(&sat->mutex);
186 return err; 186 return err;
187} 187}
188 188
@@ -233,7 +233,7 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev)
233 sat->nr = -1; 233 sat->nr = -1;
234 sat->node = of_node_get(dev); 234 sat->node = of_node_get(dev);
235 atomic_set(&sat->refcnt, 0); 235 atomic_set(&sat->refcnt, 0);
236 init_MUTEX(&sat->mutex); 236 mutex_init(&sat->mutex);
237 sat->i2c.addr = (addr >> 1) & 0x7f; 237 sat->i2c.addr = (addr >> 1) & 0x7f;
238 sat->i2c.adapter = adapter; 238 sat->i2c.adapter = adapter;
239 sat->i2c.driver = &wf_sat_driver; 239 sat->i2c.driver = &wf_sat_driver;
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 30a1af857c7a..fa394104339c 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -47,6 +47,7 @@
47 * to test the HW NMI watchdog 47 * to test the HW NMI watchdog
48 * F## = Break at do_fork for ## iterations 48 * F## = Break at do_fork for ## iterations
49 * S## = Break at sys_open for ## iterations 49 * S## = Break at sys_open for ## iterations
50 * I## = Run the single step test ## iterations
50 * 51 *
51 * NOTE: that the do_fork and sys_open tests are mutually exclusive. 52 * NOTE: that the do_fork and sys_open tests are mutually exclusive.
52 * 53 *
@@ -375,7 +376,7 @@ static void emul_sstep_get(char *arg)
375 break; 376 break;
376 case 1: 377 case 1:
377 /* set breakpoint */ 378 /* set breakpoint */
378 break_helper("Z0", 0, sstep_addr); 379 break_helper("Z0", NULL, sstep_addr);
379 break; 380 break;
380 case 2: 381 case 2:
381 /* Continue */ 382 /* Continue */
@@ -383,7 +384,7 @@ static void emul_sstep_get(char *arg)
383 break; 384 break;
384 case 3: 385 case 3:
385 /* Clear breakpoint */ 386 /* Clear breakpoint */
386 break_helper("z0", 0, sstep_addr); 387 break_helper("z0", NULL, sstep_addr);
387 break; 388 break;
388 default: 389 default:
389 eprintk("kgdbts: ERROR failed sstep get emulation\n"); 390 eprintk("kgdbts: ERROR failed sstep get emulation\n");
@@ -465,11 +466,11 @@ static struct test_struct sw_breakpoint_test[] = {
465 { "?", "S0*" }, /* Clear break points */ 466 { "?", "S0*" }, /* Clear break points */
466 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ 467 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
467 { "c", "T0*", }, /* Continue */ 468 { "c", "T0*", }, /* Continue */
468 { "g", "kgdbts_break_test", 0, check_and_rewind_pc }, 469 { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
469 { "write", "OK", write_regs }, 470 { "write", "OK", write_regs },
470 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ 471 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
471 { "D", "OK" }, /* Detach */ 472 { "D", "OK" }, /* Detach */
472 { "D", "OK", 0, got_break }, /* If the test worked we made it here */ 473 { "D", "OK", NULL, got_break }, /* On success we made it here */
473 { "", "" }, 474 { "", "" },
474}; 475};
475 476
@@ -499,14 +500,14 @@ static struct test_struct singlestep_break_test[] = {
499 { "?", "S0*" }, /* Clear break points */ 500 { "?", "S0*" }, /* Clear break points */
500 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ 501 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
501 { "c", "T0*", }, /* Continue */ 502 { "c", "T0*", }, /* Continue */
502 { "g", "kgdbts_break_test", 0, check_and_rewind_pc }, 503 { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
503 { "write", "OK", write_regs }, /* Write registers */ 504 { "write", "OK", write_regs }, /* Write registers */
504 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ 505 { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
505 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ 506 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
506 { "g", "kgdbts_break_test", 0, check_single_step }, 507 { "g", "kgdbts_break_test", NULL, check_single_step },
507 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ 508 { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
508 { "c", "T0*", }, /* Continue */ 509 { "c", "T0*", }, /* Continue */
509 { "g", "kgdbts_break_test", 0, check_and_rewind_pc }, 510 { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
510 { "write", "OK", write_regs }, /* Write registers */ 511 { "write", "OK", write_regs }, /* Write registers */
511 { "D", "OK" }, /* Remove all breakpoints and continues */ 512 { "D", "OK" }, /* Remove all breakpoints and continues */
512 { "", "" }, 513 { "", "" },
@@ -520,14 +521,14 @@ static struct test_struct do_fork_test[] = {
520 { "?", "S0*" }, /* Clear break points */ 521 { "?", "S0*" }, /* Clear break points */
521 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ 522 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
522 { "c", "T0*", }, /* Continue */ 523 { "c", "T0*", }, /* Continue */
523 { "g", "do_fork", 0, check_and_rewind_pc }, /* check location */ 524 { "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */
524 { "write", "OK", write_regs }, /* Write registers */ 525 { "write", "OK", write_regs }, /* Write registers */
525 { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */ 526 { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
526 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ 527 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
527 { "g", "do_fork", 0, check_single_step }, 528 { "g", "do_fork", NULL, check_single_step },
528 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ 529 { "do_fork", "OK", sw_break, }, /* set sw breakpoint */
529 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ 530 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
530 { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */ 531 { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
531 { "", "" }, 532 { "", "" },
532}; 533};
533 534
@@ -538,14 +539,14 @@ static struct test_struct sys_open_test[] = {
538 { "?", "S0*" }, /* Clear break points */ 539 { "?", "S0*" }, /* Clear break points */
539 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ 540 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
540 { "c", "T0*", }, /* Continue */ 541 { "c", "T0*", }, /* Continue */
541 { "g", "sys_open", 0, check_and_rewind_pc }, /* check location */ 542 { "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */
542 { "write", "OK", write_regs }, /* Write registers */ 543 { "write", "OK", write_regs }, /* Write registers */
543 { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */ 544 { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
544 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ 545 { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
545 { "g", "sys_open", 0, check_single_step }, 546 { "g", "sys_open", NULL, check_single_step },
546 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ 547 { "sys_open", "OK", sw_break, }, /* set sw breakpoint */
547 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ 548 { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
548 { "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */ 549 { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
549 { "", "" }, 550 { "", "" },
550}; 551};
551 552
@@ -556,11 +557,11 @@ static struct test_struct hw_breakpoint_test[] = {
556 { "?", "S0*" }, /* Clear break points */ 557 { "?", "S0*" }, /* Clear break points */
557 { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */ 558 { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
558 { "c", "T0*", }, /* Continue */ 559 { "c", "T0*", }, /* Continue */
559 { "g", "kgdbts_break_test", 0, check_and_rewind_pc }, 560 { "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
560 { "write", "OK", write_regs }, 561 { "write", "OK", write_regs },
561 { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */ 562 { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
562 { "D", "OK" }, /* Detach */ 563 { "D", "OK" }, /* Detach */
563 { "D", "OK", 0, got_break }, /* If the test worked we made it here */ 564 { "D", "OK", NULL, got_break }, /* On success we made it here */
564 { "", "" }, 565 { "", "" },
565}; 566};
566 567
@@ -570,12 +571,12 @@ static struct test_struct hw_breakpoint_test[] = {
570static struct test_struct hw_write_break_test[] = { 571static struct test_struct hw_write_break_test[] = {
571 { "?", "S0*" }, /* Clear break points */ 572 { "?", "S0*" }, /* Clear break points */
572 { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */ 573 { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
573 { "c", "T0*", 0, got_break }, /* Continue */ 574 { "c", "T0*", NULL, got_break }, /* Continue */
574 { "g", "silent", 0, check_and_rewind_pc }, 575 { "g", "silent", NULL, check_and_rewind_pc },
575 { "write", "OK", write_regs }, 576 { "write", "OK", write_regs },
576 { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */ 577 { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
577 { "D", "OK" }, /* Detach */ 578 { "D", "OK" }, /* Detach */
578 { "D", "OK", 0, got_break }, /* If the test worked we made it here */ 579 { "D", "OK", NULL, got_break }, /* On success we made it here */
579 { "", "" }, 580 { "", "" },
580}; 581};
581 582
@@ -585,12 +586,12 @@ static struct test_struct hw_write_break_test[] = {
585static struct test_struct hw_access_break_test[] = { 586static struct test_struct hw_access_break_test[] = {
586 { "?", "S0*" }, /* Clear break points */ 587 { "?", "S0*" }, /* Clear break points */
587 { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */ 588 { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
588 { "c", "T0*", 0, got_break }, /* Continue */ 589 { "c", "T0*", NULL, got_break }, /* Continue */
589 { "g", "silent", 0, check_and_rewind_pc }, 590 { "g", "silent", NULL, check_and_rewind_pc },
590 { "write", "OK", write_regs }, 591 { "write", "OK", write_regs },
591 { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */ 592 { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
592 { "D", "OK" }, /* Detach */ 593 { "D", "OK" }, /* Detach */
593 { "D", "OK", 0, got_break }, /* If the test worked we made it here */ 594 { "D", "OK", NULL, got_break }, /* On success we made it here */
594 { "", "" }, 595 { "", "" },
595}; 596};
596 597
@@ -599,9 +600,9 @@ static struct test_struct hw_access_break_test[] = {
599 */ 600 */
600static struct test_struct nmi_sleep_test[] = { 601static struct test_struct nmi_sleep_test[] = {
601 { "?", "S0*" }, /* Clear break points */ 602 { "?", "S0*" }, /* Clear break points */
602 { "c", "T0*", 0, got_break }, /* Continue */ 603 { "c", "T0*", NULL, got_break }, /* Continue */
603 { "D", "OK" }, /* Detach */ 604 { "D", "OK" }, /* Detach */
604 { "D", "OK", 0, got_break }, /* If the test worked we made it here */ 605 { "D", "OK", NULL, got_break }, /* On success we made it here */
605 { "", "" }, 606 { "", "" },
606}; 607};
607 608
@@ -874,18 +875,23 @@ static void kgdbts_run_tests(void)
874{ 875{
875 char *ptr; 876 char *ptr;
876 int fork_test = 0; 877 int fork_test = 0;
877 int sys_open_test = 0; 878 int do_sys_open_test = 0;
879 int sstep_test = 1000;
878 int nmi_sleep = 0; 880 int nmi_sleep = 0;
881 int i;
879 882
880 ptr = strstr(config, "F"); 883 ptr = strstr(config, "F");
881 if (ptr) 884 if (ptr)
882 fork_test = simple_strtol(ptr+1, NULL, 10); 885 fork_test = simple_strtol(ptr + 1, NULL, 10);
883 ptr = strstr(config, "S"); 886 ptr = strstr(config, "S");
884 if (ptr) 887 if (ptr)
885 sys_open_test = simple_strtol(ptr+1, NULL, 10); 888 do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
886 ptr = strstr(config, "N"); 889 ptr = strstr(config, "N");
887 if (ptr) 890 if (ptr)
888 nmi_sleep = simple_strtol(ptr+1, NULL, 10); 891 nmi_sleep = simple_strtol(ptr+1, NULL, 10);
892 ptr = strstr(config, "I");
893 if (ptr)
894 sstep_test = simple_strtol(ptr+1, NULL, 10);
889 895
890 /* required internal KGDB tests */ 896 /* required internal KGDB tests */
891 v1printk("kgdbts:RUN plant and detach test\n"); 897 v1printk("kgdbts:RUN plant and detach test\n");
@@ -894,8 +900,13 @@ static void kgdbts_run_tests(void)
894 run_breakpoint_test(0); 900 run_breakpoint_test(0);
895 v1printk("kgdbts:RUN bad memory access test\n"); 901 v1printk("kgdbts:RUN bad memory access test\n");
896 run_bad_read_test(); 902 run_bad_read_test();
897 v1printk("kgdbts:RUN singlestep breakpoint test\n"); 903 v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
898 run_singlestep_break_test(); 904 for (i = 0; i < sstep_test; i++) {
905 run_singlestep_break_test();
906 if (i % 100 == 0)
907 v1printk("kgdbts:RUN singlestep [%i/%i]\n",
908 i, sstep_test);
909 }
899 910
900 /* ===Optional tests=== */ 911 /* ===Optional tests=== */
901 912
@@ -922,7 +933,7 @@ static void kgdbts_run_tests(void)
922 repeat_test = fork_test; 933 repeat_test = fork_test;
923 printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n", 934 printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
924 repeat_test); 935 repeat_test);
925 kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg"); 936 kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
926 run_do_fork_test(); 937 run_do_fork_test();
927 return; 938 return;
928 } 939 }
@@ -931,11 +942,11 @@ static void kgdbts_run_tests(void)
931 * executed because a kernel thread will be spawned at the very 942 * executed because a kernel thread will be spawned at the very
932 * end to unregister the debug hooks. 943 * end to unregister the debug hooks.
933 */ 944 */
934 if (sys_open_test) { 945 if (do_sys_open_test) {
935 repeat_test = sys_open_test; 946 repeat_test = do_sys_open_test;
936 printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n", 947 printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
937 repeat_test); 948 repeat_test);
938 kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg"); 949 kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
939 run_sys_open_test(); 950 run_sys_open_test();
940 return; 951 return;
941 } 952 }
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index a873d2b315ca..a7714da7c283 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -100,7 +100,9 @@ static int nsc_ircc_probe_39x(nsc_chip_t *chip, chipio_t *info);
100static int nsc_ircc_init_108(nsc_chip_t *chip, chipio_t *info); 100static int nsc_ircc_init_108(nsc_chip_t *chip, chipio_t *info);
101static int nsc_ircc_init_338(nsc_chip_t *chip, chipio_t *info); 101static int nsc_ircc_init_338(nsc_chip_t *chip, chipio_t *info);
102static int nsc_ircc_init_39x(nsc_chip_t *chip, chipio_t *info); 102static int nsc_ircc_init_39x(nsc_chip_t *chip, chipio_t *info);
103#ifdef CONFIG_PNP
103static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id); 104static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id);
105#endif
104 106
105/* These are the known NSC chips */ 107/* These are the known NSC chips */
106static nsc_chip_t chips[] = { 108static nsc_chip_t chips[] = {
@@ -156,9 +158,11 @@ static const struct pnp_device_id nsc_ircc_pnp_table[] = {
156MODULE_DEVICE_TABLE(pnp, nsc_ircc_pnp_table); 158MODULE_DEVICE_TABLE(pnp, nsc_ircc_pnp_table);
157 159
158static struct pnp_driver nsc_ircc_pnp_driver = { 160static struct pnp_driver nsc_ircc_pnp_driver = {
161#ifdef CONFIG_PNP
159 .name = "nsc-ircc", 162 .name = "nsc-ircc",
160 .id_table = nsc_ircc_pnp_table, 163 .id_table = nsc_ircc_pnp_table,
161 .probe = nsc_ircc_pnp_probe, 164 .probe = nsc_ircc_pnp_probe,
165#endif
162}; 166};
163 167
164/* Some prototypes */ 168/* Some prototypes */
@@ -916,6 +920,7 @@ static int nsc_ircc_probe_39x(nsc_chip_t *chip, chipio_t *info)
916 return 0; 920 return 0;
917} 921}
918 922
923#ifdef CONFIG_PNP
919/* PNP probing */ 924/* PNP probing */
920static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id) 925static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
921{ 926{
@@ -952,6 +957,7 @@ static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *i
952 957
953 return 0; 958 return 0;
954} 959}
960#endif
955 961
956/* 962/*
957 * Function nsc_ircc_setup (info) 963 * Function nsc_ircc_setup (info)
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 1f26da761e9f..cfe0194fef71 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -376,6 +376,7 @@ MODULE_DEVICE_TABLE(pnp, smsc_ircc_pnp_table);
376 376
377static int pnp_driver_registered; 377static int pnp_driver_registered;
378 378
379#ifdef CONFIG_PNP
379static int __init smsc_ircc_pnp_probe(struct pnp_dev *dev, 380static int __init smsc_ircc_pnp_probe(struct pnp_dev *dev,
380 const struct pnp_device_id *dev_id) 381 const struct pnp_device_id *dev_id)
381{ 382{
@@ -402,7 +403,9 @@ static struct pnp_driver smsc_ircc_pnp_driver = {
402 .id_table = smsc_ircc_pnp_table, 403 .id_table = smsc_ircc_pnp_table,
403 .probe = smsc_ircc_pnp_probe, 404 .probe = smsc_ircc_pnp_probe,
404}; 405};
405 406#else /* CONFIG_PNP */
407static struct pnp_driver smsc_ircc_pnp_driver;
408#endif
406 409
407/******************************************************************************* 410/*******************************************************************************
408 * 411 *
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index cb46446b2691..03a9abcce524 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -551,7 +551,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
551 u64 mtt_seg; 551 u64 mtt_seg;
552 int err = -ENOMEM; 552 int err = -ENOMEM;
553 553
554 if (page_shift < 12 || page_shift >= 32) 554 if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
555 return -EINVAL; 555 return -EINVAL;
556 556
557 /* All MTTs must fit in the same page */ 557 /* All MTTs must fit in the same page */
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 4009c4ce96b4..57cfd72ffdf7 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -1,6 +1,6 @@
1/* niu.c: Neptune ethernet driver. 1/* niu.c: Neptune ethernet driver.
2 * 2 *
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net) 3 * Copyright (C) 2007, 2008 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#include <linux/module.h> 6#include <linux/module.h>
@@ -33,8 +33,8 @@
33 33
34#define DRV_MODULE_NAME "niu" 34#define DRV_MODULE_NAME "niu"
35#define PFX DRV_MODULE_NAME ": " 35#define PFX DRV_MODULE_NAME ": "
36#define DRV_MODULE_VERSION "0.8" 36#define DRV_MODULE_VERSION "0.9"
37#define DRV_MODULE_RELDATE "April 24, 2008" 37#define DRV_MODULE_RELDATE "May 4, 2008"
38 38
39static char version[] __devinitdata = 39static char version[] __devinitdata =
40 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; 40 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -7264,8 +7264,11 @@ static int __devinit niu_get_and_validate_port(struct niu *np)
7264 parent->num_ports = nr64(ESPC_NUM_PORTS_MACS) & 7264 parent->num_ports = nr64(ESPC_NUM_PORTS_MACS) &
7265 ESPC_NUM_PORTS_MACS_VAL; 7265 ESPC_NUM_PORTS_MACS_VAL;
7266 7266
7267 /* All of the current probing methods fail on
7268 * Maramba on-board parts.
7269 */
7267 if (!parent->num_ports) 7270 if (!parent->num_ports)
7268 return -ENODEV; 7271 parent->num_ports = 4;
7269 } 7272 }
7270 } 7273 }
7271 } 7274 }
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index b5860b97a93e..24fd613466b7 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -459,6 +459,7 @@ static void __exit lapbeth_cleanup_driver(void)
459 list_for_each_safe(entry, tmp, &lapbeth_devices) { 459 list_for_each_safe(entry, tmp, &lapbeth_devices) {
460 lapbeth = list_entry(entry, struct lapbethdev, node); 460 lapbeth = list_entry(entry, struct lapbethdev, node);
461 461
462 dev_put(lapbeth->ethdev);
462 unregister_netdevice(lapbeth->axdev); 463 unregister_netdevice(lapbeth->axdev);
463 } 464 }
464 rtnl_unlock(); 465 rtnl_unlock();
diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index d5b7a76fcaad..62fb89d82318 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig
@@ -1,6 +1,5 @@
1config IWLWIFI 1config IWLWIFI
2 bool 2 tristate
3 default n
4 3
5config IWLCORE 4config IWLCORE
6 tristate "Intel Wireless Wifi Core" 5 tristate "Intel Wireless Wifi Core"
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4a55bf380957..3706ce7972dd 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev)
842 * reading the dword at 0x100 which must either be 0 or a valid extended 842 * reading the dword at 0x100 which must either be 0 or a valid extended
843 * capability header. 843 * capability header.
844 */ 844 */
845int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) 845int pci_cfg_space_size_ext(struct pci_dev *dev)
846{ 846{
847 int pos;
848 u32 status; 847 u32 status;
849 848
850 if (!check_exp_pcix) 849 if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
851 goto skip; 850 goto fail;
851 if (status == 0xffffffff)
852 goto fail;
853
854 return PCI_CFG_SPACE_EXP_SIZE;
855
856 fail:
857 return PCI_CFG_SPACE_SIZE;
858}
859
860int pci_cfg_space_size(struct pci_dev *dev)
861{
862 int pos;
863 u32 status;
852 864
853 pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 865 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
854 if (!pos) { 866 if (!pos) {
@@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
861 goto fail; 873 goto fail;
862 } 874 }
863 875
864 skip: 876 return pci_cfg_space_size_ext(dev);
865 if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
866 goto fail;
867 if (status == 0xffffffff)
868 goto fail;
869
870 return PCI_CFG_SPACE_EXP_SIZE;
871 877
872 fail: 878 fail:
873 return PCI_CFG_SPACE_SIZE; 879 return PCI_CFG_SPACE_SIZE;
874} 880}
875 881
876int pci_cfg_space_size(struct pci_dev *dev)
877{
878 return pci_cfg_space_size_ext(dev, 1);
879}
880
881static void pci_release_bus_bridge_dev(struct device *dev) 882static void pci_release_bus_bridge_dev(struct device *dev)
882{ 883{
883 kfree(dev); 884 kfree(dev);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 46d7e400c8be..81ccbd7f9e34 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1679,6 +1679,7 @@ config MAC_SCSI
1679config SCSI_MAC_ESP 1679config SCSI_MAC_ESP
1680 tristate "Macintosh NCR53c9[46] SCSI" 1680 tristate "Macintosh NCR53c9[46] SCSI"
1681 depends on MAC && SCSI 1681 depends on MAC && SCSI
1682 select SCSI_SPI_ATTRS
1682 help 1683 help
1683 This is the NCR 53c9x SCSI controller found on most of the 68040 1684 This is the NCR 53c9x SCSI controller found on most of the 68040
1684 based Macintoshes. 1685 based Macintoshes.
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index d5bd497ab9cb..223b1917093e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -48,7 +48,7 @@ struct affs_ext_key {
48 * affs fs inode data in memory 48 * affs fs inode data in memory
49 */ 49 */
50struct affs_inode_info { 50struct affs_inode_info {
51 u32 i_opencnt; 51 atomic_t i_opencnt;
52 struct semaphore i_link_lock; /* Protects internal inode access. */ 52 struct semaphore i_link_lock; /* Protects internal inode access. */
53 struct semaphore i_ext_lock; /* Protects internal inode access. */ 53 struct semaphore i_ext_lock; /* Protects internal inode access. */
54#define i_hash_lock i_ext_lock 54#define i_hash_lock i_ext_lock
@@ -170,8 +170,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
170extern unsigned long affs_parent_ino(struct inode *dir); 170extern unsigned long affs_parent_ino(struct inode *dir);
171extern struct inode *affs_new_inode(struct inode *dir); 171extern struct inode *affs_new_inode(struct inode *dir);
172extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); 172extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
173extern void affs_put_inode(struct inode *inode);
174extern void affs_drop_inode(struct inode *inode);
175extern void affs_delete_inode(struct inode *inode); 173extern void affs_delete_inode(struct inode *inode);
176extern void affs_clear_inode(struct inode *inode); 174extern void affs_clear_inode(struct inode *inode);
177extern struct inode *affs_iget(struct super_block *sb, 175extern struct inode *affs_iget(struct super_block *sb,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1a4f092f24ef..6eac7bdeec94 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp)
48{ 48{
49 if (atomic_read(&filp->f_count) != 1) 49 if (atomic_read(&filp->f_count) != 1)
50 return 0; 50 return 0;
51 pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt); 51 pr_debug("AFFS: open(%lu,%d)\n",
52 AFFS_I(inode)->i_opencnt++; 52 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
53 atomic_inc(&AFFS_I(inode)->i_opencnt);
53 return 0; 54 return 0;
54} 55}
55 56
@@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp)
58{ 59{
59 if (atomic_read(&filp->f_count) != 0) 60 if (atomic_read(&filp->f_count) != 0)
60 return 0; 61 return 0;
61 pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt); 62 pr_debug("AFFS: release(%lu, %d)\n",
62 AFFS_I(inode)->i_opencnt--; 63 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
63 if (!AFFS_I(inode)->i_opencnt) 64
65 if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
66 mutex_lock(&inode->i_mutex);
67 if (inode->i_size != AFFS_I(inode)->mmu_private)
68 affs_truncate(inode);
64 affs_free_prealloc(inode); 69 affs_free_prealloc(inode);
70 mutex_unlock(&inode->i_mutex);
71 }
65 72
66 return 0; 73 return 0;
67} 74}
@@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext)
180 /* inline the simplest case: same extended block as last time */ 187 /* inline the simplest case: same extended block as last time */
181 struct buffer_head *bh = AFFS_I(inode)->i_ext_bh; 188 struct buffer_head *bh = AFFS_I(inode)->i_ext_bh;
182 if (ext == AFFS_I(inode)->i_ext_last) 189 if (ext == AFFS_I(inode)->i_ext_last)
183 atomic_inc(&bh->b_count); 190 get_bh(bh);
184 else 191 else
185 /* we have to do more (not inlined) */ 192 /* we have to do more (not inlined) */
186 bh = affs_get_extblock_slow(inode, ext); 193 bh = affs_get_extblock_slow(inode, ext);
@@ -306,7 +313,7 @@ store_ext:
306 affs_brelse(AFFS_I(inode)->i_ext_bh); 313 affs_brelse(AFFS_I(inode)->i_ext_bh);
307 AFFS_I(inode)->i_ext_last = ext; 314 AFFS_I(inode)->i_ext_last = ext;
308 AFFS_I(inode)->i_ext_bh = bh; 315 AFFS_I(inode)->i_ext_bh = bh;
309 atomic_inc(&bh->b_count); 316 get_bh(bh);
310 317
311 return bh; 318 return bh;
312 319
@@ -324,7 +331,6 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
324 331
325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); 332 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
326 333
327
328 BUG_ON(block > (sector_t)0x7fffffffUL); 334 BUG_ON(block > (sector_t)0x7fffffffUL);
329 335
330 if (block >= AFFS_I(inode)->i_blkcnt) { 336 if (block >= AFFS_I(inode)->i_blkcnt) {
@@ -827,6 +833,8 @@ affs_truncate(struct inode *inode)
827 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); 833 res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
828 if (!res) 834 if (!res)
829 res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); 835 res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
836 else
837 inode->i_size = AFFS_I(inode)->mmu_private;
830 mark_inode_dirty(inode); 838 mark_inode_dirty(inode);
831 return; 839 return;
832 } else if (inode->i_size == AFFS_I(inode)->mmu_private) 840 } else if (inode->i_size == AFFS_I(inode)->mmu_private)
@@ -862,6 +870,7 @@ affs_truncate(struct inode *inode)
862 blk++; 870 blk++;
863 } else 871 } else
864 AFFS_HEAD(ext_bh)->first_data = 0; 872 AFFS_HEAD(ext_bh)->first_data = 0;
873 AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i);
865 size = AFFS_SB(sb)->s_hashsize; 874 size = AFFS_SB(sb)->s_hashsize;
866 if (size > blkcnt - blk + i) 875 if (size > blkcnt - blk + i)
867 size = blkcnt - blk + i; 876 size = blkcnt - blk + i;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27fe6cbe43ae..a13b334a3910 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
58 AFFS_I(inode)->i_extcnt = 1; 58 AFFS_I(inode)->i_extcnt = 1;
59 AFFS_I(inode)->i_ext_last = ~1; 59 AFFS_I(inode)->i_ext_last = ~1;
60 AFFS_I(inode)->i_protect = prot; 60 AFFS_I(inode)->i_protect = prot;
61 AFFS_I(inode)->i_opencnt = 0; 61 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
62 AFFS_I(inode)->i_blkcnt = 0; 62 AFFS_I(inode)->i_blkcnt = 0;
63 AFFS_I(inode)->i_lc = NULL; 63 AFFS_I(inode)->i_lc = NULL;
64 AFFS_I(inode)->i_lc_size = 0; 64 AFFS_I(inode)->i_lc_size = 0;
@@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
108 inode->i_mode |= S_IFDIR; 108 inode->i_mode |= S_IFDIR;
109 } else 109 } else
110 inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR; 110 inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR;
111 if (tail->link_chain)
112 inode->i_nlink = 2;
113 /* Maybe it should be controlled by mount parameter? */ 111 /* Maybe it should be controlled by mount parameter? */
114 //inode->i_mode |= S_ISVTX; 112 //inode->i_mode |= S_ISVTX;
115 inode->i_op = &affs_dir_inode_operations; 113 inode->i_op = &affs_dir_inode_operations;
@@ -245,31 +243,12 @@ out:
245} 243}
246 244
247void 245void
248affs_put_inode(struct inode *inode)
249{
250 pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
251 affs_free_prealloc(inode);
252}
253
254void
255affs_drop_inode(struct inode *inode)
256{
257 mutex_lock(&inode->i_mutex);
258 if (inode->i_size != AFFS_I(inode)->mmu_private)
259 affs_truncate(inode);
260 mutex_unlock(&inode->i_mutex);
261
262 generic_drop_inode(inode);
263}
264
265void
266affs_delete_inode(struct inode *inode) 246affs_delete_inode(struct inode *inode)
267{ 247{
268 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 248 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
269 truncate_inode_pages(&inode->i_data, 0); 249 truncate_inode_pages(&inode->i_data, 0);
270 inode->i_size = 0; 250 inode->i_size = 0;
271 if (S_ISREG(inode->i_mode)) 251 affs_truncate(inode);
272 affs_truncate(inode);
273 clear_inode(inode); 252 clear_inode(inode);
274 affs_free_block(inode->i_sb, inode->i_ino); 253 affs_free_block(inode->i_sb, inode->i_ino);
275} 254}
@@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode)
277void 256void
278affs_clear_inode(struct inode *inode) 257affs_clear_inode(struct inode *inode)
279{ 258{
280 unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc; 259 unsigned long cache_page;
281 260
282 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 261 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
262
263 affs_free_prealloc(inode);
264 cache_page = (unsigned long)AFFS_I(inode)->i_lc;
283 if (cache_page) { 265 if (cache_page) {
284 pr_debug("AFFS: freeing ext cache\n"); 266 pr_debug("AFFS: freeing ext cache\n");
285 AFFS_I(inode)->i_lc = NULL; 267 AFFS_I(inode)->i_lc = NULL;
@@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir)
316 inode->i_ino = block; 298 inode->i_ino = block;
317 inode->i_nlink = 1; 299 inode->i_nlink = 1;
318 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 300 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
319 AFFS_I(inode)->i_opencnt = 0; 301 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
320 AFFS_I(inode)->i_blkcnt = 0; 302 AFFS_I(inode)->i_blkcnt = 0;
321 AFFS_I(inode)->i_lc = NULL; 303 AFFS_I(inode)->i_lc = NULL;
322 AFFS_I(inode)->i_lc_size = 0; 304 AFFS_I(inode)->i_lc_size = 0;
@@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
369 switch (type) { 351 switch (type) {
370 case ST_LINKFILE: 352 case ST_LINKFILE:
371 case ST_LINKDIR: 353 case ST_LINKDIR:
372 inode_bh = bh;
373 retval = -ENOSPC; 354 retval = -ENOSPC;
374 block = affs_alloc_block(dir, dir->i_ino); 355 block = affs_alloc_block(dir, dir->i_ino);
375 if (!block) 356 if (!block)
376 goto err; 357 goto err;
377 retval = -EIO; 358 retval = -EIO;
359 inode_bh = bh;
378 bh = affs_getzeroblk(sb, block); 360 bh = affs_getzeroblk(sb, block);
379 if (!bh) 361 if (!bh)
380 goto err; 362 goto err;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 2218f1ee71ce..cfcf1b6cf82b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
234int 234int
235affs_unlink(struct inode *dir, struct dentry *dentry) 235affs_unlink(struct inode *dir, struct dentry *dentry)
236{ 236{
237 pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino, 237 pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
238 dentry->d_inode->i_ino,
238 (int)dentry->d_name.len, dentry->d_name.name); 239 (int)dentry->d_name.len, dentry->d_name.name);
239 240
240 return affs_remove_header(dentry); 241 return affs_remove_header(dentry);
@@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
302int 303int
303affs_rmdir(struct inode *dir, struct dentry *dentry) 304affs_rmdir(struct inode *dir, struct dentry *dentry)
304{ 305{
305 pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino, 306 pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
307 dentry->d_inode->i_ino,
306 (int)dentry->d_name.len, dentry->d_name.name); 308 (int)dentry->d_name.len, dentry->d_name.name);
307 309
308 return affs_remove_header(dentry); 310 return affs_remove_header(dentry);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 01d25d532541..d214837d5e42 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep;
71 71
72static struct inode *affs_alloc_inode(struct super_block *sb) 72static struct inode *affs_alloc_inode(struct super_block *sb)
73{ 73{
74 struct affs_inode_info *ei; 74 struct affs_inode_info *i;
75 ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); 75
76 if (!ei) 76 i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
77 if (!i)
77 return NULL; 78 return NULL;
78 ei->vfs_inode.i_version = 1; 79
79 return &ei->vfs_inode; 80 i->vfs_inode.i_version = 1;
81 i->i_lc = NULL;
82 i->i_ext_bh = NULL;
83 i->i_pa_cnt = 0;
84
85 return &i->vfs_inode;
80} 86}
81 87
82static void affs_destroy_inode(struct inode *inode) 88static void affs_destroy_inode(struct inode *inode)
@@ -114,8 +120,6 @@ static const struct super_operations affs_sops = {
114 .alloc_inode = affs_alloc_inode, 120 .alloc_inode = affs_alloc_inode,
115 .destroy_inode = affs_destroy_inode, 121 .destroy_inode = affs_destroy_inode,
116 .write_inode = affs_write_inode, 122 .write_inode = affs_write_inode,
117 .put_inode = affs_put_inode,
118 .drop_inode = affs_drop_inode,
119 .delete_inode = affs_delete_inode, 123 .delete_inode = affs_delete_inode,
120 .clear_inode = affs_clear_inode, 124 .clear_inode = affs_clear_inode,
121 .put_super = affs_put_super, 125 .put_super = affs_put_super,
diff --git a/fs/inode.c b/fs/inode.c
index bf6478130424..c36d9480335c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1149,13 +1149,8 @@ static inline void iput_final(struct inode *inode)
1149void iput(struct inode *inode) 1149void iput(struct inode *inode)
1150{ 1150{
1151 if (inode) { 1151 if (inode) {
1152 const struct super_operations *op = inode->i_sb->s_op;
1153
1154 BUG_ON(inode->i_state == I_CLEAR); 1152 BUG_ON(inode->i_state == I_CLEAR);
1155 1153
1156 if (op && op->put_inode)
1157 op->put_inode(inode);
1158
1159 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1154 if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
1160 iput_final(inode); 1155 iput_final(inode);
1161 } 1156 }
diff --git a/fs/locks.c b/fs/locks.c
index 663c069b59b3..0ac6b92cb0b6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1753,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1753 struct file_lock *file_lock = locks_alloc_lock(); 1753 struct file_lock *file_lock = locks_alloc_lock();
1754 struct flock flock; 1754 struct flock flock;
1755 struct inode *inode; 1755 struct inode *inode;
1756 struct file *f;
1756 int error; 1757 int error;
1757 1758
1758 if (file_lock == NULL) 1759 if (file_lock == NULL)
@@ -1825,7 +1826,15 @@ again:
1825 * Attempt to detect a close/fcntl race and recover by 1826 * Attempt to detect a close/fcntl race and recover by
1826 * releasing the lock that was just acquired. 1827 * releasing the lock that was just acquired.
1827 */ 1828 */
1828 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { 1829 /*
1830 * we need that spin_lock here - it prevents reordering between
1831 * update of inode->i_flock and check for it done in close().
1832 * rcu_read_lock() wouldn't do.
1833 */
1834 spin_lock(&current->files->file_lock);
1835 f = fcheck(fd);
1836 spin_unlock(&current->files->file_lock);
1837 if (!error && f != filp && flock.l_type != F_UNLCK) {
1829 flock.l_type = F_UNLCK; 1838 flock.l_type = F_UNLCK;
1830 goto again; 1839 goto again;
1831 } 1840 }
@@ -1881,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1881 struct file_lock *file_lock = locks_alloc_lock(); 1890 struct file_lock *file_lock = locks_alloc_lock();
1882 struct flock64 flock; 1891 struct flock64 flock;
1883 struct inode *inode; 1892 struct inode *inode;
1893 struct file *f;
1884 int error; 1894 int error;
1885 1895
1886 if (file_lock == NULL) 1896 if (file_lock == NULL)
@@ -1953,7 +1963,10 @@ again:
1953 * Attempt to detect a close/fcntl race and recover by 1963 * Attempt to detect a close/fcntl race and recover by
1954 * releasing the lock that was just acquired. 1964 * releasing the lock that was just acquired.
1955 */ 1965 */
1956 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) { 1966 spin_lock(&current->files->file_lock);
1967 f = fcheck(fd);
1968 spin_unlock(&current->files->file_lock);
1969 if (!error && f != filp && flock.l_type != F_UNLCK) {
1957 flock.l_type = F_UNLCK; 1970 flock.l_type = F_UNLCK;
1958 goto again; 1971 goto again;
1959 } 1972 }
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4b733f108455..4b4f9cc2f186 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
1 1
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/fdtable.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
5#include <linux/ptrace.h> 6#include <linux/ptrace.h>
6#include <linux/seq_file.h> 7#include <linux/seq_file.h>
diff --git a/include/asm-alpha/types.h b/include/asm-alpha/types.h
index a9e34ca4d463..c1541353ccef 100644
--- a/include/asm-alpha/types.h
+++ b/include/asm-alpha/types.h
@@ -23,5 +23,11 @@ typedef unsigned int umode_t;
23 23
24#define BITS_PER_LONG 64 24#define BITS_PER_LONG 64
25 25
26#ifndef __ASSEMBLY__
27
28typedef u64 dma_addr_t;
29typedef u64 dma64_addr_t;
30
31#endif /* __ASSEMBLY__ */
26#endif /* __KERNEL__ */ 32#endif /* __KERNEL__ */
27#endif /* _ALPHA_TYPES_H */ 33#endif /* _ALPHA_TYPES_H */
diff --git a/include/asm-m68k/machw.h b/include/asm-m68k/machw.h
index d2e0e25d5c90..35624998291c 100644
--- a/include/asm-m68k/machw.h
+++ b/include/asm-m68k/machw.h
@@ -66,36 +66,6 @@ struct MAC_SCC
66# define mac_scc ((*(volatile struct SCC*)MAC_SCC_BAS)) 66# define mac_scc ((*(volatile struct SCC*)MAC_SCC_BAS))
67#endif 67#endif
68 68
69/* hardware stuff */
70
71#define MACHW_DECLARE(name) unsigned name : 1
72#define MACHW_SET(name) (mac_hw_present.name = 1)
73#define MACHW_PRESENT(name) (mac_hw_present.name)
74
75struct mac_hw_present {
76 /* video hardware */
77 /* sound hardware */
78 /* disk storage interfaces */
79 MACHW_DECLARE(MAC_SCSI_80); /* Directly mapped NCR5380 */
80 MACHW_DECLARE(MAC_SCSI_96); /* 53c9[46] */
81 MACHW_DECLARE(MAC_SCSI_96_2); /* 2nd 53c9[46] Q900 and Q950 */
82 MACHW_DECLARE(IDE); /* IDE Interface */
83 /* other I/O hardware */
84 MACHW_DECLARE(SCC); /* Serial Communications Contr. */
85 /* DMA */
86 MACHW_DECLARE(SCSI_DMA); /* DMA for the NCR5380 */
87 /* real time clocks */
88 MACHW_DECLARE(RTC_CLK); /* clock chip */
89 /* supporting hardware */
90 MACHW_DECLARE(VIA1); /* Versatile Interface Ad. 1 */
91 MACHW_DECLARE(VIA2); /* Versatile Interface Ad. 2 */
92 MACHW_DECLARE(RBV); /* Versatile Interface Ad. 2+ */
93 /* NUBUS */
94 MACHW_DECLARE(NUBUS); /* NUBUS */
95};
96
97extern struct mac_hw_present mac_hw_present;
98
99#endif /* __ASSEMBLY__ */ 69#endif /* __ASSEMBLY__ */
100 70
101#endif /* linux/machw.h */ 71#endif /* linux/machw.h */
diff --git a/include/asm-mips/types.h b/include/asm-mips/types.h
index 7a2ee4f40131..bcbb8d675af5 100644
--- a/include/asm-mips/types.h
+++ b/include/asm-mips/types.h
@@ -19,8 +19,6 @@
19 19
20typedef unsigned short umode_t; 20typedef unsigned short umode_t;
21 21
22#endif
23
24#endif /* __ASSEMBLY__ */ 22#endif /* __ASSEMBLY__ */
25 23
26/* 24/*
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index afae0697e8ce..e0062d73db1c 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -2,7 +2,7 @@
2#define _ASM_POWERPC_IO_H 2#define _ASM_POWERPC_IO_H
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5/* 5/*
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
@@ -18,6 +18,9 @@ extern int check_legacy_ioport(unsigned long base_port);
18#define _PNPWRP 0xa79 18#define _PNPWRP 0xa79
19#define PNPBIOS_BASE 0xf000 19#define PNPBIOS_BASE 0xf000
20 20
21#include <linux/device.h>
22#include <linux/io.h>
23
21#include <linux/compiler.h> 24#include <linux/compiler.h>
22#include <asm/page.h> 25#include <asm/page.h>
23#include <asm/byteorder.h> 26#include <asm/byteorder.h>
@@ -744,6 +747,9 @@ static inline void * bus_to_virt(unsigned long address)
744 747
745#define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) 748#define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
746 749
750void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
751 size_t size, unsigned long flags);
752
747#endif /* __KERNEL__ */ 753#endif /* __KERNEL__ */
748 754
749#endif /* _ASM_POWERPC_IO_H */ 755#endif /* _ASM_POWERPC_IO_H */
diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h
index 04ffbb8e0a35..81a69d711017 100644
--- a/include/asm-powerpc/kvm_host.h
+++ b/include/asm-powerpc/kvm_host.h
@@ -59,6 +59,7 @@ struct kvm_vcpu_stat {
59 u32 emulated_inst_exits; 59 u32 emulated_inst_exits;
60 u32 dec_exits; 60 u32 dec_exits;
61 u32 ext_intr_exits; 61 u32 ext_intr_exits;
62 u32 halt_wakeup;
62}; 63};
63 64
64struct tlbe { 65struct tlbe {
diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h
index 7ac820308a7e..b35a7e3ef978 100644
--- a/include/asm-powerpc/kvm_ppc.h
+++ b/include/asm-powerpc/kvm_ppc.h
@@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
77 clear_bit(priority, &vcpu->arch.pending_exceptions); 77 clear_bit(priority, &vcpu->arch.pending_exceptions);
78} 78}
79 79
80/* Helper function for "full" MSR writes. No need to call this if only EE is
81 * changing. */
80static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 82static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
81{ 83{
82 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 84 if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
83 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 85 kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
84 86
85 vcpu->arch.msr = new_msr; 87 vcpu->arch.msr = new_msr;
88
89 if (vcpu->arch.msr & MSR_WE)
90 kvm_vcpu_block(vcpu);
86} 91}
87 92
88#endif /* __POWERPC_KVM_PPC_H__ */ 93#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index e8659909e5f6..f62f4733606b 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -14,10 +14,10 @@
14 14
15/* extensible setup data list node */ 15/* extensible setup data list node */
16struct setup_data { 16struct setup_data {
17 u64 next; 17 __u64 next;
18 u32 type; 18 __u32 type;
19 u32 len; 19 __u32 len;
20 u8 data[0]; 20 __u8 data[0];
21}; 21};
22 22
23struct setup_header { 23struct setup_header {
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 9d963cd6533c..1d8cd01fa514 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -314,6 +314,9 @@ struct kvm_arch{
314 struct page *apic_access_page; 314 struct page *apic_access_page;
315 315
316 gpa_t wall_clock; 316 gpa_t wall_clock;
317
318 struct page *ept_identity_pagetable;
319 bool ept_identity_pagetable_done;
317}; 320};
318 321
319struct kvm_vm_stat { 322struct kvm_vm_stat {
@@ -422,6 +425,7 @@ struct kvm_x86_ops {
422 struct kvm_run *run); 425 struct kvm_run *run);
423 426
424 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 427 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
428 int (*get_tdp_level)(void);
425}; 429};
426 430
427extern struct kvm_x86_ops *kvm_x86_ops; 431extern struct kvm_x86_ops *kvm_x86_ops;
@@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
433int kvm_mmu_create(struct kvm_vcpu *vcpu); 437int kvm_mmu_create(struct kvm_vcpu *vcpu);
434int kvm_mmu_setup(struct kvm_vcpu *vcpu); 438int kvm_mmu_setup(struct kvm_vcpu *vcpu);
435void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 439void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
440void kvm_mmu_set_base_ptes(u64 base_pte);
441void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
442 u64 dirty_mask, u64 nx_mask, u64 x_mask);
436 443
437int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 444int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
438void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 445void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image)
620 asm("fxrstor (%0)":: "r" (image)); 627 asm("fxrstor (%0)":: "r" (image));
621} 628}
622 629
623static inline void fpu_init(void) 630static inline void fx_finit(void)
624{ 631{
625 asm("finit"); 632 asm("finit");
626} 633}
@@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
644#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" 651#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
645#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" 652#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
646#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" 653#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
654#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
647#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 655#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
648 656
649#define MSR_IA32_TIME_STAMP_COUNTER 0x010 657#define MSR_IA32_TIME_STAMP_COUNTER 0x010
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 577ab79c4c27..d7f0403bbecb 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -88,14 +88,7 @@ extern unsigned long pg0[];
88/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ 88/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
89#define pmd_none(x) (!(unsigned long)pmd_val((x))) 89#define pmd_none(x) (!(unsigned long)pmd_val((x)))
90#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) 90#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT)
91 91#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
92extern int pmd_bad(pmd_t pmd);
93
94#define pmd_bad_v1(x) \
95 (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER)))
96#define pmd_bad_v2(x) \
97 (_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER | \
98 _PAGE_PSE | _PAGE_NX)))
99 92
100#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) 93#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
101 94
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index a3bbf8766c1d..efe83dcbd412 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -158,14 +158,12 @@ static inline unsigned long pgd_bad(pgd_t pgd)
158 158
159static inline unsigned long pud_bad(pud_t pud) 159static inline unsigned long pud_bad(pud_t pud)
160{ 160{
161 return pud_val(pud) & 161 return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
162 ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
163} 162}
164 163
165static inline unsigned long pmd_bad(pmd_t pmd) 164static inline unsigned long pmd_bad(pmd_t pmd)
166{ 165{
167 return pmd_val(pmd) & 166 return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
168 ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
169} 167}
170 168
171#define pte_none(x) (!pte_val((x))) 169#define pte_none(x) (!pte_val((x)))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a1ba005d08e7..7e0fa9e64479 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1289,17 +1289,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1289extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1289extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1290 unsigned long, loff_t *); 1290 unsigned long, loff_t *);
1291 1291
1292/*
1293 * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
1294 * without the big kernel lock held in all filesystems.
1295 */
1296struct super_operations { 1292struct super_operations {
1297 struct inode *(*alloc_inode)(struct super_block *sb); 1293 struct inode *(*alloc_inode)(struct super_block *sb);
1298 void (*destroy_inode)(struct inode *); 1294 void (*destroy_inode)(struct inode *);
1299 1295
1300 void (*dirty_inode) (struct inode *); 1296 void (*dirty_inode) (struct inode *);
1301 int (*write_inode) (struct inode *, int); 1297 int (*write_inode) (struct inode *, int);
1302 void (*put_inode) (struct inode *);
1303 void (*drop_inode) (struct inode *); 1298 void (*drop_inode) (struct inode *);
1304 void (*delete_inode) (struct inode *); 1299 void (*delete_inode) (struct inode *);
1305 void (*put_super) (struct super_block *); 1300 void (*put_super) (struct super_block *);
diff --git a/include/linux/io.h b/include/linux/io.h
index 3a03a3604cce..6c7f0ba0d5fa 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -65,5 +65,6 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
65void devm_iounmap(struct device *dev, void __iomem *addr); 65void devm_iounmap(struct device *dev, void __iomem *addr);
66int check_signature(const volatile void __iomem *io_addr, 66int check_signature(const volatile void __iomem *io_addr,
67 const unsigned char *signature, int length); 67 const unsigned char *signature, int length);
68void devm_ioremap_release(struct device *dev, void *res);
68 69
69#endif /* _LINUX_IO_H */ 70#endif /* _LINUX_IO_H */
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 9757b1a6d9dc..6adcc297e354 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -261,10 +261,12 @@ struct kgdb_io {
261 261
262extern struct kgdb_arch arch_kgdb_ops; 262extern struct kgdb_arch arch_kgdb_ops;
263 263
264extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
265
264extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); 266extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
265extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); 267extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
266 268
267extern int kgdb_hex2long(char **ptr, long *long_val); 269extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
268extern int kgdb_mem2hex(char *mem, char *buf, int count); 270extern int kgdb_mem2hex(char *mem, char *buf, int count);
269extern int kgdb_hex2mem(char *buf, char *mem, int count); 271extern int kgdb_hex2mem(char *buf, char *mem, int count);
270 272
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d1dfe872ee30..7e206da1fbfb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1039,6 +1039,7 @@ extern void ata_eh_thaw_port(struct ata_port *ap);
1039 1039
1040extern void ata_eh_qc_complete(struct ata_queued_cmd *qc); 1040extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
1041extern void ata_eh_qc_retry(struct ata_queued_cmd *qc); 1041extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
1042extern void ata_eh_analyze_ncq_error(struct ata_link *link);
1042 1043
1043extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 1044extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
1044 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 1045 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
@@ -1381,6 +1382,21 @@ static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host)
1381 return *(struct ata_port **)&host->hostdata[0]; 1382 return *(struct ata_port **)&host->hostdata[0];
1382} 1383}
1383 1384
1385static inline int ata_check_ready(u8 status)
1386{
1387 /* Some controllers report 0x77 or 0x7f during intermediate
1388 * not-ready stages.
1389 */
1390 if (status == 0x77 || status == 0x7f)
1391 return 0;
1392
1393 /* 0xff indicates either no device or device not ready */
1394 if (status == 0xff)
1395 return -ENODEV;
1396
1397 return !(status & ATA_BUSY);
1398}
1399
1384 1400
1385/************************************************************************** 1401/**************************************************************************
1386 * PMP - drivers/ata/libata-pmp.c 1402 * PMP - drivers/ata/libata-pmp.c
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96acd0dae241..509159bcd4e7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -44,6 +44,7 @@
44#include <linux/mod_devicetable.h> 44#include <linux/mod_devicetable.h>
45 45
46#include <linux/types.h> 46#include <linux/types.h>
47#include <linux/init.h>
47#include <linux/ioport.h> 48#include <linux/ioport.h>
48#include <linux/list.h> 49#include <linux/list.h>
49#include <linux/compiler.h> 50#include <linux/compiler.h>
@@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr);
474void pci_bus_add_devices(struct pci_bus *bus); 475void pci_bus_add_devices(struct pci_bus *bus);
475struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, 476struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
476 struct pci_ops *ops, void *sysdata); 477 struct pci_ops *ops, void *sysdata);
477static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, 478static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
478 void *sysdata) 479 void *sysdata)
479{ 480{
480 struct pci_bus *root_bus; 481 struct pci_bus *root_bus;
@@ -666,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
666 667
667void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), 668void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
668 void *userdata); 669 void *userdata);
669int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); 670int pci_cfg_space_size_ext(struct pci_dev *dev);
670int pci_cfg_space_size(struct pci_dev *dev); 671int pci_cfg_space_size(struct pci_dev *dev);
671unsigned char pci_bus_max_busnr(struct pci_bus *bus); 672unsigned char pci_bus_max_busnr(struct pci_bus *bus);
672 673
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 03c238088aee..0c35b0343a76 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -158,6 +158,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
158} 158}
159#endif 159#endif
160 160
161extern unsigned long long time_sync_thresh;
162
161/* 163/*
162 * Task state bitmask. NOTE! These bits are also 164 * Task state bitmask. NOTE! These bits are also
163 * encoded in fs/proc/array.c: get_task_state(). 165 * encoded in fs/proc/array.c: get_task_state().
@@ -1551,6 +1553,35 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
1551 1553
1552extern unsigned long long sched_clock(void); 1554extern unsigned long long sched_clock(void);
1553 1555
1556#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
1557static inline void sched_clock_init(void)
1558{
1559}
1560
1561static inline u64 sched_clock_cpu(int cpu)
1562{
1563 return sched_clock();
1564}
1565
1566static inline void sched_clock_tick(void)
1567{
1568}
1569
1570static inline void sched_clock_idle_sleep_event(void)
1571{
1572}
1573
1574static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
1575{
1576}
1577#else
1578extern void sched_clock_init(void);
1579extern u64 sched_clock_cpu(int cpu);
1580extern void sched_clock_tick(void);
1581extern void sched_clock_idle_sleep_event(void);
1582extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1583#endif
1584
1554/* 1585/*
1555 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 1586 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
1556 * clock constructed from sched_clock(): 1587 * clock constructed from sched_clock():
@@ -1977,6 +2008,11 @@ static inline void clear_tsk_need_resched(struct task_struct *tsk)
1977 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 2008 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1978} 2009}
1979 2010
2011static inline int test_tsk_need_resched(struct task_struct *tsk)
2012{
2013 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2014}
2015
1980static inline int signal_pending(struct task_struct *p) 2016static inline int signal_pending(struct task_struct *p)
1981{ 2017{
1982 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); 2018 return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
@@ -1991,7 +2027,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
1991 2027
1992static inline int need_resched(void) 2028static inline int need_resched(void)
1993{ 2029{
1994 return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 2030 return unlikely(test_tsk_need_resched(current));
1995} 2031}
1996 2032
1997/* 2033/*
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 27bad59dae79..7858eac40aa7 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -196,12 +196,6 @@ static inline int sysfs_update_group(struct kobject *kobj,
196 return 0; 196 return 0;
197} 197}
198 198
199static inline int sysfs_update_group(struct kobject *kobj,
200 const struct attribute_group *grp)
201{
202 return 0;
203}
204
205static inline void sysfs_remove_group(struct kobject *kobj, 199static inline void sysfs_remove_group(struct kobject *kobj,
206 const struct attribute_group *grp) 200 const struct attribute_group *grp)
207{ 201{
diff --git a/include/net/ip.h b/include/net/ip.h
index 6d7bcd5e62d4..3b40bc2234be 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -210,7 +210,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
210{ 210{
211 return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || 211 return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO ||
212 (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && 212 (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT &&
213 !(dst_metric(dst, RTAX_LOCK)&(1<<RTAX_MTU)))); 213 !(dst_metric_locked(dst, RTAX_MTU))));
214} 214}
215 215
216extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); 216extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d1350bcccb03..2933d7474a79 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -648,14 +648,46 @@ extern void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
648extern void xfrm_audit_state_icvfail(struct xfrm_state *x, 648extern void xfrm_audit_state_icvfail(struct xfrm_state *x,
649 struct sk_buff *skb, u8 proto); 649 struct sk_buff *skb, u8 proto);
650#else 650#else
651#define xfrm_audit_policy_add(x, r, a, se, s) do { ; } while (0) 651
652#define xfrm_audit_policy_delete(x, r, a, se, s) do { ; } while (0) 652static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
653#define xfrm_audit_state_add(x, r, a, se, s) do { ; } while (0) 653 u32 auid, u32 ses, u32 secid)
654#define xfrm_audit_state_delete(x, r, a, se, s) do { ; } while (0) 654{
655#define xfrm_audit_state_replay_overflow(x, s) do { ; } while (0) 655}
656#define xfrm_audit_state_notfound_simple(s, f) do { ; } while (0) 656
657#define xfrm_audit_state_notfound(s, f, sp, sq) do { ; } while (0) 657static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
658#define xfrm_audit_state_icvfail(x, s, p) do { ; } while (0) 658 u32 auid, u32 ses, u32 secid)
659{
660}
661
662static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
663 u32 auid, u32 ses, u32 secid)
664{
665}
666
667static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
668 u32 auid, u32 ses, u32 secid)
669{
670}
671
672static inline void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
673 struct sk_buff *skb)
674{
675}
676
677static inline void xfrm_audit_state_notfound_simple(struct sk_buff *skb,
678 u16 family)
679{
680}
681
682static inline void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
683 __be32 net_spi, __be32 net_seq)
684{
685}
686
687static inline void xfrm_audit_state_icvfail(struct xfrm_state *x,
688 struct sk_buff *skb, u8 proto)
689{
690}
659#endif /* CONFIG_AUDITSYSCALL */ 691#endif /* CONFIG_AUDITSYSCALL */
660 692
661static inline void xfrm_pol_hold(struct xfrm_policy *policy) 693static inline void xfrm_pol_hold(struct xfrm_policy *policy)
diff --git a/init/Kconfig b/init/Kconfig
index 6a44defac3ec..4c33316743f5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -316,9 +316,16 @@ config CPUSETS
316 316
317 Say N if unsure. 317 Say N if unsure.
318 318
319#
320# Architectures with an unreliable sched_clock() should select this:
321#
322config HAVE_UNSTABLE_SCHED_CLOCK
323 bool
324
319config GROUP_SCHED 325config GROUP_SCHED
320 bool "Group CPU scheduler" 326 bool "Group CPU scheduler"
321 default y 327 depends on EXPERIMENTAL
328 default n
322 help 329 help
323 This feature lets CPU scheduler recognize task groups and control CPU 330 This feature lets CPU scheduler recognize task groups and control CPU
324 bandwidth allocation to such task groups. 331 bandwidth allocation to such task groups.
@@ -326,7 +333,7 @@ config GROUP_SCHED
326config FAIR_GROUP_SCHED 333config FAIR_GROUP_SCHED
327 bool "Group scheduling for SCHED_OTHER" 334 bool "Group scheduling for SCHED_OTHER"
328 depends on GROUP_SCHED 335 depends on GROUP_SCHED
329 default y 336 default GROUP_SCHED
330 337
331config RT_GROUP_SCHED 338config RT_GROUP_SCHED
332 bool "Group scheduling for SCHED_RR/FIFO" 339 bool "Group scheduling for SCHED_RR/FIFO"
@@ -825,6 +832,15 @@ menuconfig MODULES
825 832
826 If unsure, say Y. 833 If unsure, say Y.
827 834
835config MODULE_FORCE_LOAD
836 bool "Forced module loading"
837 depends on MODULES
838 default n
839 help
840 This option allows loading of modules even if that would set the
841 'F' (forced) taint, due to lack of version info. Which is
842 usually a really bad idea.
843
828config MODULE_UNLOAD 844config MODULE_UNLOAD
829 bool "Module unloading" 845 bool "Module unloading"
830 depends on MODULES 846 depends on MODULES
diff --git a/init/main.c b/init/main.c
index a87d4ca5c36c..ddada7acf363 100644
--- a/init/main.c
+++ b/init/main.c
@@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void)
602 softirq_init(); 602 softirq_init();
603 timekeeping_init(); 603 timekeeping_init();
604 time_init(); 604 time_init();
605 sched_clock_init();
605 profile_init(); 606 profile_init();
606 if (!irqs_disabled()) 607 if (!irqs_disabled())
607 printk("start_kernel(): bug: interrupts were enabled early\n"); 608 printk("start_kernel(): bug: interrupts were enabled early\n");
diff --git a/kernel/Makefile b/kernel/Makefile
index 188c43223f52..1c9938addb9d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 15obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/futex.c b/kernel/futex.c
index 98092c9817f4..449def8074fe 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -104,10 +104,6 @@ struct futex_q {
104 /* Key which the futex is hashed on: */ 104 /* Key which the futex is hashed on: */
105 union futex_key key; 105 union futex_key key;
106 106
107 /* For fd, sigio sent using these: */
108 int fd;
109 struct file *filp;
110
111 /* Optional priority inheritance state: */ 107 /* Optional priority inheritance state: */
112 struct futex_pi_state *pi_state; 108 struct futex_pi_state *pi_state;
113 struct task_struct *task; 109 struct task_struct *task;
@@ -126,9 +122,6 @@ struct futex_hash_bucket {
126 122
127static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
128 124
129/* Futex-fs vfsmount entry: */
130static struct vfsmount *futex_mnt;
131
132/* 125/*
133 * Take mm->mmap_sem, when futex is shared 126 * Take mm->mmap_sem, when futex is shared
134 */ 127 */
@@ -610,8 +603,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
610static void wake_futex(struct futex_q *q) 603static void wake_futex(struct futex_q *q)
611{ 604{
612 plist_del(&q->list, &q->list.plist); 605 plist_del(&q->list, &q->list.plist);
613 if (q->filp)
614 send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
615 /* 606 /*
616 * The lock in wake_up_all() is a crucial memory barrier after the 607 * The lock in wake_up_all() is a crucial memory barrier after the
617 * plist_del() and also before assigning to q->lock_ptr. 608 * plist_del() and also before assigning to q->lock_ptr.
@@ -988,14 +979,10 @@ out:
988} 979}
989 980
990/* The key must be already stored in q->key. */ 981/* The key must be already stored in q->key. */
991static inline struct futex_hash_bucket * 982static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
992queue_lock(struct futex_q *q, int fd, struct file *filp)
993{ 983{
994 struct futex_hash_bucket *hb; 984 struct futex_hash_bucket *hb;
995 985
996 q->fd = fd;
997 q->filp = filp;
998
999 init_waitqueue_head(&q->waiters); 986 init_waitqueue_head(&q->waiters);
1000 987
1001 get_futex_key_refs(&q->key); 988 get_futex_key_refs(&q->key);
@@ -1006,7 +993,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
1006 return hb; 993 return hb;
1007} 994}
1008 995
1009static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 996static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1010{ 997{
1011 int prio; 998 int prio;
1012 999
@@ -1041,15 +1028,6 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1041 * exactly once. They are called with the hashed spinlock held. 1028 * exactly once. They are called with the hashed spinlock held.
1042 */ 1029 */
1043 1030
1044/* The key must be already stored in q->key. */
1045static void queue_me(struct futex_q *q, int fd, struct file *filp)
1046{
1047 struct futex_hash_bucket *hb;
1048
1049 hb = queue_lock(q, fd, filp);
1050 __queue_me(q, hb);
1051}
1052
1053/* Return 1 if we were still queued (ie. 0 means we were woken) */ 1031/* Return 1 if we were still queued (ie. 0 means we were woken) */
1054static int unqueue_me(struct futex_q *q) 1032static int unqueue_me(struct futex_q *q)
1055{ 1033{
@@ -1194,7 +1172,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1194 if (unlikely(ret != 0)) 1172 if (unlikely(ret != 0))
1195 goto out_release_sem; 1173 goto out_release_sem;
1196 1174
1197 hb = queue_lock(&q, -1, NULL); 1175 hb = queue_lock(&q);
1198 1176
1199 /* 1177 /*
1200 * Access the page AFTER the futex is queued. 1178 * Access the page AFTER the futex is queued.
@@ -1238,7 +1216,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1238 goto out_unlock_release_sem; 1216 goto out_unlock_release_sem;
1239 1217
1240 /* Only actually queue if *uaddr contained val. */ 1218 /* Only actually queue if *uaddr contained val. */
1241 __queue_me(&q, hb); 1219 queue_me(&q, hb);
1242 1220
1243 /* 1221 /*
1244 * Now the futex is queued and we have checked the data, we 1222 * Now the futex is queued and we have checked the data, we
@@ -1386,7 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1386 goto out_release_sem; 1364 goto out_release_sem;
1387 1365
1388 retry_unlocked: 1366 retry_unlocked:
1389 hb = queue_lock(&q, -1, NULL); 1367 hb = queue_lock(&q);
1390 1368
1391 retry_locked: 1369 retry_locked:
1392 ret = lock_taken = 0; 1370 ret = lock_taken = 0;
@@ -1499,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1499 /* 1477 /*
1500 * Only actually queue now that the atomic ops are done: 1478 * Only actually queue now that the atomic ops are done:
1501 */ 1479 */
1502 __queue_me(&q, hb); 1480 queue_me(&q, hb);
1503 1481
1504 /* 1482 /*
1505 * Now the futex is queued and we have checked the data, we 1483 * Now the futex is queued and we have checked the data, we
@@ -1746,121 +1724,6 @@ pi_faulted:
1746 return ret; 1724 return ret;
1747} 1725}
1748 1726
1749static int futex_close(struct inode *inode, struct file *filp)
1750{
1751 struct futex_q *q = filp->private_data;
1752
1753 unqueue_me(q);
1754 kfree(q);
1755
1756 return 0;
1757}
1758
1759/* This is one-shot: once it's gone off you need a new fd */
1760static unsigned int futex_poll(struct file *filp,
1761 struct poll_table_struct *wait)
1762{
1763 struct futex_q *q = filp->private_data;
1764 int ret = 0;
1765
1766 poll_wait(filp, &q->waiters, wait);
1767
1768 /*
1769 * plist_node_empty() is safe here without any lock.
1770 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
1771 */
1772 if (plist_node_empty(&q->list))
1773 ret = POLLIN | POLLRDNORM;
1774
1775 return ret;
1776}
1777
1778static const struct file_operations futex_fops = {
1779 .release = futex_close,
1780 .poll = futex_poll,
1781};
1782
1783/*
1784 * Signal allows caller to avoid the race which would occur if they
1785 * set the sigio stuff up afterwards.
1786 */
1787static int futex_fd(u32 __user *uaddr, int signal)
1788{
1789 struct futex_q *q;
1790 struct file *filp;
1791 int ret, err;
1792 struct rw_semaphore *fshared;
1793 static unsigned long printk_interval;
1794
1795 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1796 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1797 "will be removed from the kernel in June 2007\n",
1798 current->comm);
1799 }
1800
1801 ret = -EINVAL;
1802 if (!valid_signal(signal))
1803 goto out;
1804
1805 ret = get_unused_fd();
1806 if (ret < 0)
1807 goto out;
1808 filp = get_empty_filp();
1809 if (!filp) {
1810 put_unused_fd(ret);
1811 ret = -ENFILE;
1812 goto out;
1813 }
1814 filp->f_op = &futex_fops;
1815 filp->f_path.mnt = mntget(futex_mnt);
1816 filp->f_path.dentry = dget(futex_mnt->mnt_root);
1817 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
1818
1819 if (signal) {
1820 err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
1821 if (err < 0) {
1822 goto error;
1823 }
1824 filp->f_owner.signum = signal;
1825 }
1826
1827 q = kmalloc(sizeof(*q), GFP_KERNEL);
1828 if (!q) {
1829 err = -ENOMEM;
1830 goto error;
1831 }
1832 q->pi_state = NULL;
1833
1834 fshared = &current->mm->mmap_sem;
1835 down_read(fshared);
1836 err = get_futex_key(uaddr, fshared, &q->key);
1837
1838 if (unlikely(err != 0)) {
1839 up_read(fshared);
1840 kfree(q);
1841 goto error;
1842 }
1843
1844 /*
1845 * queue_me() must be called before releasing mmap_sem, because
1846 * key->shared.inode needs to be referenced while holding it.
1847 */
1848 filp->private_data = q;
1849
1850 queue_me(q, ret, filp);
1851 up_read(fshared);
1852
1853 /* Now we map fd to filp, so userspace can access it */
1854 fd_install(ret, filp);
1855out:
1856 return ret;
1857error:
1858 put_unused_fd(ret);
1859 put_filp(filp);
1860 ret = err;
1861 goto out;
1862}
1863
1864/* 1727/*
1865 * Support for robust futexes: the kernel cleans up held futexes at 1728 * Support for robust futexes: the kernel cleans up held futexes at
1866 * thread exit time. 1729 * thread exit time.
@@ -2092,10 +1955,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2092 case FUTEX_WAKE_BITSET: 1955 case FUTEX_WAKE_BITSET:
2093 ret = futex_wake(uaddr, fshared, val, val3); 1956 ret = futex_wake(uaddr, fshared, val, val3);
2094 break; 1957 break;
2095 case FUTEX_FD:
2096 /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
2097 ret = futex_fd(uaddr, val);
2098 break;
2099 case FUTEX_REQUEUE: 1958 case FUTEX_REQUEUE:
2100 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); 1959 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
2101 break; 1960 break;
@@ -2156,19 +2015,6 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
2156 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 2015 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2157} 2016}
2158 2017
2159static int futexfs_get_sb(struct file_system_type *fs_type,
2160 int flags, const char *dev_name, void *data,
2161 struct vfsmount *mnt)
2162{
2163 return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
2164}
2165
2166static struct file_system_type futex_fs_type = {
2167 .name = "futexfs",
2168 .get_sb = futexfs_get_sb,
2169 .kill_sb = kill_anon_super,
2170};
2171
2172static int __init futex_init(void) 2018static int __init futex_init(void)
2173{ 2019{
2174 u32 curval; 2020 u32 curval;
@@ -2193,16 +2039,6 @@ static int __init futex_init(void)
2193 spin_lock_init(&futex_queues[i].lock); 2039 spin_lock_init(&futex_queues[i].lock);
2194 } 2040 }
2195 2041
2196 i = register_filesystem(&futex_fs_type);
2197 if (i)
2198 return i;
2199
2200 futex_mnt = kern_mount(&futex_fs_type);
2201 if (IS_ERR(futex_mnt)) {
2202 unregister_filesystem(&futex_fs_type);
2203 return PTR_ERR(futex_mnt);
2204 }
2205
2206 return 0; 2042 return 0;
2207} 2043}
2208__initcall(futex_init); 2044__initcall(futex_init);
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 1bd0ec1c80b2..39e31a036f5b 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -61,7 +61,7 @@ struct kgdb_state {
61 int err_code; 61 int err_code;
62 int cpu; 62 int cpu;
63 int pass_exception; 63 int pass_exception;
64 long threadid; 64 unsigned long threadid;
65 long kgdb_usethreadid; 65 long kgdb_usethreadid;
66 struct pt_regs *linux_regs; 66 struct pt_regs *linux_regs;
67}; 67};
@@ -146,7 +146,7 @@ atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
146 * the other CPUs might interfere with your debugging context, so 146 * the other CPUs might interfere with your debugging context, so
147 * use this with care: 147 * use this with care:
148 */ 148 */
149int kgdb_do_roundup = 1; 149static int kgdb_do_roundup = 1;
150 150
151static int __init opt_nokgdbroundup(char *str) 151static int __init opt_nokgdbroundup(char *str)
152{ 152{
@@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
438 * While we find nice hex chars, build a long_val. 438 * While we find nice hex chars, build a long_val.
439 * Return number of chars processed. 439 * Return number of chars processed.
440 */ 440 */
441int kgdb_hex2long(char **ptr, long *long_val) 441int kgdb_hex2long(char **ptr, unsigned long *long_val)
442{ 442{
443 int hex_val; 443 int hex_val;
444 int num = 0; 444 int num = 0;
@@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr)
709 return 0; 709 return 0;
710} 710}
711 711
712int remove_all_break(void) 712static int remove_all_break(void)
713{ 713{
714 unsigned long addr; 714 unsigned long addr;
715 int error; 715 int error;
diff --git a/kernel/module.c b/kernel/module.c
index 8674a390a2e8..8e4528c9909f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -890,6 +890,19 @@ static struct module_attribute *modinfo_attrs[] = {
890 890
891static const char vermagic[] = VERMAGIC_STRING; 891static const char vermagic[] = VERMAGIC_STRING;
892 892
893static int try_to_force_load(struct module *mod, const char *symname)
894{
895#ifdef CONFIG_MODULE_FORCE_LOAD
896 if (!(tainted & TAINT_FORCED_MODULE))
897 printk("%s: no version for \"%s\" found: kernel tainted.\n",
898 mod->name, symname);
899 add_taint_module(mod, TAINT_FORCED_MODULE);
900 return 0;
901#else
902 return -ENOEXEC;
903#endif
904}
905
893#ifdef CONFIG_MODVERSIONS 906#ifdef CONFIG_MODVERSIONS
894static int check_version(Elf_Shdr *sechdrs, 907static int check_version(Elf_Shdr *sechdrs,
895 unsigned int versindex, 908 unsigned int versindex,
@@ -914,18 +927,18 @@ static int check_version(Elf_Shdr *sechdrs,
914 927
915 if (versions[i].crc == *crc) 928 if (versions[i].crc == *crc)
916 return 1; 929 return 1;
917 printk("%s: disagrees about version of symbol %s\n",
918 mod->name, symname);
919 DEBUGP("Found checksum %lX vs module %lX\n", 930 DEBUGP("Found checksum %lX vs module %lX\n",
920 *crc, versions[i].crc); 931 *crc, versions[i].crc);
921 return 0; 932 goto bad_version;
922 } 933 }
923 /* Not in module's version table. OK, but that taints the kernel. */ 934
924 if (!(tainted & TAINT_FORCED_MODULE)) 935 if (!try_to_force_load(mod, symname))
925 printk("%s: no version for \"%s\" found: kernel tainted.\n", 936 return 1;
926 mod->name, symname); 937
927 add_taint_module(mod, TAINT_FORCED_MODULE); 938bad_version:
928 return 1; 939 printk("%s: disagrees about version of symbol %s\n",
940 mod->name, symname);
941 return 0;
929} 942}
930 943
931static inline int check_modstruct_version(Elf_Shdr *sechdrs, 944static inline int check_modstruct_version(Elf_Shdr *sechdrs,
@@ -1853,9 +1866,9 @@ static struct module *load_module(void __user *umod,
1853 modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); 1866 modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
1854 /* This is allowed: modprobe --force will invalidate it. */ 1867 /* This is allowed: modprobe --force will invalidate it. */
1855 if (!modmagic) { 1868 if (!modmagic) {
1856 add_taint_module(mod, TAINT_FORCED_MODULE); 1869 err = try_to_force_load(mod, "magic");
1857 printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", 1870 if (err)
1858 mod->name); 1871 goto free_hdr;
1859 } else if (!same_magic(modmagic, vermagic)) { 1872 } else if (!same_magic(modmagic, vermagic)) {
1860 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", 1873 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
1861 mod->name, modmagic, vermagic); 1874 mod->name, modmagic, vermagic);
@@ -2006,9 +2019,10 @@ static struct module *load_module(void __user *umod,
2006 (mod->num_gpl_future_syms && !gplfuturecrcindex) || 2019 (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
2007 (mod->num_unused_syms && !unusedcrcindex) || 2020 (mod->num_unused_syms && !unusedcrcindex) ||
2008 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { 2021 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
2009 printk(KERN_WARNING "%s: No versions for exported symbols." 2022 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
2010 " Tainting kernel.\n", mod->name); 2023 err = try_to_force_load(mod, "nocrc");
2011 add_taint_module(mod, TAINT_FORCED_MODULE); 2024 if (err)
2025 goto cleanup;
2012 } 2026 }
2013#endif 2027#endif
2014 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); 2028 markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
diff --git a/kernel/sched.c b/kernel/sched.c
index 34bcc5bc120e..58fb8af15776 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,16 +75,6 @@
75#include <asm/irq_regs.h> 75#include <asm/irq_regs.h>
76 76
77/* 77/*
78 * Scheduler clock - returns current time in nanosec units.
79 * This is default implementation.
80 * Architectures and sub-architectures can override this.
81 */
82unsigned long long __attribute__((weak)) sched_clock(void)
83{
84 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
85}
86
87/*
88 * Convert user-nice values [ -20 ... 0 ... 19 ] 78 * Convert user-nice values [ -20 ... 0 ... 19 ]
89 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 79 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
90 * and back. 80 * and back.
@@ -242,6 +232,12 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
242} 232}
243#endif 233#endif
244 234
235/*
236 * sched_domains_mutex serializes calls to arch_init_sched_domains,
237 * detach_destroy_domains and partition_sched_domains.
238 */
239static DEFINE_MUTEX(sched_domains_mutex);
240
245#ifdef CONFIG_GROUP_SCHED 241#ifdef CONFIG_GROUP_SCHED
246 242
247#include <linux/cgroup.h> 243#include <linux/cgroup.h>
@@ -308,9 +304,6 @@ static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
308 */ 304 */
309static DEFINE_SPINLOCK(task_group_lock); 305static DEFINE_SPINLOCK(task_group_lock);
310 306
311/* doms_cur_mutex serializes access to doms_cur[] array */
312static DEFINE_MUTEX(doms_cur_mutex);
313
314#ifdef CONFIG_FAIR_GROUP_SCHED 307#ifdef CONFIG_FAIR_GROUP_SCHED
315#ifdef CONFIG_USER_SCHED 308#ifdef CONFIG_USER_SCHED
316# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 309# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
@@ -318,7 +311,13 @@ static DEFINE_MUTEX(doms_cur_mutex);
318# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 311# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
319#endif 312#endif
320 313
314/*
315 * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems.
316 * (The default weight is 1024 - so there's no practical
317 * limitation from this.)
318 */
321#define MIN_SHARES 2 319#define MIN_SHARES 2
320#define MAX_SHARES (ULONG_MAX - 1)
322 321
323static int init_task_group_load = INIT_TASK_GROUP_LOAD; 322static int init_task_group_load = INIT_TASK_GROUP_LOAD;
324#endif 323#endif
@@ -358,21 +357,9 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
358#endif 357#endif
359} 358}
360 359
361static inline void lock_doms_cur(void)
362{
363 mutex_lock(&doms_cur_mutex);
364}
365
366static inline void unlock_doms_cur(void)
367{
368 mutex_unlock(&doms_cur_mutex);
369}
370
371#else 360#else
372 361
373static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 362static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
374static inline void lock_doms_cur(void) { }
375static inline void unlock_doms_cur(void) { }
376 363
377#endif /* CONFIG_GROUP_SCHED */ 364#endif /* CONFIG_GROUP_SCHED */
378 365
@@ -560,13 +547,7 @@ struct rq {
560 unsigned long next_balance; 547 unsigned long next_balance;
561 struct mm_struct *prev_mm; 548 struct mm_struct *prev_mm;
562 549
563 u64 clock, prev_clock_raw; 550 u64 clock;
564 s64 clock_max_delta;
565
566 unsigned int clock_warps, clock_overflows, clock_underflows;
567 u64 idle_clock;
568 unsigned int clock_deep_idle_events;
569 u64 tick_timestamp;
570 551
571 atomic_t nr_iowait; 552 atomic_t nr_iowait;
572 553
@@ -631,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
631#endif 612#endif
632} 613}
633 614
634#ifdef CONFIG_NO_HZ
635static inline bool nohz_on(int cpu)
636{
637 return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
638}
639
640static inline u64 max_skipped_ticks(struct rq *rq)
641{
642 return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
643}
644
645static inline void update_last_tick_seen(struct rq *rq)
646{
647 rq->last_tick_seen = jiffies;
648}
649#else
650static inline u64 max_skipped_ticks(struct rq *rq)
651{
652 return 1;
653}
654
655static inline void update_last_tick_seen(struct rq *rq)
656{
657}
658#endif
659
660/*
661 * Update the per-runqueue clock, as finegrained as the platform can give
662 * us, but without assuming monotonicity, etc.:
663 */
664static void __update_rq_clock(struct rq *rq)
665{
666 u64 prev_raw = rq->prev_clock_raw;
667 u64 now = sched_clock();
668 s64 delta = now - prev_raw;
669 u64 clock = rq->clock;
670
671#ifdef CONFIG_SCHED_DEBUG
672 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
673#endif
674 /*
675 * Protect against sched_clock() occasionally going backwards:
676 */
677 if (unlikely(delta < 0)) {
678 clock++;
679 rq->clock_warps++;
680 } else {
681 /*
682 * Catch too large forward jumps too:
683 */
684 u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
685 u64 max_time = rq->tick_timestamp + max_jump;
686
687 if (unlikely(clock + delta > max_time)) {
688 if (clock < max_time)
689 clock = max_time;
690 else
691 clock++;
692 rq->clock_overflows++;
693 } else {
694 if (unlikely(delta > rq->clock_max_delta))
695 rq->clock_max_delta = delta;
696 clock += delta;
697 }
698 }
699
700 rq->prev_clock_raw = now;
701 rq->clock = clock;
702}
703
704static void update_rq_clock(struct rq *rq)
705{
706 if (likely(smp_processor_id() == cpu_of(rq)))
707 __update_rq_clock(rq);
708}
709
710/* 615/*
711 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 616 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
712 * See detach_destroy_domains: synchronize_sched for details. 617 * See detach_destroy_domains: synchronize_sched for details.
@@ -722,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
722#define task_rq(p) cpu_rq(task_cpu(p)) 627#define task_rq(p) cpu_rq(task_cpu(p))
723#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 628#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
724 629
630static inline void update_rq_clock(struct rq *rq)
631{
632 rq->clock = sched_clock_cpu(cpu_of(rq));
633}
634
725/* 635/*
726 * Tunables that become constants when CONFIG_SCHED_DEBUG is off: 636 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
727 */ 637 */
@@ -757,14 +667,14 @@ const_debug unsigned int sysctl_sched_features =
757#define SCHED_FEAT(name, enabled) \ 667#define SCHED_FEAT(name, enabled) \
758 #name , 668 #name ,
759 669
760__read_mostly char *sched_feat_names[] = { 670static __read_mostly char *sched_feat_names[] = {
761#include "sched_features.h" 671#include "sched_features.h"
762 NULL 672 NULL
763}; 673};
764 674
765#undef SCHED_FEAT 675#undef SCHED_FEAT
766 676
767int sched_feat_open(struct inode *inode, struct file *filp) 677static int sched_feat_open(struct inode *inode, struct file *filp)
768{ 678{
769 filp->private_data = inode->i_private; 679 filp->private_data = inode->i_private;
770 return 0; 680 return 0;
@@ -899,7 +809,7 @@ static inline u64 global_rt_runtime(void)
899 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 809 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
900} 810}
901 811
902static const unsigned long long time_sync_thresh = 100000; 812unsigned long long time_sync_thresh = 100000;
903 813
904static DEFINE_PER_CPU(unsigned long long, time_offset); 814static DEFINE_PER_CPU(unsigned long long, time_offset);
905static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); 815static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
@@ -913,11 +823,14 @@ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
913static DEFINE_SPINLOCK(time_sync_lock); 823static DEFINE_SPINLOCK(time_sync_lock);
914static unsigned long long prev_global_time; 824static unsigned long long prev_global_time;
915 825
916static unsigned long long __sync_cpu_clock(cycles_t time, int cpu) 826static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
917{ 827{
918 unsigned long flags; 828 /*
919 829 * We want this inlined, to not get tracer function calls
920 spin_lock_irqsave(&time_sync_lock, flags); 830 * in this critical section:
831 */
832 spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
833 __raw_spin_lock(&time_sync_lock.raw_lock);
921 834
922 if (time < prev_global_time) { 835 if (time < prev_global_time) {
923 per_cpu(time_offset, cpu) += prev_global_time - time; 836 per_cpu(time_offset, cpu) += prev_global_time - time;
@@ -926,7 +839,8 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
926 prev_global_time = time; 839 prev_global_time = time;
927 } 840 }
928 841
929 spin_unlock_irqrestore(&time_sync_lock, flags); 842 __raw_spin_unlock(&time_sync_lock.raw_lock);
843 spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
930 844
931 return time; 845 return time;
932} 846}
@@ -934,8 +848,6 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
934static unsigned long long __cpu_clock(int cpu) 848static unsigned long long __cpu_clock(int cpu)
935{ 849{
936 unsigned long long now; 850 unsigned long long now;
937 unsigned long flags;
938 struct rq *rq;
939 851
940 /* 852 /*
941 * Only call sched_clock() if the scheduler has already been 853 * Only call sched_clock() if the scheduler has already been
@@ -944,11 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
944 if (unlikely(!scheduler_running)) 856 if (unlikely(!scheduler_running))
945 return 0; 857 return 0;
946 858
947 local_irq_save(flags); 859 now = sched_clock_cpu(cpu);
948 rq = cpu_rq(cpu);
949 update_rq_clock(rq);
950 now = rq->clock;
951 local_irq_restore(flags);
952 860
953 return now; 861 return now;
954} 862}
@@ -960,13 +868,18 @@ static unsigned long long __cpu_clock(int cpu)
960unsigned long long cpu_clock(int cpu) 868unsigned long long cpu_clock(int cpu)
961{ 869{
962 unsigned long long prev_cpu_time, time, delta_time; 870 unsigned long long prev_cpu_time, time, delta_time;
871 unsigned long flags;
963 872
873 local_irq_save(flags);
964 prev_cpu_time = per_cpu(prev_cpu_time, cpu); 874 prev_cpu_time = per_cpu(prev_cpu_time, cpu);
965 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); 875 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
966 delta_time = time-prev_cpu_time; 876 delta_time = time-prev_cpu_time;
967 877
968 if (unlikely(delta_time > time_sync_thresh)) 878 if (unlikely(delta_time > time_sync_thresh)) {
969 time = __sync_cpu_clock(time, cpu); 879 time = __sync_cpu_clock(time, cpu);
880 per_cpu(prev_cpu_time, cpu) = time;
881 }
882 local_irq_restore(flags);
970 883
971 return time; 884 return time;
972} 885}
@@ -1117,43 +1030,6 @@ static struct rq *this_rq_lock(void)
1117 return rq; 1030 return rq;
1118} 1031}
1119 1032
1120/*
1121 * We are going deep-idle (irqs are disabled):
1122 */
1123void sched_clock_idle_sleep_event(void)
1124{
1125 struct rq *rq = cpu_rq(smp_processor_id());
1126
1127 spin_lock(&rq->lock);
1128 __update_rq_clock(rq);
1129 spin_unlock(&rq->lock);
1130 rq->clock_deep_idle_events++;
1131}
1132EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
1133
1134/*
1135 * We just idled delta nanoseconds (called with irqs disabled):
1136 */
1137void sched_clock_idle_wakeup_event(u64 delta_ns)
1138{
1139 struct rq *rq = cpu_rq(smp_processor_id());
1140 u64 now = sched_clock();
1141
1142 rq->idle_clock += delta_ns;
1143 /*
1144 * Override the previous timestamp and ignore all
1145 * sched_clock() deltas that occured while we idled,
1146 * and use the PM-provided delta_ns to advance the
1147 * rq clock:
1148 */
1149 spin_lock(&rq->lock);
1150 rq->prev_clock_raw = now;
1151 rq->clock += delta_ns;
1152 spin_unlock(&rq->lock);
1153 touch_softlockup_watchdog();
1154}
1155EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
1156
1157static void __resched_task(struct task_struct *p, int tif_bit); 1033static void __resched_task(struct task_struct *p, int tif_bit);
1158 1034
1159static inline void resched_task(struct task_struct *p) 1035static inline void resched_task(struct task_struct *p)
@@ -1189,6 +1065,7 @@ static inline void resched_rq(struct rq *rq)
1189enum { 1065enum {
1190 HRTICK_SET, /* re-programm hrtick_timer */ 1066 HRTICK_SET, /* re-programm hrtick_timer */
1191 HRTICK_RESET, /* not a new slice */ 1067 HRTICK_RESET, /* not a new slice */
1068 HRTICK_BLOCK, /* stop hrtick operations */
1192}; 1069};
1193 1070
1194/* 1071/*
@@ -1200,6 +1077,8 @@ static inline int hrtick_enabled(struct rq *rq)
1200{ 1077{
1201 if (!sched_feat(HRTICK)) 1078 if (!sched_feat(HRTICK))
1202 return 0; 1079 return 0;
1080 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
1081 return 0;
1203 return hrtimer_is_hres_active(&rq->hrtick_timer); 1082 return hrtimer_is_hres_active(&rq->hrtick_timer);
1204} 1083}
1205 1084
@@ -1275,14 +1154,70 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1275 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); 1154 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1276 1155
1277 spin_lock(&rq->lock); 1156 spin_lock(&rq->lock);
1278 __update_rq_clock(rq); 1157 update_rq_clock(rq);
1279 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1158 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1280 spin_unlock(&rq->lock); 1159 spin_unlock(&rq->lock);
1281 1160
1282 return HRTIMER_NORESTART; 1161 return HRTIMER_NORESTART;
1283} 1162}
1284 1163
1285static inline void init_rq_hrtick(struct rq *rq) 1164static void hotplug_hrtick_disable(int cpu)
1165{
1166 struct rq *rq = cpu_rq(cpu);
1167 unsigned long flags;
1168
1169 spin_lock_irqsave(&rq->lock, flags);
1170 rq->hrtick_flags = 0;
1171 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1172 spin_unlock_irqrestore(&rq->lock, flags);
1173
1174 hrtick_clear(rq);
1175}
1176
1177static void hotplug_hrtick_enable(int cpu)
1178{
1179 struct rq *rq = cpu_rq(cpu);
1180 unsigned long flags;
1181
1182 spin_lock_irqsave(&rq->lock, flags);
1183 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1184 spin_unlock_irqrestore(&rq->lock, flags);
1185}
1186
1187static int
1188hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1189{
1190 int cpu = (int)(long)hcpu;
1191
1192 switch (action) {
1193 case CPU_UP_CANCELED:
1194 case CPU_UP_CANCELED_FROZEN:
1195 case CPU_DOWN_PREPARE:
1196 case CPU_DOWN_PREPARE_FROZEN:
1197 case CPU_DEAD:
1198 case CPU_DEAD_FROZEN:
1199 hotplug_hrtick_disable(cpu);
1200 return NOTIFY_OK;
1201
1202 case CPU_UP_PREPARE:
1203 case CPU_UP_PREPARE_FROZEN:
1204 case CPU_DOWN_FAILED:
1205 case CPU_DOWN_FAILED_FROZEN:
1206 case CPU_ONLINE:
1207 case CPU_ONLINE_FROZEN:
1208 hotplug_hrtick_enable(cpu);
1209 return NOTIFY_OK;
1210 }
1211
1212 return NOTIFY_DONE;
1213}
1214
1215static void init_hrtick(void)
1216{
1217 hotcpu_notifier(hotplug_hrtick, 0);
1218}
1219
1220static void init_rq_hrtick(struct rq *rq)
1286{ 1221{
1287 rq->hrtick_flags = 0; 1222 rq->hrtick_flags = 0;
1288 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1223 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -1319,6 +1254,10 @@ static inline void init_rq_hrtick(struct rq *rq)
1319void hrtick_resched(void) 1254void hrtick_resched(void)
1320{ 1255{
1321} 1256}
1257
1258static inline void init_hrtick(void)
1259{
1260}
1322#endif 1261#endif
1323 1262
1324/* 1263/*
@@ -1438,8 +1377,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1438{ 1377{
1439 u64 tmp; 1378 u64 tmp;
1440 1379
1441 if (unlikely(!lw->inv_weight)) 1380 if (!lw->inv_weight)
1442 lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1); 1381 lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1);
1443 1382
1444 tmp = (u64)delta_exec * weight; 1383 tmp = (u64)delta_exec * weight;
1445 /* 1384 /*
@@ -1748,6 +1687,8 @@ __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd,
1748 1687
1749 if (shares < MIN_SHARES) 1688 if (shares < MIN_SHARES)
1750 shares = MIN_SHARES; 1689 shares = MIN_SHARES;
1690 else if (shares > MAX_SHARES)
1691 shares = MAX_SHARES;
1751 1692
1752 __set_se_shares(tg->se[tcpu], shares); 1693 __set_se_shares(tg->se[tcpu], shares);
1753} 1694}
@@ -4339,8 +4280,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4339 struct rq *rq = this_rq(); 4280 struct rq *rq = this_rq();
4340 cputime64_t tmp; 4281 cputime64_t tmp;
4341 4282
4342 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) 4283 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
4343 return account_guest_time(p, cputime); 4284 account_guest_time(p, cputime);
4285 return;
4286 }
4344 4287
4345 p->stime = cputime_add(p->stime, cputime); 4288 p->stime = cputime_add(p->stime, cputime);
4346 4289
@@ -4404,19 +4347,11 @@ void scheduler_tick(void)
4404 int cpu = smp_processor_id(); 4347 int cpu = smp_processor_id();
4405 struct rq *rq = cpu_rq(cpu); 4348 struct rq *rq = cpu_rq(cpu);
4406 struct task_struct *curr = rq->curr; 4349 struct task_struct *curr = rq->curr;
4407 u64 next_tick = rq->tick_timestamp + TICK_NSEC; 4350
4351 sched_clock_tick();
4408 4352
4409 spin_lock(&rq->lock); 4353 spin_lock(&rq->lock);
4410 __update_rq_clock(rq); 4354 update_rq_clock(rq);
4411 /*
4412 * Let rq->clock advance by at least TICK_NSEC:
4413 */
4414 if (unlikely(rq->clock < next_tick)) {
4415 rq->clock = next_tick;
4416 rq->clock_underflows++;
4417 }
4418 rq->tick_timestamp = rq->clock;
4419 update_last_tick_seen(rq);
4420 update_cpu_load(rq); 4355 update_cpu_load(rq);
4421 curr->sched_class->task_tick(rq, curr, 0); 4356 curr->sched_class->task_tick(rq, curr, 0);
4422 spin_unlock(&rq->lock); 4357 spin_unlock(&rq->lock);
@@ -4570,7 +4505,7 @@ need_resched_nonpreemptible:
4570 * Do the rq-clock update outside the rq lock: 4505 * Do the rq-clock update outside the rq lock:
4571 */ 4506 */
4572 local_irq_disable(); 4507 local_irq_disable();
4573 __update_rq_clock(rq); 4508 update_rq_clock(rq);
4574 spin_lock(&rq->lock); 4509 spin_lock(&rq->lock);
4575 clear_tsk_need_resched(prev); 4510 clear_tsk_need_resched(prev);
4576 4511
@@ -4595,9 +4530,9 @@ need_resched_nonpreemptible:
4595 prev->sched_class->put_prev_task(rq, prev); 4530 prev->sched_class->put_prev_task(rq, prev);
4596 next = pick_next_task(rq, prev); 4531 next = pick_next_task(rq, prev);
4597 4532
4598 sched_info_switch(prev, next);
4599
4600 if (likely(prev != next)) { 4533 if (likely(prev != next)) {
4534 sched_info_switch(prev, next);
4535
4601 rq->nr_switches++; 4536 rq->nr_switches++;
4602 rq->curr = next; 4537 rq->curr = next;
4603 ++*switch_count; 4538 ++*switch_count;
@@ -7755,7 +7690,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7755{ 7690{
7756 int i, j; 7691 int i, j;
7757 7692
7758 lock_doms_cur(); 7693 mutex_lock(&sched_domains_mutex);
7759 7694
7760 /* always unregister in case we don't destroy any domains */ 7695 /* always unregister in case we don't destroy any domains */
7761 unregister_sched_domain_sysctl(); 7696 unregister_sched_domain_sysctl();
@@ -7804,7 +7739,7 @@ match2:
7804 7739
7805 register_sched_domain_sysctl(); 7740 register_sched_domain_sysctl();
7806 7741
7807 unlock_doms_cur(); 7742 mutex_unlock(&sched_domains_mutex);
7808} 7743}
7809 7744
7810#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7745#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -7813,8 +7748,10 @@ int arch_reinit_sched_domains(void)
7813 int err; 7748 int err;
7814 7749
7815 get_online_cpus(); 7750 get_online_cpus();
7751 mutex_lock(&sched_domains_mutex);
7816 detach_destroy_domains(&cpu_online_map); 7752 detach_destroy_domains(&cpu_online_map);
7817 err = arch_init_sched_domains(&cpu_online_map); 7753 err = arch_init_sched_domains(&cpu_online_map);
7754 mutex_unlock(&sched_domains_mutex);
7818 put_online_cpus(); 7755 put_online_cpus();
7819 7756
7820 return err; 7757 return err;
@@ -7932,13 +7869,16 @@ void __init sched_init_smp(void)
7932 BUG_ON(sched_group_nodes_bycpu == NULL); 7869 BUG_ON(sched_group_nodes_bycpu == NULL);
7933#endif 7870#endif
7934 get_online_cpus(); 7871 get_online_cpus();
7872 mutex_lock(&sched_domains_mutex);
7935 arch_init_sched_domains(&cpu_online_map); 7873 arch_init_sched_domains(&cpu_online_map);
7936 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); 7874 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
7937 if (cpus_empty(non_isolated_cpus)) 7875 if (cpus_empty(non_isolated_cpus))
7938 cpu_set(smp_processor_id(), non_isolated_cpus); 7876 cpu_set(smp_processor_id(), non_isolated_cpus);
7877 mutex_unlock(&sched_domains_mutex);
7939 put_online_cpus(); 7878 put_online_cpus();
7940 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7879 /* XXX: Theoretical race here - CPU may be hotplugged now */
7941 hotcpu_notifier(update_sched_domains, 0); 7880 hotcpu_notifier(update_sched_domains, 0);
7881 init_hrtick();
7942 7882
7943 /* Move init over to a non-isolated CPU */ 7883 /* Move init over to a non-isolated CPU */
7944 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) 7884 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
@@ -8025,7 +7965,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
8025 7965
8026 se->my_q = cfs_rq; 7966 se->my_q = cfs_rq;
8027 se->load.weight = tg->shares; 7967 se->load.weight = tg->shares;
8028 se->load.inv_weight = div64_u64(1ULL<<32, se->load.weight); 7968 se->load.inv_weight = 0;
8029 se->parent = parent; 7969 se->parent = parent;
8030} 7970}
8031#endif 7971#endif
@@ -8149,8 +8089,6 @@ void __init sched_init(void)
8149 spin_lock_init(&rq->lock); 8089 spin_lock_init(&rq->lock);
8150 lockdep_set_class(&rq->lock, &rq->rq_lock_key); 8090 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
8151 rq->nr_running = 0; 8091 rq->nr_running = 0;
8152 rq->clock = 1;
8153 update_last_tick_seen(rq);
8154 init_cfs_rq(&rq->cfs, rq); 8092 init_cfs_rq(&rq->cfs, rq);
8155 init_rt_rq(&rq->rt, rq); 8093 init_rt_rq(&rq->rt, rq);
8156#ifdef CONFIG_FAIR_GROUP_SCHED 8094#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8294,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep);
8294static void normalize_task(struct rq *rq, struct task_struct *p) 8232static void normalize_task(struct rq *rq, struct task_struct *p)
8295{ 8233{
8296 int on_rq; 8234 int on_rq;
8235
8297 update_rq_clock(rq); 8236 update_rq_clock(rq);
8298 on_rq = p->se.on_rq; 8237 on_rq = p->se.on_rq;
8299 if (on_rq) 8238 if (on_rq)
@@ -8325,7 +8264,6 @@ void normalize_rt_tasks(void)
8325 p->se.sleep_start = 0; 8264 p->se.sleep_start = 0;
8326 p->se.block_start = 0; 8265 p->se.block_start = 0;
8327#endif 8266#endif
8328 task_rq(p)->clock = 0;
8329 8267
8330 if (!rt_task(p)) { 8268 if (!rt_task(p)) {
8331 /* 8269 /*
@@ -8692,7 +8630,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
8692 dequeue_entity(cfs_rq, se, 0); 8630 dequeue_entity(cfs_rq, se, 0);
8693 8631
8694 se->load.weight = shares; 8632 se->load.weight = shares;
8695 se->load.inv_weight = div64_u64((1ULL<<32), shares); 8633 se->load.inv_weight = 0;
8696 8634
8697 if (on_rq) 8635 if (on_rq)
8698 enqueue_entity(cfs_rq, se, 0); 8636 enqueue_entity(cfs_rq, se, 0);
@@ -8722,13 +8660,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8722 if (!tg->se[0]) 8660 if (!tg->se[0])
8723 return -EINVAL; 8661 return -EINVAL;
8724 8662
8725 /*
8726 * A weight of 0 or 1 can cause arithmetics problems.
8727 * (The default weight is 1024 - so there's no practical
8728 * limitation from this.)
8729 */
8730 if (shares < MIN_SHARES) 8663 if (shares < MIN_SHARES)
8731 shares = MIN_SHARES; 8664 shares = MIN_SHARES;
8665 else if (shares > MAX_SHARES)
8666 shares = MAX_SHARES;
8732 8667
8733 mutex_lock(&shares_mutex); 8668 mutex_lock(&shares_mutex);
8734 if (tg->shares == shares) 8669 if (tg->shares == shares)
@@ -8753,7 +8688,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8753 * force a rebalance 8688 * force a rebalance
8754 */ 8689 */
8755 cfs_rq_set_shares(tg->cfs_rq[i], 0); 8690 cfs_rq_set_shares(tg->cfs_rq[i], 0);
8756 set_se_shares(tg->se[i], shares/nr_cpu_ids); 8691 set_se_shares(tg->se[i], shares);
8757 } 8692 }
8758 8693
8759 /* 8694 /*
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
new file mode 100644
index 000000000000..9c597e37f7de
--- /dev/null
+++ b/kernel/sched_clock.c
@@ -0,0 +1,236 @@
1/*
2 * sched_clock for unstable cpu clocks
3 *
4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 * Based on code by:
7 * Ingo Molnar <mingo@redhat.com>
8 * Guillaume Chazarain <guichaz@gmail.com>
9 *
10 * Create a semi stable clock from a mixture of other events, including:
11 * - gtod
12 * - jiffies
13 * - sched_clock()
14 * - explicit idle events
15 *
16 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
17 * making it monotonic and keeping it within an expected window. This window
18 * is set up using jiffies.
19 *
20 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
21 * that is otherwise invisible (TSC gets stopped).
22 *
23 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
24 * consistent between cpus (never more than 1 jiffies difference).
25 */
26#include <linux/sched.h>
27#include <linux/percpu.h>
28#include <linux/spinlock.h>
29#include <linux/ktime.h>
30#include <linux/module.h>
31
32
33#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
34
35struct sched_clock_data {
36 /*
37 * Raw spinlock - this is a special case: this might be called
38 * from within instrumentation code so we dont want to do any
39 * instrumentation ourselves.
40 */
41 raw_spinlock_t lock;
42
43 unsigned long prev_jiffies;
44 u64 prev_raw;
45 u64 tick_raw;
46 u64 tick_gtod;
47 u64 clock;
48};
49
50static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
51
52static inline struct sched_clock_data *this_scd(void)
53{
54 return &__get_cpu_var(sched_clock_data);
55}
56
57static inline struct sched_clock_data *cpu_sdc(int cpu)
58{
59 return &per_cpu(sched_clock_data, cpu);
60}
61
62void sched_clock_init(void)
63{
64 u64 ktime_now = ktime_to_ns(ktime_get());
65 u64 now = 0;
66 int cpu;
67
68 for_each_possible_cpu(cpu) {
69 struct sched_clock_data *scd = cpu_sdc(cpu);
70
71 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
72 scd->prev_jiffies = jiffies;
73 scd->prev_raw = now;
74 scd->tick_raw = now;
75 scd->tick_gtod = ktime_now;
76 scd->clock = ktime_now;
77 }
78}
79
80/*
81 * update the percpu scd from the raw @now value
82 *
83 * - filter out backward motion
84 * - use jiffies to generate a min,max window to clip the raw values
85 */
86static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
87{
88 unsigned long now_jiffies = jiffies;
89 long delta_jiffies = now_jiffies - scd->prev_jiffies;
90 u64 clock = scd->clock;
91 u64 min_clock, max_clock;
92 s64 delta = now - scd->prev_raw;
93
94 WARN_ON_ONCE(!irqs_disabled());
95 min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
96
97 if (unlikely(delta < 0)) {
98 clock++;
99 goto out;
100 }
101
102 max_clock = min_clock + TICK_NSEC;
103
104 if (unlikely(clock + delta > max_clock)) {
105 if (clock < max_clock)
106 clock = max_clock;
107 else
108 clock++;
109 } else {
110 clock += delta;
111 }
112
113 out:
114 if (unlikely(clock < min_clock))
115 clock = min_clock;
116
117 scd->prev_raw = now;
118 scd->prev_jiffies = now_jiffies;
119 scd->clock = clock;
120}
121
122static void lock_double_clock(struct sched_clock_data *data1,
123 struct sched_clock_data *data2)
124{
125 if (data1 < data2) {
126 __raw_spin_lock(&data1->lock);
127 __raw_spin_lock(&data2->lock);
128 } else {
129 __raw_spin_lock(&data2->lock);
130 __raw_spin_lock(&data1->lock);
131 }
132}
133
134u64 sched_clock_cpu(int cpu)
135{
136 struct sched_clock_data *scd = cpu_sdc(cpu);
137 u64 now, clock;
138
139 WARN_ON_ONCE(!irqs_disabled());
140 now = sched_clock();
141
142 if (cpu != raw_smp_processor_id()) {
143 /*
144 * in order to update a remote cpu's clock based on our
145 * unstable raw time rebase it against:
146 * tick_raw (offset between raw counters)
147 * tick_gotd (tick offset between cpus)
148 */
149 struct sched_clock_data *my_scd = this_scd();
150
151 lock_double_clock(scd, my_scd);
152
153 now -= my_scd->tick_raw;
154 now += scd->tick_raw;
155
156 now -= my_scd->tick_gtod;
157 now += scd->tick_gtod;
158
159 __raw_spin_unlock(&my_scd->lock);
160 } else {
161 __raw_spin_lock(&scd->lock);
162 }
163
164 __update_sched_clock(scd, now);
165 clock = scd->clock;
166
167 __raw_spin_unlock(&scd->lock);
168
169 return clock;
170}
171
172void sched_clock_tick(void)
173{
174 struct sched_clock_data *scd = this_scd();
175 u64 now, now_gtod;
176
177 WARN_ON_ONCE(!irqs_disabled());
178
179 now = sched_clock();
180 now_gtod = ktime_to_ns(ktime_get());
181
182 __raw_spin_lock(&scd->lock);
183 __update_sched_clock(scd, now);
184 /*
185 * update tick_gtod after __update_sched_clock() because that will
186 * already observe 1 new jiffy; adding a new tick_gtod to that would
187 * increase the clock 2 jiffies.
188 */
189 scd->tick_raw = now;
190 scd->tick_gtod = now_gtod;
191 __raw_spin_unlock(&scd->lock);
192}
193
194/*
195 * We are going deep-idle (irqs are disabled):
196 */
197void sched_clock_idle_sleep_event(void)
198{
199 sched_clock_cpu(smp_processor_id());
200}
201EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
202
203/*
204 * We just idled delta nanoseconds (called with irqs disabled):
205 */
206void sched_clock_idle_wakeup_event(u64 delta_ns)
207{
208 struct sched_clock_data *scd = this_scd();
209 u64 now = sched_clock();
210
211 /*
212 * Override the previous timestamp and ignore all
213 * sched_clock() deltas that occured while we idled,
214 * and use the PM-provided delta_ns to advance the
215 * rq clock:
216 */
217 __raw_spin_lock(&scd->lock);
218 scd->prev_raw = now;
219 scd->clock += delta_ns;
220 __raw_spin_unlock(&scd->lock);
221
222 touch_softlockup_watchdog();
223}
224EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
225
226#endif
227
228/*
229 * Scheduler clock - returns current time in nanosec units.
230 * This is default implementation.
231 * Architectures and sub-architectures can override this.
232 */
233unsigned long long __attribute__((weak)) sched_clock(void)
234{
235 return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
236}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6b4a12558e88..5f06118fbc31 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu)
204 PN(next_balance); 204 PN(next_balance);
205 P(curr->pid); 205 P(curr->pid);
206 PN(clock); 206 PN(clock);
207 PN(idle_clock);
208 PN(prev_clock_raw);
209 P(clock_warps);
210 P(clock_overflows);
211 P(clock_underflows);
212 P(clock_deep_idle_events);
213 PN(clock_max_delta);
214 P(cpu_load[0]); 207 P(cpu_load[0]);
215 P(cpu_load[1]); 208 P(cpu_load[1]);
216 P(cpu_load[2]); 209 P(cpu_load[2]);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 89fa32b4edf2..c863663d204d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -682,6 +682,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
682 * Update run-time statistics of the 'current'. 682 * Update run-time statistics of the 'current'.
683 */ 683 */
684 update_curr(cfs_rq); 684 update_curr(cfs_rq);
685 account_entity_enqueue(cfs_rq, se);
685 686
686 if (wakeup) { 687 if (wakeup) {
687 place_entity(cfs_rq, se, 0); 688 place_entity(cfs_rq, se, 0);
@@ -692,7 +693,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
692 check_spread(cfs_rq, se); 693 check_spread(cfs_rq, se);
693 if (se != cfs_rq->curr) 694 if (se != cfs_rq->curr)
694 __enqueue_entity(cfs_rq, se); 695 __enqueue_entity(cfs_rq, se);
695 account_entity_enqueue(cfs_rq, se);
696} 696}
697 697
698static void update_avg(u64 *avg, u64 sample) 698static void update_avg(u64 *avg, u64 sample)
@@ -841,8 +841,10 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
841 * queued ticks are scheduled to match the slice, so don't bother 841 * queued ticks are scheduled to match the slice, so don't bother
842 * validating it and just reschedule. 842 * validating it and just reschedule.
843 */ 843 */
844 if (queued) 844 if (queued) {
845 return resched_task(rq_of(cfs_rq)->curr); 845 resched_task(rq_of(cfs_rq)->curr);
846 return;
847 }
846 /* 848 /*
847 * don't let the period tick interfere with the hrtick preemption 849 * don't let the period tick interfere with the hrtick preemption
848 */ 850 */
@@ -957,7 +959,7 @@ static void yield_task_fair(struct rq *rq)
957 return; 959 return;
958 960
959 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 961 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
960 __update_rq_clock(rq); 962 update_rq_clock(rq);
961 /* 963 /*
962 * Update run-time statistics of the 'current'. 964 * Update run-time statistics of the 'current'.
963 */ 965 */
@@ -1007,7 +1009,7 @@ static int wake_idle(int cpu, struct task_struct *p)
1007 * sibling runqueue info. This will avoid the checks and cache miss 1009 * sibling runqueue info. This will avoid the checks and cache miss
1008 * penalities associated with that. 1010 * penalities associated with that.
1009 */ 1011 */
1010 if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) 1012 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1011 return cpu; 1013 return cpu;
1012 1014
1013 for_each_domain(cpu, sd) { 1015 for_each_domain(cpu, sd) {
@@ -1611,30 +1613,6 @@ static const struct sched_class fair_sched_class = {
1611}; 1613};
1612 1614
1613#ifdef CONFIG_SCHED_DEBUG 1615#ifdef CONFIG_SCHED_DEBUG
1614static void
1615print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth)
1616{
1617 struct sched_entity *se;
1618
1619 if (!cfs_rq)
1620 return;
1621
1622 list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) {
1623 int i;
1624
1625 for (i = depth; i; i--)
1626 seq_puts(m, " ");
1627
1628 seq_printf(m, "%lu %s %lu\n",
1629 se->load.weight,
1630 entity_is_task(se) ? "T" : "G",
1631 calc_delta_weight(SCHED_LOAD_SCALE, se)
1632 );
1633 if (!entity_is_task(se))
1634 print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1);
1635 }
1636}
1637
1638static void print_cfs_stats(struct seq_file *m, int cpu) 1616static void print_cfs_stats(struct seq_file *m, int cpu)
1639{ 1617{
1640 struct cfs_rq *cfs_rq; 1618 struct cfs_rq *cfs_rq;
@@ -1642,9 +1620,6 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
1642 rcu_read_lock(); 1620 rcu_read_lock();
1643 for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) 1621 for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
1644 print_cfs_rq(m, cpu, cfs_rq); 1622 print_cfs_rq(m, cpu, cfs_rq);
1645
1646 seq_printf(m, "\nWeight tree:\n");
1647 print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1);
1648 rcu_read_unlock(); 1623 rcu_read_unlock();
1649} 1624}
1650#endif 1625#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 2bcafa375633..3a4f92dbbe66 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -99,7 +99,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
99/* 99/*
100 * Simple, special scheduling class for the per-CPU idle tasks: 100 * Simple, special scheduling class for the per-CPU idle tasks:
101 */ 101 */
102const struct sched_class idle_sched_class = { 102static const struct sched_class idle_sched_class = {
103 /* .next is NULL */ 103 /* .next is NULL */
104 /* no enqueue/yield_task for idle tasks */ 104 /* no enqueue/yield_task for idle tasks */
105 105
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c2730a5a4f05..060e87b0cb1c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq)
1098 } 1098 }
1099} 1099}
1100 1100
1101 1101/*
1102 * If we are not running and we are not going to reschedule soon, we should
1103 * try to push tasks away now
1104 */
1102static void task_wake_up_rt(struct rq *rq, struct task_struct *p) 1105static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
1103{ 1106{
1104 if (!task_running(rq, p) && 1107 if (!task_running(rq, p) &&
1105 (p->prio >= rq->rt.highest_prio) && 1108 !test_tsk_need_resched(rq->curr) &&
1106 rq->rt.overloaded) 1109 rq->rt.overloaded)
1107 push_rt_tasks(rq); 1110 push_rt_tasks(rq);
1108} 1111}
@@ -1309,7 +1312,7 @@ static void set_curr_task_rt(struct rq *rq)
1309 p->se.exec_start = rq->clock; 1312 p->se.exec_start = rq->clock;
1310} 1313}
1311 1314
1312const struct sched_class rt_sched_class = { 1315static const struct sched_class rt_sched_class = {
1313 .next = &fair_sched_class, 1316 .next = &fair_sched_class,
1314 .enqueue_task = enqueue_task_rt, 1317 .enqueue_task = enqueue_task_rt,
1315 .dequeue_task = dequeue_task_rt, 1318 .dequeue_task = dequeue_task_rt,
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index f2e01ac5ab09..a5d4b1dac2a5 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -1,4 +1,10 @@
1 1
2config HAVE_ARCH_KGDB_SHADOW_INFO
3 bool
4
5config HAVE_ARCH_KGDB
6 bool
7
2menuconfig KGDB 8menuconfig KGDB
3 bool "KGDB: kernel debugging with remote gdb" 9 bool "KGDB: kernel debugging with remote gdb"
4 select FRAME_POINTER 10 select FRAME_POINTER
@@ -10,15 +16,10 @@ menuconfig KGDB
10 at http://kgdb.sourceforge.net as well as in DocBook form 16 at http://kgdb.sourceforge.net as well as in DocBook form
11 in Documentation/DocBook/. If unsure, say N. 17 in Documentation/DocBook/. If unsure, say N.
12 18
13config HAVE_ARCH_KGDB_SHADOW_INFO 19if KGDB
14 bool
15
16config HAVE_ARCH_KGDB
17 bool
18 20
19config KGDB_SERIAL_CONSOLE 21config KGDB_SERIAL_CONSOLE
20 tristate "KGDB: use kgdb over the serial console" 22 tristate "KGDB: use kgdb over the serial console"
21 depends on KGDB
22 select CONSOLE_POLL 23 select CONSOLE_POLL
23 select MAGIC_SYSRQ 24 select MAGIC_SYSRQ
24 default y 25 default y
@@ -28,7 +29,6 @@ config KGDB_SERIAL_CONSOLE
28 29
29config KGDB_TESTS 30config KGDB_TESTS
30 bool "KGDB: internal test suite" 31 bool "KGDB: internal test suite"
31 depends on KGDB
32 default n 32 default n
33 help 33 help
34 This is a kgdb I/O module specifically designed to test 34 This is a kgdb I/O module specifically designed to test
@@ -56,3 +56,5 @@ config KGDB_TESTS_BOOT_STRING
56 boot. See the drivers/misc/kgdbts.c for detailed 56 boot. See the drivers/misc/kgdbts.c for detailed
57 information about other strings you could use beyond the 57 information about other strings you could use beyond the
58 default of V1F100. 58 default of V1F100.
59
60endif # KGDB
diff --git a/lib/devres.c b/lib/devres.c
index 26c87c49d776..72c8909006da 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -2,7 +2,7 @@
2#include <linux/io.h> 2#include <linux/io.h>
3#include <linux/module.h> 3#include <linux/module.h>
4 4
5static void devm_ioremap_release(struct device *dev, void *res) 5void devm_ioremap_release(struct device *dev, void *res)
6{ 6{
7 iounmap(*(void __iomem **)res); 7 iounmap(*(void __iomem **)res);
8} 8}
diff --git a/mm/memory.c b/mm/memory.c
index bbab1e37055e..48c122d42ed7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -969,7 +969,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
969 goto no_page_table; 969 goto no_page_table;
970 970
971 pmd = pmd_offset(pud, address); 971 pmd = pmd_offset(pud, address);
972 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 972 if (pmd_none(*pmd))
973 goto no_page_table; 973 goto no_page_table;
974 974
975 if (pmd_huge(*pmd)) { 975 if (pmd_huge(*pmd)) {
@@ -978,6 +978,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
978 goto out; 978 goto out;
979 } 979 }
980 980
981 if (unlikely(pmd_bad(*pmd)))
982 goto no_page_table;
983
981 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 984 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
982 if (!ptep) 985 if (!ptep)
983 goto out; 986 goto out;
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 1b228065e745..9d52ebfc1962 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -346,9 +346,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
346 /* skb==NULL means VCC is being destroyed */ 346 /* skb==NULL means VCC is being destroyed */
347 br2684_close_vcc(brvcc); 347 br2684_close_vcc(brvcc);
348 if (list_empty(&brdev->brvccs)) { 348 if (list_empty(&brdev->brvccs)) {
349 read_lock(&devs_lock); 349 write_lock_irq(&devs_lock);
350 list_del(&brdev->br2684_devs); 350 list_del(&brdev->br2684_devs);
351 read_unlock(&devs_lock); 351 write_unlock_irq(&devs_lock);
352 unregister_netdev(net_dev); 352 unregister_netdev(net_dev);
353 free_netdev(net_dev); 353 free_netdev(net_dev);
354 } 354 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 77a981a1ee52..c2397f503b0f 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -273,15 +273,13 @@ int br_add_bridge(const char *name)
273 rtnl_lock(); 273 rtnl_lock();
274 if (strchr(dev->name, '%')) { 274 if (strchr(dev->name, '%')) {
275 ret = dev_alloc_name(dev, dev->name); 275 ret = dev_alloc_name(dev, dev->name);
276 if (ret < 0) { 276 if (ret < 0)
277 free_netdev(dev); 277 goto out_free;
278 goto out;
279 }
280 } 278 }
281 279
282 ret = register_netdevice(dev); 280 ret = register_netdevice(dev);
283 if (ret) 281 if (ret)
284 goto out; 282 goto out_free;
285 283
286 ret = br_sysfs_addbr(dev); 284 ret = br_sysfs_addbr(dev);
287 if (ret) 285 if (ret)
@@ -289,6 +287,10 @@ int br_add_bridge(const char *name)
289 out: 287 out:
290 rtnl_unlock(); 288 rtnl_unlock();
291 return ret; 289 return ret;
290
291out_free:
292 free_netdev(dev);
293 goto out;
292} 294}
293 295
294int br_del_bridge(const char *name) 296int br_del_bridge(const char *name)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4fe605fa6f8a..5c459f2b7985 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -200,7 +200,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
200 goto nodata; 200 goto nodata;
201 201
202 /* 202 /*
203 * See comment in sk_buff definition, just before the 'tail' member 203 * Only clear those fields we need to clear, not those that we will
204 * actually initialise below. Hence, don't put any more fields after
205 * the tail pointer in struct sk_buff!
204 */ 206 */
205 memset(skb, 0, offsetof(struct sk_buff, tail)); 207 memset(skb, 0, offsetof(struct sk_buff, tail));
206 skb->truesize = size + sizeof(struct sk_buff); 208 skb->truesize = size + sizeof(struct sk_buff);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4a4f6ce4498d..933a0ecf8d46 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -32,7 +32,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
32 32
33 if (len > 3) { 33 if (len > 3) {
34 DCCP_WARN("invalid length %d\n", len); 34 DCCP_WARN("invalid length %d\n", len);
35 return 1; 35 return -EINVAL;
36 } 36 }
37 /* XXX add further sanity checks */ 37 /* XXX add further sanity checks */
38 38
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2f665a516476..f50e88bf2661 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -235,14 +235,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
235 else 235 else
236 min_mtu -= 21; 236 min_mtu -= 21;
237 237
238 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) { 238 if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
239 if (!(dst_metric_locked(dst, RTAX_MTU))) { 239 if (!(dst_metric_locked(dst, RTAX_MTU))) {
240 dst->metrics[RTAX_MTU-1] = mtu; 240 dst->metrics[RTAX_MTU-1] = mtu;
241 dst_set_expires(dst, dn_rt_mtu_expires); 241 dst_set_expires(dst, dn_rt_mtu_expires);
242 } 242 }
243 if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { 243 if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
244 u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; 244 u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
245 if (dst->metrics[RTAX_ADVMSS-1] > mss) 245 if (dst_metric(dst, RTAX_ADVMSS) > mss)
246 dst->metrics[RTAX_ADVMSS-1] = mss; 246 dst->metrics[RTAX_ADVMSS-1] = mss;
247 } 247 }
248 } 248 }
@@ -805,12 +805,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
805 rt->u.dst.neighbour = n; 805 rt->u.dst.neighbour = n;
806 } 806 }
807 807
808 if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || 808 if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
809 rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu) 809 dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
810 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 810 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
811 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); 811 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
812 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 || 812 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
813 rt->u.dst.metrics[RTAX_ADVMSS-1] > mss) 813 dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
814 rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; 814 rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
815 return 0; 815 return 0;
816} 816}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5e3685c5c407..92f90ae46f4a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1468,14 +1468,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1468 1468
1469 /* BSD 4.2 compatibility hack :-( */ 1469 /* BSD 4.2 compatibility hack :-( */
1470 if (mtu == 0 && 1470 if (mtu == 0 &&
1471 old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && 1471 old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) &&
1472 old_mtu >= 68 + (iph->ihl << 2)) 1472 old_mtu >= 68 + (iph->ihl << 2))
1473 old_mtu -= iph->ihl << 2; 1473 old_mtu -= iph->ihl << 2;
1474 1474
1475 mtu = guess_mtu(old_mtu); 1475 mtu = guess_mtu(old_mtu);
1476 } 1476 }
1477 if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { 1477 if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) {
1478 if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 1478 if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) {
1479 dst_confirm(&rth->u.dst); 1479 dst_confirm(&rth->u.dst);
1480 if (mtu < ip_rt_min_pmtu) { 1480 if (mtu < ip_rt_min_pmtu) {
1481 mtu = ip_rt_min_pmtu; 1481 mtu = ip_rt_min_pmtu;
@@ -1497,7 +1497,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1497 1497
1498static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1498static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1499{ 1499{
1500 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && 1500 if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 &&
1501 !(dst_metric_locked(dst, RTAX_MTU))) { 1501 !(dst_metric_locked(dst, RTAX_MTU))) {
1502 if (mtu < ip_rt_min_pmtu) { 1502 if (mtu < ip_rt_min_pmtu) {
1503 mtu = ip_rt_min_pmtu; 1503 mtu = ip_rt_min_pmtu;
@@ -1613,7 +1613,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1613 sizeof(rt->u.dst.metrics)); 1613 sizeof(rt->u.dst.metrics));
1614 if (fi->fib_mtu == 0) { 1614 if (fi->fib_mtu == 0) {
1615 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1615 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
1616 if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && 1616 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1617 rt->rt_gateway != rt->rt_dst && 1617 rt->rt_gateway != rt->rt_dst &&
1618 rt->u.dst.dev->mtu > 576) 1618 rt->u.dst.dev->mtu > 576)
1619 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1619 rt->u.dst.metrics[RTAX_MTU-1] = 576;
@@ -1624,14 +1624,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1624 } else 1624 } else
1625 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1625 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
1626 1626
1627 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1627 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1628 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1628 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1629 if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) 1629 if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU)
1630 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1630 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1631 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) 1631 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
1632 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1632 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
1633 ip_rt_min_advmss); 1633 ip_rt_min_advmss);
1634 if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) 1634 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
1635 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1635 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1636 1636
1637#ifdef CONFIG_NET_CLS_ROUTE 1637#ifdef CONFIG_NET_CLS_ROUTE
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eda4f4a233f3..8ac15a604e08 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -66,6 +66,7 @@
66#include <linux/mm.h> 66#include <linux/mm.h>
67#include <linux/module.h> 67#include <linux/module.h>
68#include <linux/sysctl.h> 68#include <linux/sysctl.h>
69#include <net/dst.h>
69#include <net/tcp.h> 70#include <net/tcp.h>
70#include <net/inet_common.h> 71#include <net/inet_common.h>
71#include <linux/ipsec.h> 72#include <linux/ipsec.h>
@@ -605,7 +606,7 @@ static u32 tcp_rto_min(struct sock *sk)
605 u32 rto_min = TCP_RTO_MIN; 606 u32 rto_min = TCP_RTO_MIN;
606 607
607 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) 608 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
608 rto_min = dst->metrics[RTAX_RTO_MIN - 1]; 609 rto_min = dst_metric(dst, RTAX_RTO_MIN);
609 return rto_min; 610 return rto_min;
610} 611}
611 612
@@ -769,7 +770,7 @@ void tcp_update_metrics(struct sock *sk)
769 dst->metrics[RTAX_RTTVAR - 1] = m; 770 dst->metrics[RTAX_RTTVAR - 1] = m;
770 else 771 else
771 dst->metrics[RTAX_RTTVAR-1] -= 772 dst->metrics[RTAX_RTTVAR-1] -=
772 (dst->metrics[RTAX_RTTVAR-1] - m)>>2; 773 (dst_metric(dst, RTAX_RTTVAR) - m)>>2;
773 } 774 }
774 775
775 if (tp->snd_ssthresh >= 0xFFFF) { 776 if (tp->snd_ssthresh >= 0xFFFF) {
@@ -788,21 +789,21 @@ void tcp_update_metrics(struct sock *sk)
788 dst->metrics[RTAX_SSTHRESH-1] = 789 dst->metrics[RTAX_SSTHRESH-1] =
789 max(tp->snd_cwnd >> 1, tp->snd_ssthresh); 790 max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
790 if (!dst_metric_locked(dst, RTAX_CWND)) 791 if (!dst_metric_locked(dst, RTAX_CWND))
791 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; 792 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
792 } else { 793 } else {
793 /* Else slow start did not finish, cwnd is non-sense, 794 /* Else slow start did not finish, cwnd is non-sense,
794 ssthresh may be also invalid. 795 ssthresh may be also invalid.
795 */ 796 */
796 if (!dst_metric_locked(dst, RTAX_CWND)) 797 if (!dst_metric_locked(dst, RTAX_CWND))
797 dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; 798 dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
798 if (dst->metrics[RTAX_SSTHRESH-1] && 799 if (dst_metric(dst, RTAX_SSTHRESH) &&
799 !dst_metric_locked(dst, RTAX_SSTHRESH) && 800 !dst_metric_locked(dst, RTAX_SSTHRESH) &&
800 tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) 801 tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
801 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; 802 dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
802 } 803 }
803 804
804 if (!dst_metric_locked(dst, RTAX_REORDERING)) { 805 if (!dst_metric_locked(dst, RTAX_REORDERING)) {
805 if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && 806 if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
806 tp->reordering != sysctl_tcp_reordering) 807 tp->reordering != sysctl_tcp_reordering)
807 dst->metrics[RTAX_REORDERING-1] = tp->reordering; 808 dst->metrics[RTAX_REORDERING-1] = tp->reordering;
808 } 809 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a493ad9b8914..12bba0880345 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1243,11 +1243,11 @@ install_route:
1243 } 1243 }
1244 } 1244 }
1245 1245
1246 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1246 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1247 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1247 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1248 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1248 if (!dst_metric(&rt->u.dst, RTAX_MTU))
1249 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1249 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1250 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1250 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1251 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1251 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1252 rt->u.dst.dev = dev; 1252 rt->u.dst.dev = dev;
1253 rt->rt6i_idev = idev; 1253 rt->rt6i_idev = idev;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9ad4e3631b6b..915afadb0602 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1766,6 +1766,7 @@ fail_wep:
1766fail_rate: 1766fail_rate:
1767 ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); 1767 ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
1768 unregister_netdevice(local->mdev); 1768 unregister_netdevice(local->mdev);
1769 local->mdev = NULL;
1769fail_dev: 1770fail_dev:
1770 rtnl_unlock(); 1771 rtnl_unlock();
1771 sta_info_stop(local); 1772 sta_info_stop(local);
@@ -1773,8 +1774,10 @@ fail_sta_info:
1773 debugfs_hw_del(local); 1774 debugfs_hw_del(local);
1774 destroy_workqueue(local->hw.workqueue); 1775 destroy_workqueue(local->hw.workqueue);
1775fail_workqueue: 1776fail_workqueue:
1776 ieee80211_if_free(local->mdev); 1777 if (local->mdev != NULL) {
1777 local->mdev = NULL; 1778 ieee80211_if_free(local->mdev);
1779 local->mdev = NULL;
1780 }
1778fail_mdev_alloc: 1781fail_mdev_alloc:
1779 wiphy_unregister(local->hw.wiphy); 1782 wiphy_unregister(local->hw.wiphy);
1780 return result; 1783 return result;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 64b2d136c78e..1d421d059caf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -6,7 +6,7 @@
6 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version. 7 * 2 of the License, or (at your option) any later version.
8 * 8 *
9 * Authors: Jamal Hadi Salim (2005) 9 * Authors: Jamal Hadi Salim (2005-8)
10 * 10 *
11 */ 11 */
12 12
@@ -34,6 +34,7 @@ static struct tcf_hashinfo simp_hash_info = {
34 .lock = &simp_lock, 34 .lock = &simp_lock,
35}; 35};
36 36
37#define SIMP_MAX_DATA 32
37static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) 38static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
38{ 39{
39 struct tcf_defact *d = a->priv; 40 struct tcf_defact *d = a->priv;
@@ -69,23 +70,28 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
69 return ret; 70 return ret;
70} 71}
71 72
72static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) 73static int alloc_defdata(struct tcf_defact *d, char *defdata)
73{ 74{
74 d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL); 75 d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL);
75 if (unlikely(!d->tcfd_defdata)) 76 if (unlikely(!d->tcfd_defdata))
76 return -ENOMEM; 77 return -ENOMEM;
77 d->tcfd_datalen = datalen; 78
78 return 0; 79 return 0;
79} 80}
80 81
81static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) 82static void reset_policy(struct tcf_defact *d, char *defdata,
83 struct tc_defact *p)
82{ 84{
83 kfree(d->tcfd_defdata); 85 spin_lock_bh(&d->tcf_lock);
84 return alloc_defdata(d, datalen, defdata); 86 d->tcf_action = p->action;
87 memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
88 strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
89 spin_unlock_bh(&d->tcf_lock);
85} 90}
86 91
87static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { 92static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
88 [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) }, 93 [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) },
94 [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA },
89}; 95};
90 96
91static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, 97static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
@@ -95,28 +101,24 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
95 struct tc_defact *parm; 101 struct tc_defact *parm;
96 struct tcf_defact *d; 102 struct tcf_defact *d;
97 struct tcf_common *pc; 103 struct tcf_common *pc;
98 void *defdata; 104 char *defdata;
99 u32 datalen = 0;
100 int ret = 0, err; 105 int ret = 0, err;
101 106
102 if (nla == NULL) 107 if (nla == NULL)
103 return -EINVAL; 108 return -EINVAL;
104 109
105 err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL); 110 err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy);
106 if (err < 0) 111 if (err < 0)
107 return err; 112 return err;
108 113
109 if (tb[TCA_DEF_PARMS] == NULL) 114 if (tb[TCA_DEF_PARMS] == NULL)
110 return -EINVAL; 115 return -EINVAL;
111 116
112 parm = nla_data(tb[TCA_DEF_PARMS]); 117 if (tb[TCA_DEF_DATA] == NULL)
113 defdata = nla_data(tb[TCA_DEF_DATA]);
114 if (defdata == NULL)
115 return -EINVAL; 118 return -EINVAL;
116 119
117 datalen = nla_len(tb[TCA_DEF_DATA]); 120 parm = nla_data(tb[TCA_DEF_PARMS]);
118 if (datalen == 0) 121 defdata = nla_data(tb[TCA_DEF_DATA]);
119 return -EINVAL;
120 122
121 pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); 123 pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
122 if (!pc) { 124 if (!pc) {
@@ -126,11 +128,12 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
126 return -ENOMEM; 128 return -ENOMEM;
127 129
128 d = to_defact(pc); 130 d = to_defact(pc);
129 ret = alloc_defdata(d, datalen, defdata); 131 ret = alloc_defdata(d, defdata);
130 if (ret < 0) { 132 if (ret < 0) {
131 kfree(pc); 133 kfree(pc);
132 return ret; 134 return ret;
133 } 135 }
136 d->tcf_action = parm->action;
134 ret = ACT_P_CREATED; 137 ret = ACT_P_CREATED;
135 } else { 138 } else {
136 d = to_defact(pc); 139 d = to_defact(pc);
@@ -138,13 +141,9 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
138 tcf_simp_release(d, bind); 141 tcf_simp_release(d, bind);
139 return -EEXIST; 142 return -EEXIST;
140 } 143 }
141 realloc_defdata(d, datalen, defdata); 144 reset_policy(d, defdata, parm);
142 } 145 }
143 146
144 spin_lock_bh(&d->tcf_lock);
145 d->tcf_action = parm->action;
146 spin_unlock_bh(&d->tcf_lock);
147
148 if (ret == ACT_P_CREATED) 147 if (ret == ACT_P_CREATED)
149 tcf_hash_insert(pc, &simp_hash_info); 148 tcf_hash_insert(pc, &simp_hash_info);
150 return ret; 149 return ret;
@@ -172,7 +171,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
172 opt.bindcnt = d->tcf_bindcnt - bind; 171 opt.bindcnt = d->tcf_bindcnt - bind;
173 opt.action = d->tcf_action; 172 opt.action = d->tcf_action;
174 NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); 173 NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
175 NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); 174 NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
176 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); 175 t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
177 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); 176 t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
178 t.expires = jiffies_to_clock_t(d->tcf_tm.expires); 177 t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 66148cc4759e..5bc1ed490180 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1197,12 +1197,16 @@ static inline int htb_parent_last_child(struct htb_class *cl)
1197 return 1; 1197 return 1;
1198} 1198}
1199 1199
1200static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) 1200static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1201 struct Qdisc *new_q)
1201{ 1202{
1202 struct htb_class *parent = cl->parent; 1203 struct htb_class *parent = cl->parent;
1203 1204
1204 BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); 1205 BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
1205 1206
1207 if (parent->cmode != HTB_CAN_SEND)
1208 htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1209
1206 parent->level = 0; 1210 parent->level = 0;
1207 memset(&parent->un.inner, 0, sizeof(parent->un.inner)); 1211 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1208 INIT_LIST_HEAD(&parent->un.leaf.drop_list); 1212 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
@@ -1300,7 +1304,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1300 htb_deactivate(q, cl); 1304 htb_deactivate(q, cl);
1301 1305
1302 if (last_child) 1306 if (last_child)
1303 htb_parent_to_leaf(cl, new_q); 1307 htb_parent_to_leaf(q, cl, new_q);
1304 1308
1305 if (--cl->refcnt == 0) 1309 if (--cl->refcnt == 0)
1306 htb_destroy_class(sch, cl); 1310 htb_destroy_class(sch, cl);
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 4bc68f20a73c..96521cb087ec 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -11,9 +11,9 @@
11#ifndef KBUILD_NO_NLS 11#ifndef KBUILD_NO_NLS
12# include <libintl.h> 12# include <libintl.h>
13#else 13#else
14# define gettext(Msgid) ((const char *) (Msgid)) 14static inline const char *gettext(const char *txt) { return txt; }
15# define textdomain(Domainname) ((const char *) (Domainname)) 15static inline void textdomain(const char *domainname) {}
16# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname)) 16static inline void bindtextdomain(const char *name, const char *dir) {}
17#endif 17#endif
18 18
19#ifdef __cplusplus 19#ifdef __cplusplus
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 734cf4f3131e..6841e95c0989 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -773,7 +773,7 @@ static void conf_string(struct menu *menu)
773 773
774 while (1) { 774 while (1) {
775 int res; 775 int res;
776 char *heading; 776 const char *heading;
777 777
778 switch (sym_get_type(menu->sym)) { 778 switch (sym_get_type(menu->sym)) {
779 case S_INT: 779 case S_INT:
@@ -925,3 +925,4 @@ int main(int ac, char **av)
925 925
926 return 0; 926 return 0;
927} 927}
928
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index e04c4218cb52..cea4a790e1e9 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -51,6 +51,15 @@ do { \
51 sprintf(str + strlen(str), "*"); \ 51 sprintf(str + strlen(str), "*"); \
52} while(0) 52} while(0)
53 53
54/* Always end in a wildcard, for future extension */
55static inline void add_wildcard(char *str)
56{
57 int len = strlen(str);
58
59 if (str[len - 1] != '*')
60 strcat(str + len, "*");
61}
62
54unsigned int cross_build = 0; 63unsigned int cross_build = 0;
55/** 64/**
56 * Check that sizeof(device_id type) are consistent with size of section 65 * Check that sizeof(device_id type) are consistent with size of section
@@ -133,9 +142,7 @@ static void do_usb_entry(struct usb_device_id *id,
133 id->match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL, 142 id->match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL,
134 id->bInterfaceProtocol); 143 id->bInterfaceProtocol);
135 144
136 /* Always end in a wildcard, for future extension */ 145 add_wildcard(alias);
137 if (alias[strlen(alias)-1] != '*')
138 strcat(alias, "*");
139 buf_printf(&mod->dev_table_buf, 146 buf_printf(&mod->dev_table_buf,
140 "MODULE_ALIAS(\"%s\");\n", alias); 147 "MODULE_ALIAS(\"%s\");\n", alias);
141} 148}
@@ -219,6 +226,7 @@ static int do_ieee1394_entry(const char *filename,
219 ADD(alias, "ver", id->match_flags & IEEE1394_MATCH_VERSION, 226 ADD(alias, "ver", id->match_flags & IEEE1394_MATCH_VERSION,
220 id->version); 227 id->version);
221 228
229 add_wildcard(alias);
222 return 1; 230 return 1;
223} 231}
224 232
@@ -261,6 +269,7 @@ static int do_pci_entry(const char *filename,
261 ADD(alias, "bc", baseclass_mask == 0xFF, baseclass); 269 ADD(alias, "bc", baseclass_mask == 0xFF, baseclass);
262 ADD(alias, "sc", subclass_mask == 0xFF, subclass); 270 ADD(alias, "sc", subclass_mask == 0xFF, subclass);
263 ADD(alias, "i", interface_mask == 0xFF, interface); 271 ADD(alias, "i", interface_mask == 0xFF, interface);
272 add_wildcard(alias);
264 return 1; 273 return 1;
265} 274}
266 275
@@ -283,6 +292,7 @@ static int do_ccw_entry(const char *filename,
283 id->dev_type); 292 id->dev_type);
284 ADD(alias, "dm", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL, 293 ADD(alias, "dm", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL,
285 id->dev_model); 294 id->dev_model);
295 add_wildcard(alias);
286 return 1; 296 return 1;
287} 297}
288 298
@@ -290,7 +300,7 @@ static int do_ccw_entry(const char *filename,
290static int do_ap_entry(const char *filename, 300static int do_ap_entry(const char *filename,
291 struct ap_device_id *id, char *alias) 301 struct ap_device_id *id, char *alias)
292{ 302{
293 sprintf(alias, "ap:t%02X", id->dev_type); 303 sprintf(alias, "ap:t%02X*", id->dev_type);
294 return 1; 304 return 1;
295} 305}
296 306
@@ -309,6 +319,7 @@ static int do_serio_entry(const char *filename,
309 ADD(alias, "id", id->id != SERIO_ANY, id->id); 319 ADD(alias, "id", id->id != SERIO_ANY, id->id);
310 ADD(alias, "ex", id->extra != SERIO_ANY, id->extra); 320 ADD(alias, "ex", id->extra != SERIO_ANY, id->extra);
311 321
322 add_wildcard(alias);
312 return 1; 323 return 1;
313} 324}
314 325
@@ -316,7 +327,7 @@ static int do_serio_entry(const char *filename,
316static int do_acpi_entry(const char *filename, 327static int do_acpi_entry(const char *filename,
317 struct acpi_device_id *id, char *alias) 328 struct acpi_device_id *id, char *alias)
318{ 329{
319 sprintf(alias, "acpi*:%s:", id->id); 330 sprintf(alias, "acpi*:%s:*", id->id);
320 return 1; 331 return 1;
321} 332}
322 333
@@ -324,7 +335,7 @@ static int do_acpi_entry(const char *filename,
324static int do_pnp_entry(const char *filename, 335static int do_pnp_entry(const char *filename,
325 struct pnp_device_id *id, char *alias) 336 struct pnp_device_id *id, char *alias)
326{ 337{
327 sprintf(alias, "pnp:d%s", id->id); 338 sprintf(alias, "pnp:d%s*", id->id);
328 return 1; 339 return 1;
329} 340}
330 341
@@ -409,6 +420,7 @@ static int do_pcmcia_entry(const char *filename,
409 ADD(alias, "pc", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, id->prod_id_hash[2]); 420 ADD(alias, "pc", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, id->prod_id_hash[2]);
410 ADD(alias, "pd", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, id->prod_id_hash[3]); 421 ADD(alias, "pd", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, id->prod_id_hash[3]);
411 422
423 add_wildcard(alias);
412 return 1; 424 return 1;
413} 425}
414 426
@@ -432,6 +444,7 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali
432 if (isspace (*tmp)) 444 if (isspace (*tmp))
433 *tmp = '_'; 445 *tmp = '_';
434 446
447 add_wildcard(alias);
435 return 1; 448 return 1;
436} 449}
437 450
@@ -448,6 +461,7 @@ static int do_vio_entry(const char *filename, struct vio_device_id *vio,
448 if (isspace (*tmp)) 461 if (isspace (*tmp))
449 *tmp = '_'; 462 *tmp = '_';
450 463
464 add_wildcard(alias);
451 return 1; 465 return 1;
452} 466}
453 467
@@ -511,6 +525,8 @@ static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa,
511{ 525{
512 if (eisa->sig[0]) 526 if (eisa->sig[0])
513 sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig); 527 sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig);
528 else
529 strcat(alias, "*");
514 return 1; 530 return 1;
515} 531}
516 532
@@ -529,6 +545,7 @@ static int do_parisc_entry(const char *filename, struct parisc_device_id *id,
529 ADD(alias, "rev", id->hversion_rev != PA_HVERSION_REV_ANY_ID, id->hversion_rev); 545 ADD(alias, "rev", id->hversion_rev != PA_HVERSION_REV_ANY_ID, id->hversion_rev);
530 ADD(alias, "sv", id->sversion != PA_SVERSION_ANY_ID, id->sversion); 546 ADD(alias, "sv", id->sversion != PA_SVERSION_ANY_ID, id->sversion);
531 547
548 add_wildcard(alias);
532 return 1; 549 return 1;
533} 550}
534 551
@@ -544,6 +561,7 @@ static int do_sdio_entry(const char *filename,
544 ADD(alias, "c", id->class != (__u8)SDIO_ANY_ID, id->class); 561 ADD(alias, "c", id->class != (__u8)SDIO_ANY_ID, id->class);
545 ADD(alias, "v", id->vendor != (__u16)SDIO_ANY_ID, id->vendor); 562 ADD(alias, "v", id->vendor != (__u16)SDIO_ANY_ID, id->vendor);
546 ADD(alias, "d", id->device != (__u16)SDIO_ANY_ID, id->device); 563 ADD(alias, "d", id->device != (__u16)SDIO_ANY_ID, id->device);
564 add_wildcard(alias);
547 return 1; 565 return 1;
548} 566}
549 567
@@ -559,6 +577,7 @@ static int do_ssb_entry(const char *filename,
559 ADD(alias, "v", id->vendor != SSB_ANY_VENDOR, id->vendor); 577 ADD(alias, "v", id->vendor != SSB_ANY_VENDOR, id->vendor);
560 ADD(alias, "id", id->coreid != SSB_ANY_ID, id->coreid); 578 ADD(alias, "id", id->coreid != SSB_ANY_ID, id->coreid);
561 ADD(alias, "rev", id->revision != SSB_ANY_REV, id->revision); 579 ADD(alias, "rev", id->revision != SSB_ANY_REV, id->revision);
580 add_wildcard(alias);
562 return 1; 581 return 1;
563} 582}
564 583
@@ -573,6 +592,7 @@ static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
573 ADD(alias, "d", 1, id->device); 592 ADD(alias, "d", 1, id->device);
574 ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor); 593 ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
575 594
595 add_wildcard(alias);
576 return 1; 596 return 1;
577} 597}
578 598
@@ -612,9 +632,6 @@ static void do_table(void *symval, unsigned long size,
612 632
613 for (i = 0; i < size; i += id_size) { 633 for (i = 0; i < size; i += id_size) {
614 if (do_entry(mod->name, symval+i, alias)) { 634 if (do_entry(mod->name, symval+i, alias)) {
615 /* Always end in a wildcard, for future extension */
616 if (alias[strlen(alias)-1] != '*')
617 strcat(alias, "*");
618 buf_printf(&mod->dev_table_buf, 635 buf_printf(&mod->dev_table_buf,
619 "MODULE_ALIAS(\"%s\");\n", alias); 636 "MODULE_ALIAS(\"%s\");\n", alias);
620 } 637 }
diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
index 59203511e77d..54a1f9036c66 100644
--- a/sound/drivers/pcsp/pcsp.c
+++ b/sound/drivers/pcsp/pcsp.c
@@ -194,6 +194,7 @@ static void pcsp_stop_beep(struct snd_pcsp *chip)
194 spin_unlock_irq(&chip->substream_lock); 194 spin_unlock_irq(&chip->substream_lock);
195} 195}
196 196
197#ifdef CONFIG_PM
197static int pcsp_suspend(struct platform_device *dev, pm_message_t state) 198static int pcsp_suspend(struct platform_device *dev, pm_message_t state)
198{ 199{
199 struct snd_pcsp *chip = platform_get_drvdata(dev); 200 struct snd_pcsp *chip = platform_get_drvdata(dev);
@@ -201,6 +202,9 @@ static int pcsp_suspend(struct platform_device *dev, pm_message_t state)
201 snd_pcm_suspend_all(chip->pcm); 202 snd_pcm_suspend_all(chip->pcm);
202 return 0; 203 return 0;
203} 204}
205#else
206#define pcsp_suspend NULL
207#endif /* CONFIG_PM */
204 208
205static void pcsp_shutdown(struct platform_device *dev) 209static void pcsp_shutdown(struct platform_device *dev)
206{ 210{
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 581debf37dcb..7e4742109572 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -515,19 +515,16 @@ config SND_FM801
515config SND_FM801_TEA575X_BOOL 515config SND_FM801_TEA575X_BOOL
516 bool "ForteMedia FM801 + TEA5757 tuner" 516 bool "ForteMedia FM801 + TEA5757 tuner"
517 depends on SND_FM801 517 depends on SND_FM801
518 depends on VIDEO_V4L1=y || VIDEO_V4L1=SND_FM801
518 help 519 help
519 Say Y here to include support for soundcards based on the ForteMedia 520 Say Y here to include support for soundcards based on the ForteMedia
520 FM801 chip with a TEA5757 tuner connected to GPIO1-3 pins (Media 521 FM801 chip with a TEA5757 tuner connected to GPIO1-3 pins (Media
521 Forte SF256-PCS-02) into the snd-fm801 driver. 522 Forte SF256-PCS-02) into the snd-fm801 driver.
522 523
523 This will enable support for the old V4L1 API.
524
525config SND_FM801_TEA575X 524config SND_FM801_TEA575X
526 tristate 525 tristate
527 depends on SND_FM801_TEA575X_BOOL 526 depends on SND_FM801_TEA575X_BOOL
528 default SND_FM801 527 default SND_FM801
529 select VIDEO_V4L1
530 select VIDEO_DEV
531 528
532config SND_HDA_INTEL 529config SND_HDA_INTEL
533 tristate "Intel HD Audio" 530 tristate "Intel HD Audio"
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 39198e505b12..2da89810ca10 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -3446,6 +3446,7 @@ static const struct snd_kcontrol_new snd_ac97_controls_vt1617a[] = {
3446int patch_vt1617a(struct snd_ac97 * ac97) 3446int patch_vt1617a(struct snd_ac97 * ac97)
3447{ 3447{
3448 int err = 0; 3448 int err = 0;
3449 int val;
3449 3450
3450 /* we choose to not fail out at this point, but we tell the 3451 /* we choose to not fail out at this point, but we tell the
3451 caller when we return */ 3452 caller when we return */
@@ -3456,7 +3457,13 @@ int patch_vt1617a(struct snd_ac97 * ac97)
3456 /* bring analog power consumption to normal by turning off the 3457 /* bring analog power consumption to normal by turning off the
3457 * headphone amplifier, like WinXP driver for EPIA SP 3458 * headphone amplifier, like WinXP driver for EPIA SP
3458 */ 3459 */
3459 snd_ac97_write_cache(ac97, 0x5c, 0x20); 3460 /* We need to check the bit before writing it.
3461 * On some (many?) hardwares, setting bit actually clears it!
3462 */
3463 val = snd_ac97_read(ac97, 0x5c);
3464 if (!(val & 0x20))
3465 snd_ac97_write_cache(ac97, 0x5c, 0x20);
3466
3460 ac97->ext_id |= AC97_EI_SPDIF; /* force the detection of spdif */ 3467 ac97->ext_id |= AC97_EI_SPDIF; /* force the detection of spdif */
3461 ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000; 3468 ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000;
3462 ac97->build_ops = &patch_vt1616_ops; 3469 ac97->build_ops = &patch_vt1616_ops;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d9783a4263e0..6d4df45e81e0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -11902,7 +11902,10 @@ static void alc861_auto_set_output_and_unmute(struct hda_codec *codec,
11902 hda_nid_t nid, 11902 hda_nid_t nid,
11903 int pin_type, int dac_idx) 11903 int pin_type, int dac_idx)
11904{ 11904{
11905 alc_set_pin_output(codec, nid, pin_type); 11905 snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
11906 pin_type);
11907 snd_hda_codec_write(codec, dac_idx, 0, AC_VERB_SET_AMP_GAIN_MUTE,
11908 AMP_OUT_UNMUTE);
11906} 11909}
11907 11910
11908static void alc861_auto_init_multi_out(struct hda_codec *codec) 11911static void alc861_auto_init_multi_out(struct hda_codec *codec)
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index b3a15d616873..393f7fd2b1be 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -4289,6 +4289,8 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
4289 { .id = 0x83847635, .name = "STAC9250D", .patch = patch_stac925x }, 4289 { .id = 0x83847635, .name = "STAC9250D", .patch = patch_stac925x },
4290 { .id = 0x83847636, .name = "STAC9251", .patch = patch_stac925x }, 4290 { .id = 0x83847636, .name = "STAC9251", .patch = patch_stac925x },
4291 { .id = 0x83847637, .name = "STAC9250D", .patch = patch_stac925x }, 4291 { .id = 0x83847637, .name = "STAC9250D", .patch = patch_stac925x },
4292 { .id = 0x83847645, .name = "92HD206X", .patch = patch_stac927x },
4293 { .id = 0x83847646, .name = "92HD206D", .patch = patch_stac927x },
4292 /* The following does not take into account .id=0x83847661 when subsys = 4294 /* The following does not take into account .id=0x83847661 when subsys =
4293 * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are 4295 * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are
4294 * currently not fully supported. 4296 * currently not fully supported.
diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
index 4ebcd6a8bf28..1ed6afd45459 100644
--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
+++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
@@ -224,6 +224,7 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_cpu_dai *cpu_dai,
224 iismod |= S3C2410_IISMOD_SLAVE; 224 iismod |= S3C2410_IISMOD_SLAVE;
225 break; 225 break;
226 case SND_SOC_DAIFMT_CBS_CFS: 226 case SND_SOC_DAIFMT_CBS_CFS:
227 iismod &= ~S3C2410_IISMOD_SLAVE;
227 break; 228 break;
228 default: 229 default:
229 return -EINVAL; 230 return -EINVAL;
@@ -234,6 +235,7 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_cpu_dai *cpu_dai,
234 iismod |= S3C2410_IISMOD_MSB; 235 iismod |= S3C2410_IISMOD_MSB;
235 break; 236 break;
236 case SND_SOC_DAIFMT_I2S: 237 case SND_SOC_DAIFMT_I2S:
238 iismod &= ~S3C2410_IISMOD_MSB;
237 break; 239 break;
238 default: 240 default:
239 return -EINVAL; 241 return -EINVAL;
diff --git a/sound/soc/s3c24xx/s3c24xx-pcm.c b/sound/soc/s3c24xx/s3c24xx-pcm.c
index 6c70a81c730c..7806ae614617 100644
--- a/sound/soc/s3c24xx/s3c24xx-pcm.c
+++ b/sound/soc/s3c24xx/s3c24xx-pcm.c
@@ -171,7 +171,7 @@ static int s3c24xx_pcm_hw_params(struct snd_pcm_substream *substream,
171 ret = s3c2410_dma_request(prtd->params->channel, 171 ret = s3c2410_dma_request(prtd->params->channel,
172 prtd->params->client, NULL); 172 prtd->params->client, NULL);
173 173
174 if (ret) { 174 if (ret < 0) {
175 DBG(KERN_ERR "failed to get dma channel\n"); 175 DBG(KERN_ERR "failed to get dma channel\n");
176 return ret; 176 return ret;
177 } 177 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e89338e2b043..f7ba099049ea 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
522 return bad_hva(); 522 return bad_hva();
523 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 523 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
524} 524}
525EXPORT_SYMBOL_GPL(gfn_to_hva);
525 526
526/* 527/*
527 * Requires current->mm->mmap_sem to be held 528 * Requires current->mm->mmap_sem to be held