aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-pktcdvd6
-rw-r--r--Documentation/cgroups/memory.txt55
-rw-r--r--Documentation/cgroups/resource_counter.txt27
-rw-r--r--Documentation/sysctl/net.txt2
-rw-r--r--Documentation/tomoyo.txt55
-rw-r--r--Documentation/vm/00-INDEX2
-rw-r--r--Documentation/vm/active_mm.txt83
-rw-r--r--Documentation/vm/unevictable-lru.txt1041
-rw-r--r--MAINTAINERS30
-rw-r--r--arch/arm/mach-omap2/usb-musb.c8
-rw-r--r--arch/ia64/kernel/pci-swiotlb.c2
-rw-r--r--arch/powerpc/include/asm/parport.h2
-rw-r--r--arch/sparc/include/asm/parport.h5
-rw-r--r--arch/x86/include/asm/required-features.h2
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c2
-rw-r--r--arch/x86/xen/enlighten.c89
-rw-r--r--arch/x86/xen/mmu.c116
-rw-r--r--arch/x86/xen/mmu.h3
-rw-r--r--arch/x86/xen/smp.c4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--drivers/atm/solos-pci.c2
-rw-r--r--drivers/block/cciss.c2
-rw-r--r--drivers/char/agp/intel-agp.c3
-rw-r--r--drivers/char/sysrq.c1
-rw-r--r--drivers/edac/edac_core.h12
-rw-r--r--drivers/edac/edac_device.c2
-rw-r--r--drivers/edac/edac_mc.c2
-rw-r--r--drivers/edac/edac_pci.c2
-rw-r--r--drivers/hwmon/Kconfig10
-rw-r--r--drivers/hwmon/Makefile1
-rw-r--r--drivers/hwmon/hp_accel.c1
-rw-r--r--drivers/hwmon/sht15.c692
-rw-r--r--drivers/misc/eeprom/at24.c8
-rw-r--r--drivers/misc/eeprom/at25.c5
-rw-r--r--drivers/misc/sgi-xp/xpc.h254
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c138
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c128
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c20
-rw-r--r--drivers/misc/sgi-xp/xpc_sn2.c164
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c257
-rw-r--r--drivers/net/atl1c/atl1c_main.c4
-rw-r--r--drivers/net/benet/be_main.c4
-rw-r--r--drivers/net/jme.c8
-rw-r--r--drivers/net/wireless/ath9k/pci.c4
-rw-r--r--drivers/net/wireless/p54/p54pci.c4
-rw-r--r--drivers/parisc/superio.c3
-rw-r--r--drivers/scsi/3w-9xxx.c8
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c2
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.c10
-rw-r--r--drivers/spi/spi.c22
-rw-r--r--drivers/staging/b3dfg/b3dfg.c2
-rw-r--r--drivers/usb/class/cdc-acm.c4
-rw-r--r--drivers/usb/otg/nop-usb-xceiv.c4
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.c26
-rw-r--r--drivers/video/aty/radeon_base.c4
-rw-r--r--drivers/video/backlight/backlight.c3
-rw-r--r--drivers/video/backlight/lcd.c3
-rw-r--r--drivers/video/cirrusfb.c4
-rw-r--r--drivers/video/console/fbcon.c55
-rw-r--r--drivers/video/efifb.c7
-rw-r--r--drivers/video/fbmem.c19
-rw-r--r--drivers/video/intelfb/intelfb.h2
-rw-r--r--drivers/video/intelfb/intelfb_i2c.c1
-rw-r--r--drivers/video/intelfb/intelfbdrv.c1
-rw-r--r--drivers/video/intelfb/intelfbhw.c5
-rw-r--r--drivers/video/s3fb.c6
-rw-r--r--drivers/video/sa1100fb.c15
-rw-r--r--drivers/video/sa1100fb.h7
-rw-r--r--drivers/video/sis/sis_main.c2
-rw-r--r--drivers/video/skeletonfb.c8
-rw-r--r--drivers/video/uvesafb.c35
-rw-r--r--drivers/video/vfb.c11
-rw-r--r--drivers/xen/cpu_hotplug.c40
-rw-r--r--drivers/xen/manage.c5
-rw-r--r--fs/ext2/inode.c44
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/mdb.c1
-rw-r--r--fs/jbd/revoke.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c38
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c78
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/xfs_iget.c23
-rw-r--r--fs/xfs/xfs_iomap.c61
-rw-r--r--fs/xfs/xfs_iomap.h3
-rw-r--r--fs/xfs/xfs_log.c78
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c7
-rw-r--r--include/asm-generic/siginfo.h2
-rw-r--r--include/drm/drm_pciids.h2
-rw-r--r--include/linux/fb.h8
-rw-r--r--include/linux/fiemap.h2
-rw-r--r--include/linux/init_task.h13
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/sht15.h24
-rw-r--r--include/linux/usb/serial.h7
-rw-r--r--include/video/cyblafb.h175
-rw-r--r--init/initramfs.c5
-rw-r--r--ipc/mq_sysctl.c2
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/sys.c24
-rw-r--r--kernel/sysctl.c20
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/shmem.c27
-rw-r--r--mm/util.c16
-rw-r--r--security/tomoyo/common.c6
-rw-r--r--security/tomoyo/common.h2
-rw-r--r--security/tomoyo/domain.c2
-rw-r--r--security/tomoyo/file.c2
-rw-r--r--security/tomoyo/realpath.c2
-rw-r--r--security/tomoyo/realpath.h2
-rw-r--r--security/tomoyo/tomoyo.c2
-rw-r--r--security/tomoyo/tomoyo.h2
-rw-r--r--sound/pci/hda/hda_intel.c8
121 files changed, 2690 insertions, 1679 deletions
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
index bf9c16b64c34..cf11736acb76 100644
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -1,4 +1,4 @@
1What: /debug/pktcdvd/pktcdvd[0-7] 1What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
2Date: Oct. 2006 2Date: Oct. 2006
3KernelVersion: 2.6.20 3KernelVersion: 2.6.20
4Contact: Thomas Maier <balagi@justmail.de> 4Contact: Thomas Maier <balagi@justmail.de>
@@ -10,10 +10,10 @@ debugfs interface
10The pktcdvd module (packet writing driver) creates 10The pktcdvd module (packet writing driver) creates
11these files in debugfs: 11these files in debugfs:
12 12
13/debug/pktcdvd/pktcdvd[0-7]/ 13/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
14 info (0444) Lots of driver statistics and infos. 14 info (0444) Lots of driver statistics and infos.
15 15
16Example: 16Example:
17------- 17-------
18 18
19cat /debug/pktcdvd/pktcdvd0/info 19cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index a98a7fe7aabb..1a608877b14e 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -6,15 +6,14 @@ used here with the memory controller that is used in hardware.
6 6
7Salient features 7Salient features
8 8
9a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages 9a. Enable control of Anonymous, Page Cache (mapped and unmapped) and
10 Swap Cache memory pages.
10b. The infrastructure allows easy addition of other types of memory to control 11b. The infrastructure allows easy addition of other types of memory to control
11c. Provides *zero overhead* for non memory controller users 12c. Provides *zero overhead* for non memory controller users
12d. Provides a double LRU: global memory pressure causes reclaim from the 13d. Provides a double LRU: global memory pressure causes reclaim from the
13 global LRU; a cgroup on hitting a limit, reclaims from the per 14 global LRU; a cgroup on hitting a limit, reclaims from the per
14 cgroup LRU 15 cgroup LRU
15 16
16NOTE: Swap Cache (unmapped) is not accounted now.
17
18Benefits and Purpose of the memory controller 17Benefits and Purpose of the memory controller
19 18
20The memory controller isolates the memory behaviour of a group of tasks 19The memory controller isolates the memory behaviour of a group of tasks
@@ -290,34 +289,44 @@ will be charged as a new owner of it.
290 moved to the parent. If you want to avoid that, force_empty will be useful. 289 moved to the parent. If you want to avoid that, force_empty will be useful.
291 290
2925.2 stat file 2915.2 stat file
293 memory.stat file includes following statistics (now) 292
294 cache - # of pages from page-cache and shmem. 293memory.stat file includes following statistics
295 rss - # of pages from anonymous memory. 294
296 pgpgin - # of event of charging 295cache - # of bytes of page cache memory.
297 pgpgout - # of event of uncharging 296rss - # of bytes of anonymous and swap cache memory.
298 active_anon - # of pages on active lru of anon, shmem. 297pgpgin - # of pages paged in (equivalent to # of charging events).
299 inactive_anon - # of pages on active lru of anon, shmem 298pgpgout - # of pages paged out (equivalent to # of uncharging events).
300 active_file - # of pages on active lru of file-cache 299active_anon - # of bytes of anonymous and swap cache memory on active
301 inactive_file - # of pages on inactive lru of file cache 300 lru list.
302 unevictable - # of pages cannot be reclaimed.(mlocked etc) 301inactive_anon - # of bytes of anonymous memory and swap cache memory on
303 302 inactive lru list.
304 Below is depend on CONFIG_DEBUG_VM. 303active_file - # of bytes of file-backed memory on active lru list.
305 inactive_ratio - VM internal parameter. (see mm/page_alloc.c) 304inactive_file - # of bytes of file-backed memory on inactive lru list.
306 recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) 305unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
307 recent_rotated_file - VM internal parameter. (see mm/vmscan.c) 306
308 recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) 307The following additional stats are dependent on CONFIG_DEBUG_VM.
309 recent_scanned_file - VM internal parameter. (see mm/vmscan.c) 308
310 309inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
311 Memo: 310recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
311recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
312recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
313recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
314
315Memo:
312 recent_rotated means recent frequency of lru rotation. 316 recent_rotated means recent frequency of lru rotation.
313 recent_scanned means recent # of scans to lru. 317 recent_scanned means recent # of scans to lru.
314 showing for better debug please see the code for meanings. 318 showing for better debug please see the code for meanings.
315 319
320Note:
321 Only anonymous and swap cache memory is listed as part of 'rss' stat.
322 This should not be confused with the true 'resident set size' or the
323 amount of physical memory used by the cgroup. Per-cgroup rss
324 accounting is not done yet.
316 325
3175.3 swappiness 3265.3 swappiness
318 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. 327 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
319 328
320 Following cgroup's swapiness can't be changed. 329 Following cgroups' swapiness can't be changed.
321 - root cgroup (uses /proc/sys/vm/swappiness). 330 - root cgroup (uses /proc/sys/vm/swappiness).
322 - a cgroup which uses hierarchy and it has child cgroup. 331 - a cgroup which uses hierarchy and it has child cgroup.
323 - a cgroup which uses hierarchy and not the root of hierarchy. 332 - a cgroup which uses hierarchy and not the root of hierarchy.
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index f196ac1d7d25..95b24d766eab 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -47,13 +47,18 @@ to work with it.
47 47
482. Basic accounting routines 482. Basic accounting routines
49 49
50 a. void res_counter_init(struct res_counter *rc) 50 a. void res_counter_init(struct res_counter *rc,
51 struct res_counter *rc_parent)
51 52
52 Initializes the resource counter. As usual, should be the first 53 Initializes the resource counter. As usual, should be the first
53 routine called for a new counter. 54 routine called for a new counter.
54 55
55 b. int res_counter_charge[_locked] 56 The struct res_counter *parent can be used to define a hierarchical
56 (struct res_counter *rc, unsigned long val) 57 child -> parent relationship directly in the res_counter structure,
58 NULL can be used to define no relationship.
59
60 c. int res_counter_charge(struct res_counter *rc, unsigned long val,
61 struct res_counter **limit_fail_at)
57 62
58 When a resource is about to be allocated it has to be accounted 63 When a resource is about to be allocated it has to be accounted
59 with the appropriate resource counter (controller should determine 64 with the appropriate resource counter (controller should determine
@@ -67,15 +72,25 @@ to work with it.
67 * if the charging is performed first, then it should be uncharged 72 * if the charging is performed first, then it should be uncharged
68 on error path (if the one is called). 73 on error path (if the one is called).
69 74
70 c. void res_counter_uncharge[_locked] 75 If the charging fails and a hierarchical dependency exists, the
76 limit_fail_at parameter is set to the particular res_counter element
77 where the charging failed.
78
79 d. int res_counter_charge_locked
80 (struct res_counter *rc, unsigned long val)
81
82 The same as res_counter_charge(), but it must not acquire/release the
83 res_counter->lock internally (it must be called with res_counter->lock
84 held).
85
86 e. void res_counter_uncharge[_locked]
71 (struct res_counter *rc, unsigned long val) 87 (struct res_counter *rc, unsigned long val)
72 88
73 When a resource is released (freed) it should be de-accounted 89 When a resource is released (freed) it should be de-accounted
74 from the resource counter it was accounted to. This is called 90 from the resource counter it was accounted to. This is called
75 "uncharging". 91 "uncharging".
76 92
77 The _locked routines imply that the res_counter->lock is taken. 93 The _locked routines imply that the res_counter->lock is taken.
78
79 94
80 2.1 Other accounting routines 95 2.1 Other accounting routines
81 96
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index a34d55b65441..df38ef046f8d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -95,7 +95,7 @@ of struct cmsghdr structures with appended data.
95 95
96There is only one file in this directory. 96There is only one file in this directory.
97unix_dgram_qlen limits the max number of datagrams queued in Unix domain 97unix_dgram_qlen limits the max number of datagrams queued in Unix domain
98socket's buffer. It will not take effect unless PF_UNIX flag is spicified. 98socket's buffer. It will not take effect unless PF_UNIX flag is specified.
99 99
100 100
1013. /proc/sys/net/ipv4 - IPV4 settings 1013. /proc/sys/net/ipv4 - IPV4 settings
diff --git a/Documentation/tomoyo.txt b/Documentation/tomoyo.txt
new file mode 100644
index 000000000000..b3a232cae7f8
--- /dev/null
+++ b/Documentation/tomoyo.txt
@@ -0,0 +1,55 @@
1--- What is TOMOYO? ---
2
3TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
4
5LiveCD-based tutorials are available at
6http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/
7http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ .
8Though these tutorials use non-LSM version of TOMOYO, they are useful for you
9to know what TOMOYO is.
10
11--- How to enable TOMOYO? ---
12
13Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
14kernel's command line.
15
16Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details.
17
18--- Where is documentation? ---
19
20User <-> Kernel interface documentation is available at
21http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html .
22
23Materials we prepared for seminars and symposiums are available at
24http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
25Below lists are chosen from three aspects.
26
27What is TOMOYO?
28 TOMOYO Linux Overview
29 http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
30 TOMOYO Linux: pragmatic and manageable security for Linux
31 http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
32 TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
33 http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
34
35What can TOMOYO do?
36 Deep inside TOMOYO Linux
37 http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
38 The role of "pathname based access control" in security.
39 http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
40
41History of TOMOYO?
42 Realities of Mainlining
43 http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
44
45--- What is future plan? ---
46
47We believe that inode based security and name based security are complementary
48and both should be used together. But unfortunately, so far, we cannot enable
49multiple LSM modules at the same time. We feel sorry that you have to give up
50SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
51
52We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
53version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ .
54LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
55to port non-LSM version's functionalities to LSM versions.
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2131b00b63f6..2f77ced35df7 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -1,5 +1,7 @@
100-INDEX 100-INDEX
2 - this file. 2 - this file.
3active_mm.txt
4 - An explanation from Linus about tsk->active_mm vs tsk->mm.
3balance 5balance
4 - various information on memory balancing. 6 - various information on memory balancing.
5hugetlbpage.txt 7hugetlbpage.txt
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
new file mode 100644
index 000000000000..4ee1f643d897
--- /dev/null
+++ b/Documentation/vm/active_mm.txt
@@ -0,0 +1,83 @@
1List: linux-kernel
2Subject: Re: active_mm
3From: Linus Torvalds <torvalds () transmeta ! com>
4Date: 1999-07-30 21:36:24
5
6Cc'd to linux-kernel, because I don't write explanations all that often,
7and when I do I feel better about more people reading them.
8
9On Fri, 30 Jul 1999, David Mosberger wrote:
10>
11> Is there a brief description someplace on how "mm" vs. "active_mm" in
12> the task_struct are supposed to be used? (My apologies if this was
13> discussed on the mailing lists---I just returned from vacation and
14> wasn't able to follow linux-kernel for a while).
15
16Basically, the new setup is:
17
18 - we have "real address spaces" and "anonymous address spaces". The
19 difference is that an anonymous address space doesn't care about the
20 user-level page tables at all, so when we do a context switch into an
21 anonymous address space we just leave the previous address space
22 active.
23
24 The obvious use for a "anonymous address space" is any thread that
25 doesn't need any user mappings - all kernel threads basically fall into
26 this category, but even "real" threads can temporarily say that for
27 some amount of time they are not going to be interested in user space,
28 and that the scheduler might as well try to avoid wasting time on
29 switching the VM state around. Currently only the old-style bdflush
30 sync does that.
31
32 - "tsk->mm" points to the "real address space". For an anonymous process,
33 tsk->mm will be NULL, for the logical reason that an anonymous process
34 really doesn't _have_ a real address space at all.
35
36 - however, we obviously need to keep track of which address space we
37 "stole" for such an anonymous user. For that, we have "tsk->active_mm",
38 which shows what the currently active address space is.
39
40 The rule is that for a process with a real address space (ie tsk->mm is
41 non-NULL) the active_mm obviously always has to be the same as the real
42 one.
43
44 For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
45 "borrowed" mm while the anonymous process is running. When the
46 anonymous process gets scheduled away, the borrowed address space is
47 returned and cleared.
48
49To support all that, the "struct mm_struct" now has two counters: a
50"mm_users" counter that is how many "real address space users" there are,
51and a "mm_count" counter that is the number of "lazy" users (ie anonymous
52users) plus one if there are any real users.
53
54Usually there is at least one real user, but it could be that the real
55user exited on another CPU while a lazy user was still active, so you do
56actually get cases where you have a address space that is _only_ used by
57lazy users. That is often a short-lived state, because once that thread
58gets scheduled away in favour of a real thread, the "zombie" mm gets
59released because "mm_users" becomes zero.
60
61Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
62more. "init_mm" should be considered just a "lazy context when no other
63context is available", and in fact it is mainly used just at bootup when
64no real VM has yet been created. So code that used to check
65
66 if (current->mm == &init_mm)
67
68should generally just do
69
70 if (!current->mm)
71
72instead (which makes more sense anyway - the test is basically one of "do
73we have a user context", and is generally done by the page fault handler
74and things like that).
75
76Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
77because it slightly changes the interfaces to accomodate the alpha (who
78would have thought it, but the alpha actually ends up having one of the
79ugliest context switch codes - unlike the other architectures where the MM
80and register state is separate, the alpha PALcode joins the two, and you
81need to switch both together).
82
83(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 0706a7282a8c..2d70d0d95108 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -1,588 +1,691 @@
1 1 ==============================
2This document describes the Linux memory management "Unevictable LRU" 2 UNEVICTABLE LRU INFRASTRUCTURE
3infrastructure and the use of this infrastructure to manage several types 3 ==============================
4of "unevictable" pages. The document attempts to provide the overall 4
5rationale behind this mechanism and the rationale for some of the design 5========
6decisions that drove the implementation. The latter design rationale is 6CONTENTS
7discussed in the context of an implementation description. Admittedly, one 7========
8can obtain the implementation details--the "what does it do?"--by reading the 8
9code. One hopes that the descriptions below add value by provide the answer 9 (*) The Unevictable LRU
10to "why does it do that?". 10
11 11 - The unevictable page list.
12Unevictable LRU Infrastructure: 12 - Memory control group interaction.
13 13 - Marking address spaces unevictable.
14The Unevictable LRU adds an additional LRU list to track unevictable pages 14 - Detecting Unevictable Pages.
15and to hide these pages from vmscan. This mechanism is based on a patch by 15 - vmscan's handling of unevictable pages.
16Larry Woodman of Red Hat to address several scalability problems with page 16
17 (*) mlock()'d pages.
18
19 - History.
20 - Basic management.
21 - mlock()/mlockall() system call handling.
22 - Filtering special vmas.
23 - munlock()/munlockall() system call handling.
24 - Migrating mlocked pages.
25 - mmap(MAP_LOCKED) system call handling.
26 - munmap()/exit()/exec() system call handling.
27 - try_to_unmap().
28 - try_to_munlock() reverse map scan.
29 - Page reclaim in shrink_*_list().
30
31
32============
33INTRODUCTION
34============
35
36This document describes the Linux memory manager's "Unevictable LRU"
37infrastructure and the use of this to manage several types of "unevictable"
38pages.
39
40The document attempts to provide the overall rationale behind this mechanism
41and the rationale for some of the design decisions that drove the
42implementation. The latter design rationale is discussed in the context of an
43implementation description. Admittedly, one can obtain the implementation
44details - the "what does it do?" - by reading the code. One hopes that the
45descriptions below add value by provide the answer to "why does it do that?".
46
47
48===================
49THE UNEVICTABLE LRU
50===================
51
52The Unevictable LRU facility adds an additional LRU list to track unevictable
53pages and to hide these pages from vmscan. This mechanism is based on a patch
54by Larry Woodman of Red Hat to address several scalability problems with page
17reclaim in Linux. The problems have been observed at customer sites on large 55reclaim in Linux. The problems have been observed at customer sites on large
18memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB 56memory x86_64 systems.
19of main memory will have over 32 million 4k pages in a single zone. When a 57
20large fraction of these pages are not evictable for any reason [see below], 58To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
21vmscan will spend a lot of time scanning the LRU lists looking for the small 59main memory will have over 32 million 4k pages in a single zone. When a large
22fraction of pages that are evictable. This can result in a situation where 60fraction of these pages are not evictable for any reason [see below], vmscan
23all cpus are spending 100% of their time in vmscan for hours or days on end, 61will spend a lot of time scanning the LRU lists looking for the small fraction
24with the system completely unresponsive. 62of pages that are evictable. This can result in a situation where all CPUs are
25 63spending 100% of their time in vmscan for hours or days on end, with the system
26The Unevictable LRU infrastructure addresses the following classes of 64completely unresponsive.
27unevictable pages: 65
28 66The unevictable list addresses the following classes of unevictable pages:
29+ page owned by ramfs 67
30+ page mapped into SHM_LOCKed shared memory regions 68 (*) Those owned by ramfs.
31+ page mapped into VM_LOCKED [mlock()ed] vmas 69
32 70 (*) Those mapped into SHM_LOCK'd shared memory regions.
33The infrastructure might be able to handle other conditions that make pages 71
72 (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
73
74The infrastructure may also be able to handle other conditions that make pages
34unevictable, either by definition or by circumstance, in the future. 75unevictable, either by definition or by circumstance, in the future.
35 76
36 77
37The Unevictable LRU List 78THE UNEVICTABLE PAGE LIST
79-------------------------
38 80
39The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list 81The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
40called the "unevictable" list and an associated page flag, PG_unevictable, to 82called the "unevictable" list and an associated page flag, PG_unevictable, to
41indicate that the page is being managed on the unevictable list. The 83indicate that the page is being managed on the unevictable list.
42PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active 84
43flag in that it indicates on which LRU list a page resides when PG_lru is set. 85The PG_unevictable flag is analogous to, and mutually exclusive with, the
44The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU 86PG_active flag in that it indicates on which LRU list a page resides when
45Kconfig option. 87PG_lru is set. The unevictable list is compile-time configurable based on the
88UNEVICTABLE_LRU Kconfig option.
46 89
47The Unevictable LRU infrastructure maintains unevictable pages on an additional 90The Unevictable LRU infrastructure maintains unevictable pages on an additional
48LRU list for a few reasons: 91LRU list for a few reasons:
49 92
501) We get to "treat unevictable pages just like we treat other pages in the 93 (1) We get to "treat unevictable pages just like we treat other pages in the
51 system, which means we get to use the same code to manipulate them, the 94 system - which means we get to use the same code to manipulate them, the
52 same code to isolate them (for migrate, etc.), the same code to keep track 95 same code to isolate them (for migrate, etc.), the same code to keep track
53 of the statistics, etc..." [Rik van Riel] 96 of the statistics, etc..." [Rik van Riel]
97
98 (2) We want to be able to migrate unevictable pages between nodes for memory
99 defragmentation, workload management and memory hotplug. The linux kernel
100 can only migrate pages that it can successfully isolate from the LRU
101 lists. If we were to maintain pages elsewhere than on an LRU-like list,
102 where they can be found by isolate_lru_page(), we would prevent their
103 migration, unless we reworked migration code to find the unevictable pages
104 itself.
54 105
552) We want to be able to migrate unevictable pages between nodes--for memory
56 defragmentation, workload management and memory hotplug. The linux kernel
57 can only migrate pages that it can successfully isolate from the lru lists.
58 If we were to maintain pages elsewise than on an lru-like list, where they
59 can be found by isolate_lru_page(), we would prevent their migration, unless
60 we reworked migration code to find the unevictable pages.
61 106
107The unevictable list does not differentiate between file-backed and anonymous,
108swap-backed pages. This differentiation is only important while the pages are,
109in fact, evictable.
62 110
63The unevictable LRU list does not differentiate between file backed and swap 111The unevictable list benefits from the "arrayification" of the per-zone LRU
64backed [anon] pages. This differentiation is only important while the pages 112lists and statistics originally proposed and posted by Christoph Lameter.
65are, in fact, evictable.
66 113
67The unevictable LRU list benefits from the "arrayification" of the per-zone 114The unevictable list does not use the LRU pagevec mechanism. Rather,
68LRU lists and statistics originally proposed and posted by Christoph Lameter. 115unevictable pages are placed directly on the page's zone's unevictable list
116under the zone lru_lock. This allows us to prevent the stranding of pages on
117the unevictable list when one task has the page isolated from the LRU and other
118tasks are changing the "evictability" state of the page.
69 119
70The unevictable list does not use the lru pagevec mechanism. Rather,
71unevictable pages are placed directly on the page's zone's unevictable
72list under the zone lru_lock. The reason for this is to prevent stranding
73of pages on the unevictable list when one task has the page isolated from the
74lru and other tasks are changing the "evictability" state of the page.
75 120
121MEMORY CONTROL GROUP INTERACTION
122--------------------------------
76 123
77Unevictable LRU and Memory Controller Interaction 124The unevictable LRU facility interacts with the memory control group [aka
125memory controller; see Documentation/cgroups/memory.txt] by extending the
126lru_list enum.
127
128The memory controller data structure automatically gets a per-zone unevictable
129list as a result of the "arrayification" of the per-zone LRU lists (one per
130lru_list enum element). The memory controller tracks the movement of pages to
131and from the unevictable list.
78 132
79The memory controller data structure automatically gets a per zone unevictable
80lru list as a result of the "arrayification" of the per-zone LRU lists. The
81memory controller tracks the movement of pages to and from the unevictable list.
82When a memory control group comes under memory pressure, the controller will 133When a memory control group comes under memory pressure, the controller will
83not attempt to reclaim pages on the unevictable list. This has a couple of 134not attempt to reclaim pages on the unevictable list. This has a couple of
84effects. Because the pages are "hidden" from reclaim on the unevictable list, 135effects:
85the reclaim process can be more efficient, dealing only with pages that have 136
86a chance of being reclaimed. On the other hand, if too many of the pages 137 (1) Because the pages are "hidden" from reclaim on the unevictable list, the
87charged to the control group are unevictable, the evictable portion of the 138 reclaim process can be more efficient, dealing only with pages that have a
88working set of the tasks in the control group may not fit into the available 139 chance of being reclaimed.
89memory. This can cause the control group to thrash or to oom-kill tasks. 140
90 141 (2) On the other hand, if too many of the pages charged to the control group
91 142 are unevictable, the evictable portion of the working set of the tasks in
92Unevictable LRU: Detecting Unevictable Pages 143 the control group may not fit into the available memory. This can cause
93 144 the control group to thrash or to OOM-kill tasks.
94The function page_evictable(page, vma) in vmscan.c determines whether a 145
95page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions, 146
96page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's 147MARKING ADDRESS SPACES UNEVICTABLE
97address space using a wrapper function. Wrapper functions are used to set, 148----------------------------------
98clear and test the flag to reduce the requirement for #ifdef's throughout the 149
99source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created. 150For facilities such as ramfs none of the pages attached to the address space
100This flag remains for the life of the inode. 151may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE
101 152address space flag is provided, and this can be manipulated by a filesystem
102For shared memory regions, AS_UNEVICTABLE is set when an application 153using a number of wrapper functions:
103successfully SHM_LOCKs the region and is removed when the region is 154
104SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page 155 (*) void mapping_set_unevictable(struct address_space *mapping);
105tables for the region as does, for example, mlock(). So, we make no special 156
106effort to push any pages in the SHM_LOCKed region to the unevictable list. 157 Mark the address space as being completely unevictable.
107Vmscan will do this when/if it encounters the pages during reclaim. On 158
108SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the 159 (*) void mapping_clear_unevictable(struct address_space *mapping);
109unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed 160
110region is destroyed, the pages are also "rescued" from the unevictable list in 161 Mark the address space as being evictable.
111the process of freeing them. 162
112 163 (*) int mapping_unevictable(struct address_space *mapping);
113page_evictable() detects mlock()ed pages by testing an additional page flag, 164
114PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a 165 Query the address space, and return true if it is completely
115non-NULL vma is supplied, page_evictable() will check whether the vma is 166 unevictable.
167
168These are currently used in two places in the kernel:
169
170 (1) By ramfs to mark the address spaces of its inodes when they are created,
171 and this mark remains for the life of the inode.
172
173 (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
174
175 Note that SHM_LOCK is not required to page in the locked pages if they're
176 swapped out; the application must touch the pages manually if it wants to
177 ensure they're in memory.
178
179
180DETECTING UNEVICTABLE PAGES
181---------------------------
182
183The function page_evictable() in vmscan.c determines whether a page is
184evictable or not using the query function outlined above [see section "Marking
185address spaces unevictable"] to check the AS_UNEVICTABLE flag.
186
187For address spaces that are so marked after being populated (as SHM regions
188might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
189the page tables for the region as does, for example, mlock(), nor need it make
190any special effort to push any pages in the SHM_LOCK'd area to the unevictable
191list. Instead, vmscan will do this if and when it encounters the pages during
192a reclamation scan.
193
194On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
195the pages in the region and "rescue" them from the unevictable list if no other
196condition is keeping them unevictable. If an unevictable region is destroyed,
197the pages are also "rescued" from the unevictable list in the process of
198freeing them.
199
200page_evictable() also checks for mlocked pages by testing an additional page
201flag, PG_mlocked (as wrapped by PageMlocked()). If the page is NOT mlocked,
202and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is
116VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and 203VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and
117update the appropriate statistics if the vma is VM_LOCKED. This method allows 204update the appropriate statistics if the vma is VM_LOCKED. This method allows
118efficient "culling" of pages in the fault path that are being faulted in to 205efficient "culling" of pages in the fault path that are being faulted in to
119VM_LOCKED vmas. 206VM_LOCKED VMAs.
120 207
121 208
122Unevictable Pages and Vmscan [shrink_*_list()] 209VMSCAN'S HANDLING OF UNEVICTABLE PAGES
210--------------------------------------
123 211
124If unevictable pages are culled in the fault path, or moved to the unevictable 212If unevictable pages are culled in the fault path, or moved to the unevictable
125list at mlock() or mmap() time, vmscan will never encounter the pages until 213list at mlock() or mmap() time, vmscan will not encounter the pages until they
126they have become evictable again, for example, via munlock() and have been 214have become evictable again (via munlock() for example) and have been "rescued"
127"rescued" from the unevictable list. However, there may be situations where we 215from the unevictable list. However, there may be situations where we decide,
128decide, for the sake of expediency, to leave a unevictable page on one of the 216for the sake of expediency, to leave a unevictable page on one of the regular
129regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for 217active/inactive LRU lists for vmscan to deal with. vmscan checks for such
130such pages in all of the shrink_{active|inactive|page}_list() functions and 218pages in all of the shrink_{active|inactive|page}_list() functions and will
131will "cull" such pages that it encounters--that is, it diverts those pages to 219"cull" such pages that it encounters: that is, it diverts those pages to the
132the unevictable list for the zone being scanned. 220unevictable list for the zone being scanned.
133 221
134There may be situations where a page is mapped into a VM_LOCKED vma, but the 222There may be situations where a page is mapped into a VM_LOCKED VMA, but the
135page is not marked as PageMlocked. Such pages will make it all the way to 223page is not marked as PG_mlocked. Such pages will make it all the way to
136shrink_page_list() where they will be detected when vmscan walks the reverse 224shrink_page_list() where they will be detected when vmscan walks the reverse
137map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() 225map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK,
138will cull the page at that point. 226shrink_page_list() will cull the page at that point.
139 227
140To "cull" an unevictable page, vmscan simply puts the page back on the lru 228To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
141list using putback_lru_page()--the inverse operation to isolate_lru_page()-- 229using putback_lru_page() - the inverse operation to isolate_lru_page() - after
142after dropping the page lock. Because the condition which makes the page 230dropping the page lock. Because the condition which makes the page unevictable
143unevictable may change once the page is unlocked, putback_lru_page() will 231may change once the page is unlocked, putback_lru_page() will recheck the
144recheck the unevictable state of a page that it places on the unevictable lru 232unevictable state of a page that it places on the unevictable list. If the
145list. If the page has become unevictable, putback_lru_page() removes it from 233page has become unevictable, putback_lru_page() removes it from the list and
146the list and retries, including the page_unevictable() test. Because such a 234retries, including the page_unevictable() test. Because such a race is a rare
147race is a rare event and movement of pages onto the unevictable list should be 235event and movement of pages onto the unevictable list should be rare, these
148rare, these extra evictabilty checks should not occur in the majority of calls 236extra evictabilty checks should not occur in the majority of calls to
149to putback_lru_page(). 237putback_lru_page().
150 238
151 239
152Mlocked Page: Prior Work 240=============
241MLOCKED PAGES
242=============
153 243
154The "Unevictable Mlocked Pages" infrastructure is based on work originally 244The unevictable page list is also useful for mlock(), in addition to ramfs and
245SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in
246NOMMU situations, all mappings are effectively mlocked.
247
248
249HISTORY
250-------
251
252The "Unevictable mlocked Pages" infrastructure is based on work originally
155posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU". 253posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
156Nick posted his patch as an alternative to a patch posted by Christoph 254Nick posted his patch as an alternative to a patch posted by Christoph Lameter
157Lameter to achieve the same objective--hiding mlocked pages from vmscan. 255to achieve the same objective: hiding mlocked pages from vmscan.
158In Nick's patch, he used one of the struct page lru list link fields as a count 256
159of VM_LOCKED vmas that map the page. This use of the link field for a count 257In Nick's patch, he used one of the struct page LRU list link fields as a count
160prevented the management of the pages on an LRU list. Thus, mlocked pages were 258of VM_LOCKED VMAs that map the page. This use of the link field for a count
161not migratable as isolate_lru_page() could not find them and the lru list link 259prevented the management of the pages on an LRU list, and thus mlocked pages
162field was not available to the migration subsystem. Nick resolved this by 260were not migratable as isolate_lru_page() could not find them, and the LRU list
163putting mlocked pages back on the lru list before attempting to isolate them, 261link field was not available to the migration subsystem.
164thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated 262
165with the Unevictable LRU work, the count was replaced by walking the reverse 263Nick resolved this by putting mlocked pages back on the lru list before
166map to determine whether any VM_LOCKED vmas mapped the page. More on this 264attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When
167below. 265Nick's patch was integrated with the Unevictable LRU work, the count was
168 266replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
169 267mapped the page. More on this below.
170Mlocked Pages: Basic Management 268
171 269
172Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of 270BASIC MANAGEMENT
173unevictable pages. When such a page has been "noticed" by the memory 271----------------
174management subsystem, the page is marked with the PG_mlocked [PageMlocked()] 272
175flag. A PageMlocked() page will be placed on the unevictable LRU list when 273mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
176it is added to the LRU. Pages can be "noticed" by memory management in 274pages. When such a page has been "noticed" by the memory management subsystem,
177several places: 275the page is marked with the PG_mlocked flag. This can be manipulated using the
178 276PageMlocked() functions.
1791) in the mlock()/mlockall() system call handlers. 277
1802) in the mmap() system call handler when mmap()ing a region with the 278A PG_mlocked page will be placed on the unevictable list when it is added to
181 MAP_LOCKED flag, or mmap()ing a region in a task that has called 279the LRU. Such pages can be "noticed" by memory management in several places:
182 mlockall() with the MCL_FUTURE flag. Both of these conditions result 280
183 in the VM_LOCKED flag being set for the vma. 281 (1) in the mlock()/mlockall() system call handlers;
1843) in the fault path, if mlocked pages are "culled" in the fault path, 282
185 and when a VM_LOCKED stack segment is expanded. 283 (2) in the mmap() system call handler when mmapping a region with the
1864) as mentioned above, in vmscan:shrink_page_list() when attempting to 284 MAP_LOCKED flag;
187 reclaim a page in a VM_LOCKED vma via try_to_unmap(). 285
188 286 (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
189Mlocked pages become unlocked and rescued from the unevictable list when: 287 flag
190 288
1911) mapped in a range unlocked via the munlock()/munlockall() system calls. 289 (4) in the fault path, if mlocked pages are "culled" in the fault path,
1922) munmapped() out of the last VM_LOCKED vma that maps the page, including 290 and when a VM_LOCKED stack segment is expanded; or
193 unmapping at task exit. 291
1943) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file. 292 (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
1954) before a page is COWed in a VM_LOCKED vma. 293 reclaim a page in a VM_LOCKED VMA via try_to_unmap()
196 294
197 295all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
198Mlocked Pages: mlock()/mlockall() System Call Handling 296already have it set.
297
298mlocked pages become unlocked and rescued from the unevictable list when:
299
300 (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
301
302 (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
303 unmapping at task exit;
304
305 (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
306 or
307
308 (4) before a page is COW'd in a VM_LOCKED VMA.
309
310
311mlock()/mlockall() SYSTEM CALL HANDLING
312---------------------------------------
199 313
200Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup() 314Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
201for each vma in the range specified by the call. In the case of mlockall(), 315for each VMA in the range specified by the call. In the case of mlockall(),
202this is the entire active address space of the task. Note that mlock_fixup() 316this is the entire active address space of the task. Note that mlock_fixup()
203is used for both mlock()ing and munlock()ing a range of memory. A call to 317is used for both mlocking and munlocking a range of memory. A call to mlock()
204mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED 318an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
205is treated as a no-op--mlock_fixup() simply returns. 319treated as a no-op, and mlock_fixup() simply returns.
206 320
207If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas" 321If the VMA passes some filtering as described in "Filtering Special Vmas"
208below, mlock_fixup() will attempt to merge the vma with its neighbors or split 322below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
209off a subset of the vma if the range does not cover the entire vma. Once the 323off a subset of the VMA if the range does not cover the entire VMA. Once the
210vma has been merged or split or neither, mlock_fixup() will call 324VMA has been merged or split or neither, mlock_fixup() will call
211__mlock_vma_pages_range() to fault in the pages via get_user_pages() and 325__mlock_vma_pages_range() to fault in the pages via get_user_pages() and to
212to mark the pages as mlocked via mlock_vma_page(). 326mark the pages as mlocked via mlock_vma_page().
213 327
214Note that the vma being mlocked might be mapped with PROT_NONE. In this case, 328Note that the VMA being mlocked might be mapped with PROT_NONE. In this case,
215get_user_pages() will be unable to fault in the pages. That's OK. If pages 329get_user_pages() will be unable to fault in the pages. That's okay. If pages
216do end up getting faulted into this VM_LOCKED vma, we'll handle them in the 330do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
217fault path or in vmscan. 331fault path or in vmscan.
218 332
219Also note that a page returned by get_user_pages() could be truncated or 333Also note that a page returned by get_user_pages() could be truncated or
220migrated out from under us, while we're trying to mlock it. To detect 334migrated out from under us, while we're trying to mlock it. To detect this,
221this, __mlock_vma_pages_range() tests the page_mapping after acquiring 335__mlock_vma_pages_range() checks page_mapping() after acquiring the page lock.
222the page lock. If the page is still associated with its mapping, we'll 336If the page is still associated with its mapping, we'll go ahead and call
223go ahead and call mlock_vma_page(). If the mapping is gone, we just 337mlock_vma_page(). If the mapping is gone, we just unlock the page and move on.
224unlock the page and move on. Worse case, this results in page mapped 338In the worst case, this will result in a page mapped in a VM_LOCKED VMA
225in a VM_LOCKED vma remaining on a normal LRU list without being 339remaining on a normal LRU list without being PageMlocked(). Again, vmscan will
226PageMlocked(). Again, vmscan will detect and cull such pages. 340detect and cull such pages.
227 341
228mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will 342mlock_vma_page() will call TestSetPageMlocked() for each page returned by
229TestSetPageMlocked() for each page returned by get_user_pages(). We use 343get_user_pages(). We use TestSetPageMlocked() because the page might already
230TestSetPageMlocked() because the page might already be mlocked by another 344be mlocked by another task/VMA and we don't want to do extra work. We
231task/vma and we don't want to do extra work. We especially do not want to 345especially do not want to count an mlocked page more than once in the
232count an mlocked page more than once in the statistics. If the page was 346statistics. If the page was already mlocked, mlock_vma_page() need do nothing
233already mlocked, mlock_vma_page() is done. 347more.
234 348
235If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the 349If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
236page from the LRU, as it is likely on the appropriate active or inactive list 350page from the LRU, as it is likely on the appropriate active or inactive list
237at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will 351at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
238putback the page--putback_lru_page()--which will notice that the page is now 352back the page - by calling putback_lru_page() - which will notice that the page
239mlocked and divert the page to the zone's unevictable LRU list. If 353is now mlocked and divert the page to the zone's unevictable list. If
240mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle 354mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
241it later if/when it attempts to reclaim the page. 355it later if and when it attempts to reclaim the page.
242 356
243 357
244Mlocked Pages: Filtering Special Vmas 358FILTERING SPECIAL VMAS
359----------------------
245 360
246mlock_fixup() filters several classes of "special" vmas: 361mlock_fixup() filters several classes of "special" VMAs:
247 362
2481) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind 3631) VMAs with VM_IO or VM_PFNMAP set are skipped entirely. The pages behind
249 these mappings are inherently pinned, so we don't need to mark them as 364 these mappings are inherently pinned, so we don't need to mark them as
250 mlocked. In any case, most of the pages have no struct page in which to 365 mlocked. In any case, most of the pages have no struct page in which to so
251 so mark the page. Because of this, get_user_pages() will fail for these 366 mark the page. Because of this, get_user_pages() will fail for these VMAs,
252 vmas, so there is no sense in attempting to visit them. 367 so there is no sense in attempting to visit them.
253 368
2542) vmas mapping hugetlbfs page are already effectively pinned into memory. 3692) VMAs mapping hugetlbfs page are already effectively pinned into memory. We
255 We don't need nor want to mlock() these pages. However, to preserve the 370 neither need nor want to mlock() these pages. However, to preserve the
256 prior behavior of mlock()--before the unevictable/mlock changes-- 371 prior behavior of mlock() - before the unevictable/mlock changes -
257 mlock_fixup() will call make_pages_present() in the hugetlbfs vma range 372 mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
258 to allocate the huge pages and populate the ptes. 373 allocate the huge pages and populate the ptes.
259 374
2603) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of 3753) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
261 kernel pages, such as the vdso page, relay channel pages, etc. These pages 376 kernel pages, such as the VDSO page, relay channel pages, etc. These pages
262 are inherently unevictable and are not managed on the LRU lists. 377 are inherently unevictable and are not managed on the LRU lists.
263 mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls 378 mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls
264 make_pages_present() to populate the ptes. 379 make_pages_present() to populate the ptes.
265 380
266Note that for all of these special vmas, mlock_fixup() does not set the 381Note that for all of these special VMAs, mlock_fixup() does not set the
267VM_LOCKED flag. Therefore, we won't have to deal with them later during 382VM_LOCKED flag. Therefore, we won't have to deal with them later during
268munlock() or munmap()--for example, at task exit. Neither does mlock_fixup() 383munlock(), munmap() or task exit. Neither does mlock_fixup() account these
269account these vmas against the task's "locked_vm". 384VMAs against the task's "locked_vm".
270 385
271Mlocked Pages: Downgrading the Mmap Semaphore. 386
272 387munlock()/munlockall() SYSTEM CALL HANDLING
273mlock_fixup() must be called with the mmap semaphore held for write, because 388-------------------------------------------
274it may have to merge or split vmas. However, mlocking a large region of 389
275memory can take a long time--especially if vmscan must reclaim pages to 390The munlock() and munlockall() system calls are handled by the same functions -
276satisfy the regions requirements. Faulting in a large region with the mmap 391do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
277semaphore held for write can hold off other faults on the address space, in 392lock operation indicated by an argument. So, these system calls are also
278the case of a multi-threaded task. It can also hold off scans of the task's 393handled by mlock_fixup(). Again, if called for an already munlocked VMA,
279address space via /proc. While testing under heavy load, it was observed that 394mlock_fixup() simply returns. Because of the VMA filtering discussed above,
280the ps(1) command could be held off for many minutes while a large segment was 395VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be
281mlock()ed down.
282
283To address this issue, and to make the system more responsive during mlock()ing
284of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
285during the call to __mlock_vma_pages_range(). This works fine. However, the
286callers of mlock_fixup() expect the semaphore to be returned in write mode.
287So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not
288support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
289and reacquire it in write mode. In a multi-threaded task, it is possible for
290the task memory map to change while the semaphore is dropped. Therefore,
291mlock_fixup() looks up the vma at the range start address after reacquiring
292the semaphore in write mode and verifies that it still covers the original
293range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of
294mlock_fixup() have been changed to deal with this new error condition.
295
296Note: when munlocking a region, all of the pages should already be resident--
297unless we have racing threads mlocking() and munlocking() regions. So,
298unlocking should not have to wait for page allocations nor faults of any kind.
299Therefore mlock_fixup() does not downgrade the semaphore for munlock().
300
301
302Mlocked Pages: munlock()/munlockall() System Call Handling
303
304The munlock() and munlockall() system calls are handled by the same functions--
305do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
306vs lock operation indicated by an argument. So, these system calls are also
307handled by mlock_fixup(). Again, if called for an already munlock()ed vma,
308mlock_fixup() simply returns. Because of the vma filtering discussed above,
309VM_LOCKED will not be set in any "special" vmas. So, these vmas will be
310ignored for munlock. 396ignored for munlock.
311 397
312If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off 398If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
313the specified range. The range is then munlocked via the function 399specified range. The range is then munlocked via the function
314__mlock_vma_pages_range()--the same function used to mlock a vma range-- 400__mlock_vma_pages_range() - the same function used to mlock a VMA range -
315passing a flag to indicate that munlock() is being performed. 401passing a flag to indicate that munlock() is being performed.
316 402
317Because the vma access protections could have been changed to PROT_NONE after 403Because the VMA access protections could have been changed to PROT_NONE after
318faulting in and mlocking pages, get_user_pages() was unreliable for visiting 404faulting in and mlocking pages, get_user_pages() was unreliable for visiting
319these pages for munlocking. Because we don't want to leave pages mlocked(), 405these pages for munlocking. Because we don't want to leave pages mlocked,
320get_user_pages() was enhanced to accept a flag to ignore the permissions when 406get_user_pages() was enhanced to accept a flag to ignore the permissions when
321fetching the pages--all of which should be resident as a result of previous 407fetching the pages - all of which should be resident as a result of previous
322mlock()ing. 408mlocking.
323 409
324For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling 410For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
325munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked 411munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
326flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page() 412flag using TestClearPageMlocked(). As with mlock_vma_page(),
327use the Test*PageMlocked() function to handle the case where the page might 413munlock_vma_page() use the Test*PageMlocked() function to handle the case where
328have already been unlocked by another task. If the page was mlocked, 414the page might have already been unlocked by another task. If the page was
329munlock_vma_page() updates that zone statistics for the number of mlocked 415mlocked, munlock_vma_page() updates that zone statistics for the number of
330pages. Note, however, that at this point we haven't checked whether the page 416mlocked pages. Note, however, that at this point we haven't checked whether
331is mapped by other VM_LOCKED vmas. 417the page is mapped by other VM_LOCKED VMAs.
332 418
333We can't call try_to_munlock(), the function that walks the reverse map to check 419We can't call try_to_munlock(), the function that walks the reverse map to
334for other VM_LOCKED vmas, without first isolating the page from the LRU. 420check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
335try_to_munlock() is a variant of try_to_unmap() and thus requires that the page 421try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
336not be on an lru list. [More on these below.] However, the call to 422not be on an LRU list [more on these below]. However, the call to
337isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). 423isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). So,
338So, we go ahead and clear PG_mlocked up front, as this might be the only chance 424we go ahead and clear PG_mlocked up front, as this might be the only chance we
339we have. If we can successfully isolate the page, we go ahead and 425have. If we can successfully isolate the page, we go ahead and
340try_to_munlock(), which will restore the PG_mlocked flag and update the zone 426try_to_munlock(), which will restore the PG_mlocked flag and update the zone
341page statistics if it finds another vma holding the page mlocked. If we fail 427page statistics if it finds another VMA holding the page mlocked. If we fail
342to isolate the page, we'll have left a potentially mlocked page on the LRU. 428to isolate the page, we'll have left a potentially mlocked page on the LRU.
343This is fine, because we'll catch it later when/if vmscan tries to reclaim the 429This is fine, because we'll catch it later if and if vmscan tries to reclaim
344page. This should be relatively rare. 430the page. This should be relatively rare.
345 431
346Mlocked Pages: Migrating Them... 432
347 433MIGRATING MLOCKED PAGES
348A page that is being migrated has been isolated from the lru lists and is 434-----------------------
349held locked across unmapping of the page, updating the page's mapping 435
350[address_space] entry and copying the contents and state, until the 436A page that is being migrated has been isolated from the LRU lists and is held
351page table entry has been replaced with an entry that refers to the new 437locked across unmapping of the page, updating the page's address space entry
352page. Linux supports migration of mlocked pages and other unevictable 438and copying the contents and state, until the page table entry has been
353pages. This involves simply moving the PageMlocked and PageUnevictable states 439replaced with an entry that refers to the new page. Linux supports migration
354from the old page to the new page. 440of mlocked pages and other unevictable pages. This involves simply moving the
355 441PG_mlocked and PG_unevictable states from the old page to the new page.
356Note that page migration can race with mlocking or munlocking of the same 442
357page. This has been discussed from the mlock/munlock perspective in the 443Note that page migration can race with mlocking or munlocking of the same page.
358respective sections above. Both processes [migration, m[un]locking], hold 444This has been discussed from the mlock/munlock perspective in the respective
359the page locked. This provides the first level of synchronization. Page 445sections above. Both processes (migration and m[un]locking) hold the page
360migration zeros out the page_mapping of the old page before unlocking it, 446locked. This provides the first level of synchronization. Page migration
361so m[un]lock can skip these pages by testing the page mapping under page 447zeros out the page_mapping of the old page before unlocking it, so m[un]lock
362lock. 448can skip these pages by testing the page mapping under page lock.
363 449
364When completing page migration, we place the new and old pages back onto the 450To complete page migration, we place the new and old pages back onto the LRU
365lru after dropping the page lock. The "unneeded" page--old page on success, 451after dropping the page lock. The "unneeded" page - old page on success, new
366new page on failure--will be freed when the reference count held by the 452page on failure - will be freed when the reference count held by the migration
367migration process is released. To ensure that we don't strand pages on the 453process is released. To ensure that we don't strand pages on the unevictable
368unevictable list because of a race between munlock and migration, page 454list because of a race between munlock and migration, page migration uses the
369migration uses the putback_lru_page() function to add migrated pages back to 455putback_lru_page() function to add migrated pages back to the LRU.
370the lru. 456
371 457
372 458mmap(MAP_LOCKED) SYSTEM CALL HANDLING
373Mlocked Pages: mmap(MAP_LOCKED) System Call Handling 459-------------------------------------
374 460
375In addition the the mlock()/mlockall() system calls, an application can request 461In addition the the mlock()/mlockall() system calls, an application can request
376that a region of memory be mlocked using the MAP_LOCKED flag with the mmap() 462that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
377call. Furthermore, any mmap() call or brk() call that expands the heap by a 463call. Furthermore, any mmap() call or brk() call that expands the heap by a
378task that has previously called mlockall() with the MCL_FUTURE flag will result 464task that has previously called mlockall() with the MCL_FUTURE flag will result
379in the newly mapped memory being mlocked. Before the unevictable/mlock changes, 465in the newly mapped memory being mlocked. Before the unevictable/mlock
380the kernel simply called make_pages_present() to allocate pages and populate 466changes, the kernel simply called make_pages_present() to allocate pages and
381the page table. 467populate the page table.
382 468
383To mlock a range of memory under the unevictable/mlock infrastructure, the 469To mlock a range of memory under the unevictable/mlock infrastructure, the
384mmap() handler and task address space expansion functions call 470mmap() handler and task address space expansion functions call
385mlock_vma_pages_range() specifying the vma and the address range to mlock. 471mlock_vma_pages_range() specifying the vma and the address range to mlock.
386mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in 472mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in
387"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will 473"Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have
388have already been set by the caller, in filtered vmas. Thus these vma's need 474already been set by the caller, in filtered VMAs. Thus these VMA's need not be
389not be visited for munlock when the region is unmapped. 475visited for munlock when the region is unmapped.
390 476
391For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to 477For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
392fault/allocate the pages and mlock them. Again, like mlock_fixup(), 478fault/allocate the pages and mlock them. Again, like mlock_fixup(),
393mlock_vma_pages_range() downgrades the mmap semaphore to read mode before 479mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
394attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore 480attempting to fault/allocate and mlock the pages and "upgrades" the semaphore
395back to write mode before returning. 481back to write mode before returning.
396 482
397The callers of mlock_vma_pages_range() will have already added the memory 483The callers of mlock_vma_pages_range() will have already added the memory range
398range to be mlocked to the task's "locked_vm". To account for filtered vmas, 484to be mlocked to the task's "locked_vm". To account for filtered VMAs,
399mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the 485mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the
400callers then subtract a non-negative return value from the task's locked_vm. 486callers then subtract a non-negative return value from the task's locked_vm. A
401A negative return value represent an error--for example, from get_user_pages() 487negative return value represent an error - for example, from get_user_pages()
402attempting to fault in a vma with PROT_NONE access. In this case, we leave 488attempting to fault in a VMA with PROT_NONE access. In this case, we leave the
403the memory range accounted as locked_vm, as the protections could be changed 489memory range accounted as locked_vm, as the protections could be changed later
404later and pages allocated into that region. 490and pages allocated into that region.
405 491
406 492
407Mlocked Pages: munmap()/exit()/exec() System Call Handling 493munmap()/exit()/exec() SYSTEM CALL HANDLING
494-------------------------------------------
408 495
409When unmapping an mlocked region of memory, whether by an explicit call to 496When unmapping an mlocked region of memory, whether by an explicit call to
410munmap() or via an internal unmap from exit() or exec() processing, we must 497munmap() or via an internal unmap from exit() or exec() processing, we must
411munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. 498munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
412Before the unevictable/mlock changes, mlocking did not mark the pages in any 499Before the unevictable/mlock changes, mlocking did not mark the pages in any
413way, so unmapping them required no processing. 500way, so unmapping them required no processing.
414 501
415To munlock a range of memory under the unevictable/mlock infrastructure, the 502To munlock a range of memory under the unevictable/mlock infrastructure, the
416munmap() hander and task address space tear down function call 503munmap() handler and task address space call tear down function
417munlock_vma_pages_all(). The name reflects the observation that one always 504munlock_vma_pages_all(). The name reflects the observation that one always
418specifies the entire vma range when munlock()ing during unmap of a region. 505specifies the entire VMA range when munlock()ing during unmap of a region.
419Because of the vma filtering when mlocking() regions, only "normal" vmas that 506Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
420actually contain mlocked pages will be passed to munlock_vma_pages_all(). 507actually contain mlocked pages will be passed to munlock_vma_pages_all().
421 508
422munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup() 509munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
423for the munlock case, calls __munlock_vma_pages_range() to walk the page table 510for the munlock case, calls __munlock_vma_pages_range() to walk the page table
424for the vma's memory range and munlock_vma_page() each resident page mapped by 511for the VMA's memory range and munlock_vma_page() each resident page mapped by
425the vma. This effectively munlocks the page, only if this is the last 512the VMA. This effectively munlocks the page, only if this is the last
426VM_LOCKED vma that maps the page. 513VM_LOCKED VMA that maps the page.
427
428 514
429Mlocked Page: try_to_unmap()
430 515
431[Note: the code changes represented by this section are really quite small 516try_to_unmap()
432compared to the text to describe what happening and why, and to discuss the 517--------------
433implications.]
434 518
435Pages can, of course, be mapped into multiple vmas. Some of these vmas may 519Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may
436have VM_LOCKED flag set. It is possible for a page mapped into one or more 520have VM_LOCKED flag set. It is possible for a page mapped into one or more
437VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one 521VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
438of the active or inactive LRU lists. This could happen if, for example, a 522of the active or inactive LRU lists. This could happen if, for example, a task
439task in the process of munlock()ing the page could not isolate the page from 523in the process of munlocking the page could not isolate the page from the LRU.
440the LRU. As a result, vmscan/shrink_page_list() might encounter such a page 524As a result, vmscan/shrink_page_list() might encounter such a page as described
441as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To 525in section "vmscan's handling of unevictable pages". To handle this situation,
442handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED 526try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
443vmas while it is walking a page's reverse map. 527map.
444 528
445try_to_unmap() is always called, by either vmscan for reclaim or for page 529try_to_unmap() is always called, by either vmscan for reclaim or for page
446migration, with the argument page locked and isolated from the LRU. BUG_ON() 530migration, with the argument page locked and isolated from the LRU. Separate
447assertions enforce this requirement. Separate functions handle anonymous and 531functions handle anonymous and mapped file pages, as these types of pages have
448mapped file pages, as these types of pages have different reverse map 532different reverse map mechanisms.
449mechanisms. 533
450 534 (*) try_to_unmap_anon()
451 try_to_unmap_anon() 535
452 536 To unmap anonymous pages, each VMA in the list anchored in the anon_vma
453To unmap anonymous pages, each vma in the list anchored in the anon_vma must be 537 must be visited - at least until a VM_LOCKED VMA is encountered. If the
454visited--at least until a VM_LOCKED vma is encountered. If the page is being 538 page is being unmapped for migration, VM_LOCKED VMAs do not stop the
455unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked 539 process because mlocked pages are migratable. However, for reclaim, if
456pages are migratable. However, for reclaim, if the page is mapped into a 540 the page is mapped into a VM_LOCKED VMA, the scan stops.
457VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap 541
458semphore of the mm_struct to which the vma belongs in read mode. If this is 542 try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of
459successful, try_to_unmap() will mlock the page via mlock_vma_page()--we 543 the mm_struct to which the VMA belongs. If this is successful, it will
460wouldn't have gotten to try_to_unmap() if the page were already mlocked--and 544 mlock the page via mlock_vma_page() - we wouldn't have gotten to
461will return SWAP_MLOCK, indicating that the page is unevictable. If the 545 try_to_unmap_anon() if the page were already mlocked - and will return
462mmap semaphore cannot be acquired, we are not sure whether the page is really 546 SWAP_MLOCK, indicating that the page is unevictable.
463unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN. 547
464 548 If the mmap semaphore cannot be acquired, we are not sure whether the page
465 try_to_unmap_file() -- linear mappings 549 is really unevictable or not. In this case, try_to_unmap_anon() will
466 550 return SWAP_AGAIN.
467Unmapping of a mapped file page works the same, except that the scan visits 551
468all vmas that maps the page's index/page offset in the page's mapping's 552 (*) try_to_unmap_file() - linear mappings
469reverse map priority search tree. It must also visit each vma in the page's 553
470mapping's non-linear list, if the list is non-empty. As for anonymous pages, 554 Unmapping of a mapped file page works the same as for anonymous mappings,
471on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will 555 except that the scan visits all VMAs that map the page's index/page offset
472attempt to acquire the associated mm_struct's mmap semaphore to mlock the page, 556 in the page's mapping's reverse map priority search tree. It also visits
473returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not. 557 each VMA in the page's mapping's non-linear list, if the list is
474 558 non-empty.
475 try_to_unmap_file() -- non-linear mappings 559
476 560 As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file
477If a page's mapping contains a non-empty non-linear mapping vma list, then 561 page, try_to_unmap_file() will attempt to acquire the associated
478try_to_un{map|lock}() must also visit each vma in that list to determine 562 mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this
479whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit 563 is successful, and SWAP_AGAIN, if not.
480all vmas in the non-linear list to ensure that the pages is not/should not be 564
481mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate. 565 (*) try_to_unmap_file() - non-linear mappings
482However, there is no easy way to determine whether the page is actually mapped 566
483in a given vma--either for unmapping or testing whether the VM_LOCKED vma 567 If a page's mapping contains a non-empty non-linear mapping VMA list, then
484actually pins the page. 568 try_to_un{map|lock}() must also visit each VMA in that list to determine
485 569 whether the page is mapped in a VM_LOCKED VMA. Again, the scan must visit
486So, try_to_unmap_file() handles non-linear mappings by scanning a certain 570 all VMAs in the non-linear list to ensure that the pages is not/should not
487number of pages--a "cluster"--in each non-linear vma associated with the page's 571 be mlocked.
488mapping, for each file mapped page that vmscan tries to unmap. If this happens 572
489to unmap the page we're trying to unmap, try_to_unmap() will notice this on 573 If a VM_LOCKED VMA is found in the list, the scan could terminate.
490return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it 574 However, there is no easy way to determine whether the page is actually
491will return SWAP_AGAIN, causing vmscan to recirculate this page. We take 575 mapped in a given VMA - either for unmapping or testing whether the
492advantage of the cluster scan in try_to_unmap_cluster() as follows: 576 VM_LOCKED VMA actually pins the page.
493 577
494For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap 578 try_to_unmap_file() handles non-linear mappings by scanning a certain
495semaphore of the associated mm_struct for read without blocking. If this 579 number of pages - a "cluster" - in each non-linear VMA associated with the
496attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will 580 page's mapping, for each file mapped page that vmscan tries to unmap. If
497retain the mmap semaphore for the scan; otherwise it drops it here. Then, 581 this happens to unmap the page we're trying to unmap, try_to_unmap() will
498for each page in the cluster, if we're holding the mmap semaphore for a locked 582 notice this on return (page_mapcount(page) will be 0) and return
499vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This 583 SWAP_SUCCESS. Otherwise, it will return SWAP_AGAIN, causing vmscan to
500call is a no-op if the page is already locked, but will mlock any pages in 584 recirculate this page. We take advantage of the cluster scan in
501the non-linear mapping that happen to be unlocked. If one of the pages so 585 try_to_unmap_cluster() as follows:
502mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will 586
503return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan 587 For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the
504to cull the page, rather than recirculating it on the inactive list. Again, 588 mmap semaphore of the associated mm_struct for read without blocking.
505if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns 589
506SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but 590 If this attempt is successful and the VMA is VM_LOCKED,
507couldn't be mlocked. 591 try_to_unmap_cluster() will retain the mmap semaphore for the scan;
508 592 otherwise it drops it here.
509 593
510Mlocked pages: try_to_munlock() Reverse Map Scan 594 Then, for each page in the cluster, if we're holding the mmap semaphore
511 595 for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to
512TODO/FIXME: a better name might be page_mlocked()--analogous to the 596 mlock the page. This call is a no-op if the page is already locked,
513page_referenced() reverse map walker. 597 but will mlock any pages in the non-linear mapping that happen to be
514 598 unlocked.
515When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() 599
516System Call Handling" above--tries to munlock a page, it needs to 600 If one of the pages so mlocked is the page passed in to try_to_unmap(),
517determine whether or not the page is mapped by any VM_LOCKED vma, without 601 try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default
518actually attempting to unmap all ptes from the page. For this purpose, the 602 SWAP_AGAIN. This will allow vmscan to cull the page, rather than
519unevictable/mlock infrastructure introduced a variant of try_to_unmap() called 603 recirculating it on the inactive list.
520try_to_munlock(). 604
605 Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it
606 returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED
607 VMA, but couldn't be mlocked.
608
609
610try_to_munlock() REVERSE MAP SCAN
611---------------------------------
612
613 [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
614 page_referenced() reverse map walker.
615
616When munlock_vma_page() [see section "munlock()/munlockall() System Call
617Handling" above] tries to munlock a page, it needs to determine whether or not
618the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
619all PTEs from the page. For this purpose, the unevictable/mlock infrastructure
620introduced a variant of try_to_unmap() called try_to_munlock().
521 621
522try_to_munlock() calls the same functions as try_to_unmap() for anonymous and 622try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
523mapped file pages with an additional argument specifing unlock versus unmap 623mapped file pages with an additional argument specifing unlock versus unmap
524processing. Again, these functions walk the respective reverse maps looking 624processing. Again, these functions walk the respective reverse maps looking
525for VM_LOCKED vmas. When such a vma is found for anonymous pages and file 625for VM_LOCKED VMAs. When such a VMA is found for anonymous pages and file
526pages mapped in linear VMAs, as in the try_to_unmap() case, the functions 626pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
527attempt to acquire the associated mmap semphore, mlock the page via 627attempt to acquire the associated mmap semphore, mlock the page via
528mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the 628mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
529pre-clearing of the page's PG_mlocked done by munlock_vma_page. 629pre-clearing of the page's PG_mlocked done by munlock_vma_page.
530 630
531If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap 631If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap
532semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() 632semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() to
533to recycle the page on the inactive list and hope that it has better luck 633recycle the page on the inactive list and hope that it has better luck with the
534with the page next time. 634page next time.
535 635
536For file pages mapped into non-linear vmas, the try_to_munlock() logic works 636For file pages mapped into non-linear VMAs, the try_to_munlock() logic works
537slightly differently. On encountering a VM_LOCKED non-linear vma that might 637slightly differently. On encountering a VM_LOCKED non-linear VMA that might
538map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking 638map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the
539the page. munlock_vma_page() will just leave the page unlocked and let 639page. munlock_vma_page() will just leave the page unlocked and let vmscan deal
540vmscan deal with it--the usual fallback position. 640with it - the usual fallback position.
541 641
542Note that try_to_munlock()'s reverse map walk must visit every vma in a pages' 642Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
543reverse map to determine that a page is NOT mapped into any VM_LOCKED vma. 643reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
544However, the scan can terminate when it encounters a VM_LOCKED vma and can 644However, the scan can terminate when it encounters a VM_LOCKED VMA and can
545successfully acquire the vma's mmap semphore for read and mlock the page. 645successfully acquire the VMA's mmap semphore for read and mlock the page.
546Although try_to_munlock() can be called many [very many!] times when 646Although try_to_munlock() might be called a great many times when munlocking a
547munlock()ing a large region or tearing down a large address space that has been 647large region or tearing down a large address space that has been mlocked via
548mlocked via mlockall(), overall this is a fairly rare event. 648mlockall(), overall this is a fairly rare event.
549 649
550Mlocked Page: Page Reclaim in shrink_*_list() 650
551 651PAGE RECLAIM IN shrink_*_list()
552shrink_active_list() culls any obviously unevictable pages--i.e., 652-------------------------------
553!page_evictable(page, NULL)--diverting these to the unevictable lru 653
554list. However, shrink_active_list() only sees unevictable pages that 654shrink_active_list() culls any obviously unevictable pages - i.e.
555made it onto the active/inactive lru lists. Note that these pages do not 655!page_evictable(page, NULL) - diverting these to the unevictable list.
556have PageUnevictable set--otherwise, they would be on the unevictable list and 656However, shrink_active_list() only sees unevictable pages that made it onto the
557shrink_active_list would never see them. 657active/inactive lru lists. Note that these pages do not have PageUnevictable
658set - otherwise they would be on the unevictable list and shrink_active_list
659would never see them.
558 660
559Some examples of these unevictable pages on the LRU lists are: 661Some examples of these unevictable pages on the LRU lists are:
560 662
5611) ramfs pages that have been placed on the lru lists when first allocated. 663 (1) ramfs pages that have been placed on the LRU lists when first allocated.
664
665 (2) SHM_LOCK'd shared memory pages. shmctl(SHM_LOCK) does not attempt to
666 allocate or fault in the pages in the shared memory region. This happens
667 when an application accesses the page the first time after SHM_LOCK'ing
668 the segment.
562 669
5632) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to 670 (3) mlocked pages that could not be isolated from the LRU and moved to the
564 allocate or fault in the pages in the shared memory region. This happens 671 unevictable list in mlock_vma_page().
565 when an application accesses the page the first time after SHM_LOCKing
566 the segment.
567 672
5683) Mlocked pages that could not be isolated from the lru and moved to the 673 (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't
569 unevictable list in mlock_vma_page(). 674 acquire the VMA's mmap semaphore to test the flags and set PageMlocked.
675 munlock_vma_page() was forced to let the page back on to the normal LRU
676 list for vmscan to handle.
570 677
5713) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't 678shrink_inactive_list() also diverts any unevictable pages that it finds on the
572 acquire the vma's mmap semaphore to test the flags and set PageMlocked. 679inactive lists to the appropriate zone's unevictable list.
573 munlock_vma_page() was forced to let the page back on to the normal
574 LRU list for vmscan to handle.
575 680
576shrink_inactive_list() also culls any unevictable pages that it finds on 681shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
577the inactive lists, again diverting them to the appropriate zone's unevictable 682after shrink_active_list() had moved them to the inactive list, or pages mapped
578lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became 683into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
579SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or 684recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter,
580pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from 685but will pass on to shrink_page_list().
581the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
582the latter, but will pass on to shrink_page_list().
583 686
584shrink_page_list() again culls obviously unevictable pages that it could 687shrink_page_list() again culls obviously unevictable pages that it could
585encounter for similar reason to shrink_inactive_list(). Pages mapped into 688encounter for similar reason to shrink_inactive_list(). Pages mapped into
586VM_LOCKED vmas but without PG_mlocked set will make it all the way to 689VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
587try_to_unmap(). shrink_page_list() will divert them to the unevictable list 690try_to_unmap(). shrink_page_list() will divert them to the unevictable list
588when try_to_unmap() returns SWAP_MLOCK, as discussed above. 691when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/MAINTAINERS b/MAINTAINERS
index 597cd0cc29ce..57969bf67b94 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -461,7 +461,7 @@ F: arch/x86/include/asm/amd_iommu*.h
461 461
462AMD MICROCODE UPDATE SUPPORT 462AMD MICROCODE UPDATE SUPPORT
463P: Andreas Herrmann 463P: Andreas Herrmann
464M: andeas.herrmann3@amd.com 464M: andreas.herrmann3@amd.com
465L: amd64-microcode@amd64.org 465L: amd64-microcode@amd64.org
466S: Supported 466S: Supported
467F: arch/x86/kernel/microcode_amd.c 467F: arch/x86/kernel/microcode_amd.c
@@ -1894,7 +1894,7 @@ F: fs/ecryptfs/
1894EDAC-CORE 1894EDAC-CORE
1895P: Doug Thompson 1895P: Doug Thompson
1896M: dougthompson@xmission.com 1896M: dougthompson@xmission.com
1897L: bluesmoke-devel@lists.sourceforge.net 1897L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1898W: bluesmoke.sourceforge.net 1898W: bluesmoke.sourceforge.net
1899S: Supported 1899S: Supported
1900F: Documentation/edac.txt 1900F: Documentation/edac.txt
@@ -1906,7 +1906,7 @@ P: Mark Gross
1906P: Doug Thompson 1906P: Doug Thompson
1907M: mark.gross@intel.com 1907M: mark.gross@intel.com
1908M: dougthompson@xmission.com 1908M: dougthompson@xmission.com
1909L: bluesmoke-devel@lists.sourceforge.net 1909L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1910W: bluesmoke.sourceforge.net 1910W: bluesmoke.sourceforge.net
1911S: Maintained 1911S: Maintained
1912F: drivers/edac/e752x_edac.c 1912F: drivers/edac/e752x_edac.c
@@ -1914,7 +1914,7 @@ F: drivers/edac/e752x_edac.c
1914EDAC-E7XXX 1914EDAC-E7XXX
1915P: Doug Thompson 1915P: Doug Thompson
1916M: dougthompson@xmission.com 1916M: dougthompson@xmission.com
1917L: bluesmoke-devel@lists.sourceforge.net 1917L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1918W: bluesmoke.sourceforge.net 1918W: bluesmoke.sourceforge.net
1919S: Maintained 1919S: Maintained
1920F: drivers/edac/e7xxx_edac.c 1920F: drivers/edac/e7xxx_edac.c
@@ -1922,7 +1922,7 @@ F: drivers/edac/e7xxx_edac.c
1922EDAC-I82443BXGX 1922EDAC-I82443BXGX
1923P: Tim Small 1923P: Tim Small
1924M: tim@buttersideup.com 1924M: tim@buttersideup.com
1925L: bluesmoke-devel@lists.sourceforge.net 1925L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1926W: bluesmoke.sourceforge.net 1926W: bluesmoke.sourceforge.net
1927S: Maintained 1927S: Maintained
1928F: drivers/edac/i82443bxgx_edac.c 1928F: drivers/edac/i82443bxgx_edac.c
@@ -1930,7 +1930,7 @@ F: drivers/edac/i82443bxgx_edac.c
1930EDAC-I3000 1930EDAC-I3000
1931P: Jason Uhlenkott 1931P: Jason Uhlenkott
1932M: juhlenko@akamai.com 1932M: juhlenko@akamai.com
1933L: bluesmoke-devel@lists.sourceforge.net 1933L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1934W: bluesmoke.sourceforge.net 1934W: bluesmoke.sourceforge.net
1935S: Maintained 1935S: Maintained
1936F: drivers/edac/i3000_edac.c 1936F: drivers/edac/i3000_edac.c
@@ -1938,7 +1938,7 @@ F: drivers/edac/i3000_edac.c
1938EDAC-I5000 1938EDAC-I5000
1939P: Doug Thompson 1939P: Doug Thompson
1940M: dougthompson@xmission.com 1940M: dougthompson@xmission.com
1941L: bluesmoke-devel@lists.sourceforge.net 1941L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1942W: bluesmoke.sourceforge.net 1942W: bluesmoke.sourceforge.net
1943S: Maintained 1943S: Maintained
1944F: drivers/edac/i5000_edac.c 1944F: drivers/edac/i5000_edac.c
@@ -1946,7 +1946,7 @@ F: drivers/edac/i5000_edac.c
1946EDAC-I5400 1946EDAC-I5400
1947P: Mauro Carvalho Chehab 1947P: Mauro Carvalho Chehab
1948M: mchehab@redhat.com 1948M: mchehab@redhat.com
1949L: bluesmoke-devel@lists.sourceforge.net 1949L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1950W: bluesmoke.sourceforge.net 1950W: bluesmoke.sourceforge.net
1951S: Maintained 1951S: Maintained
1952F: drivers/edac/i5400_edac.c 1952F: drivers/edac/i5400_edac.c
@@ -1956,7 +1956,7 @@ P: Ranganathan Desikan
1956P: Arvind R. 1956P: Arvind R.
1957M: rdesikan@jetzbroadband.com 1957M: rdesikan@jetzbroadband.com
1958M: arvind@acarlab.com 1958M: arvind@acarlab.com
1959L: bluesmoke-devel@lists.sourceforge.net 1959L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1960W: bluesmoke.sourceforge.net 1960W: bluesmoke.sourceforge.net
1961S: Maintained 1961S: Maintained
1962F: drivers/edac/i82975x_edac.c 1962F: drivers/edac/i82975x_edac.c
@@ -1964,7 +1964,7 @@ F: drivers/edac/i82975x_edac.c
1964EDAC-PASEMI 1964EDAC-PASEMI
1965P: Egor Martovetsky 1965P: Egor Martovetsky
1966M: egor@pasemi.com 1966M: egor@pasemi.com
1967L: bluesmoke-devel@lists.sourceforge.net 1967L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1968W: bluesmoke.sourceforge.net 1968W: bluesmoke.sourceforge.net
1969S: Maintained 1969S: Maintained
1970F: drivers/edac/pasemi_edac.c 1970F: drivers/edac/pasemi_edac.c
@@ -1972,7 +1972,7 @@ F: drivers/edac/pasemi_edac.c
1972EDAC-R82600 1972EDAC-R82600
1973P: Tim Small 1973P: Tim Small
1974M: tim@buttersideup.com 1974M: tim@buttersideup.com
1975L: bluesmoke-devel@lists.sourceforge.net 1975L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
1976W: bluesmoke.sourceforge.net 1976W: bluesmoke.sourceforge.net
1977S: Maintained 1977S: Maintained
1978F: drivers/edac/r82600_edac.c 1978F: drivers/edac/r82600_edac.c
@@ -2592,8 +2592,8 @@ S: Maintained
2592F: fs/hpfs/ 2592F: fs/hpfs/
2593 2593
2594HSO 3G MODEM DRIVER 2594HSO 3G MODEM DRIVER
2595P: Denis Joseph Barrow 2595P: Jan Dumon
2596M: d.barow@option.com 2596M: j.dumon@option.com
2597W: http://www.pharscape.org 2597W: http://www.pharscape.org
2598S: Maintained 2598S: Maintained
2599F: drivers/net/usb/hso.c 2599F: drivers/net/usb/hso.c
@@ -4978,8 +4978,8 @@ S: Maintained for 2.6.
4978F: Documentation/sgi-visws.txt 4978F: Documentation/sgi-visws.txt
4979 4979
4980SGI XP/XPC/XPNET DRIVER 4980SGI XP/XPC/XPNET DRIVER
4981P: Dean Nelson 4981P: Robin Holt
4982M: dcn@sgi.com 4982M: holt@sgi.com
4983S: Maintained 4983S: Maintained
4984F: drivers/misc/sgi-xp/ 4984F: drivers/misc/sgi-xp/
4985 4985
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index fc74e913c415..34a56a136efd 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -131,14 +131,14 @@ static struct musb_hdrc_platform_data musb_plat = {
131 .power = 50, /* up to 100 mA */ 131 .power = 50, /* up to 100 mA */
132}; 132};
133 133
134static u64 musb_dmamask = DMA_32BIT_MASK; 134static u64 musb_dmamask = DMA_BIT_MASK(32);
135 135
136static struct platform_device musb_device = { 136static struct platform_device musb_device = {
137 .name = "musb_hdrc", 137 .name = "musb_hdrc",
138 .id = -1, 138 .id = -1,
139 .dev = { 139 .dev = {
140 .dma_mask = &musb_dmamask, 140 .dma_mask = &musb_dmamask,
141 .coherent_dma_mask = DMA_32BIT_MASK, 141 .coherent_dma_mask = DMA_BIT_MASK(32),
142 .platform_data = &musb_plat, 142 .platform_data = &musb_plat,
143 }, 143 },
144 .num_resources = ARRAY_SIZE(musb_resources), 144 .num_resources = ARRAY_SIZE(musb_resources),
@@ -146,14 +146,14 @@ static struct platform_device musb_device = {
146}; 146};
147 147
148#ifdef CONFIG_NOP_USB_XCEIV 148#ifdef CONFIG_NOP_USB_XCEIV
149static u64 nop_xceiv_dmamask = DMA_32BIT_MASK; 149static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
150 150
151static struct platform_device nop_xceiv_device = { 151static struct platform_device nop_xceiv_device = {
152 .name = "nop_usb_xceiv", 152 .name = "nop_usb_xceiv",
153 .id = -1, 153 .id = -1,
154 .dev = { 154 .dev = {
155 .dma_mask = &nop_xceiv_dmamask, 155 .dma_mask = &nop_xceiv_dmamask,
156 .coherent_dma_mask = DMA_32BIT_MASK, 156 .coherent_dma_mask = DMA_BIT_MASK(32),
157 .platform_data = NULL, 157 .platform_data = NULL,
158 }, 158 },
159}; 159};
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 573f02c39a00..285aae8431c6 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb);
16static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, 16static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
17 dma_addr_t *dma_handle, gfp_t gfp) 17 dma_addr_t *dma_handle, gfp_t gfp)
18{ 18{
19 if (dev->coherent_dma_mask != DMA_64BIT_MASK) 19 if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
20 gfp |= GFP_DMA; 20 gfp |= GFP_DMA;
21 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); 21 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
22} 22}
diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h
index 414c50e2e881..94942d60ddfd 100644
--- a/arch/powerpc/include/asm/parport.h
+++ b/arch/powerpc/include/asm/parport.h
@@ -29,7 +29,7 @@ static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
29 prop = of_get_property(np, "interrupts", NULL); 29 prop = of_get_property(np, "interrupts", NULL);
30 if (!prop) 30 if (!prop)
31 continue; 31 continue;
32 if (parport_pc_probe_port(io1, io2, prop[0], autodma, NULL) != NULL) 32 if (parport_pc_probe_port(io1, io2, prop[0], autodma, NULL, 0) != NULL)
33 count++; 33 count++;
34 } 34 }
35 return count; 35 return count;
diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h
index dff3f0253aa8..ff9ead640c4a 100644
--- a/arch/sparc/include/asm/parport.h
+++ b/arch/sparc/include/asm/parport.h
@@ -117,7 +117,7 @@ static int __devinit ecpp_probe(struct of_device *op, const struct of_device_id
117 if (!strcmp(parent->name, "dma")) { 117 if (!strcmp(parent->name, "dma")) {
118 p = parport_pc_probe_port(base, base + 0x400, 118 p = parport_pc_probe_port(base, base + 0x400,
119 op->irqs[0], PARPORT_DMA_NOFIFO, 119 op->irqs[0], PARPORT_DMA_NOFIFO,
120 op->dev.parent->parent); 120 op->dev.parent->parent, 0);
121 if (!p) 121 if (!p)
122 return -ENOMEM; 122 return -ENOMEM;
123 dev_set_drvdata(&op->dev, p); 123 dev_set_drvdata(&op->dev, p);
@@ -168,7 +168,8 @@ static int __devinit ecpp_probe(struct of_device *op, const struct of_device_id
168 p = parport_pc_probe_port(base, base + 0x400, 168 p = parport_pc_probe_port(base, base + 0x400,
169 op->irqs[0], 169 op->irqs[0],
170 slot, 170 slot,
171 op->dev.parent); 171 op->dev.parent,
172 0);
172 err = -ENOMEM; 173 err = -ENOMEM;
173 if (!p) 174 if (!p)
174 goto out_disable_irq; 175 goto out_disable_irq;
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c586881..a4737dddfd58 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -50,7 +50,7 @@
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51#define NEED_PSE 0 51#define NEED_PSE 0
52#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) 52#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
53#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) 53#define NEED_PGE 0
54#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 54#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
55#define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) 55#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
56#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) 56#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dde46b5..018a0a400799 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
124 124
125/* VIRT <-> MACHINE conversion */ 125/* VIRT <-> MACHINE conversion */
126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) 126#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
127#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) 127#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
128#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
128#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) 129#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
129 130
130static inline unsigned long pte_mfn(pte_t pte) 131static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 3e3cd3db7a0c..837c2c4cc203 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -277,7 +277,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
277 unsigned int perf_percent; 277 unsigned int perf_percent;
278 unsigned int retval; 278 unsigned int retval;
279 279
280 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &cur, 1)) 280 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
281 return 0; 281 return 0;
282 282
283 cur.aperf.whole = readin.aperf.whole - 283 cur.aperf.whole = readin.aperf.whole -
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a6cbd3..f09e8c36ee80 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
42#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
43#include <asm/fixmap.h> 43#include <asm/fixmap.h>
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/proto.h>
45#include <asm/msr-index.h> 46#include <asm/msr-index.h>
46#include <asm/setup.h> 47#include <asm/setup.h>
47#include <asm/desc.h> 48#include <asm/desc.h>
@@ -168,21 +169,23 @@ static void __init xen_banner(void)
168 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); 169 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
169} 170}
170 171
172static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
173static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
174
171static void xen_cpuid(unsigned int *ax, unsigned int *bx, 175static void xen_cpuid(unsigned int *ax, unsigned int *bx,
172 unsigned int *cx, unsigned int *dx) 176 unsigned int *cx, unsigned int *dx)
173{ 177{
178 unsigned maskecx = ~0;
174 unsigned maskedx = ~0; 179 unsigned maskedx = ~0;
175 180
176 /* 181 /*
177 * Mask out inconvenient features, to try and disable as many 182 * Mask out inconvenient features, to try and disable as many
178 * unsupported kernel subsystems as possible. 183 * unsupported kernel subsystems as possible.
179 */ 184 */
180 if (*ax == 1) 185 if (*ax == 1) {
181 maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ 186 maskecx = cpuid_leaf1_ecx_mask;
182 (1 << X86_FEATURE_ACPI) | /* disable ACPI */ 187 maskedx = cpuid_leaf1_edx_mask;
183 (1 << X86_FEATURE_MCE) | /* disable MCE */ 188 }
184 (1 << X86_FEATURE_MCA) | /* disable MCA */
185 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
186 189
187 asm(XEN_EMULATE_PREFIX "cpuid" 190 asm(XEN_EMULATE_PREFIX "cpuid"
188 : "=a" (*ax), 191 : "=a" (*ax),
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
190 "=c" (*cx), 193 "=c" (*cx),
191 "=d" (*dx) 194 "=d" (*dx)
192 : "0" (*ax), "2" (*cx)); 195 : "0" (*ax), "2" (*cx));
196
197 *cx &= maskecx;
193 *dx &= maskedx; 198 *dx &= maskedx;
194} 199}
195 200
201static __init void xen_init_cpuid_mask(void)
202{
203 unsigned int ax, bx, cx, dx;
204
205 cpuid_leaf1_edx_mask =
206 ~((1 << X86_FEATURE_MCE) | /* disable MCE */
207 (1 << X86_FEATURE_MCA) | /* disable MCA */
208 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
209
210 if (!xen_initial_domain())
211 cpuid_leaf1_edx_mask &=
212 ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
213 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
214
215 ax = 1;
216 xen_cpuid(&ax, &bx, &cx, &dx);
217
218 /* cpuid claims we support xsave; try enabling it to see what happens */
219 if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
220 unsigned long cr4;
221
222 set_in_cr4(X86_CR4_OSXSAVE);
223
224 cr4 = read_cr4();
225
226 if ((cr4 & X86_CR4_OSXSAVE) == 0)
227 cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
228
229 clear_in_cr4(X86_CR4_OSXSAVE);
230 }
231}
232
196static void xen_set_debugreg(int reg, unsigned long val) 233static void xen_set_debugreg(int reg, unsigned long val)
197{ 234{
198 HYPERVISOR_set_debugreg(reg, val); 235 HYPERVISOR_set_debugreg(reg, val);
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
284 321
285static void xen_load_gdt(const struct desc_ptr *dtr) 322static void xen_load_gdt(const struct desc_ptr *dtr)
286{ 323{
287 unsigned long *frames;
288 unsigned long va = dtr->address; 324 unsigned long va = dtr->address;
289 unsigned int size = dtr->size + 1; 325 unsigned int size = dtr->size + 1;
290 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 326 unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
327 unsigned long frames[pages];
291 int f; 328 int f;
292 struct multicall_space mcs;
293 329
294 /* A GDT can be up to 64k in size, which corresponds to 8192 330 /* A GDT can be up to 64k in size, which corresponds to 8192
295 8-byte entries, or 16 4k pages.. */ 331 8-byte entries, or 16 4k pages.. */
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
297 BUG_ON(size > 65536); 333 BUG_ON(size > 65536);
298 BUG_ON(va & ~PAGE_MASK); 334 BUG_ON(va & ~PAGE_MASK);
299 335
300 mcs = xen_mc_entry(sizeof(*frames) * pages);
301 frames = mcs.args;
302
303 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { 336 for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
304 frames[f] = arbitrary_virt_to_mfn((void *)va); 337 int level;
338 pte_t *ptep = lookup_address(va, &level);
339 unsigned long pfn, mfn;
340 void *virt;
341
342 BUG_ON(ptep == NULL);
343
344 pfn = pte_pfn(*ptep);
345 mfn = pfn_to_mfn(pfn);
346 virt = __va(PFN_PHYS(pfn));
347
348 frames[f] = mfn;
305 349
306 make_lowmem_page_readonly((void *)va); 350 make_lowmem_page_readonly((void *)va);
307 make_lowmem_page_readonly(mfn_to_virt(frames[f])); 351 make_lowmem_page_readonly(virt);
308 } 352 }
309 353
310 MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); 354 if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
311 355 BUG();
312 xen_mc_issue(PARAVIRT_LAZY_CPU);
313} 356}
314 357
315static void load_TLS_descriptor(struct thread_struct *t, 358static void load_TLS_descriptor(struct thread_struct *t,
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
385static int cvt_gate_to_trap(int vector, const gate_desc *val, 428static int cvt_gate_to_trap(int vector, const gate_desc *val,
386 struct trap_info *info) 429 struct trap_info *info)
387{ 430{
388 if (val->type != 0xf && val->type != 0xe) 431 if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
389 return 0; 432 return 0;
390 433
391 info->vector = vector; 434 info->vector = vector;
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
393 info->cs = gate_segment(*val); 436 info->cs = gate_segment(*val);
394 info->flags = val->dpl; 437 info->flags = val->dpl;
395 /* interrupt gates clear IF */ 438 /* interrupt gates clear IF */
396 if (val->type == 0xe) 439 if (val->type == GATE_INTERRUPT)
397 info->flags |= 4; 440 info->flags |= 1 << 2;
398 441
399 return 1; 442 return 1;
400} 443}
@@ -872,7 +915,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
872 .emergency_restart = xen_emergency_restart, 915 .emergency_restart = xen_emergency_restart,
873}; 916};
874 917
875
876/* First C function to be called on Xen boot */ 918/* First C function to be called on Xen boot */
877asmlinkage void __init xen_start_kernel(void) 919asmlinkage void __init xen_start_kernel(void)
878{ 920{
@@ -897,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void)
897 939
898 xen_init_irq_ops(); 940 xen_init_irq_ops();
899 941
942 xen_init_cpuid_mask();
943
900#ifdef CONFIG_X86_LOCAL_APIC 944#ifdef CONFIG_X86_LOCAL_APIC
901 /* 945 /*
902 * set up the basic apic ops. 946 * set up the basic apic ops.
@@ -938,6 +982,11 @@ asmlinkage void __init xen_start_kernel(void)
938 if (!xen_initial_domain()) 982 if (!xen_initial_domain())
939 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 983 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
940 984
985#ifdef CONFIG_X86_64
986 /* Work out if we support NX */
987 check_efer();
988#endif
989
941 /* Don't do the full vcpu_info placement stuff until we have a 990 /* Don't do the full vcpu_info placement stuff until we have a
942 possible map and a non-dummy shared_info. */ 991 possible map and a non-dummy shared_info. */
943 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 992 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a81838a9ab7..9842b1212407 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
184} 184}
185 185
186/* Build the parallel p2m_top_mfn structures */ 186/* Build the parallel p2m_top_mfn structures */
187void xen_setup_mfn_list_list(void) 187static void __init xen_build_mfn_list_list(void)
188{ 188{
189 unsigned pfn, idx; 189 unsigned pfn, idx;
190 190
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; 198 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); 199 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
200 } 200 }
201}
201 202
203void xen_setup_mfn_list_list(void)
204{
202 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); 205 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
203 206
204 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 207 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
218 221
219 p2m_top[topidx] = &mfn_list[pfn]; 222 p2m_top[topidx] = &mfn_list[pfn];
220 } 223 }
224
225 xen_build_mfn_list_list();
221} 226}
222 227
223unsigned long get_phys_to_machine(unsigned long pfn) 228unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
233} 238}
234EXPORT_SYMBOL_GPL(get_phys_to_machine); 239EXPORT_SYMBOL_GPL(get_phys_to_machine);
235 240
236static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) 241/* install a new p2m_top page */
242bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
237{ 243{
238 unsigned long *p; 244 unsigned topidx = p2m_top_index(pfn);
245 unsigned long **pfnp, *mfnp;
239 unsigned i; 246 unsigned i;
240 247
241 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); 248 pfnp = &p2m_top[topidx];
242 BUG_ON(p == NULL); 249 mfnp = &p2m_top_mfn[topidx];
243 250
244 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) 251 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
245 p[i] = INVALID_P2M_ENTRY; 252 p[i] = INVALID_P2M_ENTRY;
246 253
247 if (cmpxchg(pp, p2m_missing, p) != p2m_missing) 254 if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
248 free_page((unsigned long)p);
249 else
250 *mfnp = virt_to_mfn(p); 255 *mfnp = virt_to_mfn(p);
256 return true;
257 }
258
259 return false;
251} 260}
252 261
253void set_phys_to_machine(unsigned long pfn, unsigned long mfn) 262static void alloc_p2m(unsigned long pfn)
254{ 263{
255 unsigned topidx, idx; 264 unsigned long *p;
256 265
257 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { 266 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
258 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); 267 BUG_ON(p == NULL);
259 return; 268
260 } 269 if (!install_p2mtop_page(pfn, p))
270 free_page((unsigned long)p);
271}
272
273/* Try to install p2m mapping; fail if intermediate bits missing */
274bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
275{
276 unsigned topidx, idx;
261 277
262 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { 278 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
263 BUG_ON(mfn != INVALID_P2M_ENTRY); 279 BUG_ON(mfn != INVALID_P2M_ENTRY);
264 return; 280 return true;
265 } 281 }
266 282
267 topidx = p2m_top_index(pfn); 283 topidx = p2m_top_index(pfn);
268 if (p2m_top[topidx] == p2m_missing) { 284 if (p2m_top[topidx] == p2m_missing) {
269 /* no need to allocate a page to store an invalid entry */
270 if (mfn == INVALID_P2M_ENTRY) 285 if (mfn == INVALID_P2M_ENTRY)
271 return; 286 return true;
272 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); 287 return false;
273 } 288 }
274 289
275 idx = p2m_index(pfn); 290 idx = p2m_index(pfn);
276 p2m_top[topidx][idx] = mfn; 291 p2m_top[topidx][idx] = mfn;
292
293 return true;
294}
295
296void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
297{
298 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
299 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
300 return;
301 }
302
303 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
304 alloc_p2m(pfn);
305
306 if (!__set_phys_to_machine(pfn, mfn))
307 BUG();
308 }
277} 309}
278 310
279unsigned long arbitrary_virt_to_mfn(void *vaddr) 311unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -987,7 +1019,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
987 return 0; 1019 return 0;
988} 1020}
989 1021
990void __init xen_mark_init_mm_pinned(void) 1022static void __init xen_mark_init_mm_pinned(void)
991{ 1023{
992 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); 1024 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
993} 1025}
@@ -1270,8 +1302,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1270 } *args; 1302 } *args;
1271 struct multicall_space mcs; 1303 struct multicall_space mcs;
1272 1304
1273 BUG_ON(cpumask_empty(cpus)); 1305 if (cpumask_empty(cpus))
1274 BUG_ON(!mm); 1306 return; /* nothing to do */
1275 1307
1276 mcs = xen_mc_entry(sizeof(*args)); 1308 mcs = xen_mc_entry(sizeof(*args));
1277 args = mcs.args; 1309 args = mcs.args;
@@ -1438,6 +1470,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1438} 1470}
1439#endif 1471#endif
1440 1472
1473static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1474{
1475 struct mmuext_op op;
1476 op.cmd = cmd;
1477 op.arg1.mfn = pfn_to_mfn(pfn);
1478 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1479 BUG();
1480}
1481
1441/* Early in boot, while setting up the initial pagetable, assume 1482/* Early in boot, while setting up the initial pagetable, assume
1442 everything is pinned. */ 1483 everything is pinned. */
1443static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 1484static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1446,22 +1487,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1446 BUG_ON(mem_map); /* should only be used early */ 1487 BUG_ON(mem_map); /* should only be used early */
1447#endif 1488#endif
1448 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 1489 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1490 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1491}
1492
1493/* Used for pmd and pud */
1494static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1495{
1496#ifdef CONFIG_FLATMEM
1497 BUG_ON(mem_map); /* should only be used early */
1498#endif
1499 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1449} 1500}
1450 1501
1451/* Early release_pte assumes that all pts are pinned, since there's 1502/* Early release_pte assumes that all pts are pinned, since there's
1452 only init_mm and anything attached to that is pinned. */ 1503 only init_mm and anything attached to that is pinned. */
1453static void xen_release_pte_init(unsigned long pfn) 1504static __init void xen_release_pte_init(unsigned long pfn)
1454{ 1505{
1506 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1455 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1507 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1456} 1508}
1457 1509
1458static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1510static __init void xen_release_pmd_init(unsigned long pfn)
1459{ 1511{
1460 struct mmuext_op op; 1512 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1461 op.cmd = cmd;
1462 op.arg1.mfn = pfn_to_mfn(pfn);
1463 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1464 BUG();
1465} 1513}
1466 1514
1467/* This needs to make sure the new pte page is pinned iff its being 1515/* This needs to make sure the new pte page is pinned iff its being
@@ -1773,6 +1821,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1773#ifdef CONFIG_X86_LOCAL_APIC 1821#ifdef CONFIG_X86_LOCAL_APIC
1774 case FIX_APIC_BASE: /* maps dummy local APIC */ 1822 case FIX_APIC_BASE: /* maps dummy local APIC */
1775#endif 1823#endif
1824 case FIX_TEXT_POKE0:
1825 case FIX_TEXT_POKE1:
1826 /* All local page mappings */
1776 pte = pfn_pte(phys, prot); 1827 pte = pfn_pte(phys, prot);
1777 break; 1828 break;
1778 1829
@@ -1819,7 +1870,6 @@ __init void xen_post_allocator_init(void)
1819 xen_mark_init_mm_pinned(); 1870 xen_mark_init_mm_pinned();
1820} 1871}
1821 1872
1822
1823const struct pv_mmu_ops xen_mmu_ops __initdata = { 1873const struct pv_mmu_ops xen_mmu_ops __initdata = {
1824 .pagetable_setup_start = xen_pagetable_setup_start, 1874 .pagetable_setup_start = xen_pagetable_setup_start,
1825 .pagetable_setup_done = xen_pagetable_setup_done, 1875 .pagetable_setup_done = xen_pagetable_setup_done,
@@ -1843,9 +1893,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1843 1893
1844 .alloc_pte = xen_alloc_pte_init, 1894 .alloc_pte = xen_alloc_pte_init,
1845 .release_pte = xen_release_pte_init, 1895 .release_pte = xen_release_pte_init,
1846 .alloc_pmd = xen_alloc_pte_init, 1896 .alloc_pmd = xen_alloc_pmd_init,
1847 .alloc_pmd_clone = paravirt_nop, 1897 .alloc_pmd_clone = paravirt_nop,
1848 .release_pmd = xen_release_pte_init, 1898 .release_pmd = xen_release_pmd_init,
1849 1899
1850#ifdef CONFIG_HIGHPTE 1900#ifdef CONFIG_HIGHPTE
1851 .kmap_atomic_pte = xen_kmap_atomic_pte, 1901 .kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1883,8 +1933,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1883 .make_pud = PV_CALLEE_SAVE(xen_make_pud), 1933 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
1884 .set_pgd = xen_set_pgd_hyper, 1934 .set_pgd = xen_set_pgd_hyper,
1885 1935
1886 .alloc_pud = xen_alloc_pte_init, 1936 .alloc_pud = xen_alloc_pmd_init,
1887 .release_pud = xen_release_pte_init, 1937 .release_pud = xen_release_pmd_init,
1888#endif /* PAGETABLE_LEVELS == 4 */ 1938#endif /* PAGETABLE_LEVELS == 4 */
1889 1939
1890 .activate_mm = xen_activate_mm, 1940 .activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44a337d..da7302624897 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
11}; 11};
12 12
13 13
14bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
15bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
16
14void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 17void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
15 18
16 19
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e330837..429834ec1687 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
317 BUG_ON(rc); 317 BUG_ON(rc);
318 318
319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 319 while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
320 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 320 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
321 barrier(); 321 barrier();
322 } 322 }
323 323
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422 /* Make sure other vcpus get a chance to run if they need to. */ 422 /* Make sure other vcpus get a chance to run if they need to. */
423 for_each_cpu(cpu, mask) { 423 for_each_cpu(cpu, mask) {
424 if (xen_vcpu_stolen(cpu)) { 424 if (xen_vcpu_stolen(cpu)) {
425 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 425 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
426 break; 426 break;
427 } 427 }
428 } 428 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef2632ea2..20139464943c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -57,8 +57,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
57 57
58bool xen_vcpu_stolen(int vcpu); 58bool xen_vcpu_stolen(int vcpu);
59 59
60void xen_mark_init_mm_pinned(void);
61
62void xen_setup_vcpu_info_placement(void); 60void xen_setup_vcpu_info_placement(void);
63 61
64#ifdef CONFIG_SMP 62#ifdef CONFIG_SMP
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index be204308cc1b..9359613addc5 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -1059,7 +1059,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
1059 goto out; 1059 goto out;
1060 } 1060 }
1061 1061
1062 err = pci_set_dma_mask(dev, DMA_32BIT_MASK); 1062 err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
1063 if (err) { 1063 if (err) {
1064 dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n"); 1064 dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
1065 goto out; 1065 goto out;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 0ef6f08aa6ea..4d4d5e0d3fa6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3505,7 +3505,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
3505 /* The Inbound Post Queue only accepts 32-bit physical addresses for the 3505 /* The Inbound Post Queue only accepts 32-bit physical addresses for the
3506 CCISS commands, so they must be allocated from the lower 4GiB of 3506 CCISS commands, so they must be allocated from the lower 4GiB of
3507 memory. */ 3507 memory. */
3508 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); 3508 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3509 if (err) { 3509 if (err) {
3510 iounmap(vaddr); 3510 iounmap(vaddr);
3511 return -ENOMEM; 3511 return -ENOMEM;
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 9d9490e22e07..3686912427ba 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2131,6 +2131,8 @@ static const struct intel_driver_description {
2131 { PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M", 2131 { PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M",
2132 &intel_845_driver, &intel_830_driver }, 2132 &intel_845_driver, &intel_830_driver },
2133 { PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL }, 2133 { PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL },
2134 { PCI_DEVICE_ID_INTEL_82854_HB, PCI_DEVICE_ID_INTEL_82854_IG, 0, "854",
2135 &intel_845_driver, &intel_830_driver },
2134 { PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL }, 2136 { PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL },
2135 { PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM", 2137 { PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM",
2136 &intel_845_driver, &intel_830_driver }, 2138 &intel_845_driver, &intel_830_driver },
@@ -2355,6 +2357,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
2355 ID(PCI_DEVICE_ID_INTEL_82845_HB), 2357 ID(PCI_DEVICE_ID_INTEL_82845_HB),
2356 ID(PCI_DEVICE_ID_INTEL_82845G_HB), 2358 ID(PCI_DEVICE_ID_INTEL_82845G_HB),
2357 ID(PCI_DEVICE_ID_INTEL_82850_HB), 2359 ID(PCI_DEVICE_ID_INTEL_82850_HB),
2360 ID(PCI_DEVICE_ID_INTEL_82854_HB),
2358 ID(PCI_DEVICE_ID_INTEL_82855PM_HB), 2361 ID(PCI_DEVICE_ID_INTEL_82855PM_HB),
2359 ID(PCI_DEVICE_ID_INTEL_82855GM_HB), 2362 ID(PCI_DEVICE_ID_INTEL_82855GM_HB),
2360 ID(PCI_DEVICE_ID_INTEL_82860_HB), 2363 ID(PCI_DEVICE_ID_INTEL_82860_HB),
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 6de020d078e1..b0a6a3e51924 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -35,7 +35,6 @@
35#include <linux/vt_kern.h> 35#include <linux/vt_kern.h>
36#include <linux/workqueue.h> 36#include <linux/workqueue.h>
37#include <linux/kexec.h> 37#include <linux/kexec.h>
38#include <linux/interrupt.h>
39#include <linux/hrtimer.h> 38#include <linux/hrtimer.h>
40#include <linux/oom.h> 39#include <linux/oom.h>
41 40
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 28f2c3f959b5..6ad95c8d6363 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -767,11 +767,19 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
767 pci_write_config_word(pdev, offset, value); 767 pci_write_config_word(pdev, offset, value);
768} 768}
769 769
770/* write all or some bits in a dword-register*/ 770/*
771 * pci_write_bits32
772 *
773 * edac local routine to do pci_write_config_dword, but adds
774 * a mask parameter. If mask is all ones, ignore the mask.
775 * Otherwise utilize the mask to isolate specified bits
776 *
777 * write all or some bits in a dword-register
778 */
771static inline void pci_write_bits32(struct pci_dev *pdev, int offset, 779static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
772 u32 value, u32 mask) 780 u32 value, u32 mask)
773{ 781{
774 if (mask != 0xffff) { 782 if (mask != 0xffffffff) {
775 u32 buf; 783 u32 buf;
776 784
777 pci_read_config_dword(pdev, offset, &buf); 785 pci_read_config_dword(pdev, offset, &buf);
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index ca9113e1c106..a7d2c717d033 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -389,7 +389,7 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
389 */ 389 */
390static void edac_device_workq_function(struct work_struct *work_req) 390static void edac_device_workq_function(struct work_struct *work_req)
391{ 391{
392 struct delayed_work *d_work = (struct delayed_work *)work_req; 392 struct delayed_work *d_work = to_delayed_work(work_req);
393 struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work); 393 struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
394 394
395 mutex_lock(&device_ctls_mutex); 395 mutex_lock(&device_ctls_mutex);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 25d66940b4fa..335b7ebdb11c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -260,7 +260,7 @@ static int edac_mc_assert_error_check_and_clear(void)
260 */ 260 */
261static void edac_mc_workq_function(struct work_struct *work_req) 261static void edac_mc_workq_function(struct work_struct *work_req)
262{ 262{
263 struct delayed_work *d_work = (struct delayed_work *)work_req; 263 struct delayed_work *d_work = to_delayed_work(work_req);
264 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 264 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
265 265
266 mutex_lock(&mem_ctls_mutex); 266 mutex_lock(&mem_ctls_mutex);
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 5b150aea703a..30b585b1d60b 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(edac_pci_find);
233 */ 233 */
234static void edac_pci_workq_function(struct work_struct *work_req) 234static void edac_pci_workq_function(struct work_struct *work_req)
235{ 235{
236 struct delayed_work *d_work = (struct delayed_work *)work_req; 236 struct delayed_work *d_work = to_delayed_work(work_req);
237 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); 237 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
238 int msec; 238 int msec;
239 unsigned long delay; 239 unsigned long delay;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0e8a9185f676..d73f5f473e38 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -692,6 +692,16 @@ config SENSORS_PCF8591
692 These devices are hard to detect and rarely found on mainstream 692 These devices are hard to detect and rarely found on mainstream
693 hardware. If unsure, say N. 693 hardware. If unsure, say N.
694 694
695config SENSORS_SHT15
696 tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
697 depends on GENERIC_GPIO
698 help
699 If you say yes here you get support for the Sensiron SHT10, SHT11,
700 SHT15, SHT71, SHT75 humidity and temperature sensors.
701
702 This driver can also be built as a module. If so, the module
703 will be called sht15.
704
695config SENSORS_SIS5595 705config SENSORS_SIS5595
696 tristate "Silicon Integrated Systems Corp. SiS5595" 706 tristate "Silicon Integrated Systems Corp. SiS5595"
697 depends on PCI 707 depends on PCI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 1d3757837b4f..0ae26984ba45 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_SENSORS_MAX6650) += max6650.o
76obj-$(CONFIG_SENSORS_PC87360) += pc87360.o 76obj-$(CONFIG_SENSORS_PC87360) += pc87360.o
77obj-$(CONFIG_SENSORS_PC87427) += pc87427.o 77obj-$(CONFIG_SENSORS_PC87427) += pc87427.o
78obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o 78obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o
79obj-$(CONFIG_SENSORS_SHT15) += sht15.o
79obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o 80obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o
80obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o 81obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
81obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o 82obj-$(CONFIG_SENSORS_SMSC47M1) += smsc47m1.o
diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c
index 55d3dc565be6..abca7e9f953b 100644
--- a/drivers/hwmon/hp_accel.c
+++ b/drivers/hwmon/hp_accel.c
@@ -34,7 +34,6 @@
34#include <linux/wait.h> 34#include <linux/wait.h>
35#include <linux/poll.h> 35#include <linux/poll.h>
36#include <linux/freezer.h> 36#include <linux/freezer.h>
37#include <linux/version.h>
38#include <linux/uaccess.h> 37#include <linux/uaccess.h>
39#include <linux/leds.h> 38#include <linux/leds.h>
40#include <acpi/acpi_drivers.h> 39#include <acpi/acpi_drivers.h>
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
new file mode 100644
index 000000000000..6cbdc2fea734
--- /dev/null
+++ b/drivers/hwmon/sht15.c
@@ -0,0 +1,692 @@
1/*
2 * sht15.c - support for the SHT15 Temperature and Humidity Sensor
3 *
4 * Copyright (c) 2009 Jonathan Cameron
5 *
6 * Copyright (c) 2007 Wouter Horre
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 *
12 * Currently ignoring checksum on readings.
13 * Default resolution only (14bit temp, 12bit humidity)
14 * Ignoring battery status.
15 * Heater not enabled.
16 * Timings are all conservative.
17 *
18 * Data sheet available (1/2009) at
19 * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
20 *
21 * Regulator supply name = vcc
22 */
23
24#include <linux/interrupt.h>
25#include <linux/irq.h>
26#include <linux/gpio.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/hwmon.h>
30#include <linux/hwmon-sysfs.h>
31#include <linux/mutex.h>
32#include <linux/platform_device.h>
33#include <linux/delay.h>
34#include <linux/jiffies.h>
35#include <linux/err.h>
36#include <linux/sht15.h>
37#include <linux/regulator/consumer.h>
38#include <asm/atomic.h>
39
40#define SHT15_MEASURE_TEMP 3
41#define SHT15_MEASURE_RH 5
42
43#define SHT15_READING_NOTHING 0
44#define SHT15_READING_TEMP 1
45#define SHT15_READING_HUMID 2
46
47/* Min timings in nsecs */
48#define SHT15_TSCKL 100 /* clock low */
49#define SHT15_TSCKH 100 /* clock high */
50#define SHT15_TSU 150 /* data setup time */
51
52/**
53 * struct sht15_temppair - elements of voltage dependant temp calc
54 * @vdd: supply voltage in microvolts
55 * @d1: see data sheet
56 */
57struct sht15_temppair {
58 int vdd; /* microvolts */
59 int d1;
60};
61
62/* Table 9 from data sheet - relates temperature calculation
63 * to supply voltage.
64 */
65static const struct sht15_temppair temppoints[] = {
66 { 2500000, -39400 },
67 { 3000000, -39600 },
68 { 3500000, -39700 },
69 { 4000000, -39800 },
70 { 5000000, -40100 },
71};
72
73/**
74 * struct sht15_data - device instance specific data
75 * @pdata: platform data (gpio's etc)
76 * @read_work: bh of interrupt handler
77 * @wait_queue: wait queue for getting values from device
78 * @val_temp: last temperature value read from device
79 * @val_humid: last humidity value read from device
80 * @flag: status flag used to identify what the last request was
81 * @valid: are the current stored values valid (start condition)
82 * @last_updat: time of last update
83 * @read_lock: mutex to ensure only one read in progress
84 * at a time.
85 * @dev: associate device structure
86 * @hwmon_dev: device associated with hwmon subsystem
87 * @reg: associated regulator (if specified)
88 * @nb: notifier block to handle notifications of voltage changes
89 * @supply_uV: local copy of supply voltage used to allow
90 * use of regulator consumer if available
91 * @supply_uV_valid: indicates that an updated value has not yet
92 * been obtained from the regulator and so any calculations
93 * based upon it will be invalid.
94 * @update_supply_work: work struct that is used to update the supply_uV
95 * @interrupt_handled: flag used to indicate a hander has been scheduled
96 */
97struct sht15_data {
98 struct sht15_platform_data *pdata;
99 struct work_struct read_work;
100 wait_queue_head_t wait_queue;
101 uint16_t val_temp;
102 uint16_t val_humid;
103 u8 flag;
104 u8 valid;
105 unsigned long last_updat;
106 struct mutex read_lock;
107 struct device *dev;
108 struct device *hwmon_dev;
109 struct regulator *reg;
110 struct notifier_block nb;
111 int supply_uV;
112 int supply_uV_valid;
113 struct work_struct update_supply_work;
114 atomic_t interrupt_handled;
115};
116
117/**
118 * sht15_connection_reset() - reset the comms interface
119 * @data: sht15 specific data
120 *
121 * This implements section 3.4 of the data sheet
122 */
123static void sht15_connection_reset(struct sht15_data *data)
124{
125 int i;
126 gpio_direction_output(data->pdata->gpio_data, 1);
127 ndelay(SHT15_TSCKL);
128 gpio_set_value(data->pdata->gpio_sck, 0);
129 ndelay(SHT15_TSCKL);
130 for (i = 0; i < 9; ++i) {
131 gpio_set_value(data->pdata->gpio_sck, 1);
132 ndelay(SHT15_TSCKH);
133 gpio_set_value(data->pdata->gpio_sck, 0);
134 ndelay(SHT15_TSCKL);
135 }
136}
137/**
138 * sht15_send_bit() - send an individual bit to the device
139 * @data: device state data
140 * @val: value of bit to be sent
141 **/
142static inline void sht15_send_bit(struct sht15_data *data, int val)
143{
144
145 gpio_set_value(data->pdata->gpio_data, val);
146 ndelay(SHT15_TSU);
147 gpio_set_value(data->pdata->gpio_sck, 1);
148 ndelay(SHT15_TSCKH);
149 gpio_set_value(data->pdata->gpio_sck, 0);
150 ndelay(SHT15_TSCKL); /* clock low time */
151}
152
153/**
154 * sht15_transmission_start() - specific sequence for new transmission
155 *
156 * @data: device state data
157 * Timings for this are not documented on the data sheet, so very
158 * conservative ones used in implementation. This implements
159 * figure 12 on the data sheet.
160 **/
161static void sht15_transmission_start(struct sht15_data *data)
162{
163 /* ensure data is high and output */
164 gpio_direction_output(data->pdata->gpio_data, 1);
165 ndelay(SHT15_TSU);
166 gpio_set_value(data->pdata->gpio_sck, 0);
167 ndelay(SHT15_TSCKL);
168 gpio_set_value(data->pdata->gpio_sck, 1);
169 ndelay(SHT15_TSCKH);
170 gpio_set_value(data->pdata->gpio_data, 0);
171 ndelay(SHT15_TSU);
172 gpio_set_value(data->pdata->gpio_sck, 0);
173 ndelay(SHT15_TSCKL);
174 gpio_set_value(data->pdata->gpio_sck, 1);
175 ndelay(SHT15_TSCKH);
176 gpio_set_value(data->pdata->gpio_data, 1);
177 ndelay(SHT15_TSU);
178 gpio_set_value(data->pdata->gpio_sck, 0);
179 ndelay(SHT15_TSCKL);
180}
181/**
182 * sht15_send_byte() - send a single byte to the device
183 * @data: device state
184 * @byte: value to be sent
185 **/
186static void sht15_send_byte(struct sht15_data *data, u8 byte)
187{
188 int i;
189 for (i = 0; i < 8; i++) {
190 sht15_send_bit(data, !!(byte & 0x80));
191 byte <<= 1;
192 }
193}
194/**
195 * sht15_wait_for_response() - checks for ack from device
196 * @data: device state
197 **/
198static int sht15_wait_for_response(struct sht15_data *data)
199{
200 gpio_direction_input(data->pdata->gpio_data);
201 gpio_set_value(data->pdata->gpio_sck, 1);
202 ndelay(SHT15_TSCKH);
203 if (gpio_get_value(data->pdata->gpio_data)) {
204 gpio_set_value(data->pdata->gpio_sck, 0);
205 dev_err(data->dev, "Command not acknowledged\n");
206 sht15_connection_reset(data);
207 return -EIO;
208 }
209 gpio_set_value(data->pdata->gpio_sck, 0);
210 ndelay(SHT15_TSCKL);
211 return 0;
212}
213
214/**
215 * sht15_send_cmd() - Sends a command to the device.
216 * @data: device state
217 * @cmd: command byte to be sent
218 *
219 * On entry, sck is output low, data is output pull high
220 * and the interrupt disabled.
221 **/
222static int sht15_send_cmd(struct sht15_data *data, u8 cmd)
223{
224 int ret = 0;
225 sht15_transmission_start(data);
226 sht15_send_byte(data, cmd);
227 ret = sht15_wait_for_response(data);
228 return ret;
229}
230/**
231 * sht15_update_single_val() - get a new value from device
232 * @data: device instance specific data
233 * @command: command sent to request value
234 * @timeout_msecs: timeout after which comms are assumed
235 * to have failed are reset.
236 **/
237static inline int sht15_update_single_val(struct sht15_data *data,
238 int command,
239 int timeout_msecs)
240{
241 int ret;
242 ret = sht15_send_cmd(data, command);
243 if (ret)
244 return ret;
245
246 gpio_direction_input(data->pdata->gpio_data);
247 atomic_set(&data->interrupt_handled, 0);
248
249 enable_irq(gpio_to_irq(data->pdata->gpio_data));
250 if (gpio_get_value(data->pdata->gpio_data) == 0) {
251 disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
252 /* Only relevant if the interrupt hasn't occured. */
253 if (!atomic_read(&data->interrupt_handled))
254 schedule_work(&data->read_work);
255 }
256 ret = wait_event_timeout(data->wait_queue,
257 (data->flag == SHT15_READING_NOTHING),
258 msecs_to_jiffies(timeout_msecs));
259 if (ret == 0) {/* timeout occurred */
260 disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));;
261 sht15_connection_reset(data);
262 return -ETIME;
263 }
264 return 0;
265}
266
267/**
268 * sht15_update_vals() - get updated readings from device if too old
269 * @data: device state
270 **/
271static int sht15_update_vals(struct sht15_data *data)
272{
273 int ret = 0;
274 int timeout = HZ;
275
276 mutex_lock(&data->read_lock);
277 if (time_after(jiffies, data->last_updat + timeout)
278 || !data->valid) {
279 data->flag = SHT15_READING_HUMID;
280 ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160);
281 if (ret)
282 goto error_ret;
283 data->flag = SHT15_READING_TEMP;
284 ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400);
285 if (ret)
286 goto error_ret;
287 data->valid = 1;
288 data->last_updat = jiffies;
289 }
290error_ret:
291 mutex_unlock(&data->read_lock);
292
293 return ret;
294}
295
296/**
297 * sht15_calc_temp() - convert the raw reading to a temperature
298 * @data: device state
299 *
300 * As per section 4.3 of the data sheet.
301 **/
302static inline int sht15_calc_temp(struct sht15_data *data)
303{
304 int d1 = 0;
305 int i;
306
307 for (i = 1; i < ARRAY_SIZE(temppoints) - 1; i++)
308 /* Find pointer to interpolate */
309 if (data->supply_uV > temppoints[i - 1].vdd) {
310 d1 = (data->supply_uV/1000 - temppoints[i - 1].vdd)
311 * (temppoints[i].d1 - temppoints[i - 1].d1)
312 / (temppoints[i].vdd - temppoints[i - 1].vdd)
313 + temppoints[i - 1].d1;
314 break;
315 }
316
317 return data->val_temp*10 + d1;
318}
319
320/**
321 * sht15_calc_humid() - using last temperature convert raw to humid
322 * @data: device state
323 *
324 * This is the temperature compensated version as per section 4.2 of
325 * the data sheet.
326 **/
327static inline int sht15_calc_humid(struct sht15_data *data)
328{
329 int RHlinear; /* milli percent */
330 int temp = sht15_calc_temp(data);
331
332 const int c1 = -4;
333 const int c2 = 40500; /* x 10 ^ -6 */
334 const int c3 = 2800; /* x10 ^ -9 */
335
336 RHlinear = c1*1000
337 + c2 * data->val_humid/1000
338 + (data->val_humid * data->val_humid * c3)/1000000;
339 return (temp - 25000) * (10000 + 800 * data->val_humid)
340 / 1000000 + RHlinear;
341}
342
343static ssize_t sht15_show_temp(struct device *dev,
344 struct device_attribute *attr,
345 char *buf)
346{
347 int ret;
348 struct sht15_data *data = dev_get_drvdata(dev);
349
350 /* Technically no need to read humidity as well */
351 ret = sht15_update_vals(data);
352
353 return ret ? ret : sprintf(buf, "%d\n",
354 sht15_calc_temp(data));
355}
356
357static ssize_t sht15_show_humidity(struct device *dev,
358 struct device_attribute *attr,
359 char *buf)
360{
361 int ret;
362 struct sht15_data *data = dev_get_drvdata(dev);
363
364 ret = sht15_update_vals(data);
365
366 return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
367
368};
369static ssize_t show_name(struct device *dev,
370 struct device_attribute *attr,
371 char *buf)
372{
373 struct platform_device *pdev = to_platform_device(dev);
374 return sprintf(buf, "%s\n", pdev->name);
375}
376
377static SENSOR_DEVICE_ATTR(temp1_input,
378 S_IRUGO, sht15_show_temp,
379 NULL, 0);
380static SENSOR_DEVICE_ATTR(humidity1_input,
381 S_IRUGO, sht15_show_humidity,
382 NULL, 0);
383static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
384static struct attribute *sht15_attrs[] = {
385 &sensor_dev_attr_temp1_input.dev_attr.attr,
386 &sensor_dev_attr_humidity1_input.dev_attr.attr,
387 &dev_attr_name.attr,
388 NULL,
389};
390
391static const struct attribute_group sht15_attr_group = {
392 .attrs = sht15_attrs,
393};
394
395static irqreturn_t sht15_interrupt_fired(int irq, void *d)
396{
397 struct sht15_data *data = d;
398 /* First disable the interrupt */
399 disable_irq_nosync(irq);
400 atomic_inc(&data->interrupt_handled);
401 /* Then schedule a reading work struct */
402 if (data->flag != SHT15_READING_NOTHING)
403 schedule_work(&data->read_work);
404 return IRQ_HANDLED;
405}
406
407/* Each byte of data is acknowledged by pulling the data line
408 * low for one clock pulse.
409 */
410static void sht15_ack(struct sht15_data *data)
411{
412 gpio_direction_output(data->pdata->gpio_data, 0);
413 ndelay(SHT15_TSU);
414 gpio_set_value(data->pdata->gpio_sck, 1);
415 ndelay(SHT15_TSU);
416 gpio_set_value(data->pdata->gpio_sck, 0);
417 ndelay(SHT15_TSU);
418 gpio_set_value(data->pdata->gpio_data, 1);
419
420 gpio_direction_input(data->pdata->gpio_data);
421}
422/**
423 * sht15_end_transmission() - notify device of end of transmission
424 * @data: device state
425 *
426 * This is basically a NAK. (single clock pulse, data high)
427 **/
428static void sht15_end_transmission(struct sht15_data *data)
429{
430 gpio_direction_output(data->pdata->gpio_data, 1);
431 ndelay(SHT15_TSU);
432 gpio_set_value(data->pdata->gpio_sck, 1);
433 ndelay(SHT15_TSCKH);
434 gpio_set_value(data->pdata->gpio_sck, 0);
435 ndelay(SHT15_TSCKL);
436}
437
438static void sht15_bh_read_data(struct work_struct *work_s)
439{
440 int i;
441 uint16_t val = 0;
442 struct sht15_data *data
443 = container_of(work_s, struct sht15_data,
444 read_work);
445 /* Firstly, verify the line is low */
446 if (gpio_get_value(data->pdata->gpio_data)) {
447 /* If not, then start the interrupt again - care
448 here as could have gone low in meantime so verify
449 it hasn't!
450 */
451 atomic_set(&data->interrupt_handled, 0);
452 enable_irq(gpio_to_irq(data->pdata->gpio_data));
453 /* If still not occured or another handler has been scheduled */
454 if (gpio_get_value(data->pdata->gpio_data)
455 || atomic_read(&data->interrupt_handled))
456 return;
457 }
458 /* Read the data back from the device */
459 for (i = 0; i < 16; ++i) {
460 val <<= 1;
461 gpio_set_value(data->pdata->gpio_sck, 1);
462 ndelay(SHT15_TSCKH);
463 val |= !!gpio_get_value(data->pdata->gpio_data);
464 gpio_set_value(data->pdata->gpio_sck, 0);
465 ndelay(SHT15_TSCKL);
466 if (i == 7)
467 sht15_ack(data);
468 }
469 /* Tell the device we are done */
470 sht15_end_transmission(data);
471
472 switch (data->flag) {
473 case SHT15_READING_TEMP:
474 data->val_temp = val;
475 break;
476 case SHT15_READING_HUMID:
477 data->val_humid = val;
478 break;
479 }
480
481 data->flag = SHT15_READING_NOTHING;
482 wake_up(&data->wait_queue);
483}
484
485static void sht15_update_voltage(struct work_struct *work_s)
486{
487 struct sht15_data *data
488 = container_of(work_s, struct sht15_data,
489 update_supply_work);
490 data->supply_uV = regulator_get_voltage(data->reg);
491}
492
493/**
494 * sht15_invalidate_voltage() - mark supply voltage invalid when notified by reg
495 * @nb: associated notification structure
496 * @event: voltage regulator state change event code
497 * @ignored: function parameter - ignored here
498 *
499 * Note that as the notification code holds the regulator lock, we have
500 * to schedule an update of the supply voltage rather than getting it directly.
501 **/
502static int sht15_invalidate_voltage(struct notifier_block *nb,
503 unsigned long event,
504 void *ignored)
505{
506 struct sht15_data *data = container_of(nb, struct sht15_data, nb);
507
508 if (event == REGULATOR_EVENT_VOLTAGE_CHANGE)
509 data->supply_uV_valid = false;
510 schedule_work(&data->update_supply_work);
511
512 return NOTIFY_OK;
513}
514
515static int __devinit sht15_probe(struct platform_device *pdev)
516{
517 int ret = 0;
518 struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
519
520 if (!data) {
521 ret = -ENOMEM;
522 dev_err(&pdev->dev, "kzalloc failed");
523 goto error_ret;
524 }
525
526 INIT_WORK(&data->read_work, sht15_bh_read_data);
527 INIT_WORK(&data->update_supply_work, sht15_update_voltage);
528 platform_set_drvdata(pdev, data);
529 mutex_init(&data->read_lock);
530 data->dev = &pdev->dev;
531 init_waitqueue_head(&data->wait_queue);
532
533 if (pdev->dev.platform_data == NULL) {
534 dev_err(&pdev->dev, "no platform data supplied");
535 goto err_free_data;
536 }
537 data->pdata = pdev->dev.platform_data;
538 data->supply_uV = data->pdata->supply_mv*1000;
539
540/* If a regulator is available, query what the supply voltage actually is!*/
541 data->reg = regulator_get(data->dev, "vcc");
542 if (!IS_ERR(data->reg)) {
543 data->supply_uV = regulator_get_voltage(data->reg);
544 regulator_enable(data->reg);
545 /* setup a notifier block to update this if another device
546 * causes the voltage to change */
547 data->nb.notifier_call = &sht15_invalidate_voltage;
548 ret = regulator_register_notifier(data->reg, &data->nb);
549 }
550/* Try requesting the GPIOs */
551 ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck");
552 if (ret) {
553 dev_err(&pdev->dev, "gpio request failed");
554 goto err_free_data;
555 }
556 gpio_direction_output(data->pdata->gpio_sck, 0);
557 ret = gpio_request(data->pdata->gpio_data, "SHT15 data");
558 if (ret) {
559 dev_err(&pdev->dev, "gpio request failed");
560 goto err_release_gpio_sck;
561 }
562 ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
563 if (ret) {
564 dev_err(&pdev->dev, "sysfs create failed");
565 goto err_free_data;
566 }
567
568 ret = request_irq(gpio_to_irq(data->pdata->gpio_data),
569 sht15_interrupt_fired,
570 IRQF_TRIGGER_FALLING,
571 "sht15 data",
572 data);
573 if (ret) {
574 dev_err(&pdev->dev, "failed to get irq for data line");
575 goto err_release_gpio_data;
576 }
577 disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
578 sht15_connection_reset(data);
579 sht15_send_cmd(data, 0x1E);
580
581 data->hwmon_dev = hwmon_device_register(data->dev);
582 if (IS_ERR(data->hwmon_dev)) {
583 ret = PTR_ERR(data->hwmon_dev);
584 goto err_release_gpio_data;
585 }
586 return 0;
587
588err_release_gpio_data:
589 gpio_free(data->pdata->gpio_data);
590err_release_gpio_sck:
591 gpio_free(data->pdata->gpio_sck);
592err_free_data:
593 kfree(data);
594error_ret:
595
596 return ret;
597}
598
599static int __devexit sht15_remove(struct platform_device *pdev)
600{
601 struct sht15_data *data = platform_get_drvdata(pdev);
602
603 /* Make sure any reads from the device are done and
604 * prevent new ones beginnning */
605 mutex_lock(&data->read_lock);
606 hwmon_device_unregister(data->hwmon_dev);
607 sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
608 if (!IS_ERR(data->reg)) {
609 regulator_unregister_notifier(data->reg, &data->nb);
610 regulator_disable(data->reg);
611 regulator_put(data->reg);
612 }
613
614 free_irq(gpio_to_irq(data->pdata->gpio_data), data);
615 gpio_free(data->pdata->gpio_data);
616 gpio_free(data->pdata->gpio_sck);
617 mutex_unlock(&data->read_lock);
618 kfree(data);
619 return 0;
620}
621
622
623static struct platform_driver sht_drivers[] = {
624 {
625 .driver = {
626 .name = "sht10",
627 .owner = THIS_MODULE,
628 },
629 .probe = sht15_probe,
630 .remove = sht15_remove,
631 }, {
632 .driver = {
633 .name = "sht11",
634 .owner = THIS_MODULE,
635 },
636 .probe = sht15_probe,
637 .remove = sht15_remove,
638 }, {
639 .driver = {
640 .name = "sht15",
641 .owner = THIS_MODULE,
642 },
643 .probe = sht15_probe,
644 .remove = sht15_remove,
645 }, {
646 .driver = {
647 .name = "sht71",
648 .owner = THIS_MODULE,
649 },
650 .probe = sht15_probe,
651 .remove = sht15_remove,
652 }, {
653 .driver = {
654 .name = "sht75",
655 .owner = THIS_MODULE,
656 },
657 .probe = sht15_probe,
658 .remove = sht15_remove,
659 },
660};
661
662
663static int __init sht15_init(void)
664{
665 int ret;
666 int i;
667
668 for (i = 0; i < ARRAY_SIZE(sht_drivers); i++) {
669 ret = platform_driver_register(&sht_drivers[i]);
670 if (ret)
671 goto error_unreg;
672 }
673
674 return 0;
675
676error_unreg:
677 while (--i >= 0)
678 platform_driver_unregister(&sht_drivers[i]);
679
680 return ret;
681}
682module_init(sht15_init);
683
684static void __exit sht15_exit(void)
685{
686 int i;
687 for (i = ARRAY_SIZE(sht_drivers) - 1; i >= 0; i--)
688 platform_driver_unregister(&sht_drivers[i]);
689}
690module_exit(sht15_exit);
691
692MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index d184dfab9631..db39f4a52f53 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -278,7 +278,7 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
278 * We only use page mode writes; the alternative is sloooow. This routine 278 * We only use page mode writes; the alternative is sloooow. This routine
279 * writes at most one page. 279 * writes at most one page.
280 */ 280 */
281static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf, 281static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
282 unsigned offset, size_t count) 282 unsigned offset, size_t count)
283{ 283{
284 struct i2c_client *client; 284 struct i2c_client *client;
@@ -347,8 +347,8 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
347 return -ETIMEDOUT; 347 return -ETIMEDOUT;
348} 348}
349 349
350static ssize_t at24_write(struct at24_data *at24, 350static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
351 char *buf, loff_t off, size_t count) 351 size_t count)
352{ 352{
353 ssize_t retval = 0; 353 ssize_t retval = 0;
354 354
@@ -406,7 +406,7 @@ static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
406 return at24_read(at24, buf, offset, count); 406 return at24_read(at24, buf, offset, count);
407} 407}
408 408
409static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf, 409static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
410 off_t offset, size_t count) 410 off_t offset, size_t count)
411{ 411{
412 struct at24_data *at24 = container_of(macc, struct at24_data, macc); 412 struct at24_data *at24 = container_of(macc, struct at24_data, macc);
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 6bc0dac5c1e8..b34cb5f79eea 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -140,7 +140,8 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
140 140
141 141
142static ssize_t 142static ssize_t
143at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count) 143at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
144 size_t count)
144{ 145{
145 ssize_t status = 0; 146 ssize_t status = 0;
146 unsigned written = 0; 147 unsigned written = 0;
@@ -276,7 +277,7 @@ static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
276 return at25_ee_read(at25, buf, offset, count); 277 return at25_ee_read(at25, buf, offset, count);
277} 278}
278 279
279static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf, 280static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
280 off_t offset, size_t count) 281 off_t offset, size_t count)
281{ 282{
282 struct at25_data *at25 = container_of(mem, struct at25_data, mem); 283 struct at25_data *at25 = container_of(mem, struct at25_data, mem);
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 114444cfd496..b94d5f767703 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -90,18 +90,21 @@ struct xpc_rsvd_page {
90 short max_npartitions; /* value of XPC_MAX_PARTITIONS */ 90 short max_npartitions; /* value of XPC_MAX_PARTITIONS */
91 u8 version; 91 u8 version;
92 u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ 92 u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
93 unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
93 union { 94 union {
94 unsigned long vars_pa; /* phys address of struct xpc_vars */ 95 struct {
95 unsigned long activate_gru_mq_desc_gpa; /* phys addr of */ 96 unsigned long vars_pa; /* phys addr */
96 /* activate mq's */ 97 } sn2;
97 /* gru mq descriptor */ 98 struct {
99 unsigned long heartbeat_gpa; /* phys addr */
100 unsigned long activate_gru_mq_desc_gpa; /* phys addr */
101 } uv;
98 } sn; 102 } sn;
99 unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ 103 u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */
100 u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */
101 u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ 104 u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
102}; 105};
103 106
104#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */ 107#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
105 108
106/* 109/*
107 * Define the structures by which XPC variables can be exported to other 110 * Define the structures by which XPC variables can be exported to other
@@ -182,6 +185,17 @@ struct xpc_vars_part_sn2 {
182 (XPC_RP_MACH_NASIDS(_rp) + \ 185 (XPC_RP_MACH_NASIDS(_rp) + \
183 xpc_nasid_mask_nlongs)) 186 xpc_nasid_mask_nlongs))
184 187
188
189/*
190 * The following structure describes the partition's heartbeat info which
191 * will be periodically read by other partitions to determine whether this
192 * XPC is still 'alive'.
193 */
194struct xpc_heartbeat_uv {
195 unsigned long value;
196 unsigned long offline; /* if 0, heartbeat should be changing */
197};
198
185/* 199/*
186 * Info pertinent to a GRU message queue using a watch list for irq generation. 200 * Info pertinent to a GRU message queue using a watch list for irq generation.
187 */ 201 */
@@ -198,7 +212,7 @@ struct xpc_gru_mq_uv {
198 212
199/* 213/*
200 * The activate_mq is used to send/receive GRU messages that affect XPC's 214 * The activate_mq is used to send/receive GRU messages that affect XPC's
201 * heartbeat, partition active state, and channel state. This is UV only. 215 * partition active state and channel state. This is uv only.
202 */ 216 */
203struct xpc_activate_mq_msghdr_uv { 217struct xpc_activate_mq_msghdr_uv {
204 unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ 218 unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
@@ -210,33 +224,27 @@ struct xpc_activate_mq_msghdr_uv {
210 224
211/* activate_mq defined message types */ 225/* activate_mq defined message types */
212#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 226#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
213#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1
214#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2
215#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3
216 227
217#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4 228#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1
218#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5 229#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2
219 230
220#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6 231#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3
221#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7 232#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4
222#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8 233#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5
223#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9 234#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6
235#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7
224 236
225#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10 237#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8
226#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11 238#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9
227 239
228struct xpc_activate_mq_msg_uv { 240struct xpc_activate_mq_msg_uv {
229 struct xpc_activate_mq_msghdr_uv hdr; 241 struct xpc_activate_mq_msghdr_uv hdr;
230}; 242};
231 243
232struct xpc_activate_mq_msg_heartbeat_req_uv {
233 struct xpc_activate_mq_msghdr_uv hdr;
234 u64 heartbeat;
235};
236
237struct xpc_activate_mq_msg_activate_req_uv { 244struct xpc_activate_mq_msg_activate_req_uv {
238 struct xpc_activate_mq_msghdr_uv hdr; 245 struct xpc_activate_mq_msghdr_uv hdr;
239 unsigned long rp_gpa; 246 unsigned long rp_gpa;
247 unsigned long heartbeat_gpa;
240 unsigned long activate_gru_mq_desc_gpa; 248 unsigned long activate_gru_mq_desc_gpa;
241}; 249};
242 250
@@ -271,6 +279,11 @@ struct xpc_activate_mq_msg_chctl_openreply_uv {
271 unsigned long notify_gru_mq_desc_gpa; 279 unsigned long notify_gru_mq_desc_gpa;
272}; 280};
273 281
282struct xpc_activate_mq_msg_chctl_opencomplete_uv {
283 struct xpc_activate_mq_msghdr_uv hdr;
284 short ch_number;
285};
286
274/* 287/*
275 * Functions registered by add_timer() or called by kernel_thread() only 288 * Functions registered by add_timer() or called by kernel_thread() only
276 * allow for a single 64-bit argument. The following macros can be used to 289 * allow for a single 64-bit argument. The following macros can be used to
@@ -576,30 +589,32 @@ struct xpc_channel {
576 589
577#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ 590#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
578 591
579#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */ 592#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */
580#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */ 593#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */
581#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */ 594#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */
582#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ 595#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */
596#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */
597#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */
583 598
584#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ 599#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */
585#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */ 600#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */
586#define XPC_C_CONNECTEDCALLOUT_MADE \ 601#define XPC_C_CONNECTEDCALLOUT_MADE \
587 0x00000080 /* connected callout completed */ 602 0x00000200 /* connected callout completed */
588#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */ 603#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */
589#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */ 604#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */
590 605
591#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */ 606#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */
592#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */ 607#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */
593#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */ 608#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */
594#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */ 609#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */
595 610
596#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */ 611#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */
597#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */ 612#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */
598#define XPC_C_DISCONNECTINGCALLOUT \ 613#define XPC_C_DISCONNECTINGCALLOUT \
599 0x00010000 /* disconnecting callout initiated */ 614 0x00040000 /* disconnecting callout initiated */
600#define XPC_C_DISCONNECTINGCALLOUT_MADE \ 615#define XPC_C_DISCONNECTINGCALLOUT_MADE \
601 0x00020000 /* disconnecting callout completed */ 616 0x00080000 /* disconnecting callout completed */
602#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ 617#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */
603 618
604/* 619/*
605 * The channel control flags (chctl) union consists of a 64-bit variable which 620 * The channel control flags (chctl) union consists of a 64-bit variable which
@@ -618,11 +633,13 @@ union xpc_channel_ctl_flags {
618#define XPC_CHCTL_CLOSEREPLY 0x02 633#define XPC_CHCTL_CLOSEREPLY 0x02
619#define XPC_CHCTL_OPENREQUEST 0x04 634#define XPC_CHCTL_OPENREQUEST 0x04
620#define XPC_CHCTL_OPENREPLY 0x08 635#define XPC_CHCTL_OPENREPLY 0x08
621#define XPC_CHCTL_MSGREQUEST 0x10 636#define XPC_CHCTL_OPENCOMPLETE 0x10
637#define XPC_CHCTL_MSGREQUEST 0x20
622 638
623#define XPC_OPENCLOSE_CHCTL_FLAGS \ 639#define XPC_OPENCLOSE_CHCTL_FLAGS \
624 (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ 640 (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
625 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY) 641 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
642 XPC_CHCTL_OPENCOMPLETE)
626#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST 643#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST
627 644
628static inline int 645static inline int
@@ -687,6 +704,9 @@ struct xpc_partition_sn2 {
687}; 704};
688 705
689struct xpc_partition_uv { 706struct xpc_partition_uv {
707 unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
708 struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
709 /* partition's heartbeat */
690 unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ 710 unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */
691 /* activate mq's gru mq */ 711 /* activate mq's gru mq */
692 /* descriptor */ 712 /* descriptor */
@@ -698,14 +718,12 @@ struct xpc_partition_uv {
698 u8 remote_act_state; /* remote partition's act_state */ 718 u8 remote_act_state; /* remote partition's act_state */
699 u8 act_state_req; /* act_state request from remote partition */ 719 u8 act_state_req; /* act_state request from remote partition */
700 enum xp_retval reason; /* reason for deactivate act_state request */ 720 enum xp_retval reason; /* reason for deactivate act_state request */
701 u64 heartbeat; /* incremented by remote partition */
702}; 721};
703 722
704/* struct xpc_partition_uv flags */ 723/* struct xpc_partition_uv flags */
705 724
706#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 725#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001
707#define XPC_P_ENGAGED_UV 0x00000002 726#define XPC_P_ENGAGED_UV 0x00000002
708#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004
709 727
710/* struct xpc_partition_uv act_state change requests */ 728/* struct xpc_partition_uv act_state change requests */
711 729
@@ -762,6 +780,62 @@ struct xpc_partition {
762 780
763} ____cacheline_aligned; 781} ____cacheline_aligned;
764 782
783struct xpc_arch_operations {
784 int (*setup_partitions) (void);
785 void (*teardown_partitions) (void);
786 void (*process_activate_IRQ_rcvd) (void);
787 enum xp_retval (*get_partition_rsvd_page_pa)
788 (void *, u64 *, unsigned long *, size_t *);
789 int (*setup_rsvd_page) (struct xpc_rsvd_page *);
790
791 void (*allow_hb) (short);
792 void (*disallow_hb) (short);
793 void (*disallow_all_hbs) (void);
794 void (*increment_heartbeat) (void);
795 void (*offline_heartbeat) (void);
796 void (*online_heartbeat) (void);
797 void (*heartbeat_init) (void);
798 void (*heartbeat_exit) (void);
799 enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
800
801 void (*request_partition_activation) (struct xpc_rsvd_page *,
802 unsigned long, int);
803 void (*request_partition_reactivation) (struct xpc_partition *);
804 void (*request_partition_deactivation) (struct xpc_partition *);
805 void (*cancel_partition_deactivation_request) (struct xpc_partition *);
806 enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
807 void (*teardown_ch_structures) (struct xpc_partition *);
808
809 enum xp_retval (*make_first_contact) (struct xpc_partition *);
810
811 u64 (*get_chctl_all_flags) (struct xpc_partition *);
812 void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
813 void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
814 void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
815 void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
816 void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
817 void (*process_msg_chctl_flags) (struct xpc_partition *, int);
818
819 enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
820 unsigned long);
821
822 enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
823 void (*teardown_msg_structures) (struct xpc_channel *);
824
825 void (*indicate_partition_engaged) (struct xpc_partition *);
826 void (*indicate_partition_disengaged) (struct xpc_partition *);
827 void (*assume_partition_disengaged) (short);
828 int (*partition_engaged) (short);
829 int (*any_partition_engaged) (void);
830
831 int (*n_of_deliverable_payloads) (struct xpc_channel *);
832 enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
833 u16, u8, xpc_notify_func, void *);
834 void *(*get_deliverable_payload) (struct xpc_channel *);
835 void (*received_payload) (struct xpc_channel *, void *);
836 void (*notify_senders_of_disconnect) (struct xpc_channel *);
837};
838
765/* struct xpc_partition act_state values (for XPC HB) */ 839/* struct xpc_partition act_state values (for XPC HB) */
766 840
767#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ 841#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */
@@ -802,67 +876,17 @@ extern struct xpc_registration xpc_registrations[];
802/* found in xpc_main.c */ 876/* found in xpc_main.c */
803extern struct device *xpc_part; 877extern struct device *xpc_part;
804extern struct device *xpc_chan; 878extern struct device *xpc_chan;
879extern struct xpc_arch_operations xpc_arch_ops;
805extern int xpc_disengage_timelimit; 880extern int xpc_disengage_timelimit;
806extern int xpc_disengage_timedout; 881extern int xpc_disengage_timedout;
807extern int xpc_activate_IRQ_rcvd; 882extern int xpc_activate_IRQ_rcvd;
808extern spinlock_t xpc_activate_IRQ_rcvd_lock; 883extern spinlock_t xpc_activate_IRQ_rcvd_lock;
809extern wait_queue_head_t xpc_activate_IRQ_wq; 884extern wait_queue_head_t xpc_activate_IRQ_wq;
810extern void *xpc_heartbeating_to_mask;
811extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); 885extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
812extern void xpc_activate_partition(struct xpc_partition *); 886extern void xpc_activate_partition(struct xpc_partition *);
813extern void xpc_activate_kthreads(struct xpc_channel *, int); 887extern void xpc_activate_kthreads(struct xpc_channel *, int);
814extern void xpc_create_kthreads(struct xpc_channel *, int, int); 888extern void xpc_create_kthreads(struct xpc_channel *, int, int);
815extern void xpc_disconnect_wait(int); 889extern void xpc_disconnect_wait(int);
816extern int (*xpc_setup_partitions_sn) (void);
817extern void (*xpc_teardown_partitions_sn) (void);
818extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
819 unsigned long *,
820 size_t *);
821extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
822extern void (*xpc_heartbeat_init) (void);
823extern void (*xpc_heartbeat_exit) (void);
824extern void (*xpc_increment_heartbeat) (void);
825extern void (*xpc_offline_heartbeat) (void);
826extern void (*xpc_online_heartbeat) (void);
827extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
828extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
829extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
830extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
831extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
832extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
833extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
834extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
835extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
836extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
837 unsigned long, int);
838extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
839extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
840extern void (*xpc_cancel_partition_deactivation_request) (
841 struct xpc_partition *);
842extern void (*xpc_process_activate_IRQ_rcvd) (void);
843extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
844extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
845
846extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
847extern int (*xpc_partition_engaged) (short);
848extern int (*xpc_any_partition_engaged) (void);
849extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
850extern void (*xpc_assume_partition_disengaged) (short);
851
852extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
853 unsigned long *);
854extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
855 unsigned long *);
856extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
857 unsigned long *);
858extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
859
860extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
861 unsigned long);
862
863extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
864 u16, u8, xpc_notify_func, void *);
865extern void (*xpc_received_payload) (struct xpc_channel *, void *);
866 890
867/* found in xpc_sn2.c */ 891/* found in xpc_sn2.c */
868extern int xpc_init_sn2(void); 892extern int xpc_init_sn2(void);
@@ -909,40 +933,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
909extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); 933extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
910extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); 934extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
911 935
912static inline int
913xpc_hb_allowed(short partid, void *heartbeating_to_mask)
914{
915 return test_bit(partid, heartbeating_to_mask);
916}
917
918static inline int
919xpc_any_hbs_allowed(void)
920{
921 DBUG_ON(xpc_heartbeating_to_mask == NULL);
922 return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
923}
924
925static inline void
926xpc_allow_hb(short partid)
927{
928 DBUG_ON(xpc_heartbeating_to_mask == NULL);
929 set_bit(partid, xpc_heartbeating_to_mask);
930}
931
932static inline void
933xpc_disallow_hb(short partid)
934{
935 DBUG_ON(xpc_heartbeating_to_mask == NULL);
936 clear_bit(partid, xpc_heartbeating_to_mask);
937}
938
939static inline void
940xpc_disallow_all_hbs(void)
941{
942 DBUG_ON(xpc_heartbeating_to_mask == NULL);
943 bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
944}
945
946static inline void 936static inline void
947xpc_wakeup_channel_mgr(struct xpc_partition *part) 937xpc_wakeup_channel_mgr(struct xpc_partition *part)
948{ 938{
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 99a2534c38a1..652593fc486d 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9/* 9/*
@@ -39,34 +39,38 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
39 39
40 if (!(ch->flags & XPC_C_SETUP)) { 40 if (!(ch->flags & XPC_C_SETUP)) {
41 spin_unlock_irqrestore(&ch->lock, *irq_flags); 41 spin_unlock_irqrestore(&ch->lock, *irq_flags);
42 ret = xpc_setup_msg_structures(ch); 42 ret = xpc_arch_ops.setup_msg_structures(ch);
43 spin_lock_irqsave(&ch->lock, *irq_flags); 43 spin_lock_irqsave(&ch->lock, *irq_flags);
44 44
45 if (ret != xpSuccess) 45 if (ret != xpSuccess)
46 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); 46 XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
47 else
48 ch->flags |= XPC_C_SETUP;
47 49
48 ch->flags |= XPC_C_SETUP; 50 if (ch->flags & XPC_C_DISCONNECTING)
49
50 if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
51 return; 51 return;
52 } 52 }
53 53
54 if (!(ch->flags & XPC_C_OPENREPLY)) { 54 if (!(ch->flags & XPC_C_OPENREPLY)) {
55 ch->flags |= XPC_C_OPENREPLY; 55 ch->flags |= XPC_C_OPENREPLY;
56 xpc_send_chctl_openreply(ch, irq_flags); 56 xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
57 } 57 }
58 58
59 if (!(ch->flags & XPC_C_ROPENREPLY)) 59 if (!(ch->flags & XPC_C_ROPENREPLY))
60 return; 60 return;
61 61
62 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ 62 if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
63 ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
64 xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
65 }
66
67 if (!(ch->flags & XPC_C_ROPENCOMPLETE))
68 return;
63 69
64 dev_info(xpc_chan, "channel %d to partition %d connected\n", 70 dev_info(xpc_chan, "channel %d to partition %d connected\n",
65 ch->number, ch->partid); 71 ch->number, ch->partid);
66 72
67 spin_unlock_irqrestore(&ch->lock, *irq_flags); 73 ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */
68 xpc_create_kthreads(ch, 1, 0);
69 spin_lock_irqsave(&ch->lock, *irq_flags);
70} 74}
71 75
72/* 76/*
@@ -96,7 +100,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
96 100
97 if (part->act_state == XPC_P_AS_DEACTIVATING) { 101 if (part->act_state == XPC_P_AS_DEACTIVATING) {
98 /* can't proceed until the other side disengages from us */ 102 /* can't proceed until the other side disengages from us */
99 if (xpc_partition_engaged(ch->partid)) 103 if (xpc_arch_ops.partition_engaged(ch->partid))
100 return; 104 return;
101 105
102 } else { 106 } else {
@@ -108,7 +112,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
108 112
109 if (!(ch->flags & XPC_C_CLOSEREPLY)) { 113 if (!(ch->flags & XPC_C_CLOSEREPLY)) {
110 ch->flags |= XPC_C_CLOSEREPLY; 114 ch->flags |= XPC_C_CLOSEREPLY;
111 xpc_send_chctl_closereply(ch, irq_flags); 115 xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
112 } 116 }
113 117
114 if (!(ch->flags & XPC_C_RCLOSEREPLY)) 118 if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -118,7 +122,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
118 /* wake those waiting for notify completion */ 122 /* wake those waiting for notify completion */
119 if (atomic_read(&ch->n_to_notify) > 0) { 123 if (atomic_read(&ch->n_to_notify) > 0) {
120 /* we do callout while holding ch->lock, callout can't block */ 124 /* we do callout while holding ch->lock, callout can't block */
121 xpc_notify_senders_of_disconnect(ch); 125 xpc_arch_ops.notify_senders_of_disconnect(ch);
122 } 126 }
123 127
124 /* both sides are disconnected now */ 128 /* both sides are disconnected now */
@@ -132,7 +136,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
132 DBUG_ON(atomic_read(&ch->n_to_notify) != 0); 136 DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
133 137
134 /* it's now safe to free the channel's message queues */ 138 /* it's now safe to free the channel's message queues */
135 xpc_teardown_msg_structures(ch); 139 xpc_arch_ops.teardown_msg_structures(ch);
136 140
137 ch->func = NULL; 141 ch->func = NULL;
138 ch->key = NULL; 142 ch->key = NULL;
@@ -144,8 +148,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
144 148
145 /* 149 /*
146 * Mark the channel disconnected and clear all other flags, including 150 * Mark the channel disconnected and clear all other flags, including
147 * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but 151 * XPC_C_SETUP (because of call to
148 * not including XPC_C_WDISCONNECT (if it was set). 152 * xpc_arch_ops.teardown_msg_structures()) but not including
153 * XPC_C_WDISCONNECT (if it was set).
149 */ 154 */
150 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); 155 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
151 156
@@ -184,6 +189,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
184 struct xpc_channel *ch = &part->channels[ch_number]; 189 struct xpc_channel *ch = &part->channels[ch_number];
185 enum xp_retval reason; 190 enum xp_retval reason;
186 enum xp_retval ret; 191 enum xp_retval ret;
192 int create_kthread = 0;
187 193
188 spin_lock_irqsave(&ch->lock, irq_flags); 194 spin_lock_irqsave(&ch->lock, irq_flags);
189 195
@@ -196,8 +202,7 @@ again:
196 * has had a chance to see that the channel is disconnected. 202 * has had a chance to see that the channel is disconnected.
197 */ 203 */
198 ch->delayed_chctl_flags |= chctl_flags; 204 ch->delayed_chctl_flags |= chctl_flags;
199 spin_unlock_irqrestore(&ch->lock, irq_flags); 205 goto out;
200 return;
201 } 206 }
202 207
203 if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { 208 if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
@@ -239,8 +244,7 @@ again:
239 XPC_CHCTL_CLOSEREQUEST; 244 XPC_CHCTL_CLOSEREQUEST;
240 spin_unlock(&part->chctl_lock); 245 spin_unlock(&part->chctl_lock);
241 } 246 }
242 spin_unlock_irqrestore(&ch->lock, irq_flags); 247 goto out;
243 return;
244 } 248 }
245 249
246 XPC_SET_REASON(ch, 0, 0); 250 XPC_SET_REASON(ch, 0, 0);
@@ -250,7 +254,8 @@ again:
250 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); 254 ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
251 } 255 }
252 256
253 chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY); 257 chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
258 XPC_CHCTL_OPENCOMPLETE);
254 259
255 /* 260 /*
256 * The meaningful CLOSEREQUEST connection state fields are: 261 * The meaningful CLOSEREQUEST connection state fields are:
@@ -269,8 +274,7 @@ again:
269 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); 274 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
270 275
271 DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); 276 DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
272 spin_unlock_irqrestore(&ch->lock, irq_flags); 277 goto out;
273 return;
274 } 278 }
275 279
276 xpc_process_disconnect(ch, &irq_flags); 280 xpc_process_disconnect(ch, &irq_flags);
@@ -283,8 +287,7 @@ again:
283 287
284 if (ch->flags & XPC_C_DISCONNECTED) { 288 if (ch->flags & XPC_C_DISCONNECTED) {
285 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); 289 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
286 spin_unlock_irqrestore(&ch->lock, irq_flags); 290 goto out;
287 return;
288 } 291 }
289 292
290 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); 293 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
@@ -299,8 +302,7 @@ again:
299 XPC_CHCTL_CLOSEREPLY; 302 XPC_CHCTL_CLOSEREPLY;
300 spin_unlock(&part->chctl_lock); 303 spin_unlock(&part->chctl_lock);
301 } 304 }
302 spin_unlock_irqrestore(&ch->lock, irq_flags); 305 goto out;
303 return;
304 } 306 }
305 307
306 ch->flags |= XPC_C_RCLOSEREPLY; 308 ch->flags |= XPC_C_RCLOSEREPLY;
@@ -320,14 +322,12 @@ again:
320 322
321 if (part->act_state == XPC_P_AS_DEACTIVATING || 323 if (part->act_state == XPC_P_AS_DEACTIVATING ||
322 (ch->flags & XPC_C_ROPENREQUEST)) { 324 (ch->flags & XPC_C_ROPENREQUEST)) {
323 spin_unlock_irqrestore(&ch->lock, irq_flags); 325 goto out;
324 return;
325 } 326 }
326 327
327 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { 328 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
328 ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; 329 ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
329 spin_unlock_irqrestore(&ch->lock, irq_flags); 330 goto out;
330 return;
331 } 331 }
332 DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | 332 DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
333 XPC_C_OPENREQUEST))); 333 XPC_C_OPENREQUEST)));
@@ -341,8 +341,7 @@ again:
341 */ 341 */
342 if (args->entry_size == 0 || args->local_nentries == 0) { 342 if (args->entry_size == 0 || args->local_nentries == 0) {
343 /* assume OPENREQUEST was delayed by mistake */ 343 /* assume OPENREQUEST was delayed by mistake */
344 spin_unlock_irqrestore(&ch->lock, irq_flags); 344 goto out;
345 return;
346 } 345 }
347 346
348 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); 347 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
@@ -352,8 +351,7 @@ again:
352 if (args->entry_size != ch->entry_size) { 351 if (args->entry_size != ch->entry_size) {
353 XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, 352 XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
354 &irq_flags); 353 &irq_flags);
355 spin_unlock_irqrestore(&ch->lock, irq_flags); 354 goto out;
356 return;
357 } 355 }
358 } else { 356 } else {
359 ch->entry_size = args->entry_size; 357 ch->entry_size = args->entry_size;
@@ -375,15 +373,13 @@ again:
375 args->local_msgqueue_pa, args->local_nentries, 373 args->local_msgqueue_pa, args->local_nentries,
376 args->remote_nentries, ch->partid, ch->number); 374 args->remote_nentries, ch->partid, ch->number);
377 375
378 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) { 376 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
379 spin_unlock_irqrestore(&ch->lock, irq_flags); 377 goto out;
380 return; 378
381 }
382 if (!(ch->flags & XPC_C_OPENREQUEST)) { 379 if (!(ch->flags & XPC_C_OPENREQUEST)) {
383 XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, 380 XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
384 &irq_flags); 381 &irq_flags);
385 spin_unlock_irqrestore(&ch->lock, irq_flags); 382 goto out;
386 return;
387 } 383 }
388 384
389 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); 385 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
@@ -400,11 +396,11 @@ again:
400 DBUG_ON(args->local_nentries == 0); 396 DBUG_ON(args->local_nentries == 0);
401 DBUG_ON(args->remote_nentries == 0); 397 DBUG_ON(args->remote_nentries == 0);
402 398
403 ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa); 399 ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
400 args->local_msgqueue_pa);
404 if (ret != xpSuccess) { 401 if (ret != xpSuccess) {
405 XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); 402 XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
406 spin_unlock_irqrestore(&ch->lock, irq_flags); 403 goto out;
407 return;
408 } 404 }
409 ch->flags |= XPC_C_ROPENREPLY; 405 ch->flags |= XPC_C_ROPENREPLY;
410 406
@@ -430,7 +426,36 @@ again:
430 xpc_process_connect(ch, &irq_flags); 426 xpc_process_connect(ch, &irq_flags);
431 } 427 }
432 428
429 if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
430
431 dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
432 "partid=%d, channel=%d\n", ch->partid, ch->number);
433
434 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
435 goto out;
436
437 if (!(ch->flags & XPC_C_OPENREQUEST) ||
438 !(ch->flags & XPC_C_OPENREPLY)) {
439 XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
440 &irq_flags);
441 goto out;
442 }
443
444 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
445 DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
446 DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
447
448 ch->flags |= XPC_C_ROPENCOMPLETE;
449
450 xpc_process_connect(ch, &irq_flags);
451 create_kthread = 1;
452 }
453
454out:
433 spin_unlock_irqrestore(&ch->lock, irq_flags); 455 spin_unlock_irqrestore(&ch->lock, irq_flags);
456
457 if (create_kthread)
458 xpc_create_kthreads(ch, 1, 0);
434} 459}
435 460
436/* 461/*
@@ -508,7 +533,7 @@ xpc_connect_channel(struct xpc_channel *ch)
508 /* initiate the connection */ 533 /* initiate the connection */
509 534
510 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); 535 ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
511 xpc_send_chctl_openrequest(ch, &irq_flags); 536 xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
512 537
513 xpc_process_connect(ch, &irq_flags); 538 xpc_process_connect(ch, &irq_flags);
514 539
@@ -526,7 +551,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
526 int ch_number; 551 int ch_number;
527 u32 ch_flags; 552 u32 ch_flags;
528 553
529 chctl.all_flags = xpc_get_chctl_all_flags(part); 554 chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
530 555
531 /* 556 /*
532 * Initiate channel connections for registered channels. 557 * Initiate channel connections for registered channels.
@@ -564,10 +589,6 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
564 if (!(ch_flags & XPC_C_OPENREQUEST)) { 589 if (!(ch_flags & XPC_C_OPENREQUEST)) {
565 DBUG_ON(ch_flags & XPC_C_SETUP); 590 DBUG_ON(ch_flags & XPC_C_SETUP);
566 (void)xpc_connect_channel(ch); 591 (void)xpc_connect_channel(ch);
567 } else {
568 spin_lock_irqsave(&ch->lock, irq_flags);
569 xpc_process_connect(ch, &irq_flags);
570 spin_unlock_irqrestore(&ch->lock, irq_flags);
571 } 592 }
572 continue; 593 continue;
573 } 594 }
@@ -579,7 +600,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
579 */ 600 */
580 601
581 if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) 602 if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
582 xpc_process_msg_chctl_flags(part, ch_number); 603 xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
583 } 604 }
584} 605}
585 606
@@ -755,7 +776,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
755 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | 776 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
756 XPC_C_CONNECTING | XPC_C_CONNECTED); 777 XPC_C_CONNECTING | XPC_C_CONNECTED);
757 778
758 xpc_send_chctl_closerequest(ch, irq_flags); 779 xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
759 780
760 if (channel_was_connected) 781 if (channel_was_connected)
761 ch->flags |= XPC_C_WASCONNECTED; 782 ch->flags |= XPC_C_WASCONNECTED;
@@ -862,8 +883,8 @@ xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
862 DBUG_ON(payload == NULL); 883 DBUG_ON(payload == NULL);
863 884
864 if (xpc_part_ref(part)) { 885 if (xpc_part_ref(part)) {
865 ret = xpc_send_payload(&part->channels[ch_number], flags, 886 ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
866 payload, payload_size, 0, NULL, NULL); 887 flags, payload, payload_size, 0, NULL, NULL);
867 xpc_part_deref(part); 888 xpc_part_deref(part);
868 } 889 }
869 890
@@ -914,9 +935,8 @@ xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
914 DBUG_ON(func == NULL); 935 DBUG_ON(func == NULL);
915 936
916 if (xpc_part_ref(part)) { 937 if (xpc_part_ref(part)) {
917 ret = xpc_send_payload(&part->channels[ch_number], flags, 938 ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
918 payload, payload_size, XPC_N_CALL, func, 939 flags, payload, payload_size, XPC_N_CALL, func, key);
919 key);
920 xpc_part_deref(part); 940 xpc_part_deref(part);
921 } 941 }
922 return ret; 942 return ret;
@@ -930,7 +950,7 @@ xpc_deliver_payload(struct xpc_channel *ch)
930{ 950{
931 void *payload; 951 void *payload;
932 952
933 payload = xpc_get_deliverable_payload(ch); 953 payload = xpc_arch_ops.get_deliverable_payload(ch);
934 if (payload != NULL) { 954 if (payload != NULL) {
935 955
936 /* 956 /*
@@ -984,7 +1004,7 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
984 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); 1004 DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
985 1005
986 ch = &part->channels[ch_number]; 1006 ch = &part->channels[ch_number];
987 xpc_received_payload(ch, payload); 1007 xpc_arch_ops.received_payload(ch, payload);
988 1008
989 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ 1009 /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */
990 xpc_msgqueue_deref(ch); 1010 xpc_msgqueue_deref(ch);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 1ab9fda87fab..fd3688a3e23f 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9/* 9/*
@@ -150,7 +150,6 @@ DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
150 150
151static unsigned long xpc_hb_check_timeout; 151static unsigned long xpc_hb_check_timeout;
152static struct timer_list xpc_hb_timer; 152static struct timer_list xpc_hb_timer;
153void *xpc_heartbeating_to_mask;
154 153
155/* notification that the xpc_hb_checker thread has exited */ 154/* notification that the xpc_hb_checker thread has exited */
156static DECLARE_COMPLETION(xpc_hb_checker_exited); 155static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -170,62 +169,7 @@ static struct notifier_block xpc_die_notifier = {
170 .notifier_call = xpc_system_die, 169 .notifier_call = xpc_system_die,
171}; 170};
172 171
173int (*xpc_setup_partitions_sn) (void); 172struct xpc_arch_operations xpc_arch_ops;
174void (*xpc_teardown_partitions_sn) (void);
175enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
176 unsigned long *rp_pa,
177 size_t *len);
178int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
179void (*xpc_heartbeat_init) (void);
180void (*xpc_heartbeat_exit) (void);
181void (*xpc_increment_heartbeat) (void);
182void (*xpc_offline_heartbeat) (void);
183void (*xpc_online_heartbeat) (void);
184enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
185
186enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
187void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
188u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
189enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
190void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
191void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
192int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
193void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
194
195void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
196 unsigned long remote_rp_pa,
197 int nasid);
198void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
199void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
200void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
201
202void (*xpc_process_activate_IRQ_rcvd) (void);
203enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
204void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
205
206void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
207int (*xpc_partition_engaged) (short partid);
208int (*xpc_any_partition_engaged) (void);
209void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
210void (*xpc_assume_partition_disengaged) (short partid);
211
212void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
213 unsigned long *irq_flags);
214void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
215 unsigned long *irq_flags);
216void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
217 unsigned long *irq_flags);
218void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
219 unsigned long *irq_flags);
220
221enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
222 unsigned long msgqueue_pa);
223
224enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
225 void *payload, u16 payload_size,
226 u8 notify_type, xpc_notify_func func,
227 void *key);
228void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
229 173
230/* 174/*
231 * Timer function to enforce the timelimit on the partition disengage. 175 * Timer function to enforce the timelimit on the partition disengage.
@@ -240,7 +184,7 @@ xpc_timeout_partition_disengage(unsigned long data)
240 (void)xpc_partition_disengaged(part); 184 (void)xpc_partition_disengaged(part);
241 185
242 DBUG_ON(part->disengage_timeout != 0); 186 DBUG_ON(part->disengage_timeout != 0);
243 DBUG_ON(xpc_partition_engaged(XPC_PARTID(part))); 187 DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
244} 188}
245 189
246/* 190/*
@@ -251,7 +195,7 @@ xpc_timeout_partition_disengage(unsigned long data)
251static void 195static void
252xpc_hb_beater(unsigned long dummy) 196xpc_hb_beater(unsigned long dummy)
253{ 197{
254 xpc_increment_heartbeat(); 198 xpc_arch_ops.increment_heartbeat();
255 199
256 if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) 200 if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
257 wake_up_interruptible(&xpc_activate_IRQ_wq); 201 wake_up_interruptible(&xpc_activate_IRQ_wq);
@@ -263,7 +207,7 @@ xpc_hb_beater(unsigned long dummy)
263static void 207static void
264xpc_start_hb_beater(void) 208xpc_start_hb_beater(void)
265{ 209{
266 xpc_heartbeat_init(); 210 xpc_arch_ops.heartbeat_init();
267 init_timer(&xpc_hb_timer); 211 init_timer(&xpc_hb_timer);
268 xpc_hb_timer.function = xpc_hb_beater; 212 xpc_hb_timer.function = xpc_hb_beater;
269 xpc_hb_beater(0); 213 xpc_hb_beater(0);
@@ -273,7 +217,7 @@ static void
273xpc_stop_hb_beater(void) 217xpc_stop_hb_beater(void)
274{ 218{
275 del_timer_sync(&xpc_hb_timer); 219 del_timer_sync(&xpc_hb_timer);
276 xpc_heartbeat_exit(); 220 xpc_arch_ops.heartbeat_exit();
277} 221}
278 222
279/* 223/*
@@ -302,7 +246,7 @@ xpc_check_remote_hb(void)
302 continue; 246 continue;
303 } 247 }
304 248
305 ret = xpc_get_remote_heartbeat(part); 249 ret = xpc_arch_ops.get_remote_heartbeat(part);
306 if (ret != xpSuccess) 250 if (ret != xpSuccess)
307 XPC_DEACTIVATE_PARTITION(part, ret); 251 XPC_DEACTIVATE_PARTITION(part, ret);
308 } 252 }
@@ -353,7 +297,7 @@ xpc_hb_checker(void *ignore)
353 force_IRQ = 0; 297 force_IRQ = 0;
354 dev_dbg(xpc_part, "processing activate IRQs " 298 dev_dbg(xpc_part, "processing activate IRQs "
355 "received\n"); 299 "received\n");
356 xpc_process_activate_IRQ_rcvd(); 300 xpc_arch_ops.process_activate_IRQ_rcvd();
357 } 301 }
358 302
359 /* wait for IRQ or timeout */ 303 /* wait for IRQ or timeout */
@@ -528,7 +472,7 @@ xpc_setup_ch_structures(struct xpc_partition *part)
528 init_waitqueue_head(&ch->idle_wq); 472 init_waitqueue_head(&ch->idle_wq);
529 } 473 }
530 474
531 ret = xpc_setup_ch_structures_sn(part); 475 ret = xpc_arch_ops.setup_ch_structures(part);
532 if (ret != xpSuccess) 476 if (ret != xpSuccess)
533 goto out_2; 477 goto out_2;
534 478
@@ -572,7 +516,7 @@ xpc_teardown_ch_structures(struct xpc_partition *part)
572 516
573 /* now we can begin tearing down the infrastructure */ 517 /* now we can begin tearing down the infrastructure */
574 518
575 xpc_teardown_ch_structures_sn(part); 519 xpc_arch_ops.teardown_ch_structures(part);
576 520
577 kfree(part->remote_openclose_args_base); 521 kfree(part->remote_openclose_args_base);
578 part->remote_openclose_args = NULL; 522 part->remote_openclose_args = NULL;
@@ -620,12 +564,12 @@ xpc_activating(void *__partid)
620 564
621 dev_dbg(xpc_part, "activating partition %d\n", partid); 565 dev_dbg(xpc_part, "activating partition %d\n", partid);
622 566
623 xpc_allow_hb(partid); 567 xpc_arch_ops.allow_hb(partid);
624 568
625 if (xpc_setup_ch_structures(part) == xpSuccess) { 569 if (xpc_setup_ch_structures(part) == xpSuccess) {
626 (void)xpc_part_ref(part); /* this will always succeed */ 570 (void)xpc_part_ref(part); /* this will always succeed */
627 571
628 if (xpc_make_first_contact(part) == xpSuccess) { 572 if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
629 xpc_mark_partition_active(part); 573 xpc_mark_partition_active(part);
630 xpc_channel_mgr(part); 574 xpc_channel_mgr(part);
631 /* won't return until partition is deactivating */ 575 /* won't return until partition is deactivating */
@@ -635,12 +579,12 @@ xpc_activating(void *__partid)
635 xpc_teardown_ch_structures(part); 579 xpc_teardown_ch_structures(part);
636 } 580 }
637 581
638 xpc_disallow_hb(partid); 582 xpc_arch_ops.disallow_hb(partid);
639 xpc_mark_partition_inactive(part); 583 xpc_mark_partition_inactive(part);
640 584
641 if (part->reason == xpReactivating) { 585 if (part->reason == xpReactivating) {
642 /* interrupting ourselves results in activating partition */ 586 /* interrupting ourselves results in activating partition */
643 xpc_request_partition_reactivation(part); 587 xpc_arch_ops.request_partition_reactivation(part);
644 } 588 }
645 589
646 return 0; 590 return 0;
@@ -713,10 +657,13 @@ xpc_activate_kthreads(struct xpc_channel *ch, int needed)
713static void 657static void
714xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) 658xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
715{ 659{
660 int (*n_of_deliverable_payloads) (struct xpc_channel *) =
661 xpc_arch_ops.n_of_deliverable_payloads;
662
716 do { 663 do {
717 /* deliver messages to their intended recipients */ 664 /* deliver messages to their intended recipients */
718 665
719 while (xpc_n_of_deliverable_payloads(ch) > 0 && 666 while (n_of_deliverable_payloads(ch) > 0 &&
720 !(ch->flags & XPC_C_DISCONNECTING)) { 667 !(ch->flags & XPC_C_DISCONNECTING)) {
721 xpc_deliver_payload(ch); 668 xpc_deliver_payload(ch);
722 } 669 }
@@ -732,7 +679,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
732 "wait_event_interruptible_exclusive()\n"); 679 "wait_event_interruptible_exclusive()\n");
733 680
734 (void)wait_event_interruptible_exclusive(ch->idle_wq, 681 (void)wait_event_interruptible_exclusive(ch->idle_wq,
735 (xpc_n_of_deliverable_payloads(ch) > 0 || 682 (n_of_deliverable_payloads(ch) > 0 ||
736 (ch->flags & XPC_C_DISCONNECTING))); 683 (ch->flags & XPC_C_DISCONNECTING)));
737 684
738 atomic_dec(&ch->kthreads_idle); 685 atomic_dec(&ch->kthreads_idle);
@@ -749,6 +696,8 @@ xpc_kthread_start(void *args)
749 struct xpc_channel *ch; 696 struct xpc_channel *ch;
750 int n_needed; 697 int n_needed;
751 unsigned long irq_flags; 698 unsigned long irq_flags;
699 int (*n_of_deliverable_payloads) (struct xpc_channel *) =
700 xpc_arch_ops.n_of_deliverable_payloads;
752 701
753 dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", 702 dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
754 partid, ch_number); 703 partid, ch_number);
@@ -777,7 +726,7 @@ xpc_kthread_start(void *args)
777 * additional kthreads to help deliver them. We only 726 * additional kthreads to help deliver them. We only
778 * need one less than total #of messages to deliver. 727 * need one less than total #of messages to deliver.
779 */ 728 */
780 n_needed = xpc_n_of_deliverable_payloads(ch) - 1; 729 n_needed = n_of_deliverable_payloads(ch) - 1;
781 if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) 730 if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
782 xpc_activate_kthreads(ch, n_needed); 731 xpc_activate_kthreads(ch, n_needed);
783 732
@@ -805,7 +754,7 @@ xpc_kthread_start(void *args)
805 754
806 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 755 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
807 atomic_dec_return(&part->nchannels_engaged) == 0) { 756 atomic_dec_return(&part->nchannels_engaged) == 0) {
808 xpc_indicate_partition_disengaged(part); 757 xpc_arch_ops.indicate_partition_disengaged(part);
809 } 758 }
810 759
811 xpc_msgqueue_deref(ch); 760 xpc_msgqueue_deref(ch);
@@ -837,6 +786,8 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
837 u64 args = XPC_PACK_ARGS(ch->partid, ch->number); 786 u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
838 struct xpc_partition *part = &xpc_partitions[ch->partid]; 787 struct xpc_partition *part = &xpc_partitions[ch->partid];
839 struct task_struct *kthread; 788 struct task_struct *kthread;
789 void (*indicate_partition_disengaged) (struct xpc_partition *) =
790 xpc_arch_ops.indicate_partition_disengaged;
840 791
841 while (needed-- > 0) { 792 while (needed-- > 0) {
842 793
@@ -858,7 +809,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
858 809
859 } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && 810 } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
860 atomic_inc_return(&part->nchannels_engaged) == 1) { 811 atomic_inc_return(&part->nchannels_engaged) == 1) {
861 xpc_indicate_partition_engaged(part); 812 xpc_arch_ops.indicate_partition_engaged(part);
862 } 813 }
863 (void)xpc_part_ref(part); 814 (void)xpc_part_ref(part);
864 xpc_msgqueue_ref(ch); 815 xpc_msgqueue_ref(ch);
@@ -880,7 +831,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
880 831
881 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 832 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
882 atomic_dec_return(&part->nchannels_engaged) == 0) { 833 atomic_dec_return(&part->nchannels_engaged) == 0) {
883 xpc_indicate_partition_disengaged(part); 834 indicate_partition_disengaged(part);
884 } 835 }
885 xpc_msgqueue_deref(ch); 836 xpc_msgqueue_deref(ch);
886 xpc_part_deref(part); 837 xpc_part_deref(part);
@@ -993,13 +944,13 @@ xpc_setup_partitions(void)
993 atomic_set(&part->references, 0); 944 atomic_set(&part->references, 0);
994 } 945 }
995 946
996 return xpc_setup_partitions_sn(); 947 return xpc_arch_ops.setup_partitions();
997} 948}
998 949
999static void 950static void
1000xpc_teardown_partitions(void) 951xpc_teardown_partitions(void)
1001{ 952{
1002 xpc_teardown_partitions_sn(); 953 xpc_arch_ops.teardown_partitions();
1003 kfree(xpc_partitions); 954 kfree(xpc_partitions);
1004} 955}
1005 956
@@ -1055,7 +1006,7 @@ xpc_do_exit(enum xp_retval reason)
1055 disengage_timeout = part->disengage_timeout; 1006 disengage_timeout = part->disengage_timeout;
1056 } 1007 }
1057 1008
1058 if (xpc_any_partition_engaged()) { 1009 if (xpc_arch_ops.any_partition_engaged()) {
1059 if (time_is_before_jiffies(printmsg_time)) { 1010 if (time_is_before_jiffies(printmsg_time)) {
1060 dev_info(xpc_part, "waiting for remote " 1011 dev_info(xpc_part, "waiting for remote "
1061 "partitions to deactivate, timeout in " 1012 "partitions to deactivate, timeout in "
@@ -1086,8 +1037,7 @@ xpc_do_exit(enum xp_retval reason)
1086 1037
1087 } while (1); 1038 } while (1);
1088 1039
1089 DBUG_ON(xpc_any_partition_engaged()); 1040 DBUG_ON(xpc_arch_ops.any_partition_engaged());
1090 DBUG_ON(xpc_any_hbs_allowed() != 0);
1091 1041
1092 xpc_teardown_rsvd_page(); 1042 xpc_teardown_rsvd_page();
1093 1043
@@ -1152,15 +1102,15 @@ xpc_die_deactivate(void)
1152 /* keep xpc_hb_checker thread from doing anything (just in case) */ 1102 /* keep xpc_hb_checker thread from doing anything (just in case) */
1153 xpc_exiting = 1; 1103 xpc_exiting = 1;
1154 1104
1155 xpc_disallow_all_hbs(); /*indicate we're deactivated */ 1105 xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */
1156 1106
1157 for (partid = 0; partid < xp_max_npartitions; partid++) { 1107 for (partid = 0; partid < xp_max_npartitions; partid++) {
1158 part = &xpc_partitions[partid]; 1108 part = &xpc_partitions[partid];
1159 1109
1160 if (xpc_partition_engaged(partid) || 1110 if (xpc_arch_ops.partition_engaged(partid) ||
1161 part->act_state != XPC_P_AS_INACTIVE) { 1111 part->act_state != XPC_P_AS_INACTIVE) {
1162 xpc_request_partition_deactivation(part); 1112 xpc_arch_ops.request_partition_deactivation(part);
1163 xpc_indicate_partition_disengaged(part); 1113 xpc_arch_ops.indicate_partition_disengaged(part);
1164 } 1114 }
1165 } 1115 }
1166 1116
@@ -1177,7 +1127,7 @@ xpc_die_deactivate(void)
1177 wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; 1127 wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
1178 1128
1179 while (1) { 1129 while (1) {
1180 any_engaged = xpc_any_partition_engaged(); 1130 any_engaged = xpc_arch_ops.any_partition_engaged();
1181 if (!any_engaged) { 1131 if (!any_engaged) {
1182 dev_info(xpc_part, "all partitions have deactivated\n"); 1132 dev_info(xpc_part, "all partitions have deactivated\n");
1183 break; 1133 break;
@@ -1186,7 +1136,7 @@ xpc_die_deactivate(void)
1186 if (!keep_waiting--) { 1136 if (!keep_waiting--) {
1187 for (partid = 0; partid < xp_max_npartitions; 1137 for (partid = 0; partid < xp_max_npartitions;
1188 partid++) { 1138 partid++) {
1189 if (xpc_partition_engaged(partid)) { 1139 if (xpc_arch_ops.partition_engaged(partid)) {
1190 dev_info(xpc_part, "deactivate from " 1140 dev_info(xpc_part, "deactivate from "
1191 "remote partition %d timed " 1141 "remote partition %d timed "
1192 "out\n", partid); 1142 "out\n", partid);
@@ -1233,7 +1183,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1233 /* fall through */ 1183 /* fall through */
1234 case DIE_MCA_MONARCH_ENTER: 1184 case DIE_MCA_MONARCH_ENTER:
1235 case DIE_INIT_MONARCH_ENTER: 1185 case DIE_INIT_MONARCH_ENTER:
1236 xpc_offline_heartbeat(); 1186 xpc_arch_ops.offline_heartbeat();
1237 break; 1187 break;
1238 1188
1239 case DIE_KDEBUG_LEAVE: 1189 case DIE_KDEBUG_LEAVE:
@@ -1244,7 +1194,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1244 /* fall through */ 1194 /* fall through */
1245 case DIE_MCA_MONARCH_LEAVE: 1195 case DIE_MCA_MONARCH_LEAVE:
1246 case DIE_INIT_MONARCH_LEAVE: 1196 case DIE_INIT_MONARCH_LEAVE:
1247 xpc_online_heartbeat(); 1197 xpc_arch_ops.online_heartbeat();
1248 break; 1198 break;
1249 } 1199 }
1250#else 1200#else
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 6722f6fe4dc7..65877bc5edaa 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -70,6 +70,9 @@ xpc_get_rsvd_page_pa(int nasid)
70 size_t buf_len = 0; 70 size_t buf_len = 0;
71 void *buf = buf; 71 void *buf = buf;
72 void *buf_base = NULL; 72 void *buf_base = NULL;
73 enum xp_retval (*get_partition_rsvd_page_pa)
74 (void *, u64 *, unsigned long *, size_t *) =
75 xpc_arch_ops.get_partition_rsvd_page_pa;
73 76
74 while (1) { 77 while (1) {
75 78
@@ -79,8 +82,7 @@ xpc_get_rsvd_page_pa(int nasid)
79 * ??? function or have two versions? Rename rp_pa for UV to 82 * ??? function or have two versions? Rename rp_pa for UV to
80 * ??? rp_gpa? 83 * ??? rp_gpa?
81 */ 84 */
82 ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, 85 ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
83 &len);
84 86
85 dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " 87 dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
86 "address=0x%016lx, len=0x%016lx\n", ret, 88 "address=0x%016lx, len=0x%016lx\n", ret,
@@ -172,7 +174,7 @@ xpc_setup_rsvd_page(void)
172 xpc_part_nasids = XPC_RP_PART_NASIDS(rp); 174 xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
173 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); 175 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
174 176
175 ret = xpc_setup_rsvd_page_sn(rp); 177 ret = xpc_arch_ops.setup_rsvd_page(rp);
176 if (ret != 0) 178 if (ret != 0)
177 return ret; 179 return ret;
178 180
@@ -264,7 +266,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
264 short partid = XPC_PARTID(part); 266 short partid = XPC_PARTID(part);
265 int disengaged; 267 int disengaged;
266 268
267 disengaged = !xpc_partition_engaged(partid); 269 disengaged = !xpc_arch_ops.partition_engaged(partid);
268 if (part->disengage_timeout) { 270 if (part->disengage_timeout) {
269 if (!disengaged) { 271 if (!disengaged) {
270 if (time_is_after_jiffies(part->disengage_timeout)) { 272 if (time_is_after_jiffies(part->disengage_timeout)) {
@@ -280,7 +282,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
280 dev_info(xpc_part, "deactivate request to remote " 282 dev_info(xpc_part, "deactivate request to remote "
281 "partition %d timed out\n", partid); 283 "partition %d timed out\n", partid);
282 xpc_disengage_timedout = 1; 284 xpc_disengage_timedout = 1;
283 xpc_assume_partition_disengaged(partid); 285 xpc_arch_ops.assume_partition_disengaged(partid);
284 disengaged = 1; 286 disengaged = 1;
285 } 287 }
286 part->disengage_timeout = 0; 288 part->disengage_timeout = 0;
@@ -294,7 +296,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
294 if (part->act_state != XPC_P_AS_INACTIVE) 296 if (part->act_state != XPC_P_AS_INACTIVE)
295 xpc_wakeup_channel_mgr(part); 297 xpc_wakeup_channel_mgr(part);
296 298
297 xpc_cancel_partition_deactivation_request(part); 299 xpc_arch_ops.cancel_partition_deactivation_request(part);
298 } 300 }
299 return disengaged; 301 return disengaged;
300} 302}
@@ -339,7 +341,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
339 spin_unlock_irqrestore(&part->act_lock, irq_flags); 341 spin_unlock_irqrestore(&part->act_lock, irq_flags);
340 if (reason == xpReactivating) { 342 if (reason == xpReactivating) {
341 /* we interrupt ourselves to reactivate partition */ 343 /* we interrupt ourselves to reactivate partition */
342 xpc_request_partition_reactivation(part); 344 xpc_arch_ops.request_partition_reactivation(part);
343 } 345 }
344 return; 346 return;
345 } 347 }
@@ -358,7 +360,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
358 spin_unlock_irqrestore(&part->act_lock, irq_flags); 360 spin_unlock_irqrestore(&part->act_lock, irq_flags);
359 361
360 /* ask remote partition to deactivate with regard to us */ 362 /* ask remote partition to deactivate with regard to us */
361 xpc_request_partition_deactivation(part); 363 xpc_arch_ops.request_partition_deactivation(part);
362 364
363 /* set a timelimit on the disengage phase of the deactivation request */ 365 /* set a timelimit on the disengage phase of the deactivation request */
364 part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); 366 part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
@@ -496,7 +498,7 @@ xpc_discovery(void)
496 continue; 498 continue;
497 } 499 }
498 500
499 xpc_request_partition_activation(remote_rp, 501 xpc_arch_ops.request_partition_activation(remote_rp,
500 remote_rp_pa, nasid); 502 remote_rp_pa, nasid);
501 } 503 }
502 } 504 }
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index eaaa964942de..915a3b495da5 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9/* 9/*
@@ -60,14 +60,14 @@ static struct xpc_vars_sn2 *xpc_vars_sn2;
60static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; 60static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
61 61
62static int 62static int
63xpc_setup_partitions_sn_sn2(void) 63xpc_setup_partitions_sn2(void)
64{ 64{
65 /* nothing needs to be done */ 65 /* nothing needs to be done */
66 return 0; 66 return 0;
67} 67}
68 68
69static void 69static void
70xpc_teardown_partitions_sn_sn2(void) 70xpc_teardown_partitions_sn2(void)
71{ 71{
72 /* nothing needs to be done */ 72 /* nothing needs to be done */
73} 73}
@@ -431,6 +431,13 @@ xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
431} 431}
432 432
433static void 433static void
434xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
435 unsigned long *irq_flags)
436{
437 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
438}
439
440static void
434xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) 441xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
435{ 442{
436 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); 443 XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
@@ -621,7 +628,7 @@ xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
621 628
622 629
623static int 630static int
624xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp) 631xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
625{ 632{
626 struct amo *amos_page; 633 struct amo *amos_page;
627 int i; 634 int i;
@@ -629,7 +636,7 @@ xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
629 636
630 xpc_vars_sn2 = XPC_RP_VARS(rp); 637 xpc_vars_sn2 = XPC_RP_VARS(rp);
631 638
632 rp->sn.vars_pa = xp_pa(xpc_vars_sn2); 639 rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
633 640
634 /* vars_part array follows immediately after vars */ 641 /* vars_part array follows immediately after vars */
635 xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + 642 xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
@@ -693,6 +700,33 @@ xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
693 return 0; 700 return 0;
694} 701}
695 702
703static int
704xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
705{
706 return test_bit(partid, heartbeating_to_mask);
707}
708
709static void
710xpc_allow_hb_sn2(short partid)
711{
712 DBUG_ON(xpc_vars_sn2 == NULL);
713 set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
714}
715
716static void
717xpc_disallow_hb_sn2(short partid)
718{
719 DBUG_ON(xpc_vars_sn2 == NULL);
720 clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
721}
722
723static void
724xpc_disallow_all_hbs_sn2(void)
725{
726 DBUG_ON(xpc_vars_sn2 == NULL);
727 bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
728}
729
696static void 730static void
697xpc_increment_heartbeat_sn2(void) 731xpc_increment_heartbeat_sn2(void)
698{ 732{
@@ -719,7 +753,6 @@ xpc_heartbeat_init_sn2(void)
719 DBUG_ON(xpc_vars_sn2 == NULL); 753 DBUG_ON(xpc_vars_sn2 == NULL);
720 754
721 bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); 755 bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
722 xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
723 xpc_online_heartbeat_sn2(); 756 xpc_online_heartbeat_sn2();
724} 757}
725 758
@@ -751,9 +784,9 @@ xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
751 remote_vars->heartbeating_to_mask[0]); 784 remote_vars->heartbeating_to_mask[0]);
752 785
753 if ((remote_vars->heartbeat == part->last_heartbeat && 786 if ((remote_vars->heartbeat == part->last_heartbeat &&
754 remote_vars->heartbeat_offline == 0) || 787 !remote_vars->heartbeat_offline) ||
755 !xpc_hb_allowed(sn_partition_id, 788 !xpc_hb_allowed_sn2(sn_partition_id,
756 &remote_vars->heartbeating_to_mask)) { 789 remote_vars->heartbeating_to_mask)) {
757 ret = xpNoHeartbeat; 790 ret = xpNoHeartbeat;
758 } else { 791 } else {
759 part->last_heartbeat = remote_vars->heartbeat; 792 part->last_heartbeat = remote_vars->heartbeat;
@@ -972,7 +1005,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
972 return; 1005 return;
973 } 1006 }
974 1007
975 remote_vars_pa = remote_rp->sn.vars_pa; 1008 remote_vars_pa = remote_rp->sn.sn2.vars_pa;
976 remote_rp_version = remote_rp->version; 1009 remote_rp_version = remote_rp->version;
977 remote_rp_ts_jiffies = remote_rp->ts_jiffies; 1010 remote_rp_ts_jiffies = remote_rp->ts_jiffies;
978 1011
@@ -1129,7 +1162,7 @@ xpc_process_activate_IRQ_rcvd_sn2(void)
1129 * Setup the channel structures that are sn2 specific. 1162 * Setup the channel structures that are sn2 specific.
1130 */ 1163 */
1131static enum xp_retval 1164static enum xp_retval
1132xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part) 1165xpc_setup_ch_structures_sn2(struct xpc_partition *part)
1133{ 1166{
1134 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; 1167 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1135 struct xpc_channel_sn2 *ch_sn2; 1168 struct xpc_channel_sn2 *ch_sn2;
@@ -1251,7 +1284,7 @@ out_1:
1251 * Teardown the channel structures that are sn2 specific. 1284 * Teardown the channel structures that are sn2 specific.
1252 */ 1285 */
1253static void 1286static void
1254xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part) 1287xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
1255{ 1288{
1256 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; 1289 struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1257 short partid = XPC_PARTID(part); 1290 short partid = XPC_PARTID(part);
@@ -2315,61 +2348,70 @@ xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
2315 xpc_acknowledge_msgs_sn2(ch, get, msg->flags); 2348 xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
2316} 2349}
2317 2350
2351static struct xpc_arch_operations xpc_arch_ops_sn2 = {
2352 .setup_partitions = xpc_setup_partitions_sn2,
2353 .teardown_partitions = xpc_teardown_partitions_sn2,
2354 .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
2355 .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
2356 .setup_rsvd_page = xpc_setup_rsvd_page_sn2,
2357
2358 .allow_hb = xpc_allow_hb_sn2,
2359 .disallow_hb = xpc_disallow_hb_sn2,
2360 .disallow_all_hbs = xpc_disallow_all_hbs_sn2,
2361 .increment_heartbeat = xpc_increment_heartbeat_sn2,
2362 .offline_heartbeat = xpc_offline_heartbeat_sn2,
2363 .online_heartbeat = xpc_online_heartbeat_sn2,
2364 .heartbeat_init = xpc_heartbeat_init_sn2,
2365 .heartbeat_exit = xpc_heartbeat_exit_sn2,
2366 .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
2367
2368 .request_partition_activation =
2369 xpc_request_partition_activation_sn2,
2370 .request_partition_reactivation =
2371 xpc_request_partition_reactivation_sn2,
2372 .request_partition_deactivation =
2373 xpc_request_partition_deactivation_sn2,
2374 .cancel_partition_deactivation_request =
2375 xpc_cancel_partition_deactivation_request_sn2,
2376
2377 .setup_ch_structures = xpc_setup_ch_structures_sn2,
2378 .teardown_ch_structures = xpc_teardown_ch_structures_sn2,
2379
2380 .make_first_contact = xpc_make_first_contact_sn2,
2381
2382 .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
2383 .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
2384 .send_chctl_closereply = xpc_send_chctl_closereply_sn2,
2385 .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
2386 .send_chctl_openreply = xpc_send_chctl_openreply_sn2,
2387 .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
2388 .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
2389
2390 .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
2391
2392 .setup_msg_structures = xpc_setup_msg_structures_sn2,
2393 .teardown_msg_structures = xpc_teardown_msg_structures_sn2,
2394
2395 .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
2396 .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
2397 .partition_engaged = xpc_partition_engaged_sn2,
2398 .any_partition_engaged = xpc_any_partition_engaged_sn2,
2399 .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
2400
2401 .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
2402 .send_payload = xpc_send_payload_sn2,
2403 .get_deliverable_payload = xpc_get_deliverable_payload_sn2,
2404 .received_payload = xpc_received_payload_sn2,
2405 .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
2406};
2407
2318int 2408int
2319xpc_init_sn2(void) 2409xpc_init_sn2(void)
2320{ 2410{
2321 int ret; 2411 int ret;
2322 size_t buf_size; 2412 size_t buf_size;
2323 2413
2324 xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2; 2414 xpc_arch_ops = xpc_arch_ops_sn2;
2325 xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2;
2326 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
2327 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
2328 xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
2329 xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
2330 xpc_online_heartbeat = xpc_online_heartbeat_sn2;
2331 xpc_heartbeat_init = xpc_heartbeat_init_sn2;
2332 xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
2333 xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
2334
2335 xpc_request_partition_activation = xpc_request_partition_activation_sn2;
2336 xpc_request_partition_reactivation =
2337 xpc_request_partition_reactivation_sn2;
2338 xpc_request_partition_deactivation =
2339 xpc_request_partition_deactivation_sn2;
2340 xpc_cancel_partition_deactivation_request =
2341 xpc_cancel_partition_deactivation_request_sn2;
2342
2343 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
2344 xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
2345 xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
2346 xpc_make_first_contact = xpc_make_first_contact_sn2;
2347
2348 xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
2349 xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
2350 xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
2351 xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
2352 xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
2353
2354 xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
2355
2356 xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
2357 xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
2358
2359 xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
2360 xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
2361 xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
2362 xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
2363
2364 xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
2365 xpc_indicate_partition_disengaged =
2366 xpc_indicate_partition_disengaged_sn2;
2367 xpc_partition_engaged = xpc_partition_engaged_sn2;
2368 xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
2369 xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
2370
2371 xpc_send_payload = xpc_send_payload_sn2;
2372 xpc_received_payload = xpc_received_payload_sn2;
2373 2415
2374 if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { 2416 if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
2375 dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " 2417 dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index f7fff4727edb..9172fcdee4e2 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,8 +46,7 @@ struct uv_IO_APIC_route_entry {
46}; 46};
47#endif 47#endif
48 48
49static atomic64_t xpc_heartbeat_uv; 49static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
50static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
51 50
52#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) 51#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
53#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ 52#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
@@ -63,7 +62,7 @@ static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
63static struct xpc_gru_mq_uv *xpc_notify_mq_uv; 62static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
64 63
65static int 64static int
66xpc_setup_partitions_sn_uv(void) 65xpc_setup_partitions_uv(void)
67{ 66{
68 short partid; 67 short partid;
69 struct xpc_partition_uv *part_uv; 68 struct xpc_partition_uv *part_uv;
@@ -79,7 +78,7 @@ xpc_setup_partitions_sn_uv(void)
79} 78}
80 79
81static void 80static void
82xpc_teardown_partitions_sn_uv(void) 81xpc_teardown_partitions_uv(void)
83{ 82{
84 short partid; 83 short partid;
85 struct xpc_partition_uv *part_uv; 84 struct xpc_partition_uv *part_uv;
@@ -423,41 +422,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
423 /* syncing of remote_act_state was just done above */ 422 /* syncing of remote_act_state was just done above */
424 break; 423 break;
425 424
426 case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
427 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
428
429 msg = container_of(msg_hdr,
430 struct xpc_activate_mq_msg_heartbeat_req_uv,
431 hdr);
432 part_uv->heartbeat = msg->heartbeat;
433 break;
434 }
435 case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
436 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
437
438 msg = container_of(msg_hdr,
439 struct xpc_activate_mq_msg_heartbeat_req_uv,
440 hdr);
441 part_uv->heartbeat = msg->heartbeat;
442
443 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
444 part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
445 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
446 break;
447 }
448 case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
449 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
450
451 msg = container_of(msg_hdr,
452 struct xpc_activate_mq_msg_heartbeat_req_uv,
453 hdr);
454 part_uv->heartbeat = msg->heartbeat;
455
456 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
457 part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
458 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
459 break;
460 }
461 case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { 425 case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
462 struct xpc_activate_mq_msg_activate_req_uv *msg; 426 struct xpc_activate_mq_msg_activate_req_uv *msg;
463 427
@@ -475,6 +439,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
475 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; 439 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
476 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ 440 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
477 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; 441 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
442 part_uv->heartbeat_gpa = msg->heartbeat_gpa;
478 443
479 if (msg->activate_gru_mq_desc_gpa != 444 if (msg->activate_gru_mq_desc_gpa !=
480 part_uv->activate_gru_mq_desc_gpa) { 445 part_uv->activate_gru_mq_desc_gpa) {
@@ -569,6 +534,17 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
569 xpc_wakeup_channel_mgr(part); 534 xpc_wakeup_channel_mgr(part);
570 break; 535 break;
571 } 536 }
537 case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
538 struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
539
540 msg = container_of(msg_hdr, struct
541 xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
542 spin_lock_irqsave(&part->chctl_lock, irq_flags);
543 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
544 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
545
546 xpc_wakeup_channel_mgr(part);
547 }
572 case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: 548 case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
573 spin_lock_irqsave(&part_uv->flags_lock, irq_flags); 549 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
574 part_uv->flags |= XPC_P_ENGAGED_UV; 550 part_uv->flags |= XPC_P_ENGAGED_UV;
@@ -759,7 +735,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
759 735
760 /* 736 /*
761 * !!! Make our side think that the remote partition sent an activate 737 * !!! Make our side think that the remote partition sent an activate
762 * !!! message our way by doing what the activate IRQ handler would 738 * !!! mq message our way by doing what the activate IRQ handler would
763 * !!! do had one really been sent. 739 * !!! do had one really been sent.
764 */ 740 */
765 741
@@ -806,90 +782,82 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
806} 782}
807 783
808static int 784static int
809xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) 785xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
810{ 786{
811 rp->sn.activate_gru_mq_desc_gpa = 787 xpc_heartbeat_uv =
788 &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
789 rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
790 rp->sn.uv.activate_gru_mq_desc_gpa =
812 uv_gpa(xpc_activate_mq_uv->gru_mq_desc); 791 uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
813 return 0; 792 return 0;
814} 793}
815 794
816static void 795static void
817xpc_send_heartbeat_uv(int msg_type) 796xpc_allow_hb_uv(short partid)
818{ 797{
819 short partid; 798}
820 struct xpc_partition *part;
821 struct xpc_activate_mq_msg_heartbeat_req_uv msg;
822
823 /*
824 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
825 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
826 * !!! seconds. This is an increase in numalink traffic.
827 * ??? Is this good?
828 */
829
830 msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
831
832 partid = find_first_bit(xpc_heartbeating_to_mask_uv,
833 XP_MAX_NPARTITIONS_UV);
834
835 while (partid < XP_MAX_NPARTITIONS_UV) {
836 part = &xpc_partitions[partid];
837 799
838 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), 800static void
839 msg_type); 801xpc_disallow_hb_uv(short partid)
802{
803}
840 804
841 partid = find_next_bit(xpc_heartbeating_to_mask_uv, 805static void
842 XP_MAX_NPARTITIONS_UV, partid + 1); 806xpc_disallow_all_hbs_uv(void)
843 } 807{
844} 808}
845 809
846static void 810static void
847xpc_increment_heartbeat_uv(void) 811xpc_increment_heartbeat_uv(void)
848{ 812{
849 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV); 813 xpc_heartbeat_uv->value++;
850} 814}
851 815
852static void 816static void
853xpc_offline_heartbeat_uv(void) 817xpc_offline_heartbeat_uv(void)
854{ 818{
855 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); 819 xpc_increment_heartbeat_uv();
820 xpc_heartbeat_uv->offline = 1;
856} 821}
857 822
858static void 823static void
859xpc_online_heartbeat_uv(void) 824xpc_online_heartbeat_uv(void)
860{ 825{
861 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV); 826 xpc_increment_heartbeat_uv();
827 xpc_heartbeat_uv->offline = 0;
862} 828}
863 829
864static void 830static void
865xpc_heartbeat_init_uv(void) 831xpc_heartbeat_init_uv(void)
866{ 832{
867 atomic64_set(&xpc_heartbeat_uv, 0); 833 xpc_heartbeat_uv->value = 1;
868 bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); 834 xpc_heartbeat_uv->offline = 0;
869 xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
870} 835}
871 836
872static void 837static void
873xpc_heartbeat_exit_uv(void) 838xpc_heartbeat_exit_uv(void)
874{ 839{
875 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); 840 xpc_offline_heartbeat_uv();
876} 841}
877 842
878static enum xp_retval 843static enum xp_retval
879xpc_get_remote_heartbeat_uv(struct xpc_partition *part) 844xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
880{ 845{
881 struct xpc_partition_uv *part_uv = &part->sn.uv; 846 struct xpc_partition_uv *part_uv = &part->sn.uv;
882 enum xp_retval ret = xpNoHeartbeat; 847 enum xp_retval ret;
883 848
884 if (part_uv->remote_act_state != XPC_P_AS_INACTIVE && 849 ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
885 part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) { 850 part_uv->heartbeat_gpa,
851 sizeof(struct xpc_heartbeat_uv));
852 if (ret != xpSuccess)
853 return ret;
886 854
887 if (part_uv->heartbeat != part->last_heartbeat || 855 if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
888 (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) { 856 !part_uv->cached_heartbeat.offline) {
889 857
890 part->last_heartbeat = part_uv->heartbeat; 858 ret = xpNoHeartbeat;
891 ret = xpSuccess; 859 } else {
892 } 860 part->last_heartbeat = part_uv->cached_heartbeat.value;
893 } 861 }
894 return ret; 862 return ret;
895} 863}
@@ -904,8 +872,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
904 872
905 part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ 873 part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
906 part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; 874 part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
875 part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
907 part->sn.uv.activate_gru_mq_desc_gpa = 876 part->sn.uv.activate_gru_mq_desc_gpa =
908 remote_rp->sn.activate_gru_mq_desc_gpa; 877 remote_rp->sn.uv.activate_gru_mq_desc_gpa;
909 878
910 /* 879 /*
911 * ??? Is it a good idea to make this conditional on what is 880 * ??? Is it a good idea to make this conditional on what is
@@ -913,8 +882,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
913 */ 882 */
914 if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { 883 if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
915 msg.rp_gpa = uv_gpa(xpc_rsvd_page); 884 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
885 msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
916 msg.activate_gru_mq_desc_gpa = 886 msg.activate_gru_mq_desc_gpa =
917 xpc_rsvd_page->sn.activate_gru_mq_desc_gpa; 887 xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
918 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), 888 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
919 XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); 889 XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
920 } 890 }
@@ -1010,7 +980,7 @@ xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
1010 * Setup the channel structures that are uv specific. 980 * Setup the channel structures that are uv specific.
1011 */ 981 */
1012static enum xp_retval 982static enum xp_retval
1013xpc_setup_ch_structures_sn_uv(struct xpc_partition *part) 983xpc_setup_ch_structures_uv(struct xpc_partition *part)
1014{ 984{
1015 struct xpc_channel_uv *ch_uv; 985 struct xpc_channel_uv *ch_uv;
1016 int ch_number; 986 int ch_number;
@@ -1029,7 +999,7 @@ xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
1029 * Teardown the channel structures that are uv specific. 999 * Teardown the channel structures that are uv specific.
1030 */ 1000 */
1031static void 1001static void
1032xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part) 1002xpc_teardown_ch_structures_uv(struct xpc_partition *part)
1033{ 1003{
1034 /* nothing needs to be done */ 1004 /* nothing needs to be done */
1035 return; 1005 return;
@@ -1243,6 +1213,16 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1243} 1213}
1244 1214
1245static void 1215static void
1216xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1217{
1218 struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
1219
1220 msg.ch_number = ch->number;
1221 xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1222 XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
1223}
1224
1225static void
1246xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) 1226xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1247{ 1227{
1248 unsigned long irq_flags; 1228 unsigned long irq_flags;
@@ -1669,58 +1649,67 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1669 msg->hdr.msg_slot_number += ch->remote_nentries; 1649 msg->hdr.msg_slot_number += ch->remote_nentries;
1670} 1650}
1671 1651
1652static struct xpc_arch_operations xpc_arch_ops_uv = {
1653 .setup_partitions = xpc_setup_partitions_uv,
1654 .teardown_partitions = xpc_teardown_partitions_uv,
1655 .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
1656 .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
1657 .setup_rsvd_page = xpc_setup_rsvd_page_uv,
1658
1659 .allow_hb = xpc_allow_hb_uv,
1660 .disallow_hb = xpc_disallow_hb_uv,
1661 .disallow_all_hbs = xpc_disallow_all_hbs_uv,
1662 .increment_heartbeat = xpc_increment_heartbeat_uv,
1663 .offline_heartbeat = xpc_offline_heartbeat_uv,
1664 .online_heartbeat = xpc_online_heartbeat_uv,
1665 .heartbeat_init = xpc_heartbeat_init_uv,
1666 .heartbeat_exit = xpc_heartbeat_exit_uv,
1667 .get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
1668
1669 .request_partition_activation =
1670 xpc_request_partition_activation_uv,
1671 .request_partition_reactivation =
1672 xpc_request_partition_reactivation_uv,
1673 .request_partition_deactivation =
1674 xpc_request_partition_deactivation_uv,
1675 .cancel_partition_deactivation_request =
1676 xpc_cancel_partition_deactivation_request_uv,
1677
1678 .setup_ch_structures = xpc_setup_ch_structures_uv,
1679 .teardown_ch_structures = xpc_teardown_ch_structures_uv,
1680
1681 .make_first_contact = xpc_make_first_contact_uv,
1682
1683 .get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
1684 .send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
1685 .send_chctl_closereply = xpc_send_chctl_closereply_uv,
1686 .send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
1687 .send_chctl_openreply = xpc_send_chctl_openreply_uv,
1688 .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
1689 .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
1690
1691 .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
1692
1693 .setup_msg_structures = xpc_setup_msg_structures_uv,
1694 .teardown_msg_structures = xpc_teardown_msg_structures_uv,
1695
1696 .indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
1697 .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
1698 .assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
1699 .partition_engaged = xpc_partition_engaged_uv,
1700 .any_partition_engaged = xpc_any_partition_engaged_uv,
1701
1702 .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
1703 .send_payload = xpc_send_payload_uv,
1704 .get_deliverable_payload = xpc_get_deliverable_payload_uv,
1705 .received_payload = xpc_received_payload_uv,
1706 .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1707};
1708
1672int 1709int
1673xpc_init_uv(void) 1710xpc_init_uv(void)
1674{ 1711{
1675 xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv; 1712 xpc_arch_ops = xpc_arch_ops_uv;
1676 xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv;
1677 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
1678 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
1679 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
1680 xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
1681 xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
1682 xpc_online_heartbeat = xpc_online_heartbeat_uv;
1683 xpc_heartbeat_init = xpc_heartbeat_init_uv;
1684 xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
1685 xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
1686
1687 xpc_request_partition_activation = xpc_request_partition_activation_uv;
1688 xpc_request_partition_reactivation =
1689 xpc_request_partition_reactivation_uv;
1690 xpc_request_partition_deactivation =
1691 xpc_request_partition_deactivation_uv;
1692 xpc_cancel_partition_deactivation_request =
1693 xpc_cancel_partition_deactivation_request_uv;
1694
1695 xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
1696 xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
1697
1698 xpc_make_first_contact = xpc_make_first_contact_uv;
1699
1700 xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
1701 xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
1702 xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
1703 xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
1704 xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
1705
1706 xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
1707
1708 xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
1709 xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
1710
1711 xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
1712 xpc_indicate_partition_disengaged =
1713 xpc_indicate_partition_disengaged_uv;
1714 xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
1715 xpc_partition_engaged = xpc_partition_engaged_uv;
1716 xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
1717
1718 xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
1719 xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
1720 xpc_send_payload = xpc_send_payload_uv;
1721 xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
1722 xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
1723 xpc_received_payload = xpc_received_payload_uv;
1724 1713
1725 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { 1714 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1726 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", 1715 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index deb7b53167ee..83a12125b94e 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -2532,8 +2532,8 @@ static int __devinit atl1c_probe(struct pci_dev *pdev,
2532 * various kernel subsystems to support the mechanics required by a 2532 * various kernel subsystems to support the mechanics required by a
2533 * fixed-high-32-bit system. 2533 * fixed-high-32-bit system.
2534 */ 2534 */
2535 if ((pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) || 2535 if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
2536 (pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK) != 0)) { 2536 (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
2537 dev_err(&pdev->dev, "No usable DMA configuration,aborting\n"); 2537 dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
2538 goto err_dma; 2538 goto err_dma;
2539 } 2539 }
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 9b75aa630062..30d0c81c989e 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1821,11 +1821,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
1821 1821
1822 be_msix_enable(adapter); 1822 be_msix_enable(adapter);
1823 1823
1824 status = pci_set_dma_mask(pdev, DMA_64BIT_MASK); 1824 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1825 if (!status) { 1825 if (!status) {
1826 netdev->features |= NETIF_F_HIGHDMA; 1826 netdev->features |= NETIF_F_HIGHDMA;
1827 } else { 1827 } else {
1828 status = pci_set_dma_mask(pdev, DMA_32BIT_MASK); 1828 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1829 if (status) { 1829 if (status) {
1830 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n"); 1830 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
1831 goto free_netdev; 1831 goto free_netdev;
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index ece35040288c..621a7c0c46ba 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -2591,13 +2591,13 @@ static int
2591jme_pci_dma64(struct pci_dev *pdev) 2591jme_pci_dma64(struct pci_dev *pdev)
2592{ 2592{
2593 if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 && 2593 if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
2594 !pci_set_dma_mask(pdev, DMA_64BIT_MASK)) 2594 !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
2595 if (!pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) 2595 if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
2596 return 1; 2596 return 1;
2597 2597
2598 if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 && 2598 if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
2599 !pci_set_dma_mask(pdev, DMA_40BIT_MASK)) 2599 !pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
2600 if (!pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK)) 2600 if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)))
2601 return 1; 2601 return 1;
2602 2602
2603 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) 2603 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
diff --git a/drivers/net/wireless/ath9k/pci.c b/drivers/net/wireless/ath9k/pci.c
index 6dbc58580abb..168411d322a2 100644
--- a/drivers/net/wireless/ath9k/pci.c
+++ b/drivers/net/wireless/ath9k/pci.c
@@ -93,14 +93,14 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
93 if (pci_enable_device(pdev)) 93 if (pci_enable_device(pdev))
94 return -EIO; 94 return -EIO;
95 95
96 ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); 96 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
97 97
98 if (ret) { 98 if (ret) {
99 printk(KERN_ERR "ath9k: 32-bit DMA not available\n"); 99 printk(KERN_ERR "ath9k: 32-bit DMA not available\n");
100 goto bad; 100 goto bad;
101 } 101 }
102 102
103 ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); 103 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
104 104
105 if (ret) { 105 if (ret) {
106 printk(KERN_ERR "ath9k: 32-bit DMA consistent " 106 printk(KERN_ERR "ath9k: 32-bit DMA consistent "
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index e3569a0a952d..b1610ea4bb3d 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -492,8 +492,8 @@ static int __devinit p54p_probe(struct pci_dev *pdev,
492 goto err_disable_dev; 492 goto err_disable_dev;
493 } 493 }
494 494
495 if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) || 495 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) ||
496 pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { 496 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
497 dev_err(&pdev->dev, "No suitable DMA available\n"); 497 dev_err(&pdev->dev, "No suitable DMA available\n");
498 goto err_free_reg; 498 goto err_free_reg;
499 } 499 }
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 4fa3bb2ddfe4..33e5ade774ca 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -434,7 +434,8 @@ static void __init superio_parport_init(void)
434 0 /*base_hi*/, 434 0 /*base_hi*/,
435 PAR_IRQ, 435 PAR_IRQ,
436 PARPORT_DMA_NONE /* dma */, 436 PARPORT_DMA_NONE /* dma */,
437 NULL /*struct pci_dev* */) ) 437 NULL /*struct pci_dev* */),
438 0 /* shared irq flags */ )
438 439
439 printk(KERN_WARNING PFX "Probing parallel port failed.\n"); 440 printk(KERN_WARNING PFX "Probing parallel port failed.\n");
440#endif /* CONFIG_PARPORT_PC */ 441#endif /* CONFIG_PARPORT_PC */
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fdb14ec4fd47..8b7983aba8f7 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -2234,10 +2234,10 @@ static int twa_resume(struct pci_dev *pdev)
2234 pci_set_master(pdev); 2234 pci_set_master(pdev);
2235 pci_try_set_mwi(pdev); 2235 pci_try_set_mwi(pdev);
2236 2236
2237 if (pci_set_dma_mask(pdev, DMA_64BIT_MASK) 2237 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
2238 || pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) 2238 || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
2239 if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) 2239 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
2240 || pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { 2240 || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
2241 TW_PRINTK(host, TW_DRIVER, 0x40, "Failed to set dma mask during resume"); 2241 TW_PRINTK(host, TW_DRIVER, 0x40, "Failed to set dma mask during resume");
2242 retval = -ENODEV; 2242 retval = -ENODEV;
2243 goto out_disable_device; 2243 goto out_disable_device;
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 280261c451d6..2a889853a106 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1378,7 +1378,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
1378 if (dev->nondasd_support && !dev->in_reset) 1378 if (dev->nondasd_support && !dev->in_reset)
1379 printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id); 1379 printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id);
1380 1380
1381 if (dma_get_required_mask(&dev->pdev->dev) > DMA_32BIT_MASK) 1381 if (dma_get_required_mask(&dev->pdev->dev) > DMA_BIT_MASK(32))
1382 dev->needs_dac = 1; 1382 dev->needs_dac = 1;
1383 dev->dac_support = 0; 1383 dev->dac_support = 0;
1384 if ((sizeof(dma_addr_t) > 4) && dev->needs_dac && 1384 if ((sizeof(dma_addr_t) > 4) && dev->needs_dac &&
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index b1bd3fc7bae8..36fd2e75da1c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1394,7 +1394,7 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd,
1394 */ 1394 */
1395 cmd->sense_buffer[8] = 0; /* Information */ 1395 cmd->sense_buffer[8] = 0; /* Information */
1396 cmd->sense_buffer[9] = 0xa; /* Add. length */ 1396 cmd->sense_buffer[9] = 0xa; /* Add. length */
1397 do_div(bghm, cmd->device->sector_size); 1397 bghm /= cmd->device->sector_size;
1398 1398
1399 failing_sector = scsi_get_lba(cmd); 1399 failing_sector = scsi_get_lba(cmd);
1400 failing_sector += bghm; 1400 failing_sector += bghm;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 52427a8324f5..a91f5143ceac 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -855,9 +855,9 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
855 if (sizeof(dma_addr_t) > 4) { 855 if (sizeof(dma_addr_t) > 4) {
856 const uint64_t required_mask = 856 const uint64_t required_mask =
857 dma_get_required_mask(&pdev->dev); 857 dma_get_required_mask(&pdev->dev);
858 if ((required_mask > DMA_32BIT_MASK) && !pci_set_dma_mask(pdev, 858 if ((required_mask > DMA_BIT_MASK(32)) && !pci_set_dma_mask(pdev,
859 DMA_64BIT_MASK) && !pci_set_consistent_dma_mask(pdev, 859 DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(pdev,
860 DMA_64BIT_MASK)) { 860 DMA_BIT_MASK(64))) {
861 ioc->base_add_sg_single = &_base_add_sg_single_64; 861 ioc->base_add_sg_single = &_base_add_sg_single_64;
862 ioc->sge_size = sizeof(Mpi2SGESimple64_t); 862 ioc->sge_size = sizeof(Mpi2SGESimple64_t);
863 desc = "64"; 863 desc = "64";
@@ -865,8 +865,8 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
865 } 865 }
866 } 866 }
867 867
868 if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK) 868 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
869 && !pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) { 869 && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
870 ioc->base_add_sg_single = &_base_add_sg_single_32; 870 ioc->base_add_sg_single = &_base_add_sg_single_32;
871 ioc->sge_size = sizeof(Mpi2SGESimple32_t); 871 ioc->sge_size = sizeof(Mpi2SGESimple32_t);
872 desc = "32"; 872 desc = "32";
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 643908b74bc0..8eba98c8ed1e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -658,7 +658,7 @@ int spi_write_then_read(struct spi_device *spi,
658 658
659 int status; 659 int status;
660 struct spi_message message; 660 struct spi_message message;
661 struct spi_transfer x; 661 struct spi_transfer x[2];
662 u8 *local_buf; 662 u8 *local_buf;
663 663
664 /* Use preallocated DMA-safe buffer. We can't avoid copying here, 664 /* Use preallocated DMA-safe buffer. We can't avoid copying here,
@@ -669,9 +669,15 @@ int spi_write_then_read(struct spi_device *spi,
669 return -EINVAL; 669 return -EINVAL;
670 670
671 spi_message_init(&message); 671 spi_message_init(&message);
672 memset(&x, 0, sizeof x); 672 memset(x, 0, sizeof x);
673 x.len = n_tx + n_rx; 673 if (n_tx) {
674 spi_message_add_tail(&x, &message); 674 x[0].len = n_tx;
675 spi_message_add_tail(&x[0], &message);
676 }
677 if (n_rx) {
678 x[1].len = n_rx;
679 spi_message_add_tail(&x[1], &message);
680 }
675 681
676 /* ... unless someone else is using the pre-allocated buffer */ 682 /* ... unless someone else is using the pre-allocated buffer */
677 if (!mutex_trylock(&lock)) { 683 if (!mutex_trylock(&lock)) {
@@ -682,15 +688,15 @@ int spi_write_then_read(struct spi_device *spi,
682 local_buf = buf; 688 local_buf = buf;
683 689
684 memcpy(local_buf, txbuf, n_tx); 690 memcpy(local_buf, txbuf, n_tx);
685 x.tx_buf = local_buf; 691 x[0].tx_buf = local_buf;
686 x.rx_buf = local_buf; 692 x[1].rx_buf = local_buf + n_tx;
687 693
688 /* do the i/o */ 694 /* do the i/o */
689 status = spi_sync(spi, &message); 695 status = spi_sync(spi, &message);
690 if (status == 0) 696 if (status == 0)
691 memcpy(rxbuf, x.rx_buf + n_tx, n_rx); 697 memcpy(rxbuf, x[1].rx_buf, n_rx);
692 698
693 if (x.tx_buf == buf) 699 if (x[0].tx_buf == buf)
694 mutex_unlock(&lock); 700 mutex_unlock(&lock);
695 else 701 else
696 kfree(local_buf); 702 kfree(local_buf);
diff --git a/drivers/staging/b3dfg/b3dfg.c b/drivers/staging/b3dfg/b3dfg.c
index 0348072b3ab5..75ebe338c6f2 100644
--- a/drivers/staging/b3dfg/b3dfg.c
+++ b/drivers/staging/b3dfg/b3dfg.c
@@ -1000,7 +1000,7 @@ static int __devinit b3dfg_probe(struct pci_dev *pdev,
1000 1000
1001 pci_set_master(pdev); 1001 pci_set_master(pdev);
1002 1002
1003 r = pci_set_dma_mask(pdev, DMA_32BIT_MASK); 1003 r = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1004 if (r) { 1004 if (r) {
1005 dev_err(&pdev->dev, "no usable DMA configuration\n"); 1005 dev_err(&pdev->dev, "no usable DMA configuration\n");
1006 goto err_free_res; 1006 goto err_free_res;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 869d47cb6db3..0a69c0977e3f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -546,10 +546,6 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
546 tty->driver_data = acm; 546 tty->driver_data = acm;
547 acm->tty = tty; 547 acm->tty = tty;
548 548
549 /* force low_latency on so that our tty_push actually forces the data through,
550 otherwise it is scheduled, and with high data rates data can get lost. */
551 tty->low_latency = 1;
552
553 if (usb_autopm_get_interface(acm->control) < 0) 549 if (usb_autopm_get_interface(acm->control) < 0)
554 goto early_bail; 550 goto early_bail;
555 else 551 else
diff --git a/drivers/usb/otg/nop-usb-xceiv.c b/drivers/usb/otg/nop-usb-xceiv.c
index 4b933f646f2e..c567168f89af 100644
--- a/drivers/usb/otg/nop-usb-xceiv.c
+++ b/drivers/usb/otg/nop-usb-xceiv.c
@@ -36,14 +36,14 @@ struct nop_usb_xceiv {
36 struct device *dev; 36 struct device *dev;
37}; 37};
38 38
39static u64 nop_xceiv_dmamask = DMA_32BIT_MASK; 39static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
40 40
41static struct platform_device nop_xceiv_device = { 41static struct platform_device nop_xceiv_device = {
42 .name = "nop_usb_xceiv", 42 .name = "nop_usb_xceiv",
43 .id = -1, 43 .id = -1,
44 .dev = { 44 .dev = {
45 .dma_mask = &nop_xceiv_dmamask, 45 .dma_mask = &nop_xceiv_dmamask,
46 .coherent_dma_mask = DMA_32BIT_MASK, 46 .coherent_dma_mask = DMA_BIT_MASK(32),
47 .platform_data = NULL, 47 .platform_data = NULL,
48 }, 48 },
49}; 49};
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 2620bf6fe5e1..9c4c700c7cc6 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -1215,20 +1215,22 @@ static void ti_bulk_in_callback(struct urb *urb)
1215 } 1215 }
1216 1216
1217 tty = tty_port_tty_get(&port->port); 1217 tty = tty_port_tty_get(&port->port);
1218 if (tty && urb->actual_length) { 1218 if (tty) {
1219 usb_serial_debug_data(debug, dev, __func__, 1219 if (urb->actual_length) {
1220 urb->actual_length, urb->transfer_buffer); 1220 usb_serial_debug_data(debug, dev, __func__,
1221 1221 urb->actual_length, urb->transfer_buffer);
1222 if (!tport->tp_is_open) 1222
1223 dbg("%s - port closed, dropping data", __func__); 1223 if (!tport->tp_is_open)
1224 else 1224 dbg("%s - port closed, dropping data",
1225 ti_recv(&urb->dev->dev, tty, 1225 __func__);
1226 else
1227 ti_recv(&urb->dev->dev, tty,
1226 urb->transfer_buffer, 1228 urb->transfer_buffer,
1227 urb->actual_length); 1229 urb->actual_length);
1228 1230 spin_lock(&tport->tp_lock);
1229 spin_lock(&tport->tp_lock); 1231 tport->tp_icount.rx += urb->actual_length;
1230 tport->tp_icount.rx += urb->actual_length; 1232 spin_unlock(&tport->tp_lock);
1231 spin_unlock(&tport->tp_lock); 1233 }
1232 tty_kref_put(tty); 1234 tty_kref_put(tty);
1233 } 1235 }
1234 1236
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 16bb7e3c0310..6c37e8ee5efe 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -698,8 +698,8 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
698found: 698found:
699 /* 699 /*
700 * Some methods fail to retrieve SCLK and MCLK values, we apply default 700 * Some methods fail to retrieve SCLK and MCLK values, we apply default
701 * settings in this case (200Mhz). If that really happne often, we could 701 * settings in this case (200Mhz). If that really happens often, we
702 * fetch from registers instead... 702 * could fetch from registers instead...
703 */ 703 */
704 if (rinfo->pll.mclk == 0) 704 if (rinfo->pll.mclk == 0)
705 rinfo->pll.mclk = 20000; 705 rinfo->pll.mclk = 20000;
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index dd37cbcaf8ce..157057c79ca3 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -35,8 +35,6 @@ static int fb_notifier_callback(struct notifier_block *self,
35 return 0; 35 return 0;
36 36
37 bd = container_of(self, struct backlight_device, fb_notif); 37 bd = container_of(self, struct backlight_device, fb_notif);
38 if (!lock_fb_info(evdata->info))
39 return -ENODEV;
40 mutex_lock(&bd->ops_lock); 38 mutex_lock(&bd->ops_lock);
41 if (bd->ops) 39 if (bd->ops)
42 if (!bd->ops->check_fb || 40 if (!bd->ops->check_fb ||
@@ -49,7 +47,6 @@ static int fb_notifier_callback(struct notifier_block *self,
49 backlight_update_status(bd); 47 backlight_update_status(bd);
50 } 48 }
51 mutex_unlock(&bd->ops_lock); 49 mutex_unlock(&bd->ops_lock);
52 unlock_fb_info(evdata->info);
53 return 0; 50 return 0;
54} 51}
55 52
diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 0bb13df0fa89..b6449470106c 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -40,8 +40,6 @@ static int fb_notifier_callback(struct notifier_block *self,
40 if (!ld->ops) 40 if (!ld->ops)
41 return 0; 41 return 0;
42 42
43 if (!lock_fb_info(evdata->info))
44 return -ENODEV;
45 mutex_lock(&ld->ops_lock); 43 mutex_lock(&ld->ops_lock);
46 if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) { 44 if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) {
47 if (event == FB_EVENT_BLANK) { 45 if (event == FB_EVENT_BLANK) {
@@ -53,7 +51,6 @@ static int fb_notifier_callback(struct notifier_block *self,
53 } 51 }
54 } 52 }
55 mutex_unlock(&ld->ops_lock); 53 mutex_unlock(&ld->ops_lock);
56 unlock_fb_info(evdata->info);
57 return 0; 54 return 0;
58} 55}
59 56
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index d42e385f091c..4c2bf923418c 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -567,9 +567,7 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
567 default: 567 default:
568 dev_dbg(info->device, 568 dev_dbg(info->device,
569 "Unsupported bpp size: %d\n", var->bits_per_pixel); 569 "Unsupported bpp size: %d\n", var->bits_per_pixel);
570 assert(false); 570 return -EINVAL;
571 /* should never occur */
572 break;
573 } 571 }
574 572
575 if (var->xres_virtual < var->xres) 573 if (var->xres_virtual < var->xres)
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 2cd500a304f2..471a9a60376a 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2263,9 +2263,12 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info,
2263 } 2263 }
2264 2264
2265 2265
2266 if (!lock_fb_info(info))
2267 return;
2266 event.info = info; 2268 event.info = info;
2267 event.data = &blank; 2269 event.data = &blank;
2268 fb_notifier_call_chain(FB_EVENT_CONBLANK, &event); 2270 fb_notifier_call_chain(FB_EVENT_CONBLANK, &event);
2271 unlock_fb_info(info);
2269} 2272}
2270 2273
2271static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) 2274static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
@@ -2956,8 +2959,6 @@ static int fbcon_fb_unregistered(struct fb_info *info)
2956{ 2959{
2957 int i, idx; 2960 int i, idx;
2958 2961
2959 if (!lock_fb_info(info))
2960 return -ENODEV;
2961 idx = info->node; 2962 idx = info->node;
2962 for (i = first_fb_vc; i <= last_fb_vc; i++) { 2963 for (i = first_fb_vc; i <= last_fb_vc; i++) {
2963 if (con2fb_map[i] == idx) 2964 if (con2fb_map[i] == idx)
@@ -2985,8 +2986,6 @@ static int fbcon_fb_unregistered(struct fb_info *info)
2985 if (primary_device == idx) 2986 if (primary_device == idx)
2986 primary_device = -1; 2987 primary_device = -1;
2987 2988
2988 unlock_fb_info(info);
2989
2990 if (!num_registered_fb) 2989 if (!num_registered_fb)
2991 unregister_con_driver(&fb_con); 2990 unregister_con_driver(&fb_con);
2992 2991
@@ -3027,11 +3026,8 @@ static int fbcon_fb_registered(struct fb_info *info)
3027{ 3026{
3028 int ret = 0, i, idx; 3027 int ret = 0, i, idx;
3029 3028
3030 if (!lock_fb_info(info))
3031 return -ENODEV;
3032 idx = info->node; 3029 idx = info->node;
3033 fbcon_select_primary(info); 3030 fbcon_select_primary(info);
3034 unlock_fb_info(info);
3035 3031
3036 if (info_idx == -1) { 3032 if (info_idx == -1) {
3037 for (i = first_fb_vc; i <= last_fb_vc; i++) { 3033 for (i = first_fb_vc; i <= last_fb_vc; i++) {
@@ -3152,53 +3148,23 @@ static int fbcon_event_notify(struct notifier_block *self,
3152 3148
3153 switch(action) { 3149 switch(action) {
3154 case FB_EVENT_SUSPEND: 3150 case FB_EVENT_SUSPEND:
3155 if (!lock_fb_info(info)) {
3156 ret = -ENODEV;
3157 goto done;
3158 }
3159 fbcon_suspended(info); 3151 fbcon_suspended(info);
3160 unlock_fb_info(info);
3161 break; 3152 break;
3162 case FB_EVENT_RESUME: 3153 case FB_EVENT_RESUME:
3163 if (!lock_fb_info(info)) {
3164 ret = -ENODEV;
3165 goto done;
3166 }
3167 fbcon_resumed(info); 3154 fbcon_resumed(info);
3168 unlock_fb_info(info);
3169 break; 3155 break;
3170 case FB_EVENT_MODE_CHANGE: 3156 case FB_EVENT_MODE_CHANGE:
3171 if (!lock_fb_info(info)) {
3172 ret = -ENODEV;
3173 goto done;
3174 }
3175 fbcon_modechanged(info); 3157 fbcon_modechanged(info);
3176 unlock_fb_info(info);
3177 break; 3158 break;
3178 case FB_EVENT_MODE_CHANGE_ALL: 3159 case FB_EVENT_MODE_CHANGE_ALL:
3179 if (!lock_fb_info(info)) {
3180 ret = -ENODEV;
3181 goto done;
3182 }
3183 fbcon_set_all_vcs(info); 3160 fbcon_set_all_vcs(info);
3184 unlock_fb_info(info);
3185 break; 3161 break;
3186 case FB_EVENT_MODE_DELETE: 3162 case FB_EVENT_MODE_DELETE:
3187 mode = event->data; 3163 mode = event->data;
3188 if (!lock_fb_info(info)) {
3189 ret = -ENODEV;
3190 goto done;
3191 }
3192 ret = fbcon_mode_deleted(info, mode); 3164 ret = fbcon_mode_deleted(info, mode);
3193 unlock_fb_info(info);
3194 break; 3165 break;
3195 case FB_EVENT_FB_UNBIND: 3166 case FB_EVENT_FB_UNBIND:
3196 if (!lock_fb_info(info)) {
3197 ret = -ENODEV;
3198 goto done;
3199 }
3200 idx = info->node; 3167 idx = info->node;
3201 unlock_fb_info(info);
3202 ret = fbcon_fb_unbind(idx); 3168 ret = fbcon_fb_unbind(idx);
3203 break; 3169 break;
3204 case FB_EVENT_FB_REGISTERED: 3170 case FB_EVENT_FB_REGISTERED:
@@ -3217,29 +3183,14 @@ static int fbcon_event_notify(struct notifier_block *self,
3217 con2fb->framebuffer = con2fb_map[con2fb->console - 1]; 3183 con2fb->framebuffer = con2fb_map[con2fb->console - 1];
3218 break; 3184 break;
3219 case FB_EVENT_BLANK: 3185 case FB_EVENT_BLANK:
3220 if (!lock_fb_info(info)) {
3221 ret = -ENODEV;
3222 goto done;
3223 }
3224 fbcon_fb_blanked(info, *(int *)event->data); 3186 fbcon_fb_blanked(info, *(int *)event->data);
3225 unlock_fb_info(info);
3226 break; 3187 break;
3227 case FB_EVENT_NEW_MODELIST: 3188 case FB_EVENT_NEW_MODELIST:
3228 if (!lock_fb_info(info)) {
3229 ret = -ENODEV;
3230 goto done;
3231 }
3232 fbcon_new_modelist(info); 3189 fbcon_new_modelist(info);
3233 unlock_fb_info(info);
3234 break; 3190 break;
3235 case FB_EVENT_GET_REQ: 3191 case FB_EVENT_GET_REQ:
3236 caps = event->data; 3192 caps = event->data;
3237 if (!lock_fb_info(info)) {
3238 ret = -ENODEV;
3239 goto done;
3240 }
3241 fbcon_get_requirement(info, caps); 3193 fbcon_get_requirement(info, caps);
3242 unlock_fb_info(info);
3243 break; 3194 break;
3244 } 3195 }
3245done: 3196done:
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index 0c5b9a9fd56f..8dea2bc92705 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -210,12 +210,15 @@ static int __init efifb_probe(struct platform_device *dev)
210 unsigned int size_total; 210 unsigned int size_total;
211 int request_succeeded = 0; 211 int request_succeeded = 0;
212 212
213 printk(KERN_INFO "efifb: probing for efifb\n");
214
215 if (!screen_info.lfb_depth) 213 if (!screen_info.lfb_depth)
216 screen_info.lfb_depth = 32; 214 screen_info.lfb_depth = 32;
217 if (!screen_info.pages) 215 if (!screen_info.pages)
218 screen_info.pages = 1; 216 screen_info.pages = 1;
217 if (!screen_info.lfb_base) {
218 printk(KERN_DEBUG "efifb: invalid framebuffer address\n");
219 return -ENODEV;
220 }
221 printk(KERN_INFO "efifb: probing for efifb\n");
219 222
220 /* just assume they're all unset if any are */ 223 /* just assume they're all unset if any are */
221 if (!screen_info.blue_size) { 224 if (!screen_info.blue_size) {
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 2ac32e6b5953..d412a1ddc12f 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1097,8 +1097,11 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
1097 return -EINVAL; 1097 return -EINVAL;
1098 con2fb.framebuffer = -1; 1098 con2fb.framebuffer = -1;
1099 event.data = &con2fb; 1099 event.data = &con2fb;
1100 if (!lock_fb_info(info))
1101 return -ENODEV;
1100 event.info = info; 1102 event.info = info;
1101 fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); 1103 fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
1104 unlock_fb_info(info);
1102 ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; 1105 ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0;
1103 break; 1106 break;
1104 case FBIOPUT_CON2FBMAP: 1107 case FBIOPUT_CON2FBMAP:
@@ -1115,8 +1118,11 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
1115 break; 1118 break;
1116 } 1119 }
1117 event.data = &con2fb; 1120 event.data = &con2fb;
1121 if (!lock_fb_info(info))
1122 return -ENODEV;
1118 event.info = info; 1123 event.info = info;
1119 ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); 1124 ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event);
1125 unlock_fb_info(info);
1120 break; 1126 break;
1121 case FBIOBLANK: 1127 case FBIOBLANK:
1122 if (!lock_fb_info(info)) 1128 if (!lock_fb_info(info))
@@ -1521,7 +1527,10 @@ register_framebuffer(struct fb_info *fb_info)
1521 registered_fb[i] = fb_info; 1527 registered_fb[i] = fb_info;
1522 1528
1523 event.info = fb_info; 1529 event.info = fb_info;
1530 if (!lock_fb_info(fb_info))
1531 return -ENODEV;
1524 fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event); 1532 fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event);
1533 unlock_fb_info(fb_info);
1525 return 0; 1534 return 0;
1526} 1535}
1527 1536
@@ -1555,8 +1564,12 @@ unregister_framebuffer(struct fb_info *fb_info)
1555 goto done; 1564 goto done;
1556 } 1565 }
1557 1566
1567
1568 if (!lock_fb_info(fb_info))
1569 return -ENODEV;
1558 event.info = fb_info; 1570 event.info = fb_info;
1559 ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); 1571 ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event);
1572 unlock_fb_info(fb_info);
1560 1573
1561 if (ret) { 1574 if (ret) {
1562 ret = -EINVAL; 1575 ret = -EINVAL;
@@ -1590,6 +1603,8 @@ void fb_set_suspend(struct fb_info *info, int state)
1590{ 1603{
1591 struct fb_event event; 1604 struct fb_event event;
1592 1605
1606 if (!lock_fb_info(info))
1607 return;
1593 event.info = info; 1608 event.info = info;
1594 if (state) { 1609 if (state) {
1595 fb_notifier_call_chain(FB_EVENT_SUSPEND, &event); 1610 fb_notifier_call_chain(FB_EVENT_SUSPEND, &event);
@@ -1598,6 +1613,7 @@ void fb_set_suspend(struct fb_info *info, int state)
1598 info->state = FBINFO_STATE_RUNNING; 1613 info->state = FBINFO_STATE_RUNNING;
1599 fb_notifier_call_chain(FB_EVENT_RESUME, &event); 1614 fb_notifier_call_chain(FB_EVENT_RESUME, &event);
1600 } 1615 }
1616 unlock_fb_info(info);
1601} 1617}
1602 1618
1603/** 1619/**
@@ -1667,8 +1683,11 @@ int fb_new_modelist(struct fb_info *info)
1667 err = 1; 1683 err = 1;
1668 1684
1669 if (!list_empty(&info->modelist)) { 1685 if (!list_empty(&info->modelist)) {
1686 if (!lock_fb_info(info))
1687 return -ENODEV;
1670 event.info = info; 1688 event.info = info;
1671 err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event); 1689 err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event);
1690 unlock_fb_info(info);
1672 } 1691 }
1673 1692
1674 return err; 1693 return err;
diff --git a/drivers/video/intelfb/intelfb.h b/drivers/video/intelfb/intelfb.h
index a50bea614804..40984551c927 100644
--- a/drivers/video/intelfb/intelfb.h
+++ b/drivers/video/intelfb/intelfb.h
@@ -53,6 +53,7 @@
53#define PCI_DEVICE_ID_INTEL_830M 0x3577 53#define PCI_DEVICE_ID_INTEL_830M 0x3577
54#define PCI_DEVICE_ID_INTEL_845G 0x2562 54#define PCI_DEVICE_ID_INTEL_845G 0x2562
55#define PCI_DEVICE_ID_INTEL_85XGM 0x3582 55#define PCI_DEVICE_ID_INTEL_85XGM 0x3582
56#define PCI_DEVICE_ID_INTEL_854 0x358E
56#define PCI_DEVICE_ID_INTEL_865G 0x2572 57#define PCI_DEVICE_ID_INTEL_865G 0x2572
57#define PCI_DEVICE_ID_INTEL_915G 0x2582 58#define PCI_DEVICE_ID_INTEL_915G 0x2582
58#define PCI_DEVICE_ID_INTEL_915GM 0x2592 59#define PCI_DEVICE_ID_INTEL_915GM 0x2592
@@ -154,6 +155,7 @@ enum intel_chips {
154 INTEL_85XGM, 155 INTEL_85XGM,
155 INTEL_852GM, 156 INTEL_852GM,
156 INTEL_852GME, 157 INTEL_852GME,
158 INTEL_854,
157 INTEL_855GM, 159 INTEL_855GM,
158 INTEL_855GME, 160 INTEL_855GME,
159 INTEL_865G, 161 INTEL_865G,
diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c
index b3065492bb20..487f2be47460 100644
--- a/drivers/video/intelfb/intelfb_i2c.c
+++ b/drivers/video/intelfb/intelfb_i2c.c
@@ -156,6 +156,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
156 switch(dinfo->chipset) { 156 switch(dinfo->chipset) {
157 case INTEL_830M: 157 case INTEL_830M:
158 case INTEL_845G: 158 case INTEL_845G:
159 case INTEL_854:
159 case INTEL_855GM: 160 case INTEL_855GM:
160 case INTEL_865G: 161 case INTEL_865G:
161 dinfo->output[i].type = INTELFB_OUTPUT_DVO; 162 dinfo->output[i].type = INTELFB_OUTPUT_DVO;
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index 6d8e5415c809..ace14fe02fc4 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -182,6 +182,7 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = {
182 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G }, 182 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G },
183 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM }, 183 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM },
184 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G }, 184 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G },
185 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_854, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_854 },
185 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G }, 186 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G },
186 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM }, 187 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM },
187 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G }, 188 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G },
diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c
index 8b26b27c2db6..0689f97c5238 100644
--- a/drivers/video/intelfb/intelfbhw.c
+++ b/drivers/video/intelfb/intelfbhw.c
@@ -84,6 +84,11 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
84 dinfo->mobile = 0; 84 dinfo->mobile = 0;
85 dinfo->pll_index = PLLS_I8xx; 85 dinfo->pll_index = PLLS_I8xx;
86 return 0; 86 return 0;
87 case PCI_DEVICE_ID_INTEL_854:
88 dinfo->mobile = 1;
89 dinfo->name = "Intel(R) 854";
90 dinfo->chipset = INTEL_854;
91 return 0;
87 case PCI_DEVICE_ID_INTEL_85XGM: 92 case PCI_DEVICE_ID_INTEL_85XGM:
88 tmp = 0; 93 tmp = 0;
89 dinfo->mobile = 1; 94 dinfo->mobile = 1;
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index 4dcec48a1d78..c3fad34309ed 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -45,11 +45,11 @@ struct s3fb_info {
45static const struct svga_fb_format s3fb_formats[] = { 45static const struct svga_fb_format s3fb_formats[] = {
46 { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, 46 { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
47 FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4, FB_VISUAL_PSEUDOCOLOR, 8, 16}, 47 FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4, FB_VISUAL_PSEUDOCOLOR, 8, 16},
48 { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, 48 { 4, {0, 4, 0}, {0, 4, 0}, {0, 4, 0}, {0, 0, 0}, 0,
49 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 16}, 49 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 16},
50 { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1, 50 { 4, {0, 4, 0}, {0, 4, 0}, {0, 4, 0}, {0, 0, 0}, 1,
51 FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 8, 16}, 51 FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 8, 16},
52 { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0, 52 { 8, {0, 8, 0}, {0, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0,
53 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 4, 8}, 53 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 4, 8},
54 {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0, 54 {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0,
55 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 4}, 55 FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 4},
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index fad58cf9ef73..10ddad8e17d6 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -199,16 +199,20 @@
199extern void (*sa1100fb_backlight_power)(int on); 199extern void (*sa1100fb_backlight_power)(int on);
200extern void (*sa1100fb_lcd_power)(int on); 200extern void (*sa1100fb_lcd_power)(int on);
201 201
202/* 202static struct sa1100fb_rgb rgb_4 = {
203 * IMHO this looks wrong. In 8BPP, length should be 8.
204 */
205static struct sa1100fb_rgb rgb_8 = {
206 .red = { .offset = 0, .length = 4, }, 203 .red = { .offset = 0, .length = 4, },
207 .green = { .offset = 0, .length = 4, }, 204 .green = { .offset = 0, .length = 4, },
208 .blue = { .offset = 0, .length = 4, }, 205 .blue = { .offset = 0, .length = 4, },
209 .transp = { .offset = 0, .length = 0, }, 206 .transp = { .offset = 0, .length = 0, },
210}; 207};
211 208
209static struct sa1100fb_rgb rgb_8 = {
210 .red = { .offset = 0, .length = 8, },
211 .green = { .offset = 0, .length = 8, },
212 .blue = { .offset = 0, .length = 8, },
213 .transp = { .offset = 0, .length = 0, },
214};
215
212static struct sa1100fb_rgb def_rgb_16 = { 216static struct sa1100fb_rgb def_rgb_16 = {
213 .red = { .offset = 11, .length = 5, }, 217 .red = { .offset = 11, .length = 5, },
214 .green = { .offset = 5, .length = 6, }, 218 .green = { .offset = 5, .length = 6, },
@@ -613,7 +617,7 @@ sa1100fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
613 DPRINTK("var->bits_per_pixel=%d\n", var->bits_per_pixel); 617 DPRINTK("var->bits_per_pixel=%d\n", var->bits_per_pixel);
614 switch (var->bits_per_pixel) { 618 switch (var->bits_per_pixel) {
615 case 4: 619 case 4:
616 rgbidx = RGB_8; 620 rgbidx = RGB_4;
617 break; 621 break;
618 case 8: 622 case 8:
619 rgbidx = RGB_8; 623 rgbidx = RGB_8;
@@ -1382,6 +1386,7 @@ static struct sa1100fb_info * __init sa1100fb_init_fbinfo(struct device *dev)
1382 fbi->fb.monspecs = monspecs; 1386 fbi->fb.monspecs = monspecs;
1383 fbi->fb.pseudo_palette = (fbi + 1); 1387 fbi->fb.pseudo_palette = (fbi + 1);
1384 1388
1389 fbi->rgb[RGB_4] = &rgb_4;
1385 fbi->rgb[RGB_8] = &rgb_8; 1390 fbi->rgb[RGB_8] = &rgb_8;
1386 fbi->rgb[RGB_16] = &def_rgb_16; 1391 fbi->rgb[RGB_16] = &def_rgb_16;
1387 1392
diff --git a/drivers/video/sa1100fb.h b/drivers/video/sa1100fb.h
index 86831db9a042..1c3b459865d8 100644
--- a/drivers/video/sa1100fb.h
+++ b/drivers/video/sa1100fb.h
@@ -57,9 +57,10 @@ struct sa1100fb_lcd_reg {
57 unsigned long lccr3; 57 unsigned long lccr3;
58}; 58};
59 59
60#define RGB_8 (0) 60#define RGB_4 (0)
61#define RGB_16 (1) 61#define RGB_8 (1)
62#define NR_RGB 2 62#define RGB_16 (2)
63#define NR_RGB 3
63 64
64struct sa1100fb_info { 65struct sa1100fb_info {
65 struct fb_info fb; 66 struct fb_info fb;
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index 346d6458cf76..7e17ee95a97a 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -1129,7 +1129,7 @@ sisfb_bpp_to_var(struct sis_video_info *ivideo, struct fb_var_screeninfo *var)
1129 switch(var->bits_per_pixel) { 1129 switch(var->bits_per_pixel) {
1130 case 8: 1130 case 8:
1131 var->red.offset = var->green.offset = var->blue.offset = 0; 1131 var->red.offset = var->green.offset = var->blue.offset = 0;
1132 var->red.length = var->green.length = var->blue.length = 6; 1132 var->red.length = var->green.length = var->blue.length = 8;
1133 break; 1133 break;
1134 case 16: 1134 case 16:
1135 var->red.offset = 11; 1135 var->red.offset = 11;
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index a439159204a8..89158bc71da2 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -308,9 +308,11 @@ static int xxxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
308 * color depth = SUM(var->{color}.length) 308 * color depth = SUM(var->{color}.length)
309 * 309 *
310 * Pseudocolor: 310 * Pseudocolor:
311 * var->{color}.offset is 0 311 * var->{color}.offset is 0 unless the palette index takes less than
312 * var->{color}.length contains width of DAC or the number of unique 312 * bits_per_pixel bits and is stored in the upper
313 * colors available (color depth) 313 * bits of the pixel value
314 * var->{color}.length is set so that 1 << length is the number of
315 * available palette entries
314 * pseudo_palette is not used 316 * pseudo_palette is not used
315 * RAMDAC[X] is programmed to (red, green, blue) 317 * RAMDAC[X] is programmed to (red, green, blue)
316 * color depth = var->{color}.length 318 * color depth = var->{color}.length
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 0b370aebdbfd..421770b5e6ab 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -55,6 +55,7 @@ static u16 maxvf __devinitdata; /* maximum vertical frequency */
55static u16 maxhf __devinitdata; /* maximum horizontal frequency */ 55static u16 maxhf __devinitdata; /* maximum horizontal frequency */
56static u16 vbemode __devinitdata; /* force use of a specific VBE mode */ 56static u16 vbemode __devinitdata; /* force use of a specific VBE mode */
57static char *mode_option __devinitdata; 57static char *mode_option __devinitdata;
58static u8 dac_width = 6;
58 59
59static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX]; 60static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX];
60static DEFINE_MUTEX(uvfb_lock); 61static DEFINE_MUTEX(uvfb_lock);
@@ -303,22 +304,10 @@ static void uvesafb_setup_var(struct fb_var_screeninfo *var,
303 var->blue.offset = 0; 304 var->blue.offset = 0;
304 var->transp.offset = 0; 305 var->transp.offset = 0;
305 306
306 /* 307 var->red.length = 8;
307 * We're assuming that we can switch the DAC to 8 bits. If 308 var->green.length = 8;
308 * this proves to be incorrect, we'll update the fields 309 var->blue.length = 8;
309 * later in set_par(). 310 var->transp.length = 0;
310 */
311 if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) {
312 var->red.length = 8;
313 var->green.length = 8;
314 var->blue.length = 8;
315 var->transp.length = 0;
316 } else {
317 var->red.length = 6;
318 var->green.length = 6;
319 var->blue.length = 6;
320 var->transp.length = 0;
321 }
322 } 311 }
323} 312}
324 313
@@ -1006,7 +995,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
1006 struct fb_info *info) 995 struct fb_info *info)
1007{ 996{
1008 struct uvesafb_pal_entry entry; 997 struct uvesafb_pal_entry entry;
1009 int shift = 16 - info->var.green.length; 998 int shift = 16 - dac_width;
1010 int err = 0; 999 int err = 0;
1011 1000
1012 if (regno >= info->cmap.len) 1001 if (regno >= info->cmap.len)
@@ -1055,7 +1044,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
1055static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info) 1044static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
1056{ 1045{
1057 struct uvesafb_pal_entry *entries; 1046 struct uvesafb_pal_entry *entries;
1058 int shift = 16 - info->var.green.length; 1047 int shift = 16 - dac_width;
1059 int i, err = 0; 1048 int i, err = 0;
1060 1049
1061 if (info->var.bits_per_pixel == 8) { 1050 if (info->var.bits_per_pixel == 8) {
@@ -1317,13 +1306,9 @@ setmode:
1317 err = uvesafb_exec(task); 1306 err = uvesafb_exec(task);
1318 if (err || (task->t.regs.eax & 0xffff) != 0x004f || 1307 if (err || (task->t.regs.eax & 0xffff) != 0x004f ||
1319 ((task->t.regs.ebx & 0xff00) >> 8) != 8) { 1308 ((task->t.regs.ebx & 0xff00) >> 8) != 8) {
1320 /* 1309 dac_width = 6;
1321 * We've failed to set the DAC palette format - 1310 } else {
1322 * time to correct var. 1311 dac_width = 8;
1323 */
1324 info->var.red.length = 6;
1325 info->var.green.length = 6;
1326 info->var.blue.length = 6;
1327 } 1312 }
1328 } 1313 }
1329 1314
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index cc919ae46571..050d432c7d95 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -318,13 +318,16 @@ static int vfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
318 * {hardwarespecific} contains width of RAMDAC 318 * {hardwarespecific} contains width of RAMDAC
319 * cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset) 319 * cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset)
320 * RAMDAC[X] is programmed to (red, green, blue) 320 * RAMDAC[X] is programmed to (red, green, blue)
321 * 321 *
322 * Pseudocolor: 322 * Pseudocolor:
323 * uses offset = 0 && length = RAMDAC register width. 323 * var->{color}.offset is 0 unless the palette index takes less than
324 * var->{color}.offset is 0 324 * bits_per_pixel bits and is stored in the upper
325 * var->{color}.length contains widht of DAC 325 * bits of the pixel value
326 * var->{color}.length is set so that 1 << length is the number of available
327 * palette entries
326 * cmap is not used 328 * cmap is not used
327 * RAMDAC[X] is programmed to (red, green, blue) 329 * RAMDAC[X] is programmed to (red, green, blue)
330 *
328 * Truecolor: 331 * Truecolor:
329 * does not use DAC. Usually 3 are present. 332 * does not use DAC. Usually 3 are present.
330 * var->{color}.offset contains start of bitfield 333 * var->{color}.offset contains start of bitfield
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5f54c01c1568..bdfd584ad853 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu)
21 set_cpu_present(cpu, false); 21 set_cpu_present(cpu, false);
22} 22}
23 23
24static void vcpu_hotplug(unsigned int cpu) 24static int vcpu_online(unsigned int cpu)
25{ 25{
26 int err; 26 int err;
27 char dir[32], state[32]; 27 char dir[32], state[32];
28 28
29 if (!cpu_possible(cpu))
30 return;
31
32 sprintf(dir, "cpu/%u", cpu); 29 sprintf(dir, "cpu/%u", cpu);
33 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); 30 err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
34 if (err != 1) { 31 if (err != 1) {
35 printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); 32 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
36 return; 33 return err;
37 } 34 }
38 35
39 if (strcmp(state, "online") == 0) { 36 if (strcmp(state, "online") == 0)
37 return 1;
38 else if (strcmp(state, "offline") == 0)
39 return 0;
40
41 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
42 return -EINVAL;
43}
44static void vcpu_hotplug(unsigned int cpu)
45{
46 if (!cpu_possible(cpu))
47 return;
48
49 switch (vcpu_online(cpu)) {
50 case 1:
40 enable_hotplug_cpu(cpu); 51 enable_hotplug_cpu(cpu);
41 } else if (strcmp(state, "offline") == 0) { 52 break;
53 case 0:
42 (void)cpu_down(cpu); 54 (void)cpu_down(cpu);
43 disable_hotplug_cpu(cpu); 55 disable_hotplug_cpu(cpu);
44 } else { 56 break;
45 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", 57 default:
46 state, cpu); 58 break;
47 } 59 }
48} 60}
49 61
@@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
64static int setup_cpu_watcher(struct notifier_block *notifier, 76static int setup_cpu_watcher(struct notifier_block *notifier,
65 unsigned long event, void *data) 77 unsigned long event, void *data)
66{ 78{
79 int cpu;
67 static struct xenbus_watch cpu_watch = { 80 static struct xenbus_watch cpu_watch = {
68 .node = "cpu", 81 .node = "cpu",
69 .callback = handle_vcpu_hotplug_event}; 82 .callback = handle_vcpu_hotplug_event};
70 83
71 (void)register_xenbus_watch(&cpu_watch); 84 (void)register_xenbus_watch(&cpu_watch);
72 85
86 for_each_possible_cpu(cpu) {
87 if (vcpu_online(cpu) == 0) {
88 (void)cpu_down(cpu);
89 cpu_clear(cpu, cpu_present_map);
90 }
91 }
92
73 return NOTIFY_DONE; 93 return NOTIFY_DONE;
74} 94}
75 95
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 0d61db1e7b49..4b5b84837ee1 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -62,14 +62,15 @@ static int xen_suspend(void *data)
62 gnttab_resume(); 62 gnttab_resume();
63 xen_mm_unpin_all(); 63 xen_mm_unpin_all();
64 64
65 sysdev_resume();
66
67 if (!*cancelled) { 65 if (!*cancelled) {
68 xen_irq_resume(); 66 xen_irq_resume();
69 xen_console_resume(); 67 xen_console_resume();
70 xen_timer_resume(); 68 xen_timer_resume();
71 } 69 }
72 70
71 sysdev_resume();
72 device_power_up(PMSG_RESUME);
73
73 return 0; 74 return 0;
74} 75}
75 76
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b43b95563663..acf678831103 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode,
590 590
591 if (depth == 0) 591 if (depth == 0)
592 return (err); 592 return (err);
593reread:
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
595 593
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
596 /* Simplest case - block found, no allocation needed */ 595 /* Simplest case - block found, no allocation needed */
597 if (!partial) { 596 if (!partial) {
598 first_block = le32_to_cpu(chain[depth - 1].key); 597 first_block = le32_to_cpu(chain[depth - 1].key);
@@ -602,15 +601,16 @@ reread:
602 while (count < maxblocks && count <= blocks_to_boundary) { 601 while (count < maxblocks && count <= blocks_to_boundary) {
603 ext2_fsblk_t blk; 602 ext2_fsblk_t blk;
604 603
605 if (!verify_chain(chain, partial)) { 604 if (!verify_chain(chain, chain + depth - 1)) {
606 /* 605 /*
607 * Indirect block might be removed by 606 * Indirect block might be removed by
608 * truncate while we were reading it. 607 * truncate while we were reading it.
609 * Handling of that case: forget what we've 608 * Handling of that case: forget what we've
610 * got now, go to reread. 609 * got now, go to reread.
611 */ 610 */
611 err = -EAGAIN;
612 count = 0; 612 count = 0;
613 goto changed; 613 break;
614 } 614 }
615 blk = le32_to_cpu(*(chain[depth-1].p + count)); 615 blk = le32_to_cpu(*(chain[depth-1].p + count));
616 if (blk == first_block + count) 616 if (blk == first_block + count)
@@ -618,7 +618,8 @@ reread:
618 else 618 else
619 break; 619 break;
620 } 620 }
621 goto got_it; 621 if (err != -EAGAIN)
622 goto got_it;
622 } 623 }
623 624
624 /* Next simple case - plain lookup or failed read of indirect block */ 625 /* Next simple case - plain lookup or failed read of indirect block */
@@ -626,6 +627,33 @@ reread:
626 goto cleanup; 627 goto cleanup;
627 628
628 mutex_lock(&ei->truncate_mutex); 629 mutex_lock(&ei->truncate_mutex);
630 /*
631 * If the indirect block is missing while we are reading
632 * the chain(ext3_get_branch() returns -EAGAIN err), or
633 * if the chain has been changed after we grab the semaphore,
634 * (either because another process truncated this branch, or
635 * another get_block allocated this branch) re-grab the chain to see if
636 * the request block has been allocated or not.
637 *
638 * Since we already block the truncate/other get_block
639 * at this point, we will have the current copy of the chain when we
640 * splice the branch into the tree.
641 */
642 if (err == -EAGAIN || !verify_chain(chain, partial)) {
643 while (partial > chain) {
644 brelse(partial->bh);
645 partial--;
646 }
647 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
648 if (!partial) {
649 count++;
650 mutex_unlock(&ei->truncate_mutex);
651 if (err)
652 goto cleanup;
653 clear_buffer_new(bh_result);
654 goto got_it;
655 }
656 }
629 657
630 /* 658 /*
631 * Okay, we need to do block allocation. Lazily initialize the block 659 * Okay, we need to do block allocation. Lazily initialize the block
@@ -683,12 +711,6 @@ cleanup:
683 partial--; 711 partial--;
684 } 712 }
685 return err; 713 return err;
686changed:
687 while (partial > chain) {
688 brelse(partial->bh);
689 partial--;
690 }
691 goto reread;
692} 714}
693 715
694int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 716int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9435dda8f1e0..a1cbff2b4d99 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
70 BUG(); 70 BUG();
71 return 0; 71 return 0;
72 } 72 }
73
74 if (!tree)
75 return 0;
76
73 if (tree->node_size >= PAGE_CACHE_SIZE) { 77 if (tree->node_size >= PAGE_CACHE_SIZE) {
74 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); 78 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
75 spin_lock(&tree->hash_lock); 79 spin_lock(&tree->hash_lock);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 36ca2e1a4fa3..7b6165f25fbe 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb)
349 if (HFS_SB(sb)->nls_disk) 349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk); 350 unload_nls(HFS_SB(sb)->nls_disk);
351 351
352 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
352 kfree(HFS_SB(sb)); 353 kfree(HFS_SB(sb));
353 sb->s_fs_info = NULL; 354 sb->s_fs_info = NULL;
354} 355}
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c7bd649bbbdc..3e9afc2a91d2 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -55,6 +55,25 @@
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 *
59 * Locking rules:
60 * We keep two hash tables of revoke records. One hashtable belongs to the
61 * running transaction (is pointed to by journal->j_revoke), the other one
62 * belongs to the committing transaction. Accesses to the second hash table
63 * happen only from the kjournald and no other thread touches this table. Also
64 * journal_switch_revoke_table() which switches which hashtable belongs to the
65 * running and which to the committing transaction is called only from
66 * kjournald. Therefore we need no locks when accessing the hashtable belonging
67 * to the committing transaction.
68 *
69 * All users operating on the hash table belonging to the running transaction
70 * have a handle to the transaction. Therefore they are safe from kjournald
71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used.
73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed.
58 */ 77 */
59 78
60#ifndef __KERNEL__ 79#ifndef __KERNEL__
@@ -402,8 +421,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
402 * the second time we would still have a pending revoke to cancel. So, 421 * the second time we would still have a pending revoke to cancel. So,
403 * do not trust the Revoked bit on buffers unless RevokeValid is also 422 * do not trust the Revoked bit on buffers unless RevokeValid is also
404 * set. 423 * set.
405 *
406 * The caller must have the journal locked.
407 */ 424 */
408int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) 425int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
409{ 426{
@@ -481,10 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
481/* 498/*
482 * Write revoke records to the journal for all entries in the current 499 * Write revoke records to the journal for all entries in the current
483 * revoke hash, deleting the entries as we go. 500 * revoke hash, deleting the entries as we go.
484 *
485 * Called with the journal lock held.
486 */ 501 */
487
488void journal_write_revoke_records(journal_t *journal, 502void journal_write_revoke_records(journal_t *journal,
489 transaction_t *transaction) 503 transaction_t *transaction)
490{ 504{
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f67300fe7..7ec89fc05b2b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode(
153} 153}
154 154
155/* 155/*
156 * Schedule IO completion handling on a xfsdatad if this was
157 * the final hold on this ioend. If we are asked to wait,
158 * flush the workqueue.
159 */
160STATIC void
161xfs_finish_ioend(
162 xfs_ioend_t *ioend,
163 int wait)
164{
165 if (atomic_dec_and_test(&ioend->io_remaining)) {
166 queue_work(xfsdatad_workqueue, &ioend->io_work);
167 if (wait)
168 flush_workqueue(xfsdatad_workqueue);
169 }
170}
171
172/*
173 * We're now finished for good with this ioend structure. 156 * We're now finished for good with this ioend structure.
174 * Update the page state via the associated buffer_heads, 157 * Update the page state via the associated buffer_heads,
175 * release holds on the inode and bio, and finally free 158 * release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@ xfs_end_bio_read(
310} 293}
311 294
312/* 295/*
296 * Schedule IO completion handling on a xfsdatad if this was
297 * the final hold on this ioend. If we are asked to wait,
298 * flush the workqueue.
299 */
300STATIC void
301xfs_finish_ioend(
302 xfs_ioend_t *ioend,
303 int wait)
304{
305 if (atomic_dec_and_test(&ioend->io_remaining)) {
306 struct workqueue_struct *wq = xfsdatad_workqueue;
307 if (ioend->io_work.func == xfs_end_bio_unwritten)
308 wq = xfsconvertd_workqueue;
309
310 queue_work(wq, &ioend->io_work);
311 if (wait)
312 flush_workqueue(wq);
313 }
314}
315
316/*
313 * Allocate and initialise an IO completion structure. 317 * Allocate and initialise an IO completion structure.
314 * We need to track unwritten extent write completion here initially. 318 * We need to track unwritten extent write completion here initially.
315 * We'll need to extend this for updating the ondisk inode size later 319 * We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd528849755..221b3e66ceef 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue; 21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
22extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
23 24
24/* 25/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016bb9134..e28800a9f2b5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
51 51
52static struct workqueue_struct *xfslogd_workqueue; 52static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 53struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue;
54 55
55#ifdef XFS_BUF_TRACE 56#ifdef XFS_BUF_TRACE
56void 57void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
1775 xfs_buf_t *bp, *n; 1776 xfs_buf_t *bp, *n;
1776 int pincount = 0; 1777 int pincount = 0;
1777 1778
1779 xfs_buf_runall_queues(xfsconvertd_workqueue);
1778 xfs_buf_runall_queues(xfsdatad_workqueue); 1780 xfs_buf_runall_queues(xfsdatad_workqueue);
1779 xfs_buf_runall_queues(xfslogd_workqueue); 1781 xfs_buf_runall_queues(xfslogd_workqueue);
1780 1782
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
1831 if (!xfsdatad_workqueue) 1833 if (!xfsdatad_workqueue)
1832 goto out_destroy_xfslogd_workqueue; 1834 goto out_destroy_xfslogd_workqueue;
1833 1835
1836 xfsconvertd_workqueue = create_workqueue("xfsconvertd");
1837 if (!xfsconvertd_workqueue)
1838 goto out_destroy_xfsdatad_workqueue;
1839
1834 register_shrinker(&xfs_buf_shake); 1840 register_shrinker(&xfs_buf_shake);
1835 return 0; 1841 return 0;
1836 1842
1843 out_destroy_xfsdatad_workqueue:
1844 destroy_workqueue(xfsdatad_workqueue);
1837 out_destroy_xfslogd_workqueue: 1845 out_destroy_xfslogd_workqueue:
1838 destroy_workqueue(xfslogd_workqueue); 1846 destroy_workqueue(xfslogd_workqueue);
1839 out_free_buf_zone: 1847 out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
1849xfs_buf_terminate(void) 1857xfs_buf_terminate(void)
1850{ 1858{
1851 unregister_shrinker(&xfs_buf_shake); 1859 unregister_shrinker(&xfs_buf_shake);
1860 destroy_workqueue(xfsconvertd_workqueue);
1852 destroy_workqueue(xfsdatad_workqueue); 1861 destroy_workqueue(xfsdatad_workqueue);
1853 destroy_workqueue(xfslogd_workqueue); 1862 destroy_workqueue(xfslogd_workqueue);
1854 kmem_zone_destroy(xfs_buf_zone); 1863 kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 5aeb77776961..08be36d7326c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
74 74
75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
76 xfs_iflags_clear(ip, XFS_ITRUNCATED); 76 xfs_iflags_clear(ip, XFS_ITRUNCATED);
77 ret = filemap_fdatawrite(mapping); 77 ret = -filemap_fdatawrite(mapping);
78 if (flags & XFS_B_ASYNC)
79 return -ret;
80 ret2 = filemap_fdatawait(mapping);
81 if (!ret)
82 ret = ret2;
83 } 78 }
84 return -ret; 79 if (flags & XFS_B_ASYNC)
80 return ret;
81 ret2 = xfs_wait_on_pages(ip, first, last);
82 if (!ret)
83 ret = ret2;
84 return ret;
85} 85}
86 86
87int 87int
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7e90daa0d1d1..9142192ccbe6 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,10 +751,26 @@ start:
751 goto relock; 751 goto relock;
752 } 752 }
753 } else { 753 } else {
754 int enospc = 0;
755 ssize_t ret2 = 0;
756
757write_retry:
754 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 758 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
755 *offset, ioflags); 759 *offset, ioflags);
756 ret = generic_file_buffered_write(iocb, iovp, segs, 760 ret2 = generic_file_buffered_write(iocb, iovp, segs,
757 pos, offset, count, ret); 761 pos, offset, count, ret);
762 /*
763 * if we just got an ENOSPC, flush the inode now we
764 * aren't holding any page locks and retry *once*
765 */
766 if (ret2 == -ENOSPC && !enospc) {
767 error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
768 if (error)
769 goto out_unlock_internal;
770 enospc = 1;
771 goto write_retry;
772 }
773 ret = ret2;
758 } 774 }
759 775
760 current->backing_dev_info = NULL; 776 current->backing_dev_info = NULL;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a608e72fa405..f7ba76633c29 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
62 uint32_t first_index = 0; 62 uint32_t first_index = 0;
63 int error = 0; 63 int error = 0;
64 int last_error = 0; 64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71 65
72 do { 66 do {
73 struct inode *inode; 67 struct inode *inode;
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
128 * If we have to flush data or wait for I/O completion 122 * If we have to flush data or wait for I/O completion
129 * we need to hold the iolock. 123 * we need to hold the iolock.
130 */ 124 */
131 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { 125 if (flags & SYNC_DELWRI) {
132 xfs_ilock(ip, XFS_IOLOCK_SHARED); 126 if (VN_DIRTY(inode)) {
133 lock_flags |= XFS_IOLOCK_SHARED; 127 if (flags & SYNC_TRYLOCK) {
134 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); 128 if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
135 if (flags & SYNC_IOWAIT) 129 lock_flags |= XFS_IOLOCK_SHARED;
130 } else {
131 xfs_ilock(ip, XFS_IOLOCK_SHARED);
132 lock_flags |= XFS_IOLOCK_SHARED;
133 }
134 if (lock_flags & XFS_IOLOCK_SHARED) {
135 error = xfs_flush_pages(ip, 0, -1,
136 (flags & SYNC_WAIT) ? 0
137 : XFS_B_ASYNC,
138 FI_NONE);
139 }
140 }
141 if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
136 xfs_ioend_wait(ip); 142 xfs_ioend_wait(ip);
137 } 143 }
138 xfs_ilock(ip, XFS_ILOCK_SHARED); 144 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -398,15 +404,17 @@ STATIC void
398xfs_syncd_queue_work( 404xfs_syncd_queue_work(
399 struct xfs_mount *mp, 405 struct xfs_mount *mp,
400 void *data, 406 void *data,
401 void (*syncer)(struct xfs_mount *, void *)) 407 void (*syncer)(struct xfs_mount *, void *),
408 struct completion *completion)
402{ 409{
403 struct bhv_vfs_sync_work *work; 410 struct xfs_sync_work *work;
404 411
405 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); 412 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
406 INIT_LIST_HEAD(&work->w_list); 413 INIT_LIST_HEAD(&work->w_list);
407 work->w_syncer = syncer; 414 work->w_syncer = syncer;
408 work->w_data = data; 415 work->w_data = data;
409 work->w_mount = mp; 416 work->w_mount = mp;
417 work->w_completion = completion;
410 spin_lock(&mp->m_sync_lock); 418 spin_lock(&mp->m_sync_lock);
411 list_add_tail(&work->w_list, &mp->m_sync_list); 419 list_add_tail(&work->w_list, &mp->m_sync_list);
412 spin_unlock(&mp->m_sync_lock); 420 spin_unlock(&mp->m_sync_lock);
@@ -420,49 +428,26 @@ xfs_syncd_queue_work(
420 * heads, looking about for more room... 428 * heads, looking about for more room...
421 */ 429 */
422STATIC void 430STATIC void
423xfs_flush_inode_work( 431xfs_flush_inodes_work(
424 struct xfs_mount *mp,
425 void *arg)
426{
427 struct inode *inode = arg;
428 filemap_flush(inode->i_mapping);
429 iput(inode);
430}
431
432void
433xfs_flush_inode(
434 xfs_inode_t *ip)
435{
436 struct inode *inode = VFS_I(ip);
437
438 igrab(inode);
439 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
440 delay(msecs_to_jiffies(500));
441}
442
443/*
444 * This is the "bigger hammer" version of xfs_flush_inode_work...
445 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
446 */
447STATIC void
448xfs_flush_device_work(
449 struct xfs_mount *mp, 432 struct xfs_mount *mp,
450 void *arg) 433 void *arg)
451{ 434{
452 struct inode *inode = arg; 435 struct inode *inode = arg;
453 sync_blockdev(mp->m_super->s_bdev); 436 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
437 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
454 iput(inode); 438 iput(inode);
455} 439}
456 440
457void 441void
458xfs_flush_device( 442xfs_flush_inodes(
459 xfs_inode_t *ip) 443 xfs_inode_t *ip)
460{ 444{
461 struct inode *inode = VFS_I(ip); 445 struct inode *inode = VFS_I(ip);
446 DECLARE_COMPLETION_ONSTACK(completion);
462 447
463 igrab(inode); 448 igrab(inode);
464 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); 449 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
465 delay(msecs_to_jiffies(500)); 450 wait_for_completion(&completion);
466 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 451 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
467} 452}
468 453
@@ -497,7 +482,7 @@ xfssyncd(
497{ 482{
498 struct xfs_mount *mp = arg; 483 struct xfs_mount *mp = arg;
499 long timeleft; 484 long timeleft;
500 bhv_vfs_sync_work_t *work, *n; 485 xfs_sync_work_t *work, *n;
501 LIST_HEAD (tmp); 486 LIST_HEAD (tmp);
502 487
503 set_freezable(); 488 set_freezable();
@@ -532,6 +517,8 @@ xfssyncd(
532 list_del(&work->w_list); 517 list_del(&work->w_list);
533 if (work == &mp->m_sync_work) 518 if (work == &mp->m_sync_work)
534 continue; 519 continue;
520 if (work->w_completion)
521 complete(work->w_completion);
535 kmem_free(work); 522 kmem_free(work);
536 } 523 }
537 } 524 }
@@ -545,6 +532,7 @@ xfs_syncd_init(
545{ 532{
546 mp->m_sync_work.w_syncer = xfs_sync_worker; 533 mp->m_sync_work.w_syncer = xfs_sync_worker;
547 mp->m_sync_work.w_mount = mp; 534 mp->m_sync_work.w_mount = mp;
535 mp->m_sync_work.w_completion = NULL;
548 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 536 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
549 if (IS_ERR(mp->m_sync_task)) 537 if (IS_ERR(mp->m_sync_task))
550 return -PTR_ERR(mp->m_sync_task); 538 return -PTR_ERR(mp->m_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 04f058c848ae..308d5bf6dfbd 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,18 +21,20 @@
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_perag; 22struct xfs_perag;
23 23
24typedef struct bhv_vfs_sync_work { 24typedef struct xfs_sync_work {
25 struct list_head w_list; 25 struct list_head w_list;
26 struct xfs_mount *w_mount; 26 struct xfs_mount *w_mount;
27 void *w_data; /* syncer routine argument */ 27 void *w_data; /* syncer routine argument */
28 void (*w_syncer)(struct xfs_mount *, void *); 28 void (*w_syncer)(struct xfs_mount *, void *);
29} bhv_vfs_sync_work_t; 29 struct completion *w_completion;
30} xfs_sync_work_t;
30 31
31#define SYNC_ATTR 0x0001 /* sync attributes */ 32#define SYNC_ATTR 0x0001 /* sync attributes */
32#define SYNC_DELWRI 0x0002 /* look at delayed writes */ 33#define SYNC_DELWRI 0x0002 /* look at delayed writes */
33#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ 34#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
34#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ 35#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
35#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ 36#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
37#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
36 38
37int xfs_syncd_init(struct xfs_mount *mp); 39int xfs_syncd_init(struct xfs_mount *mp);
38void xfs_syncd_stop(struct xfs_mount *mp); 40void xfs_syncd_stop(struct xfs_mount *mp);
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
43int xfs_quiesce_data(struct xfs_mount *mp); 45int xfs_quiesce_data(struct xfs_mount *mp);
44void xfs_quiesce_attr(struct xfs_mount *mp); 46void xfs_quiesce_attr(struct xfs_mount *mp);
45 47
46void xfs_flush_inode(struct xfs_inode *ip); 48void xfs_flush_inodes(struct xfs_inode *ip);
47void xfs_flush_device(struct xfs_inode *ip);
48 49
49int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); 50int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
50int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); 51int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 478e587087fe..89b81eedce6a 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -69,15 +69,6 @@ xfs_inode_alloc(
69 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
70 ASSERT(completion_done(&ip->i_flush)); 70 ASSERT(completion_done(&ip->i_flush));
71 71
72 /*
73 * initialise the VFS inode here to get failures
74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */ 72 /* initialise the xfs inode */
82 ip->i_ino = ino; 73 ip->i_ino = ino;
83 ip->i_mount = mp; 74 ip->i_mount = mp;
@@ -113,6 +104,20 @@ xfs_inode_alloc(
113#ifdef XFS_DIR2_TRACE 104#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 105 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif 106#endif
107 /*
108 * Now initialise the VFS inode. We do this after the xfs_inode
109 * initialisation as internal failures will result in ->destroy_inode
110 * being called and that will pass down through the reclaim path and
111 * free the XFS inode. This path requires the XFS inode to already be
112 * initialised. Hence if this call fails, the xfs_inode has already
113 * been freed and we should not reference it at all in the error
114 * handling.
115 */
116 if (!inode_init_always(mp->m_super, VFS_I(ip)))
117 return NULL;
118
119 /* prevent anyone from using this yet */
120 VFS_I(ip)->i_state = I_NEW|I_LOCK;
116 121
117 return ip; 122 return ip;
118} 123}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 08ce72316bfe..5aaa2d7ec155 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb(
338} 338}
339 339
340STATIC int 340STATIC int
341xfs_flush_space(
342 xfs_inode_t *ip,
343 int *fsynced,
344 int *ioflags)
345{
346 switch (*fsynced) {
347 case 0:
348 if (ip->i_delayed_blks) {
349 xfs_iunlock(ip, XFS_ILOCK_EXCL);
350 xfs_flush_inode(ip);
351 xfs_ilock(ip, XFS_ILOCK_EXCL);
352 *fsynced = 1;
353 } else {
354 *ioflags |= BMAPI_SYNC;
355 *fsynced = 2;
356 }
357 return 0;
358 case 1:
359 *fsynced = 2;
360 *ioflags |= BMAPI_SYNC;
361 return 0;
362 case 2:
363 xfs_iunlock(ip, XFS_ILOCK_EXCL);
364 xfs_flush_device(ip);
365 xfs_ilock(ip, XFS_ILOCK_EXCL);
366 *fsynced = 3;
367 return 0;
368 }
369 return 1;
370}
371
372STATIC int
373xfs_cmn_err_fsblock_zero( 341xfs_cmn_err_fsblock_zero(
374 xfs_inode_t *ip, 342 xfs_inode_t *ip,
375 xfs_bmbt_irec_t *imap) 343 xfs_bmbt_irec_t *imap)
@@ -538,15 +506,9 @@ error_out:
538} 506}
539 507
540/* 508/*
541 * If the caller is doing a write at the end of the file, 509 * If the caller is doing a write at the end of the file, then extend the
542 * then extend the allocation out to the file system's write 510 * allocation out to the file system's write iosize. We clean up any extra
543 * iosize. We clean up any extra space left over when the 511 * space left over when the file is closed in xfs_inactive().
544 * file is closed in xfs_inactive().
545 *
546 * For sync writes, we are flushing delayed allocate space to
547 * try to make additional space available for allocation near
548 * the filesystem full boundary - preallocation hurts in that
549 * situation, of course.
550 */ 512 */
551STATIC int 513STATIC int
552xfs_iomap_eof_want_preallocate( 514xfs_iomap_eof_want_preallocate(
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
565 int n, error, imaps; 527 int n, error, imaps;
566 528
567 *prealloc = 0; 529 *prealloc = 0;
568 if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) 530 if ((offset + count) <= ip->i_size)
569 return 0; 531 return 0;
570 532
571 /* 533 /*
@@ -611,7 +573,7 @@ xfs_iomap_write_delay(
611 xfs_extlen_t extsz; 573 xfs_extlen_t extsz;
612 int nimaps; 574 int nimaps;
613 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 575 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
614 int prealloc, fsynced = 0; 576 int prealloc, flushed = 0;
615 int error; 577 int error;
616 578
617 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 579 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -627,12 +589,12 @@ xfs_iomap_write_delay(
627 extsz = xfs_get_extsz_hint(ip); 589 extsz = xfs_get_extsz_hint(ip);
628 offset_fsb = XFS_B_TO_FSBT(mp, offset); 590 offset_fsb = XFS_B_TO_FSBT(mp, offset);
629 591
630retry:
631 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 592 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
632 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 593 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
633 if (error) 594 if (error)
634 return error; 595 return error;
635 596
597retry:
636 if (prealloc) { 598 if (prealloc) {
637 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 599 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
638 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 600 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -659,15 +621,22 @@ retry:
659 621
660 /* 622 /*
661 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 623 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
662 * then we must have run out of space - flush delalloc, and retry.. 624 * then we must have run out of space - flush all other inodes with
625 * delalloc blocks and retry without EOF preallocation.
663 */ 626 */
664 if (nimaps == 0) { 627 if (nimaps == 0) {
665 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 628 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
666 ip, offset, count); 629 ip, offset, count);
667 if (xfs_flush_space(ip, &fsynced, &ioflag)) 630 if (flushed)
668 return XFS_ERROR(ENOSPC); 631 return XFS_ERROR(ENOSPC);
669 632
633 xfs_iunlock(ip, XFS_ILOCK_EXCL);
634 xfs_flush_inodes(ip);
635 xfs_ilock(ip, XFS_ILOCK_EXCL);
636
637 flushed = 1;
670 error = 0; 638 error = 0;
639 prealloc = 0;
671 goto retry; 640 goto retry;
672 } 641 }
673 642
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a1cc1322fc0f..fdcf7b82747f 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -40,8 +40,7 @@ typedef enum {
40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ 40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ 41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */
42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ 42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */
43 BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44 BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */
45} bmapi_flags_t; 44} bmapi_flags_t;
46 45
47 46
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f76c6d7cea21..3750f04ede0b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -562,9 +562,8 @@ xfs_log_mount(
562 } 562 }
563 563
564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
565 if (!mp->m_log) { 565 if (IS_ERR(mp->m_log)) {
566 cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); 566 error = -PTR_ERR(mp->m_log);
567 error = ENOMEM;
568 goto out; 567 goto out;
569 } 568 }
570 569
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp,
1180 xfs_buf_t *bp; 1179 xfs_buf_t *bp;
1181 int i; 1180 int i;
1182 int iclogsize; 1181 int iclogsize;
1182 int error = ENOMEM;
1183 1183
1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1185 if (!log) 1185 if (!log) {
1186 return NULL; 1186 xlog_warn("XFS: Log allocation failed: No memory!");
1187 goto out;
1188 }
1187 1189
1188 log->l_mp = mp; 1190 log->l_mp = mp;
1189 log->l_targ = log_target; 1191 log->l_targ = log_target;
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp,
1201 log->l_grant_reserve_cycle = 1; 1203 log->l_grant_reserve_cycle = 1;
1202 log->l_grant_write_cycle = 1; 1204 log->l_grant_write_cycle = 1;
1203 1205
1206 error = EFSCORRUPTED;
1204 if (xfs_sb_version_hassector(&mp->m_sb)) { 1207 if (xfs_sb_version_hassector(&mp->m_sb)) {
1205 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1208 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1206 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); 1209 if (log->l_sectbb_log < 0 ||
1210 log->l_sectbb_log > mp->m_sectbb_log) {
1211 xlog_warn("XFS: Log sector size (0x%x) out of range.",
1212 log->l_sectbb_log);
1213 goto out_free_log;
1214 }
1215
1207 /* for larger sector sizes, must have v2 or external log */ 1216 /* for larger sector sizes, must have v2 or external log */
1208 ASSERT(log->l_sectbb_log == 0 || 1217 if (log->l_sectbb_log != 0 &&
1209 log->l_logBBstart == 0 || 1218 (log->l_logBBstart != 0 &&
1210 xfs_sb_version_haslogv2(&mp->m_sb)); 1219 !xfs_sb_version_haslogv2(&mp->m_sb))) {
1211 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); 1220 xlog_warn("XFS: log sector size (0x%x) invalid "
1221 "for configuration.", log->l_sectbb_log);
1222 goto out_free_log;
1223 }
1224 if (mp->m_sb.sb_logsectlog < BBSHIFT) {
1225 xlog_warn("XFS: Log sector log (0x%x) too small.",
1226 mp->m_sb.sb_logsectlog);
1227 goto out_free_log;
1228 }
1212 } 1229 }
1213 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1230 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
1214 1231
1215 xlog_get_iclog_buffer_size(mp, log); 1232 xlog_get_iclog_buffer_size(mp, log);
1216 1233
1234 error = ENOMEM;
1217 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1235 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1218 if (!bp) 1236 if (!bp)
1219 goto out_free_log; 1237 goto out_free_log;
@@ -1313,7 +1331,8 @@ out_free_iclog:
1313 xfs_buf_free(log->l_xbuf); 1331 xfs_buf_free(log->l_xbuf);
1314out_free_log: 1332out_free_log:
1315 kmem_free(log); 1333 kmem_free(log);
1316 return NULL; 1334out:
1335 return ERR_PTR(-error);
1317} /* xlog_alloc_log */ 1336} /* xlog_alloc_log */
1318 1337
1319 1338
@@ -2541,18 +2560,19 @@ redo:
2541 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2560 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2542 xlog_trace_loggrant(log, tic, 2561 xlog_trace_loggrant(log, tic,
2543 "xlog_grant_log_space: sleep 2"); 2562 "xlog_grant_log_space: sleep 2");
2563 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock);
2566
2544 XFS_STATS_INC(xs_sleep_logspace); 2567 XFS_STATS_INC(xs_sleep_logspace);
2545 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2568 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2546 2569
2547 if (XLOG_FORCED_SHUTDOWN(log)) { 2570 spin_lock(&log->l_grant_lock);
2548 spin_lock(&log->l_grant_lock); 2571 if (XLOG_FORCED_SHUTDOWN(log))
2549 goto error_return; 2572 goto error_return;
2550 }
2551 2573
2552 xlog_trace_loggrant(log, tic, 2574 xlog_trace_loggrant(log, tic,
2553 "xlog_grant_log_space: wake 2"); 2575 "xlog_grant_log_space: wake 2");
2554 xlog_grant_push_ail(log->l_mp, need_bytes);
2555 spin_lock(&log->l_grant_lock);
2556 goto redo; 2576 goto redo;
2557 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2577 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2558 xlog_del_ticketq(&log->l_reserve_headq, tic); 2578 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2631 * for more free space, otherwise try to get some space for 2651 * for more free space, otherwise try to get some space for
2632 * this transaction. 2652 * this transaction.
2633 */ 2653 */
2634 2654 need_bytes = tic->t_unit_res;
2635 if ((ntic = log->l_write_headq)) { 2655 if ((ntic = log->l_write_headq)) {
2636 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2656 free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2637 log->l_grant_write_bytes); 2657 log->l_grant_write_bytes);
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log,
2651 2671
2652 xlog_trace_loggrant(log, tic, 2672 xlog_trace_loggrant(log, tic,
2653 "xlog_regrant_write_log_space: sleep 1"); 2673 "xlog_regrant_write_log_space: sleep 1");
2674 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock);
2677
2654 XFS_STATS_INC(xs_sleep_logspace); 2678 XFS_STATS_INC(xs_sleep_logspace);
2655 sv_wait(&tic->t_wait, PINOD|PLTWAIT, 2679 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2656 &log->l_grant_lock, s); 2680 &log->l_grant_lock, s);
2657 2681
2658 /* If we're shutting down, this tic is already 2682 /* If we're shutting down, this tic is already
2659 * off the queue */ 2683 * off the queue */
2660 if (XLOG_FORCED_SHUTDOWN(log)) { 2684 spin_lock(&log->l_grant_lock);
2661 spin_lock(&log->l_grant_lock); 2685 if (XLOG_FORCED_SHUTDOWN(log))
2662 goto error_return; 2686 goto error_return;
2663 }
2664 2687
2665 xlog_trace_loggrant(log, tic, 2688 xlog_trace_loggrant(log, tic,
2666 "xlog_regrant_write_log_space: wake 1"); 2689 "xlog_regrant_write_log_space: wake 1");
2667 xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
2668 spin_lock(&log->l_grant_lock);
2669 } 2690 }
2670 } 2691 }
2671 2692
2672 need_bytes = tic->t_unit_res;
2673
2674redo: 2693redo:
2675 if (XLOG_FORCED_SHUTDOWN(log)) 2694 if (XLOG_FORCED_SHUTDOWN(log))
2676 goto error_return; 2695 goto error_return;
@@ -2680,19 +2699,20 @@ redo:
2680 if (free_bytes < need_bytes) { 2699 if (free_bytes < need_bytes) {
2681 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2700 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2682 xlog_ins_ticketq(&log->l_write_headq, tic); 2701 xlog_ins_ticketq(&log->l_write_headq, tic);
2702 spin_unlock(&log->l_grant_lock);
2703 xlog_grant_push_ail(log->l_mp, need_bytes);
2704 spin_lock(&log->l_grant_lock);
2705
2683 XFS_STATS_INC(xs_sleep_logspace); 2706 XFS_STATS_INC(xs_sleep_logspace);
2684 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2685 2708
2686 /* If we're shutting down, this tic is already off the queue */ 2709 /* If we're shutting down, this tic is already off the queue */
2687 if (XLOG_FORCED_SHUTDOWN(log)) { 2710 spin_lock(&log->l_grant_lock);
2688 spin_lock(&log->l_grant_lock); 2711 if (XLOG_FORCED_SHUTDOWN(log))
2689 goto error_return; 2712 goto error_return;
2690 }
2691 2713
2692 xlog_trace_loggrant(log, tic, 2714 xlog_trace_loggrant(log, tic,
2693 "xlog_regrant_write_log_space: wake 2"); 2715 "xlog_regrant_write_log_space: wake 2");
2694 xlog_grant_push_ail(log->l_mp, need_bytes);
2695 spin_lock(&log->l_grant_lock);
2696 goto redo; 2716 goto redo;
2697 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2717 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2698 xlog_del_ticketq(&log->l_write_headq, tic); 2718 xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7af44adffc8f..d6a64392f983 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
313#endif 313#endif
314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
315 struct task_struct *m_sync_task; /* generalised sync thread */ 315 struct task_struct *m_sync_task; /* generalised sync thread */
316 bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 316 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
317 struct list_head m_sync_list; /* sync thread work item list */ 317 struct list_head m_sync_list; /* sync thread work item list */
318 spinlock_t m_sync_lock; /* work item list lock */ 318 spinlock_t m_sync_lock; /* work item list lock */
319 int m_sync_seq; /* sync thread generation no. */ 319 int m_sync_seq; /* sync thread generation no. */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7394c7af5de5..19cf90a9c762 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1457,6 +1457,13 @@ xfs_create(
1457 error = xfs_trans_reserve(tp, resblks, log_res, 0, 1457 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1458 XFS_TRANS_PERM_LOG_RES, log_count); 1458 XFS_TRANS_PERM_LOG_RES, log_count);
1459 if (error == ENOSPC) { 1459 if (error == ENOSPC) {
1460 /* flush outstanding delalloc blocks and retry */
1461 xfs_flush_inodes(dp);
1462 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1463 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1464 }
1465 if (error == ENOSPC) {
1466 /* No space at all so try a "no-allocation" reservation */
1460 resblks = 0; 1467 resblks = 0;
1461 error = xfs_trans_reserve(tp, 0, log_res, 0, 1468 error = xfs_trans_reserve(tp, 0, log_res, 0,
1462 XFS_TRANS_PERM_LOG_RES, log_count); 1469 XFS_TRANS_PERM_LOG_RES, log_count);
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 35752dadd6df..c840719a8c59 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -201,7 +201,7 @@ typedef struct siginfo {
201#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ 201#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */
202#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ 202#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
203#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */ 203#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */
204#define NSIGTRAP 2 204#define NSIGTRAP 4
205 205
206/* 206/*
207 * SIGCHLD si_codes 207 * SIGCHLD si_codes
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 2df74eb09563..9477af01a639 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -472,6 +472,7 @@
472 {0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ 472 {0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
473 {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ 473 {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
474 {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ 474 {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
475 {0x8086, 0x358e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
475 {0, 0, 0} 476 {0, 0, 0}
476 477
477#define gamma_PCI_IDS \ 478#define gamma_PCI_IDS \
@@ -533,4 +534,5 @@
533 {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 534 {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
534 {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 535 {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
535 {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ 536 {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
537 {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
536 {0, 0, 0} 538 {0, 0, 0}
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f563c5013932..330c4b1bfcaa 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -173,8 +173,12 @@ struct fb_fix_screeninfo {
173/* Interpretation of offset for color fields: All offsets are from the right, 173/* Interpretation of offset for color fields: All offsets are from the right,
174 * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you 174 * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you
175 * can use the offset as right argument to <<). A pixel afterwards is a bit 175 * can use the offset as right argument to <<). A pixel afterwards is a bit
176 * stream and is written to video memory as that unmodified. This implies 176 * stream and is written to video memory as that unmodified.
177 * big-endian byte order if bits_per_pixel is greater than 8. 177 *
178 * For pseudocolor: offset and length should be the same for all color
179 * components. Offset specifies the position of the least significant bit
180 * of the pallette index in a pixel value. Length indicates the number
181 * of available palette entries (i.e. # of entries = 1 << length).
178 */ 182 */
179struct fb_bitfield { 183struct fb_bitfield {
180 __u32 offset; /* beginning of bitfield */ 184 __u32 offset; /* beginning of bitfield */
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 671decbd2aeb..934e22d65801 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -11,6 +11,8 @@
11#ifndef _LINUX_FIEMAP_H 11#ifndef _LINUX_FIEMAP_H
12#define _LINUX_FIEMAP_H 12#define _LINUX_FIEMAP_H
13 13
14#include <linux/types.h>
15
14struct fiemap_extent { 16struct fiemap_extent {
15 __u64 fe_logical; /* logical offset in bytes for the start of 17 __u64 fe_logical; /* logical offset in bytes for the start of
16 * the extent from the beginning of the file */ 18 * the extent from the beginning of the file */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfb93337e9a..d87247d2641f 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,19 +15,6 @@
15extern struct files_struct init_files; 15extern struct files_struct init_files;
16extern struct fs_struct init_fs; 16extern struct fs_struct init_fs;
17 17
18#define INIT_KIOCTX(name, which_mm) \
19{ \
20 .users = ATOMIC_INIT(1), \
21 .dead = 0, \
22 .mm = &which_mm, \
23 .user_id = 0, \
24 .next = NULL, \
25 .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
26 .ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
27 .reqs_active = 0U, \
28 .max_reqs = ~0U, \
29}
30
31#define INIT_MM(name) \ 18#define INIT_MM(name) \
32{ \ 19{ \
33 .mm_rb = RB_ROOT, \ 20 .mm_rb = RB_ROOT, \
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ee98cd570885..06ba90c211a5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2514,6 +2514,8 @@
2514#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 2514#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
2515#define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 2515#define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
2516#define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 2516#define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
2517#define PCI_DEVICE_ID_INTEL_82854_HB 0x358c
2518#define PCI_DEVICE_ID_INTEL_82854_IG 0x358e
2517#define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 2519#define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
2518#define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 2520#define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582
2519#define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590 2521#define PCI_DEVICE_ID_INTEL_E7520_MCH 0x3590
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
new file mode 100644
index 000000000000..046bce05ecab
--- /dev/null
+++ b/include/linux/sht15.h
@@ -0,0 +1,24 @@
1/*
2 * sht15.h - support for the SHT15 Temperature and Humidity Sensor
3 *
4 * Copyright (c) 2009 Jonathan Cameron
5 *
6 * Copyright (c) 2007 Wouter Horre
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13/**
14 * struct sht15_platform_data - sht15 connectivity info
15 * @gpio_data: no. of gpio to which bidirectional data line is connected
16 * @gpio_sck: no. of gpio to which the data clock is connected.
17 * @supply_mv: supply voltage in mv. Overridden by regulator if available.
18 **/
19struct sht15_platform_data {
20 int gpio_data;
21 int gpio_sck;
22 int supply_mv;
23};
24
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index b95842542590..625e9e4639c6 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -29,7 +29,7 @@
29/** 29/**
30 * usb_serial_port: structure for the specific ports of a device. 30 * usb_serial_port: structure for the specific ports of a device.
31 * @serial: pointer back to the struct usb_serial owner of this port. 31 * @serial: pointer back to the struct usb_serial owner of this port.
32 * @tty: pointer to the corresponding tty for this port. 32 * @port: pointer to the corresponding tty_port for this port.
33 * @lock: spinlock to grab when updating portions of this structure. 33 * @lock: spinlock to grab when updating portions of this structure.
34 * @mutex: mutex used to synchronize serial_open() and serial_close() 34 * @mutex: mutex used to synchronize serial_open() and serial_close()
35 * access for this port. 35 * access for this port.
@@ -44,19 +44,22 @@
44 * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe 44 * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe
45 * for this port. 45 * for this port.
46 * @bulk_in_buffer: pointer to the bulk in buffer for this port. 46 * @bulk_in_buffer: pointer to the bulk in buffer for this port.
47 * @bulk_in_size: the size of the bulk_in_buffer, in bytes.
47 * @read_urb: pointer to the bulk in struct urb for this port. 48 * @read_urb: pointer to the bulk in struct urb for this port.
48 * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this 49 * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this
49 * port. 50 * port.
50 * @bulk_out_buffer: pointer to the bulk out buffer for this port. 51 * @bulk_out_buffer: pointer to the bulk out buffer for this port.
51 * @bulk_out_size: the size of the bulk_out_buffer, in bytes. 52 * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
52 * @write_urb: pointer to the bulk out struct urb for this port. 53 * @write_urb: pointer to the bulk out struct urb for this port.
54 * @write_urb_busy: port`s writing status
53 * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this 55 * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
54 * port. 56 * port.
55 * @write_wait: a wait_queue_head_t used by the port. 57 * @write_wait: a wait_queue_head_t used by the port.
56 * @work: work queue entry for the line discipline waking up. 58 * @work: work queue entry for the line discipline waking up.
57 * @open_count: number of times this port has been opened.
58 * @throttled: nonzero if the read urb is inactive to throttle the device 59 * @throttled: nonzero if the read urb is inactive to throttle the device
59 * @throttle_req: nonzero if the tty wants to throttle us 60 * @throttle_req: nonzero if the tty wants to throttle us
61 * @console: attached usb serial console
62 * @dev: pointer to the serial device
60 * 63 *
61 * This structure is used by the usb-serial core and drivers for the specific 64 * This structure is used by the usb-serial core and drivers for the specific
62 * ports of a device. 65 * ports of a device.
diff --git a/include/video/cyblafb.h b/include/video/cyblafb.h
deleted file mode 100644
index d3c1d4e2c8e3..000000000000
--- a/include/video/cyblafb.h
+++ /dev/null
@@ -1,175 +0,0 @@
1
2#ifndef CYBLAFB_DEBUG
3#define CYBLAFB_DEBUG 0
4#endif
5
6#if CYBLAFB_DEBUG
7#define debug(f,a...) printk("%s:" f, __func__ , ## a);
8#else
9#define debug(f,a...)
10#endif
11
12#define output(f, a...) printk("cyblafb: " f, ## a)
13
14#define Kb (1024)
15#define Mb (Kb*Kb)
16
17/* PCI IDS of supported cards temporarily here */
18
19#define CYBERBLADEi1 0x8500
20
21/* these defines are for 'lcd' variable */
22#define LCD_STRETCH 0
23#define LCD_CENTER 1
24#define LCD_BIOS 2
25
26/* display types */
27#define DISPLAY_CRT 0
28#define DISPLAY_FP 1
29
30#define ROP_S 0xCC
31
32#define point(x,y) ((y)<<16|(x))
33
34//
35// Attribute Regs, ARxx, 3c0/3c1
36//
37#define AR00 0x00
38#define AR01 0x01
39#define AR02 0x02
40#define AR03 0x03
41#define AR04 0x04
42#define AR05 0x05
43#define AR06 0x06
44#define AR07 0x07
45#define AR08 0x08
46#define AR09 0x09
47#define AR0A 0x0A
48#define AR0B 0x0B
49#define AR0C 0x0C
50#define AR0D 0x0D
51#define AR0E 0x0E
52#define AR0F 0x0F
53#define AR10 0x10
54#define AR12 0x12
55#define AR13 0x13
56
57//
58// Sequencer Regs, SRxx, 3c4/3c5
59//
60#define SR00 0x00
61#define SR01 0x01
62#define SR02 0x02
63#define SR03 0x03
64#define SR04 0x04
65#define SR0D 0x0D
66#define SR0E 0x0E
67#define SR11 0x11
68#define SR18 0x18
69#define SR19 0x19
70
71//
72//
73//
74#define CR00 0x00
75#define CR01 0x01
76#define CR02 0x02
77#define CR03 0x03
78#define CR04 0x04
79#define CR05 0x05
80#define CR06 0x06
81#define CR07 0x07
82#define CR08 0x08
83#define CR09 0x09
84#define CR0A 0x0A
85#define CR0B 0x0B
86#define CR0C 0x0C
87#define CR0D 0x0D
88#define CR0E 0x0E
89#define CR0F 0x0F
90#define CR10 0x10
91#define CR11 0x11
92#define CR12 0x12
93#define CR13 0x13
94#define CR14 0x14
95#define CR15 0x15
96#define CR16 0x16
97#define CR17 0x17
98#define CR18 0x18
99#define CR19 0x19
100#define CR1A 0x1A
101#define CR1B 0x1B
102#define CR1C 0x1C
103#define CR1D 0x1D
104#define CR1E 0x1E
105#define CR1F 0x1F
106#define CR20 0x20
107#define CR21 0x21
108#define CR27 0x27
109#define CR29 0x29
110#define CR2A 0x2A
111#define CR2B 0x2B
112#define CR2D 0x2D
113#define CR2F 0x2F
114#define CR36 0x36
115#define CR38 0x38
116#define CR39 0x39
117#define CR3A 0x3A
118#define CR55 0x55
119#define CR56 0x56
120#define CR57 0x57
121#define CR58 0x58
122
123//
124//
125//
126
127#define GR00 0x01
128#define GR01 0x01
129#define GR02 0x02
130#define GR03 0x03
131#define GR04 0x04
132#define GR05 0x05
133#define GR06 0x06
134#define GR07 0x07
135#define GR08 0x08
136#define GR0F 0x0F
137#define GR20 0x20
138#define GR23 0x23
139#define GR2F 0x2F
140#define GR30 0x30
141#define GR31 0x31
142#define GR33 0x33
143#define GR52 0x52
144#define GR53 0x53
145#define GR5D 0x5d
146
147
148//
149// Graphics Engine
150//
151#define GEBase 0x2100 // could be mapped elsewhere if we like it
152#define GE00 (GEBase+0x00) // source 1, p 111
153#define GE04 (GEBase+0x04) // source 2, p 111
154#define GE08 (GEBase+0x08) // destination 1, p 111
155#define GE0C (GEBase+0x0C) // destination 2, p 112
156#define GE10 (GEBase+0x10) // right view base & enable, p 112
157#define GE13 (GEBase+0x13) // left view base & enable, p 112
158#define GE18 (GEBase+0x18) // block write start address, p 112
159#define GE1C (GEBase+0x1C) // block write end address, p 112
160#define GE20 (GEBase+0x20) // engine status, p 113
161#define GE24 (GEBase+0x24) // reset all GE pointers
162#define GE44 (GEBase+0x44) // command register, p 126
163#define GE48 (GEBase+0x48) // raster operation, p 127
164#define GE60 (GEBase+0x60) // foreground color, p 128
165#define GE64 (GEBase+0x64) // background color, p 128
166#define GE6C (GEBase+0x6C) // Pattern and Style, p 129, ok
167#define GE9C (GEBase+0x9C) // pixel engine data port, p 125
168#define GEB8 (GEBase+0xB8) // Destination Stride / Buffer Base 0, p 133
169#define GEBC (GEBase+0xBC) // Destination Stride / Buffer Base 1, p 133
170#define GEC0 (GEBase+0xC0) // Destination Stride / Buffer Base 2, p 133
171#define GEC4 (GEBase+0xC4) // Destination Stride / Buffer Base 3, p 133
172#define GEC8 (GEBase+0xC8) // Source Stride / Buffer Base 0, p 133
173#define GECC (GEBase+0xCC) // Source Stride / Buffer Base 1, p 133
174#define GED0 (GEBase+0xD0) // Source Stride / Buffer Base 2, p 133
175#define GED4 (GEBase+0xD4) // Source Stride / Buffer Base 3, p 133
diff --git a/init/initramfs.c b/init/initramfs.c
index 80cd713f6cc5..9ee7b7810417 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -310,7 +310,8 @@ static int __init do_name(void)
310 if (wfd >= 0) { 310 if (wfd >= 0) {
311 sys_fchown(wfd, uid, gid); 311 sys_fchown(wfd, uid, gid);
312 sys_fchmod(wfd, mode); 312 sys_fchmod(wfd, mode);
313 sys_ftruncate(wfd, body_len); 313 if (body_len)
314 sys_ftruncate(wfd, body_len);
314 vcollected = kstrdup(collected, GFP_KERNEL); 315 vcollected = kstrdup(collected, GFP_KERNEL);
315 state = CopyFile; 316 state = CopyFile;
316 } 317 }
@@ -515,6 +516,7 @@ skip:
515 initrd_end = 0; 516 initrd_end = 0;
516} 517}
517 518
519#ifdef CONFIG_BLK_DEV_RAM
518#define BUF_SIZE 1024 520#define BUF_SIZE 1024
519static void __init clean_rootfs(void) 521static void __init clean_rootfs(void)
520{ 522{
@@ -561,6 +563,7 @@ static void __init clean_rootfs(void)
561 sys_close(fd); 563 sys_close(fd);
562 kfree(buf); 564 kfree(buf);
563} 565}
566#endif
564 567
565static int __init populate_rootfs(void) 568static int __init populate_rootfs(void)
566{ 569{
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 89f60ec8ee54..24ae46dfe45d 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -22,6 +22,7 @@
22#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */ 22#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
23#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */ 23#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
24 24
25#ifdef CONFIG_PROC_SYSCTL
25static void *get_mq(ctl_table *table) 26static void *get_mq(ctl_table *table)
26{ 27{
27 char *which = table->data; 28 char *which = table->data;
@@ -30,7 +31,6 @@ static void *get_mq(ctl_table *table)
30 return which; 31 return which;
31} 32}
32 33
33#ifdef CONFIG_PROC_SYSCTL
34static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, 34static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
35 void __user *buffer, size_t *lenp, loff_t *ppos) 35 void __user *buffer, size_t *lenp, loff_t *ppos)
36{ 36{
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 64191fa09b7e..dfcd83ceee3b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -604,10 +604,11 @@ repeat:
604 ret = security_ptrace_traceme(current->parent); 604 ret = security_ptrace_traceme(current->parent);
605 605
606 /* 606 /*
607 * Set the ptrace bit in the process ptrace flags. 607 * Check PF_EXITING to ensure ->real_parent has not passed
608 * Then link us on our parent's ptraced list. 608 * exit_ptrace(). Otherwise we don't report the error but
609 * pretend ->real_parent untraces us right after return.
609 */ 610 */
610 if (!ret) { 611 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
611 current->ptrace |= PT_PTRACED; 612 current->ptrace |= PT_PTRACED;
612 __ptrace_link(current, current->real_parent); 613 __ptrace_link(current, current->real_parent);
613 } 614 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 51dbb55604e8..e7998cf31498 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -360,6 +360,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
360 void __user *, arg) 360 void __user *, arg)
361{ 361{
362 char buffer[256]; 362 char buffer[256];
363 int ret = 0;
363 364
364 /* We only trust the superuser with rebooting the system. */ 365 /* We only trust the superuser with rebooting the system. */
365 if (!capable(CAP_SYS_BOOT)) 366 if (!capable(CAP_SYS_BOOT))
@@ -397,7 +398,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
397 kernel_halt(); 398 kernel_halt();
398 unlock_kernel(); 399 unlock_kernel();
399 do_exit(0); 400 do_exit(0);
400 break; 401 panic("cannot halt");
401 402
402 case LINUX_REBOOT_CMD_POWER_OFF: 403 case LINUX_REBOOT_CMD_POWER_OFF:
403 kernel_power_off(); 404 kernel_power_off();
@@ -417,29 +418,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
417 418
418#ifdef CONFIG_KEXEC 419#ifdef CONFIG_KEXEC
419 case LINUX_REBOOT_CMD_KEXEC: 420 case LINUX_REBOOT_CMD_KEXEC:
420 { 421 ret = kernel_kexec();
421 int ret; 422 break;
422 ret = kernel_kexec();
423 unlock_kernel();
424 return ret;
425 }
426#endif 423#endif
427 424
428#ifdef CONFIG_HIBERNATION 425#ifdef CONFIG_HIBERNATION
429 case LINUX_REBOOT_CMD_SW_SUSPEND: 426 case LINUX_REBOOT_CMD_SW_SUSPEND:
430 { 427 ret = hibernate();
431 int ret = hibernate(); 428 break;
432 unlock_kernel();
433 return ret;
434 }
435#endif 429#endif
436 430
437 default: 431 default:
438 unlock_kernel(); 432 ret = -EINVAL;
439 return -EINVAL; 433 break;
440 } 434 }
441 unlock_kernel(); 435 unlock_kernel();
442 return 0; 436 return ret;
443} 437}
444 438
445static void deferred_cad(struct work_struct *dummy) 439static void deferred_cad(struct work_struct *dummy)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4286b62b34a0..e3d2c7dd59b9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -902,16 +902,6 @@ static struct ctl_table kern_table[] = {
902 .proc_handler = &proc_dointvec, 902 .proc_handler = &proc_dointvec,
903 }, 903 },
904#endif 904#endif
905#ifdef CONFIG_UNEVICTABLE_LRU
906 {
907 .ctl_name = CTL_UNNUMBERED,
908 .procname = "scan_unevictable_pages",
909 .data = &scan_unevictable_pages,
910 .maxlen = sizeof(scan_unevictable_pages),
911 .mode = 0644,
912 .proc_handler = &scan_unevictable_handler,
913 },
914#endif
915#ifdef CONFIG_SLOW_WORK 905#ifdef CONFIG_SLOW_WORK
916 { 906 {
917 .ctl_name = CTL_UNNUMBERED, 907 .ctl_name = CTL_UNNUMBERED,
@@ -1302,6 +1292,16 @@ static struct ctl_table vm_table[] = {
1302 .extra2 = &one, 1292 .extra2 = &one,
1303 }, 1293 },
1304#endif 1294#endif
1295#ifdef CONFIG_UNEVICTABLE_LRU
1296 {
1297 .ctl_name = CTL_UNNUMBERED,
1298 .procname = "scan_unevictable_pages",
1299 .data = &scan_unevictable_pages,
1300 .maxlen = sizeof(scan_unevictable_pages),
1301 .mode = 0644,
1302 .proc_handler = &scan_unevictable_handler,
1303 },
1304#endif
1305/* 1305/*
1306 * NOTE: do not add new entries to this table unless you have read 1306 * NOTE: do not add new entries to this table unless you have read
1307 * Documentation/sysctl/ctl_unnumbered.txt 1307 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/Kconfig b/mm/Kconfig
index b53427ad30a3..57971d2ab848 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU
213 will use one page flag and increase the code size a little, 213 will use one page flag and increase the code size a little,
214 say Y unless you know what you are doing. 214 say Y unless you know what you are doing.
215 215
216 See Documentation/vm/unevictable-lru.txt for more information.
217
216config HAVE_MLOCK 218config HAVE_MLOCK
217 bool 219 bool
218 default y if MMU=y 220 default y if MMU=y
diff --git a/mm/filemap.c b/mm/filemap.c
index 2e2d38ebda4b..8bd498040f32 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -567,8 +567,8 @@ EXPORT_SYMBOL(wait_on_page_bit);
567 567
568/** 568/**
569 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue 569 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
570 * @page - Page defining the wait queue of interest 570 * @page: Page defining the wait queue of interest
571 * @waiter - Waiter to add to the queue 571 * @waiter: Waiter to add to the queue
572 * 572 *
573 * Add an arbitrary @waiter to the wait queue for the nominated @page. 573 * Add an arbitrary @waiter to the wait queue for the nominated @page.
574 */ 574 */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fc6d6c48238..e44fb0fbb80e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
932 if (unlikely(!mem)) 932 if (unlikely(!mem))
933 return 0; 933 return 0;
934 934
935 VM_BUG_ON(mem_cgroup_is_obsolete(mem)); 935 VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
936 936
937 while (1) { 937 while (1) {
938 int ret; 938 int ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index d94d2e9146bc..f9cb20ebb990 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -24,6 +24,7 @@
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/vfs.h> 25#include <linux/vfs.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/pagemap.h>
27#include <linux/file.h> 28#include <linux/file.h>
28#include <linux/mm.h> 29#include <linux/mm.h>
29#include <linux/module.h> 30#include <linux/module.h>
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt;
43#include <linux/exportfs.h> 44#include <linux/exportfs.h>
44#include <linux/generic_acl.h> 45#include <linux/generic_acl.h>
45#include <linux/mman.h> 46#include <linux/mman.h>
46#include <linux/pagemap.h>
47#include <linux/string.h> 47#include <linux/string.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/backing-dev.h> 49#include <linux/backing-dev.h>
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt;
65#include <asm/div64.h> 65#include <asm/div64.h>
66#include <asm/pgtable.h> 66#include <asm/pgtable.h>
67 67
68/*
69 * The maximum size of a shmem/tmpfs file is limited by the maximum size of
70 * its triple-indirect swap vector - see illustration at shmem_swp_entry().
71 *
72 * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
73 * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
74 * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
75 * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
76 *
77 * We use / and * instead of shifts in the definitions below, so that the swap
78 * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
79 */
68#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) 80#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
69#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) 81#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
70#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
71 82
72#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) 83#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
73#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) 84#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
74 85
86#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
87#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
88
89#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
75#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 90#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
76 91
77/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ 92/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
@@ -2581,7 +2596,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
2581#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) 2596#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev)
2582#define shmem_acct_size(flags, size) 0 2597#define shmem_acct_size(flags, size) 0
2583#define shmem_unacct_size(flags, size) do {} while (0) 2598#define shmem_unacct_size(flags, size) do {} while (0)
2584#define SHMEM_MAX_BYTES LLONG_MAX 2599#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
2585 2600
2586#endif /* CONFIG_SHMEM */ 2601#endif /* CONFIG_SHMEM */
2587 2602
diff --git a/mm/util.c b/mm/util.c
index 2599e83eea17..55bef160b9f1 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -223,6 +223,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
223} 223}
224#endif 224#endif
225 225
226/**
227 * get_user_pages_fast() - pin user pages in memory
228 * @start: starting user address
229 * @nr_pages: number of pages from start to pin
230 * @write: whether pages will be written to
231 * @pages: array that receives pointers to the pages pinned.
232 * Should be at least nr_pages long.
233 *
234 * Attempt to pin user pages in memory without taking mm->mmap_sem.
235 * If not successful, it will fall back to taking the lock and
236 * calling get_user_pages().
237 *
238 * Returns number of pages pinned. This may be fewer than the number
239 * requested. If nr_pages is 0 or negative, returns 0. If no pages
240 * were pinned, returns -errno.
241 */
226int __attribute__((weak)) get_user_pages_fast(unsigned long start, 242int __attribute__((weak)) get_user_pages_fast(unsigned long start,
227 int nr_pages, int write, struct page **pages) 243 int nr_pages, int write, struct page **pages)
228{ 244{
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a0affd9cfca8..d4d41b3efc7c 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
@@ -1773,7 +1773,7 @@ void tomoyo_load_policy(const char *filename)
1773 envp[2] = NULL; 1773 envp[2] = NULL;
1774 call_usermodehelper(argv[0], argv, envp, 1); 1774 call_usermodehelper(argv[0], argv, envp, 1);
1775 1775
1776 printk(KERN_INFO "TOMOYO: 2.2.0-pre 2009/02/01\n"); 1776 printk(KERN_INFO "TOMOYO: 2.2.0 2009/04/01\n");
1777 printk(KERN_INFO "Mandatory Access Control activated.\n"); 1777 printk(KERN_INFO "Mandatory Access Control activated.\n");
1778 tomoyo_policy_loaded = true; 1778 tomoyo_policy_loaded = true;
1779 { /* Check all profiles currently assigned to domains are defined. */ 1779 { /* Check all profiles currently assigned to domains are defined. */
@@ -1800,7 +1800,7 @@ void tomoyo_load_policy(const char *filename)
1800static int tomoyo_read_version(struct tomoyo_io_buffer *head) 1800static int tomoyo_read_version(struct tomoyo_io_buffer *head)
1801{ 1801{
1802 if (!head->read_eof) { 1802 if (!head->read_eof) {
1803 tomoyo_io_printf(head, "2.2.0-pre"); 1803 tomoyo_io_printf(head, "2.2.0");
1804 head->read_eof = true; 1804 head->read_eof = true;
1805 } 1805 }
1806 return 0; 1806 return 0;
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index e77e6a6de0f2..678f4ff16aa4 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 2f2b449ffd2d..2d6748741a26 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 65f50c1c5ee9..2316da8ec5bc 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 3bbe01a7a4b5..bf8e2b451687 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/realpath.h b/security/tomoyo/realpath.h
index 7ec9fc9cbc07..78217a37960b 100644
--- a/security/tomoyo/realpath.h
+++ b/security/tomoyo/realpath.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3eeeae12c4dc..5b481912752a 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index a0c8f6e0bea4..41c6ebafb9c5 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * Copyright (C) 2005-2009 NTT DATA CORPORATION 6 * Copyright (C) 2005-2009 NTT DATA CORPORATION
7 * 7 *
8 * Version: 2.2.0-pre 2009/02/01 8 * Version: 2.2.0 2009/04/01
9 * 9 *
10 */ 10 */
11 11
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 30829ee920c3..7ba8db5d4c42 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2260,11 +2260,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
2260 gcap &= ~0x01; 2260 gcap &= ~0x01;
2261 2261
2262 /* allow 64bit DMA address if supported by H/W */ 2262 /* allow 64bit DMA address if supported by H/W */
2263 if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_64BIT_MASK)) 2263 if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
2264 pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK); 2264 pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
2265 else { 2265 else {
2266 pci_set_dma_mask(pci, DMA_32BIT_MASK); 2266 pci_set_dma_mask(pci, DMA_BIT_MASK(32));
2267 pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK); 2267 pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
2268 } 2268 }
2269 2269
2270 /* read number of streams from GCAP register instead of using 2270 /* read number of streams from GCAP register instead of using