aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS5
-rw-r--r--Documentation/lguest/extract58
-rw-r--r--Documentation/lguest/lguest.c620
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/alpha/kernel/head.S1
-rw-r--r--arch/alpha/kernel/pci.c10
-rw-r--r--arch/alpha/kernel/pci_iommu.c4
-rw-r--r--arch/alpha/kernel/smp.c6
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/kernel/alternative.c14
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig2
-rw-r--r--arch/i386/mm/init.c14
-rw-r--r--arch/ia64/kernel/cyclone.c14
-rw-r--r--arch/ia64/kernel/time.c4
-rw-r--r--arch/m32r/kernel/setup_mappi.c5
-rw-r--r--arch/m68knommu/kernel/setup.c2
-rw-r--r--arch/m68knommu/platform/5206/config.c2
-rw-r--r--arch/m68knommu/platform/5206e/config.c2
-rw-r--r--arch/m68knommu/platform/520x/config.c2
-rw-r--r--arch/m68knommu/platform/523x/config.c2
-rw-r--r--arch/m68knommu/platform/5249/config.c2
-rw-r--r--arch/m68knommu/platform/5272/config.c2
-rw-r--r--arch/m68knommu/platform/527x/config.c2
-rw-r--r--arch/m68knommu/platform/528x/config.c2
-rw-r--r--arch/m68knommu/platform/5307/config.c2
-rw-r--r--arch/m68knommu/platform/5307/pit.c14
-rw-r--r--arch/m68knommu/platform/5307/timers.c13
-rw-r--r--arch/m68knommu/platform/532x/config.c2
-rw-r--r--arch/m68knommu/platform/5407/config.c2
-rw-r--r--arch/m68knommu/platform/68328/timers.c11
-rw-r--r--arch/m68knommu/platform/68360/config.c14
-rw-r--r--arch/sparc64/kernel/viohs.c2
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c5
-rw-r--r--arch/x86_64/ia32/syscall32.c8
-rw-r--r--arch/x86_64/kernel/tce.c4
-rw-r--r--arch/x86_64/kernel/tsc.c2
-rw-r--r--arch/x86_64/mm/init.c10
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/base/power/shutdown.c2
-rw-r--r--drivers/block/lguest_blk.c169
-rw-r--r--drivers/char/Kconfig1
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/hvc_lguest.c80
-rw-r--r--drivers/edac/Kconfig4
-rw-r--r--drivers/edac/edac_mc.c64
-rw-r--r--drivers/edac/edac_mc_sysfs.c19
-rw-r--r--drivers/edac/edac_module.h8
-rw-r--r--drivers/edac/edac_pci.c162
-rw-r--r--drivers/edac/edac_pci_sysfs.c297
-rw-r--r--drivers/edac/i3000_edac.c2
-rw-r--r--drivers/i2c/chips/ds1682.c3
-rw-r--r--drivers/ide/pci/scc_pata.c4
-rw-r--r--drivers/ieee1394/raw1394.c2
-rw-r--r--drivers/lguest/Makefile12
-rw-r--r--drivers/lguest/README47
-rw-r--r--drivers/lguest/core.c357
-rw-r--r--drivers/lguest/hypercalls.c127
-rw-r--r--drivers/lguest/interrupts_and_traps.c205
-rw-r--r--drivers/lguest/io.c265
-rw-r--r--drivers/lguest/lg.h44
-rw-r--r--drivers/lguest/lguest.c490
-rw-r--r--drivers/lguest/lguest_asm.S71
-rw-r--r--drivers/lguest/lguest_bus.c75
-rw-r--r--drivers/lguest/lguest_user.c166
-rw-r--r--drivers/lguest/page_tables.c329
-rw-r--r--drivers/lguest/segments.c126
-rw-r--r--drivers/lguest/switcher.S284
-rw-r--r--drivers/media/video/Kconfig4
-rw-r--r--drivers/mtd/maps/Kconfig2
-rw-r--r--drivers/net/ax88796.c2
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.c2
-rw-r--r--drivers/net/lguest_net.c237
-rw-r--r--drivers/net/lib8390.c9
-rw-r--r--drivers/net/pppol2tp.c4
-rw-r--r--drivers/pnp/card.c166
-rw-r--r--drivers/pnp/core.c50
-rw-r--r--drivers/pnp/driver.c70
-rw-r--r--drivers/pnp/interface.c217
-rw-r--r--drivers/pnp/isapnp/compat.c39
-rw-r--r--drivers/pnp/isapnp/core.c332
-rw-r--r--drivers/pnp/isapnp/proc.c21
-rw-r--r--drivers/pnp/manager.c144
-rw-r--r--drivers/pnp/pnpacpi/core.c98
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c441
-rw-r--r--drivers/pnp/pnpbios/bioscalls.c339
-rw-r--r--drivers/pnp/pnpbios/core.c257
-rw-r--r--drivers/pnp/pnpbios/proc.c107
-rw-r--r--drivers/pnp/pnpbios/rsparser.c349
-rw-r--r--drivers/pnp/quirks.c80
-rw-r--r--drivers/pnp/resource.c102
-rw-r--r--drivers/pnp/support.c17
-rw-r--r--drivers/pnp/system.c40
-rw-r--r--drivers/rtc/Makefile42
-rw-r--r--drivers/rtc/rtc-ds1307.c2
-rw-r--r--drivers/rtc/rtc-stk17ta8.c6
-rw-r--r--drivers/spi/spi_s3c24xx.c2
-rw-r--r--drivers/video/chipsfb.c3
-rw-r--r--drivers/video/tgafb.c2
-rw-r--r--drivers/w1/masters/ds1wm.c2
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c2
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/reiserfs/stree.c5
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/timerfd.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--include/asm-alpha/bitops.h2
-rw-r--r--include/asm-arm/unaligned.h22
-rw-r--r--include/asm-m68k/raw_io.h8
-rw-r--r--include/asm-m68knommu/hardirq.h2
-rw-r--r--include/asm-m68knommu/machdep.h1
-rw-r--r--include/asm-mips/edac.h35
-rw-r--r--include/asm-powerpc/bug.h2
-rw-r--r--include/asm-powerpc/page.h1
-rw-r--r--include/asm-x86_64/uaccess.h4
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/device.h3
-rw-r--r--include/linux/lguest.h47
-rw-r--r--include/linux/lguest_bus.h5
-rw-r--r--include/linux/lguest_launcher.h60
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/netfilter/xt_connlimit.h4
-rw-r--r--include/linux/pnp.h191
-rw-r--r--include/linux/pnpbios.h60
-rw-r--r--include/linux/suspend.h3
-rw-r--r--include/net/netfilter/nf_conntrack_tuple.h4
-rw-r--r--include/xen/page.h1
-rw-r--r--init/initramfs.c2
-rw-r--r--kernel/kmod.c8
-rw-r--r--kernel/power/disk.c1
-rw-r--r--kernel/sys.c3
-rw-r--r--mm/migrate.c24
-rw-r--r--mm/page_alloc.c6
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c2
-rw-r--r--net/ipv6/ip6_tunnel.c17
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_expect.c8
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/xt_connlimit.c6
-rw-r--r--net/netfilter/xt_u32.c11
-rw-r--r--net/rxrpc/ar-connection.c2
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/tipc/msg.h6
149 files changed, 5842 insertions, 2251 deletions
diff --git a/CREDITS b/CREDITS
index 10c214dc95e7..832436e1dd91 100644
--- a/CREDITS
+++ b/CREDITS
@@ -966,6 +966,7 @@ N: Pekka Enberg
966E: penberg@cs.helsinki.fi 966E: penberg@cs.helsinki.fi
967W: http://www.cs.helsinki.fi/u/penberg/ 967W: http://www.cs.helsinki.fi/u/penberg/
968D: Various kernel hacks, fixes, and cleanups. 968D: Various kernel hacks, fixes, and cleanups.
969D: Slab allocators
969S: Finland 970S: Finland
970 971
971N: David Engebretsen 972N: David Engebretsen
@@ -1939,8 +1940,8 @@ D: for Menuconfig's lxdialog.
1939N: Christoph Lameter 1940N: Christoph Lameter
1940E: christoph@lameter.com 1941E: christoph@lameter.com
1941D: Digiboard PC/Xe and PC/Xi, Digiboard EPCA 1942D: Digiboard PC/Xe and PC/Xi, Digiboard EPCA
1942D: Early protocol filter for bridging code 1943D: NUMA support, Slab allocators, Page migration
1943D: Bug fixes 1944D: Scalability, Time subsystem
1944 1945
1945N: Paul Laufer 1946N: Paul Laufer
1946E: paul@laufernet.com 1947E: paul@laufernet.com
diff --git a/Documentation/lguest/extract b/Documentation/lguest/extract
new file mode 100644
index 000000000000..7730bb6e4b94
--- /dev/null
+++ b/Documentation/lguest/extract
@@ -0,0 +1,58 @@
1#! /bin/sh
2
3set -e
4
5PREFIX=$1
6shift
7
8trap 'rm -r $TMPDIR' 0
9TMPDIR=`mktemp -d`
10
11exec 3>/dev/null
12for f; do
13 while IFS="
14" read -r LINE; do
15 case "$LINE" in
16 *$PREFIX:[0-9]*:\**)
17 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
18 if [ -f $TMPDIR/$NUM ]; then
19 echo "$TMPDIR/$NUM already exits prior to $f"
20 exit 1
21 fi
22 exec 3>>$TMPDIR/$NUM
23 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
24 /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
25 ;;
26 *$PREFIX:[0-9]*)
27 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
28 if [ -f $TMPDIR/$NUM ]; then
29 echo "$TMPDIR/$NUM already exits prior to $f"
30 exit 1
31 fi
32 exec 3>>$TMPDIR/$NUM
33 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
34 /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
35 ;;
36 *:\**)
37 /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
38 echo >&3
39 exec 3>/dev/null
40 ;;
41 *)
42 /bin/echo "$LINE" >&3
43 ;;
44 esac
45 done < $f
46 echo >&3
47 exec 3>/dev/null
48done
49
50LASTFILE=""
51for f in $TMPDIR/*; do
52 if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
53 LASTFILE=$(cat $TMPDIR/.$(basename $f) )
54 echo "[ $LASTFILE ]"
55 fi
56 cat $f
57done
58
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 62a8133393e1..f7918401a007 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,5 +1,10 @@
1/* Simple program to layout "physical" memory for new lguest guest. 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * Linked high to avoid likely physical memory. */ 2 * "physical" memory for the new Guest by mapping the kernel image and the
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest.
4 *
5 * The only trick: the Makefile links it at a high address so it will be clear
6 * of the guest memory region. It means that each Guest cannot have more than
7 * about 2.5G of memory on a normally configured Host. :*/
3#define _LARGEFILE64_SOURCE 8#define _LARGEFILE64_SOURCE
4#define _GNU_SOURCE 9#define _GNU_SOURCE
5#include <stdio.h> 10#include <stdio.h>
@@ -29,12 +34,20 @@
29#include <termios.h> 34#include <termios.h>
30#include <getopt.h> 35#include <getopt.h>
31#include <zlib.h> 36#include <zlib.h>
37/*L:110 We can ignore the 28 include files we need for this program, but I do
38 * want to draw attention to the use of kernel-style types.
39 *
40 * As Linus said, "C is a Spartan language, and so should your naming be." I
41 * like these abbreviations and the header we need uses them, so we define them
42 * here.
43 */
32typedef unsigned long long u64; 44typedef unsigned long long u64;
33typedef uint32_t u32; 45typedef uint32_t u32;
34typedef uint16_t u16; 46typedef uint16_t u16;
35typedef uint8_t u8; 47typedef uint8_t u8;
36#include "../../include/linux/lguest_launcher.h" 48#include "../../include/linux/lguest_launcher.h"
37#include "../../include/asm-i386/e820.h" 49#include "../../include/asm-i386/e820.h"
50/*:*/
38 51
39#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ 52#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
40#define NET_PEERNUM 1 53#define NET_PEERNUM 1
@@ -43,33 +56,52 @@ typedef uint8_t u8;
43#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 56#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
44#endif 57#endif
45 58
59/*L:120 verbose is both a global flag and a macro. The C preprocessor allows
60 * this, and although I wouldn't recommend it, it works quite nicely here. */
46static bool verbose; 61static bool verbose;
47#define verbose(args...) \ 62#define verbose(args...) \
48 do { if (verbose) printf(args); } while(0) 63 do { if (verbose) printf(args); } while(0)
64/*:*/
65
66/* The pipe to send commands to the waker process */
49static int waker_fd; 67static int waker_fd;
68/* The top of guest physical memory. */
50static u32 top; 69static u32 top;
51 70
71/* This is our list of devices. */
52struct device_list 72struct device_list
53{ 73{
74 /* Summary information about the devices in our list: ready to pass to
75 * select() to ask which need servicing.*/
54 fd_set infds; 76 fd_set infds;
55 int max_infd; 77 int max_infd;
56 78
79 /* The descriptor page for the devices. */
57 struct lguest_device_desc *descs; 80 struct lguest_device_desc *descs;
81
82 /* A single linked list of devices. */
58 struct device *dev; 83 struct device *dev;
84 /* ... And an end pointer so we can easily append new devices */
59 struct device **lastdev; 85 struct device **lastdev;
60}; 86};
61 87
88/* The device structure describes a single device. */
62struct device 89struct device
63{ 90{
91 /* The linked-list pointer. */
64 struct device *next; 92 struct device *next;
93 /* The descriptor for this device, as mapped into the Guest. */
65 struct lguest_device_desc *desc; 94 struct lguest_device_desc *desc;
95 /* The memory page(s) of this device, if any. Also mapped in Guest. */
66 void *mem; 96 void *mem;
67 97
68 /* Watch this fd if handle_input non-NULL. */ 98 /* If handle_input is set, it wants to be called when this file
99 * descriptor is ready. */
69 int fd; 100 int fd;
70 bool (*handle_input)(int fd, struct device *me); 101 bool (*handle_input)(int fd, struct device *me);
71 102
72 /* Watch DMA to this key if handle_input non-NULL. */ 103 /* If handle_output is set, it wants to be called when the Guest sends
104 * DMA to this key. */
73 unsigned long watch_key; 105 unsigned long watch_key;
74 u32 (*handle_output)(int fd, const struct iovec *iov, 106 u32 (*handle_output)(int fd, const struct iovec *iov,
75 unsigned int num, struct device *me); 107 unsigned int num, struct device *me);
@@ -78,6 +110,11 @@ struct device
78 void *priv; 110 void *priv;
79}; 111};
80 112
113/*L:130
114 * Loading the Kernel.
115 *
116 * We start with couple of simple helper routines. open_or_die() avoids
117 * error-checking code cluttering the callers: */
81static int open_or_die(const char *name, int flags) 118static int open_or_die(const char *name, int flags)
82{ 119{
83 int fd = open(name, flags); 120 int fd = open(name, flags);
@@ -86,26 +123,38 @@ static int open_or_die(const char *name, int flags)
86 return fd; 123 return fd;
87} 124}
88 125
126/* map_zeroed_pages() takes a (page-aligned) address and a number of pages. */
89static void *map_zeroed_pages(unsigned long addr, unsigned int num) 127static void *map_zeroed_pages(unsigned long addr, unsigned int num)
90{ 128{
129 /* We cache the /dev/zero file-descriptor so we only open it once. */
91 static int fd = -1; 130 static int fd = -1;
92 131
93 if (fd == -1) 132 if (fd == -1)
94 fd = open_or_die("/dev/zero", O_RDONLY); 133 fd = open_or_die("/dev/zero", O_RDONLY);
95 134
135 /* We use a private mapping (ie. if we write to the page, it will be
136 * copied), and obviously we insist that it be mapped where we ask. */
96 if (mmap((void *)addr, getpagesize() * num, 137 if (mmap((void *)addr, getpagesize() * num,
97 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) 138 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0)
98 != (void *)addr) 139 != (void *)addr)
99 err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); 140 err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr);
141
142 /* Returning the address is just a courtesy: can simplify callers. */
100 return (void *)addr; 143 return (void *)addr;
101} 144}
102 145
103/* Find magic string marking entry point, return entry point. */ 146/* To find out where to start we look for the magic Guest string, which marks
147 * the code we see in lguest_asm.S. This is a hack which we are currently
148 * plotting to replace with the normal Linux entry point. */
104static unsigned long entry_point(void *start, void *end, 149static unsigned long entry_point(void *start, void *end,
105 unsigned long page_offset) 150 unsigned long page_offset)
106{ 151{
107 void *p; 152 void *p;
108 153
154 /* The scan gives us the physical starting address. We want the
155 * virtual address in this case, and fortunately, we already figured
156 * out the physical-virtual difference and passed it here in
157 * "page_offset". */
109 for (p = start; p < end; p++) 158 for (p = start; p < end; p++)
110 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) 159 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0)
111 return (long)p + strlen("GenuineLguest") + page_offset; 160 return (long)p + strlen("GenuineLguest") + page_offset;
@@ -113,7 +162,17 @@ static unsigned long entry_point(void *start, void *end,
113 err(1, "Is this image a genuine lguest?"); 162 err(1, "Is this image a genuine lguest?");
114} 163}
115 164
116/* Returns the entry point */ 165/* This routine takes an open vmlinux image, which is in ELF, and maps it into
166 * the Guest memory. ELF = Embedded Linking Format, which is the format used
167 * by all modern binaries on Linux including the kernel.
168 *
169 * The ELF headers give *two* addresses: a physical address, and a virtual
170 * address. The Guest kernel expects to be placed in memory at the physical
171 * address, and the page tables set up so it will correspond to that virtual
172 * address. We return the difference between the virtual and physical
173 * addresses in the "page_offset" pointer.
174 *
175 * We return the starting address. */
117static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, 176static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
118 unsigned long *page_offset) 177 unsigned long *page_offset)
119{ 178{
@@ -122,40 +181,61 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
122 unsigned int i; 181 unsigned int i;
123 unsigned long start = -1UL, end = 0; 182 unsigned long start = -1UL, end = 0;
124 183
125 /* Sanity checks. */ 184 /* Sanity checks on the main ELF header: an x86 executable with a
185 * reasonable number of correctly-sized program headers. */
126 if (ehdr->e_type != ET_EXEC 186 if (ehdr->e_type != ET_EXEC
127 || ehdr->e_machine != EM_386 187 || ehdr->e_machine != EM_386
128 || ehdr->e_phentsize != sizeof(Elf32_Phdr) 188 || ehdr->e_phentsize != sizeof(Elf32_Phdr)
129 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) 189 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
130 errx(1, "Malformed elf header"); 190 errx(1, "Malformed elf header");
131 191
192 /* An ELF executable contains an ELF header and a number of "program"
193 * headers which indicate which parts ("segments") of the program to
194 * load where. */
195
196 /* We read in all the program headers at once: */
132 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) 197 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
133 err(1, "Seeking to program headers"); 198 err(1, "Seeking to program headers");
134 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) 199 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
135 err(1, "Reading program headers"); 200 err(1, "Reading program headers");
136 201
202 /* We don't know page_offset yet. */
137 *page_offset = 0; 203 *page_offset = 0;
138 /* We map the loadable segments at virtual addresses corresponding 204
139 * to their physical addresses (our virtual == guest physical). */ 205 /* Try all the headers: there are usually only three. A read-only one,
206 * a read-write one, and a "note" section which isn't loadable. */
140 for (i = 0; i < ehdr->e_phnum; i++) { 207 for (i = 0; i < ehdr->e_phnum; i++) {
208 /* If this isn't a loadable segment, we ignore it */
141 if (phdr[i].p_type != PT_LOAD) 209 if (phdr[i].p_type != PT_LOAD)
142 continue; 210 continue;
143 211
144 verbose("Section %i: size %i addr %p\n", 212 verbose("Section %i: size %i addr %p\n",
145 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); 213 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
146 214
147 /* We expect linear address space. */ 215 /* We expect a simple linear address space: every segment must
216 * have the same difference between virtual (p_vaddr) and
217 * physical (p_paddr) address. */
148 if (!*page_offset) 218 if (!*page_offset)
149 *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; 219 *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr;
150 else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) 220 else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr)
151 errx(1, "Page offset of section %i different", i); 221 errx(1, "Page offset of section %i different", i);
152 222
223 /* We track the first and last address we mapped, so we can
224 * tell entry_point() where to scan. */
153 if (phdr[i].p_paddr < start) 225 if (phdr[i].p_paddr < start)
154 start = phdr[i].p_paddr; 226 start = phdr[i].p_paddr;
155 if (phdr[i].p_paddr + phdr[i].p_filesz > end) 227 if (phdr[i].p_paddr + phdr[i].p_filesz > end)
156 end = phdr[i].p_paddr + phdr[i].p_filesz; 228 end = phdr[i].p_paddr + phdr[i].p_filesz;
157 229
158 /* We map everything private, writable. */ 230 /* We map this section of the file at its physical address. We
231 * map it read & write even if the header says this segment is
232 * read-only. The kernel really wants to be writable: it
233 * patches its own instructions which would normally be
234 * read-only.
235 *
236 * MAP_PRIVATE means that the page won't be copied until a
237 * write is done to it. This allows us to share much of the
238 * kernel memory between Guests. */
159 addr = mmap((void *)phdr[i].p_paddr, 239 addr = mmap((void *)phdr[i].p_paddr,
160 phdr[i].p_filesz, 240 phdr[i].p_filesz,
161 PROT_READ|PROT_WRITE|PROT_EXEC, 241 PROT_READ|PROT_WRITE|PROT_EXEC,
@@ -169,7 +249,31 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
169 return entry_point((void *)start, (void *)end, *page_offset); 249 return entry_point((void *)start, (void *)end, *page_offset);
170} 250}
171 251
172/* This is amazingly reliable. */ 252/*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated.
253 *
254 * We know that CONFIG_PAGE_OFFSET sets what virtual address the kernel expects
255 * to be. We don't know what that option was, but we can figure it out
256 * approximately by looking at the addresses in the code. I chose the common
257 * case of reading a memory location into the %eax register:
258 *
259 * movl <some-address>, %eax
260 *
261 * This gets encoded as five bytes: "0xA1 <4-byte-address>". For example,
262 * "0xA1 0x18 0x60 0x47 0xC0" reads the address 0xC0476018 into %eax.
263 *
264 * In this example can guess that the kernel was compiled with
265 * CONFIG_PAGE_OFFSET set to 0xC0000000 (it's always a round number). If the
266 * kernel were larger than 16MB, we might see 0xC1 addresses show up, but our
267 * kernel isn't that bloated yet.
268 *
269 * Unfortunately, x86 has variable-length instructions, so finding this
270 * particular instruction properly involves writing a disassembler. Instead,
271 * we rely on statistics. We look for "0xA1" and tally the different bytes
272 * which occur 4 bytes later (the "0xC0" in our example above). When one of
273 * those bytes appears three times, we can be reasonably confident that it
274 * forms the start of CONFIG_PAGE_OFFSET.
275 *
276 * This is amazingly reliable. */
173static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) 277static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
174{ 278{
175 unsigned int i, possibilities[256] = { 0 }; 279 unsigned int i, possibilities[256] = { 0 };
@@ -182,30 +286,52 @@ static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
182 errx(1, "could not determine page offset"); 286 errx(1, "could not determine page offset");
183} 287}
184 288
289/*L:160 Unfortunately the entire ELF image isn't compressed: the segments
290 * which need loading are extracted and compressed raw. This denies us the
291 * information we need to make a fully-general loader. */
185static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) 292static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
186{ 293{
187 gzFile f; 294 gzFile f;
188 int ret, len = 0; 295 int ret, len = 0;
296 /* A bzImage always gets loaded at physical address 1M. This is
297 * actually configurable as CONFIG_PHYSICAL_START, but as the comment
298 * there says, "Don't change this unless you know what you are doing".
299 * Indeed. */
189 void *img = (void *)0x100000; 300 void *img = (void *)0x100000;
190 301
302 /* gzdopen takes our file descriptor (carefully placed at the start of
303 * the GZIP header we found) and returns a gzFile. */
191 f = gzdopen(fd, "rb"); 304 f = gzdopen(fd, "rb");
305 /* We read it into memory in 64k chunks until we hit the end. */
192 while ((ret = gzread(f, img + len, 65536)) > 0) 306 while ((ret = gzread(f, img + len, 65536)) > 0)
193 len += ret; 307 len += ret;
194 if (ret < 0) 308 if (ret < 0)
195 err(1, "reading image from bzImage"); 309 err(1, "reading image from bzImage");
196 310
197 verbose("Unpacked size %i addr %p\n", len, img); 311 verbose("Unpacked size %i addr %p\n", len, img);
312
313 /* Without the ELF header, we can't tell virtual-physical gap. This is
314 * CONFIG_PAGE_OFFSET, and people do actually change it. Fortunately,
315 * I have a clever way of figuring it out from the code itself. */
198 *page_offset = intuit_page_offset(img, len); 316 *page_offset = intuit_page_offset(img, len);
199 317
200 return entry_point(img, img + len, *page_offset); 318 return entry_point(img, img + len, *page_offset);
201} 319}
202 320
321/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're
322 * supposed to jump into it and it will unpack itself. We can't do that
323 * because the Guest can't run the unpacking code, and adding features to
324 * lguest kills puppies, so we don't want to.
325 *
326 * The bzImage is formed by putting the decompressing code in front of the
327 * compressed kernel code. So we can simple scan through it looking for the
328 * first "gzip" header, and start decompressing from there. */
203static unsigned long load_bzimage(int fd, unsigned long *page_offset) 329static unsigned long load_bzimage(int fd, unsigned long *page_offset)
204{ 330{
205 unsigned char c; 331 unsigned char c;
206 int state = 0; 332 int state = 0;
207 333
208 /* Ugly brute force search for gzip header. */ 334 /* GZIP header is 0x1F 0x8B <method> <flags>... <compressed-by>. */
209 while (read(fd, &c, 1) == 1) { 335 while (read(fd, &c, 1) == 1) {
210 switch (state) { 336 switch (state) {
211 case 0: 337 case 0:
@@ -222,8 +348,10 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
222 state++; 348 state++;
223 break; 349 break;
224 case 9: 350 case 9:
351 /* Seek back to the start of the gzip header. */
225 lseek(fd, -10, SEEK_CUR); 352 lseek(fd, -10, SEEK_CUR);
226 if (c != 0x03) /* Compressed under UNIX. */ 353 /* One final check: "compressed under UNIX". */
354 if (c != 0x03)
227 state = -1; 355 state = -1;
228 else 356 else
229 return unpack_bzimage(fd, page_offset); 357 return unpack_bzimage(fd, page_offset);
@@ -232,25 +360,43 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
232 errx(1, "Could not find kernel in bzImage"); 360 errx(1, "Could not find kernel in bzImage");
233} 361}
234 362
363/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
364 * come wrapped up in the self-decompressing "bzImage" format. With some funky
365 * coding, we can load those, too. */
235static unsigned long load_kernel(int fd, unsigned long *page_offset) 366static unsigned long load_kernel(int fd, unsigned long *page_offset)
236{ 367{
237 Elf32_Ehdr hdr; 368 Elf32_Ehdr hdr;
238 369
370 /* Read in the first few bytes. */
239 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) 371 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
240 err(1, "Reading kernel"); 372 err(1, "Reading kernel");
241 373
374 /* If it's an ELF file, it starts with "\177ELF" */
242 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) 375 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
243 return map_elf(fd, &hdr, page_offset); 376 return map_elf(fd, &hdr, page_offset);
244 377
378 /* Otherwise we assume it's a bzImage, and try to unpack it */
245 return load_bzimage(fd, page_offset); 379 return load_bzimage(fd, page_offset);
246} 380}
247 381
382/* This is a trivial little helper to align pages. Andi Kleen hated it because
383 * it calls getpagesize() twice: "it's dumb code."
384 *
385 * Kernel guys get really het up about optimization, even when it's not
386 * necessary. I leave this code as a reaction against that. */
248static inline unsigned long page_align(unsigned long addr) 387static inline unsigned long page_align(unsigned long addr)
249{ 388{
389 /* Add upwards and truncate downwards. */
250 return ((addr + getpagesize()-1) & ~(getpagesize()-1)); 390 return ((addr + getpagesize()-1) & ~(getpagesize()-1));
251} 391}
252 392
253/* initrd gets loaded at top of memory: return length. */ 393/*L:180 An "initial ram disk" is a disk image loaded into memory along with
394 * the kernel which the kernel can use to boot from without needing any
395 * drivers. Most distributions now use this as standard: the initrd contains
396 * the code to load the appropriate driver modules for the current machine.
397 *
398 * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
399 * kernels. He sent me this (and tells me when I break it). */
254static unsigned long load_initrd(const char *name, unsigned long mem) 400static unsigned long load_initrd(const char *name, unsigned long mem)
255{ 401{
256 int ifd; 402 int ifd;
@@ -259,21 +405,35 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
259 void *iaddr; 405 void *iaddr;
260 406
261 ifd = open_or_die(name, O_RDONLY); 407 ifd = open_or_die(name, O_RDONLY);
408 /* fstat() is needed to get the file size. */
262 if (fstat(ifd, &st) < 0) 409 if (fstat(ifd, &st) < 0)
263 err(1, "fstat() on initrd '%s'", name); 410 err(1, "fstat() on initrd '%s'", name);
264 411
412 /* The length needs to be rounded up to a page size: mmap needs the
413 * address to be page aligned. */
265 len = page_align(st.st_size); 414 len = page_align(st.st_size);
415 /* We map the initrd at the top of memory. */
266 iaddr = mmap((void *)mem - len, st.st_size, 416 iaddr = mmap((void *)mem - len, st.st_size,
267 PROT_READ|PROT_EXEC|PROT_WRITE, 417 PROT_READ|PROT_EXEC|PROT_WRITE,
268 MAP_FIXED|MAP_PRIVATE, ifd, 0); 418 MAP_FIXED|MAP_PRIVATE, ifd, 0);
269 if (iaddr != (void *)mem - len) 419 if (iaddr != (void *)mem - len)
270 err(1, "Mmaping initrd '%s' returned %p not %p", 420 err(1, "Mmaping initrd '%s' returned %p not %p",
271 name, iaddr, (void *)mem - len); 421 name, iaddr, (void *)mem - len);
422 /* Once a file is mapped, you can close the file descriptor. It's a
423 * little odd, but quite useful. */
272 close(ifd); 424 close(ifd);
273 verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); 425 verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr);
426
427 /* We return the initrd size. */
274 return len; 428 return len;
275} 429}
276 430
431/* Once we know how much memory we have, and the address the Guest kernel
432 * expects, we can construct simple linear page tables which will get the Guest
433 * far enough into the boot to create its own.
434 *
435 * We lay them out of the way, just below the initrd (which is why we need to
436 * know its size). */
277static unsigned long setup_pagetables(unsigned long mem, 437static unsigned long setup_pagetables(unsigned long mem,
278 unsigned long initrd_size, 438 unsigned long initrd_size,
279 unsigned long page_offset) 439 unsigned long page_offset)
@@ -282,23 +442,32 @@ static unsigned long setup_pagetables(unsigned long mem,
282 unsigned int mapped_pages, i, linear_pages; 442 unsigned int mapped_pages, i, linear_pages;
283 unsigned int ptes_per_page = getpagesize()/sizeof(u32); 443 unsigned int ptes_per_page = getpagesize()/sizeof(u32);
284 444
285 /* If we can map all of memory above page_offset, we do so. */ 445 /* Ideally we map all physical memory starting at page_offset.
446 * However, if page_offset is 0xC0000000 we can only map 1G of physical
447 * (0xC0000000 + 1G overflows). */
286 if (mem <= -page_offset) 448 if (mem <= -page_offset)
287 mapped_pages = mem/getpagesize(); 449 mapped_pages = mem/getpagesize();
288 else 450 else
289 mapped_pages = -page_offset/getpagesize(); 451 mapped_pages = -page_offset/getpagesize();
290 452
291 /* Each linear PTE page can map ptes_per_page pages. */ 453 /* Each PTE page can map ptes_per_page pages: how many do we need? */
292 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; 454 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
293 455
294 /* We lay out top-level then linear mapping immediately below initrd */ 456 /* We put the toplevel page directory page at the top of memory. */
295 pgdir = (void *)mem - initrd_size - getpagesize(); 457 pgdir = (void *)mem - initrd_size - getpagesize();
458
459 /* Now we use the next linear_pages pages as pte pages */
296 linear = (void *)pgdir - linear_pages*getpagesize(); 460 linear = (void *)pgdir - linear_pages*getpagesize();
297 461
462 /* Linear mapping is easy: put every page's address into the mapping in
463 * order. PAGE_PRESENT contains the flags Present, Writable and
464 * Executable. */
298 for (i = 0; i < mapped_pages; i++) 465 for (i = 0; i < mapped_pages; i++)
299 linear[i] = ((i * getpagesize()) | PAGE_PRESENT); 466 linear[i] = ((i * getpagesize()) | PAGE_PRESENT);
300 467
301 /* Now set up pgd so that this memory is at page_offset */ 468 /* The top level points to the linear page table pages above. The
469 * entry representing page_offset points to the first one, and they
470 * continue from there. */
302 for (i = 0; i < mapped_pages; i += ptes_per_page) { 471 for (i = 0; i < mapped_pages; i += ptes_per_page) {
303 pgdir[(i + page_offset/getpagesize())/ptes_per_page] 472 pgdir[(i + page_offset/getpagesize())/ptes_per_page]
304 = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); 473 = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT);
@@ -307,9 +476,13 @@ static unsigned long setup_pagetables(unsigned long mem,
307 verbose("Linear mapping of %u pages in %u pte pages at %p\n", 476 verbose("Linear mapping of %u pages in %u pte pages at %p\n",
308 mapped_pages, linear_pages, linear); 477 mapped_pages, linear_pages, linear);
309 478
479 /* We return the top level (guest-physical) address: the kernel needs
480 * to know where it is. */
310 return (unsigned long)pgdir; 481 return (unsigned long)pgdir;
311} 482}
312 483
484/* Simple routine to roll all the commandline arguments together with spaces
485 * between them. */
313static void concat(char *dst, char *args[]) 486static void concat(char *dst, char *args[])
314{ 487{
315 unsigned int i, len = 0; 488 unsigned int i, len = 0;
@@ -323,6 +496,10 @@ static void concat(char *dst, char *args[])
323 dst[len] = '\0'; 496 dst[len] = '\0';
324} 497}
325 498
499/* This is where we actually tell the kernel to initialize the Guest. We saw
500 * the arguments it expects when we looked at initialize() in lguest_user.c:
501 * the top physical page to allow, the top level pagetable, the entry point and
502 * the page_offset constant for the Guest. */
326static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) 503static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
327{ 504{
328 u32 args[] = { LHREQ_INITIALIZE, 505 u32 args[] = { LHREQ_INITIALIZE,
@@ -332,8 +509,11 @@ static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
332 fd = open_or_die("/dev/lguest", O_RDWR); 509 fd = open_or_die("/dev/lguest", O_RDWR);
333 if (write(fd, args, sizeof(args)) < 0) 510 if (write(fd, args, sizeof(args)) < 0)
334 err(1, "Writing to /dev/lguest"); 511 err(1, "Writing to /dev/lguest");
512
513 /* We return the /dev/lguest file descriptor to control this Guest */
335 return fd; 514 return fd;
336} 515}
516/*:*/
337 517
338static void set_fd(int fd, struct device_list *devices) 518static void set_fd(int fd, struct device_list *devices)
339{ 519{
@@ -342,61 +522,108 @@ static void set_fd(int fd, struct device_list *devices)
342 devices->max_infd = fd; 522 devices->max_infd = fd;
343} 523}
344 524
345/* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */ 525/*L:200
526 * The Waker.
527 *
528 * With a console and network devices, we can have lots of input which we need
529 * to process. We could try to tell the kernel what file descriptors to watch,
530 * but handing a file descriptor mask through to the kernel is fairly icky.
531 *
532 * Instead, we fork off a process which watches the file descriptors and writes
533 * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host
534 * loop to stop running the Guest. This causes it to return from the
535 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
536 * the LHREQ_BREAK and wake us up again.
537 *
538 * This, of course, is merely a different *kind* of icky.
539 */
346static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices) 540static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices)
347{ 541{
542 /* Add the pipe from the Launcher to the fdset in the device_list, so
543 * we watch it, too. */
348 set_fd(pipefd, devices); 544 set_fd(pipefd, devices);
349 545
350 for (;;) { 546 for (;;) {
351 fd_set rfds = devices->infds; 547 fd_set rfds = devices->infds;
352 u32 args[] = { LHREQ_BREAK, 1 }; 548 u32 args[] = { LHREQ_BREAK, 1 };
353 549
550 /* Wait until input is ready from one of the devices. */
354 select(devices->max_infd+1, &rfds, NULL, NULL, NULL); 551 select(devices->max_infd+1, &rfds, NULL, NULL, NULL);
552 /* Is it a message from the Launcher? */
355 if (FD_ISSET(pipefd, &rfds)) { 553 if (FD_ISSET(pipefd, &rfds)) {
356 int ignorefd; 554 int ignorefd;
555 /* If read() returns 0, it means the Launcher has
556 * exited. We silently follow. */
357 if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0) 557 if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0)
358 exit(0); 558 exit(0);
559 /* Otherwise it's telling us there's a problem with one
560 * of the devices, and we should ignore that file
561 * descriptor from now on. */
359 FD_CLR(ignorefd, &devices->infds); 562 FD_CLR(ignorefd, &devices->infds);
360 } else 563 } else /* Send LHREQ_BREAK command. */
361 write(lguest_fd, args, sizeof(args)); 564 write(lguest_fd, args, sizeof(args));
362 } 565 }
363} 566}
364 567
568/* This routine just sets up a pipe to the Waker process. */
365static int setup_waker(int lguest_fd, struct device_list *device_list) 569static int setup_waker(int lguest_fd, struct device_list *device_list)
366{ 570{
367 int pipefd[2], child; 571 int pipefd[2], child;
368 572
573 /* We create a pipe to talk to the waker, and also so it knows when the
574 * Launcher dies (and closes pipe). */
369 pipe(pipefd); 575 pipe(pipefd);
370 child = fork(); 576 child = fork();
371 if (child == -1) 577 if (child == -1)
372 err(1, "forking"); 578 err(1, "forking");
373 579
374 if (child == 0) { 580 if (child == 0) {
581 /* Close the "writing" end of our copy of the pipe */
375 close(pipefd[1]); 582 close(pipefd[1]);
376 wake_parent(pipefd[0], lguest_fd, device_list); 583 wake_parent(pipefd[0], lguest_fd, device_list);
377 } 584 }
585 /* Close the reading end of our copy of the pipe. */
378 close(pipefd[0]); 586 close(pipefd[0]);
379 587
588 /* Here is the fd used to talk to the waker. */
380 return pipefd[1]; 589 return pipefd[1];
381} 590}
382 591
592/*L:210
593 * Device Handling.
594 *
595 * When the Guest sends DMA to us, it sends us an array of addresses and sizes.
596 * We need to make sure it's not trying to reach into the Launcher itself, so
597 * we have a convenient routine which check it and exits with an error message
598 * if something funny is going on:
599 */
383static void *_check_pointer(unsigned long addr, unsigned int size, 600static void *_check_pointer(unsigned long addr, unsigned int size,
384 unsigned int line) 601 unsigned int line)
385{ 602{
603 /* We have to separately check addr and addr+size, because size could
604 * be huge and addr + size might wrap around. */
386 if (addr >= top || addr + size >= top) 605 if (addr >= top || addr + size >= top)
387 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); 606 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr);
607 /* We return a pointer for the caller's convenience, now we know it's
608 * safe to use. */
388 return (void *)addr; 609 return (void *)addr;
389} 610}
611/* A macro which transparently hands the line number to the real function. */
390#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) 612#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
391 613
392/* Returns pointer to dma->used_len */ 614/* The Guest has given us the address of a "struct lguest_dma". We check it's
615 * OK and convert it to an iovec (which is a simple array of ptr/size
616 * pairs). */
393static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) 617static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
394{ 618{
395 unsigned int i; 619 unsigned int i;
396 struct lguest_dma *udma; 620 struct lguest_dma *udma;
397 621
622 /* First we make sure that the array memory itself is valid. */
398 udma = check_pointer(dma, sizeof(*udma)); 623 udma = check_pointer(dma, sizeof(*udma));
624 /* Now we check each element */
399 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { 625 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
626 /* A zero length ends the array. */
400 if (!udma->len[i]) 627 if (!udma->len[i])
401 break; 628 break;
402 629
@@ -404,9 +631,15 @@ static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
404 iov[i].iov_len = udma->len[i]; 631 iov[i].iov_len = udma->len[i];
405 } 632 }
406 *num = i; 633 *num = i;
634
635 /* We return the pointer to where the caller should write the amount of
636 * the buffer used. */
407 return &udma->used_len; 637 return &udma->used_len;
408} 638}
409 639
640/* This routine gets a DMA buffer from the Guest for a given key, and converts
641 * it to an iovec array. It returns the interrupt the Guest wants when we're
642 * finished, and a pointer to the "used_len" field to fill in. */
410static u32 *get_dma_buffer(int fd, void *key, 643static u32 *get_dma_buffer(int fd, void *key,
411 struct iovec iov[], unsigned int *num, u32 *irq) 644 struct iovec iov[], unsigned int *num, u32 *irq)
412{ 645{
@@ -414,16 +647,21 @@ static u32 *get_dma_buffer(int fd, void *key,
414 unsigned long udma; 647 unsigned long udma;
415 u32 *res; 648 u32 *res;
416 649
650 /* Ask the kernel for a DMA buffer corresponding to this key. */
417 udma = write(fd, buf, sizeof(buf)); 651 udma = write(fd, buf, sizeof(buf));
652 /* They haven't registered any, or they're all used? */
418 if (udma == (unsigned long)-1) 653 if (udma == (unsigned long)-1)
419 return NULL; 654 return NULL;
420 655
421 /* Kernel stashes irq in ->used_len. */ 656 /* Convert it into our iovec array */
422 res = dma2iov(udma, iov, num); 657 res = dma2iov(udma, iov, num);
658 /* The kernel stashes irq in ->used_len to get it out to us. */
423 *irq = *res; 659 *irq = *res;
660 /* Return a pointer to ((struct lguest_dma *)udma)->used_len. */
424 return res; 661 return res;
425} 662}
426 663
664/* This is a convenient routine to send the Guest an interrupt. */
427static void trigger_irq(int fd, u32 irq) 665static void trigger_irq(int fd, u32 irq)
428{ 666{
429 u32 buf[] = { LHREQ_IRQ, irq }; 667 u32 buf[] = { LHREQ_IRQ, irq };
@@ -431,6 +669,10 @@ static void trigger_irq(int fd, u32 irq)
431 err(1, "Triggering irq %i", irq); 669 err(1, "Triggering irq %i", irq);
432} 670}
433 671
672/* This simply sets up an iovec array where we can put data to be discarded.
673 * This happens when the Guest doesn't want or can't handle the input: we have
674 * to get rid of it somewhere, and if we bury it in the ceiling space it will
675 * start to smell after a week. */
434static void discard_iovec(struct iovec *iov, unsigned int *num) 676static void discard_iovec(struct iovec *iov, unsigned int *num)
435{ 677{
436 static char discard_buf[1024]; 678 static char discard_buf[1024];
@@ -439,19 +681,24 @@ static void discard_iovec(struct iovec *iov, unsigned int *num)
439 iov->iov_len = sizeof(discard_buf); 681 iov->iov_len = sizeof(discard_buf);
440} 682}
441 683
684/* Here is the input terminal setting we save, and the routine to restore them
685 * on exit so the user can see what they type next. */
442static struct termios orig_term; 686static struct termios orig_term;
443static void restore_term(void) 687static void restore_term(void)
444{ 688{
445 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); 689 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
446} 690}
447 691
692/* We associate some data with the console for our exit hack. */
448struct console_abort 693struct console_abort
449{ 694{
695 /* How many times have they hit ^C? */
450 int count; 696 int count;
697 /* When did they start? */
451 struct timeval start; 698 struct timeval start;
452}; 699};
453 700
454/* We DMA input to buffer bound at start of console page. */ 701/* This is the routine which handles console input (ie. stdin). */
455static bool handle_console_input(int fd, struct device *dev) 702static bool handle_console_input(int fd, struct device *dev)
456{ 703{
457 u32 irq = 0, *lenp; 704 u32 irq = 0, *lenp;
@@ -460,24 +707,38 @@ static bool handle_console_input(int fd, struct device *dev)
460 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 707 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
461 struct console_abort *abort = dev->priv; 708 struct console_abort *abort = dev->priv;
462 709
710 /* First we get the console buffer from the Guest. The key is dev->mem
711 * which was set to 0 in setup_console(). */
463 lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq); 712 lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq);
464 if (!lenp) { 713 if (!lenp) {
714 /* If it's not ready for input, warn and set up to discard. */
465 warn("console: no dma buffer!"); 715 warn("console: no dma buffer!");
466 discard_iovec(iov, &num); 716 discard_iovec(iov, &num);
467 } 717 }
468 718
719 /* This is why we convert to iovecs: the readv() call uses them, and so
720 * it reads straight into the Guest's buffer. */
469 len = readv(dev->fd, iov, num); 721 len = readv(dev->fd, iov, num);
470 if (len <= 0) { 722 if (len <= 0) {
723 /* This implies that the console is closed, is /dev/null, or
724 * something went terribly wrong. We still go through the rest
725 * of the logic, though, especially the exit handling below. */
471 warnx("Failed to get console input, ignoring console."); 726 warnx("Failed to get console input, ignoring console.");
472 len = 0; 727 len = 0;
473 } 728 }
474 729
730 /* If we read the data into the Guest, fill in the length and send the
731 * interrupt. */
475 if (lenp) { 732 if (lenp) {
476 *lenp = len; 733 *lenp = len;
477 trigger_irq(fd, irq); 734 trigger_irq(fd, irq);
478 } 735 }
479 736
480 /* Three ^C within one second? Exit. */ 737 /* Three ^C within one second? Exit.
738 *
739 * This is such a hack, but works surprisingly well. Each ^C has to be
740 * in a buffer by itself, so they can't be too fast. But we check that
741 * we get three within about a second, so they can't be too slow. */
481 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { 742 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
482 if (!abort->count++) 743 if (!abort->count++)
483 gettimeofday(&abort->start, NULL); 744 gettimeofday(&abort->start, NULL);
@@ -485,43 +746,60 @@ static bool handle_console_input(int fd, struct device *dev)
485 struct timeval now; 746 struct timeval now;
486 gettimeofday(&now, NULL); 747 gettimeofday(&now, NULL);
487 if (now.tv_sec <= abort->start.tv_sec+1) { 748 if (now.tv_sec <= abort->start.tv_sec+1) {
488 /* Make sure waker is not blocked in BREAK */
489 u32 args[] = { LHREQ_BREAK, 0 }; 749 u32 args[] = { LHREQ_BREAK, 0 };
750 /* Close the fd so Waker will know it has to
751 * exit. */
490 close(waker_fd); 752 close(waker_fd);
753 /* Just in case waker is blocked in BREAK, send
754 * unbreak now. */
491 write(fd, args, sizeof(args)); 755 write(fd, args, sizeof(args));
492 exit(2); 756 exit(2);
493 } 757 }
494 abort->count = 0; 758 abort->count = 0;
495 } 759 }
496 } else 760 } else
761 /* Any other key resets the abort counter. */
497 abort->count = 0; 762 abort->count = 0;
498 763
764 /* Now, if we didn't read anything, put the input terminal back and
765 * return failure (meaning, don't call us again). */
499 if (!len) { 766 if (!len) {
500 restore_term(); 767 restore_term();
501 return false; 768 return false;
502 } 769 }
770 /* Everything went OK! */
503 return true; 771 return true;
504} 772}
505 773
774/* Handling console output is much simpler than input. */
506static u32 handle_console_output(int fd, const struct iovec *iov, 775static u32 handle_console_output(int fd, const struct iovec *iov,
507 unsigned num, struct device*dev) 776 unsigned num, struct device*dev)
508{ 777{
778 /* Whatever the Guest sends, write it to standard output. Return the
779 * number of bytes written. */
509 return writev(STDOUT_FILENO, iov, num); 780 return writev(STDOUT_FILENO, iov, num);
510} 781}
511 782
783/* Guest->Host network output is also pretty easy. */
512static u32 handle_tun_output(int fd, const struct iovec *iov, 784static u32 handle_tun_output(int fd, const struct iovec *iov,
513 unsigned num, struct device *dev) 785 unsigned num, struct device *dev)
514{ 786{
515 /* Now we've seen output, we should warn if we can't get buffers. */ 787 /* We put a flag in the "priv" pointer of the network device, and set
788 * it as soon as we see output. We'll see why in handle_tun_input() */
516 *(bool *)dev->priv = true; 789 *(bool *)dev->priv = true;
790 /* Whatever packet the Guest sent us, write it out to the tun
791 * device. */
517 return writev(dev->fd, iov, num); 792 return writev(dev->fd, iov, num);
518} 793}
519 794
795/* This matches the peer_key() in lguest_net.c. The key for any given slot
796 * is the address of the network device's page plus 4 * the slot number. */
520static unsigned long peer_offset(unsigned int peernum) 797static unsigned long peer_offset(unsigned int peernum)
521{ 798{
522 return 4 * peernum; 799 return 4 * peernum;
523} 800}
524 801
802/* This is where we handle a packet coming in from the tun device */
525static bool handle_tun_input(int fd, struct device *dev) 803static bool handle_tun_input(int fd, struct device *dev)
526{ 804{
527 u32 irq = 0, *lenp; 805 u32 irq = 0, *lenp;
@@ -529,17 +807,28 @@ static bool handle_tun_input(int fd, struct device *dev)
529 unsigned num; 807 unsigned num;
530 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 808 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
531 809
810 /* First we get a buffer the Guest has bound to its key. */
532 lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, 811 lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num,
533 &irq); 812 &irq);
534 if (!lenp) { 813 if (!lenp) {
814 /* Now, it's expected that if we try to send a packet too
815 * early, the Guest won't be ready yet. This is why we set a
816 * flag when the Guest sends its first packet. If it's sent a
817 * packet we assume it should be ready to receive them.
818 *
819 * Actually, this is what the status bits in the descriptor are
820 * for: we should *use* them. FIXME! */
535 if (*(bool *)dev->priv) 821 if (*(bool *)dev->priv)
536 warn("network: no dma buffer!"); 822 warn("network: no dma buffer!");
537 discard_iovec(iov, &num); 823 discard_iovec(iov, &num);
538 } 824 }
539 825
826 /* Read the packet from the device directly into the Guest's buffer. */
540 len = readv(dev->fd, iov, num); 827 len = readv(dev->fd, iov, num);
541 if (len <= 0) 828 if (len <= 0)
542 err(1, "reading network"); 829 err(1, "reading network");
830
831 /* Write the used_len, and trigger the interrupt for the Guest */
543 if (lenp) { 832 if (lenp) {
544 *lenp = len; 833 *lenp = len;
545 trigger_irq(fd, irq); 834 trigger_irq(fd, irq);
@@ -547,9 +836,13 @@ static bool handle_tun_input(int fd, struct device *dev)
547 verbose("tun input packet len %i [%02x %02x] (%s)\n", len, 836 verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
548 ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], 837 ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1],
549 lenp ? "sent" : "discarded"); 838 lenp ? "sent" : "discarded");
839 /* All good. */
550 return true; 840 return true;
551} 841}
552 842
843/* The last device handling routine is block output: the Guest has sent a DMA
844 * to the block device. It will have placed the command it wants in the
845 * "struct lguest_block_page". */
553static u32 handle_block_output(int fd, const struct iovec *iov, 846static u32 handle_block_output(int fd, const struct iovec *iov,
554 unsigned num, struct device *dev) 847 unsigned num, struct device *dev)
555{ 848{
@@ -559,36 +852,64 @@ static u32 handle_block_output(int fd, const struct iovec *iov,
559 struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; 852 struct iovec reply[LGUEST_MAX_DMA_SECTIONS];
560 off64_t device_len, off = (off64_t)p->sector * 512; 853 off64_t device_len, off = (off64_t)p->sector * 512;
561 854
855 /* First we extract the device length from the dev->priv pointer. */
562 device_len = *(off64_t *)dev->priv; 856 device_len = *(off64_t *)dev->priv;
563 857
858 /* We first check that the read or write is within the length of the
859 * block file. */
564 if (off >= device_len) 860 if (off >= device_len)
565 err(1, "Bad offset %llu vs %llu", off, device_len); 861 err(1, "Bad offset %llu vs %llu", off, device_len);
862 /* Move to the right location in the block file. This shouldn't fail,
863 * but best to check. */
566 if (lseek64(dev->fd, off, SEEK_SET) != off) 864 if (lseek64(dev->fd, off, SEEK_SET) != off)
567 err(1, "Bad seek to sector %i", p->sector); 865 err(1, "Bad seek to sector %i", p->sector);
568 866
569 verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); 867 verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off);
570 868
869 /* They were supposed to bind a reply buffer at key equal to the start
870 * of the block device memory. We need this to tell them when the
871 * request is finished. */
571 lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); 872 lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq);
572 if (!lenp) 873 if (!lenp)
573 err(1, "Block request didn't give us a dma buffer"); 874 err(1, "Block request didn't give us a dma buffer");
574 875
575 if (p->type) { 876 if (p->type) {
877 /* A write request. The DMA they sent contained the data, so
878 * write it out. */
576 len = writev(dev->fd, iov, num); 879 len = writev(dev->fd, iov, num);
880 /* Grr... Now we know how long the "struct lguest_dma" they
881 * sent was, we make sure they didn't try to write over the end
882 * of the block file (possibly extending it). */
577 if (off + len > device_len) { 883 if (off + len > device_len) {
884 /* Trim it back to the correct length */
578 ftruncate(dev->fd, device_len); 885 ftruncate(dev->fd, device_len);
886 /* Die, bad Guest, die. */
579 errx(1, "Write past end %llu+%u", off, len); 887 errx(1, "Write past end %llu+%u", off, len);
580 } 888 }
889 /* The reply length is 0: we just send back an empty DMA to
890 * interrupt them and tell them the write is finished. */
581 *lenp = 0; 891 *lenp = 0;
582 } else { 892 } else {
893 /* A read request. They sent an empty DMA to start the
894 * request, and we put the read contents into the reply
895 * buffer. */
583 len = readv(dev->fd, reply, reply_num); 896 len = readv(dev->fd, reply, reply_num);
584 *lenp = len; 897 *lenp = len;
585 } 898 }
586 899
900 /* The result is 1 (done), 2 if there was an error (short read or
901 * write). */
587 p->result = 1 + (p->bytes != len); 902 p->result = 1 + (p->bytes != len);
903 /* Now tell them we've used their reply buffer. */
588 trigger_irq(fd, irq); 904 trigger_irq(fd, irq);
905
906 /* We're supposed to return the number of bytes of the output buffer we
907 * used. But the block device uses the "result" field instead, so we
908 * don't bother. */
589 return 0; 909 return 0;
590} 910}
591 911
912/* This is the generic routine we call when the Guest sends some DMA out. */
592static void handle_output(int fd, unsigned long dma, unsigned long key, 913static void handle_output(int fd, unsigned long dma, unsigned long key,
593 struct device_list *devices) 914 struct device_list *devices)
594{ 915{
@@ -597,30 +918,53 @@ static void handle_output(int fd, unsigned long dma, unsigned long key,
597 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 918 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
598 unsigned num = 0; 919 unsigned num = 0;
599 920
921 /* Convert the "struct lguest_dma" they're sending to a "struct
922 * iovec". */
600 lenp = dma2iov(dma, iov, &num); 923 lenp = dma2iov(dma, iov, &num);
924
925 /* Check each device: if they expect output to this key, tell them to
926 * handle it. */
601 for (i = devices->dev; i; i = i->next) { 927 for (i = devices->dev; i; i = i->next) {
602 if (i->handle_output && key == i->watch_key) { 928 if (i->handle_output && key == i->watch_key) {
929 /* We write the result straight into the used_len field
930 * for them. */
603 *lenp = i->handle_output(fd, iov, num, i); 931 *lenp = i->handle_output(fd, iov, num, i);
604 return; 932 return;
605 } 933 }
606 } 934 }
935
936 /* This can happen: the kernel sends any SEND_DMA which doesn't match
937 * another Guest to us. It could be that another Guest just left a
938 * network, for example. But it's unusual. */
607 warnx("Pending dma %p, key %p", (void *)dma, (void *)key); 939 warnx("Pending dma %p, key %p", (void *)dma, (void *)key);
608} 940}
609 941
942/* This is called when the waker wakes us up: check for incoming file
943 * descriptors. */
610static void handle_input(int fd, struct device_list *devices) 944static void handle_input(int fd, struct device_list *devices)
611{ 945{
946 /* select() wants a zeroed timeval to mean "don't wait". */
612 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; 947 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
613 948
614 for (;;) { 949 for (;;) {
615 struct device *i; 950 struct device *i;
616 fd_set fds = devices->infds; 951 fd_set fds = devices->infds;
617 952
953 /* If nothing is ready, we're done. */
618 if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) 954 if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0)
619 break; 955 break;
620 956
957 /* Otherwise, call the device(s) which have readable
958 * file descriptors and a method of handling them. */
621 for (i = devices->dev; i; i = i->next) { 959 for (i = devices->dev; i; i = i->next) {
622 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 960 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
961 /* If handle_input() returns false, it means we
962 * should no longer service it.
963 * handle_console_input() does this. */
623 if (!i->handle_input(fd, i)) { 964 if (!i->handle_input(fd, i)) {
965 /* Clear it from the set of input file
966 * descriptors kept at the head of the
967 * device list. */
624 FD_CLR(i->fd, &devices->infds); 968 FD_CLR(i->fd, &devices->infds);
625 /* Tell waker to ignore it too... */ 969 /* Tell waker to ignore it too... */
626 write(waker_fd, &i->fd, sizeof(i->fd)); 970 write(waker_fd, &i->fd, sizeof(i->fd));
@@ -630,6 +974,15 @@ static void handle_input(int fd, struct device_list *devices)
630 } 974 }
631} 975}
632 976
977/*L:190
978 * Device Setup
979 *
980 * All devices need a descriptor so the Guest knows it exists, and a "struct
981 * device" so the Launcher can keep track of it. We have common helper
982 * routines to allocate them.
983 *
984 * This routine allocates a new "struct lguest_device_desc" from descriptor
985 * table in the devices array just above the Guest's normal memory. */
633static struct lguest_device_desc * 986static struct lguest_device_desc *
634new_dev_desc(struct lguest_device_desc *descs, 987new_dev_desc(struct lguest_device_desc *descs,
635 u16 type, u16 features, u16 num_pages) 988 u16 type, u16 features, u16 num_pages)
@@ -641,6 +994,8 @@ new_dev_desc(struct lguest_device_desc *descs,
641 descs[i].type = type; 994 descs[i].type = type;
642 descs[i].features = features; 995 descs[i].features = features;
643 descs[i].num_pages = num_pages; 996 descs[i].num_pages = num_pages;
997 /* If they said the device needs memory, we allocate
998 * that now, bumping up the top of Guest memory. */
644 if (num_pages) { 999 if (num_pages) {
645 map_zeroed_pages(top, num_pages); 1000 map_zeroed_pages(top, num_pages);
646 descs[i].pfn = top/getpagesize(); 1001 descs[i].pfn = top/getpagesize();
@@ -652,6 +1007,9 @@ new_dev_desc(struct lguest_device_desc *descs,
652 errx(1, "too many devices"); 1007 errx(1, "too many devices");
653} 1008}
654 1009
1010/* This monster routine does all the creation and setup of a new device,
1011 * including caling new_dev_desc() to allocate the descriptor and device
1012 * memory. */
655static struct device *new_device(struct device_list *devices, 1013static struct device *new_device(struct device_list *devices,
656 u16 type, u16 num_pages, u16 features, 1014 u16 type, u16 num_pages, u16 features,
657 int fd, 1015 int fd,
@@ -664,12 +1022,18 @@ static struct device *new_device(struct device_list *devices,
664{ 1022{
665 struct device *dev = malloc(sizeof(*dev)); 1023 struct device *dev = malloc(sizeof(*dev));
666 1024
667 /* Append to device list. */ 1025 /* Append to device list. Prepending to a single-linked list is
1026 * easier, but the user expects the devices to be arranged on the bus
1027 * in command-line order. The first network device on the command line
1028 * is eth0, the first block device /dev/lgba, etc. */
668 *devices->lastdev = dev; 1029 *devices->lastdev = dev;
669 dev->next = NULL; 1030 dev->next = NULL;
670 devices->lastdev = &dev->next; 1031 devices->lastdev = &dev->next;
671 1032
1033 /* Now we populate the fields one at a time. */
672 dev->fd = fd; 1034 dev->fd = fd;
1035 /* If we have an input handler for this file descriptor, then we add it
1036 * to the device_list's fdset and maxfd. */
673 if (handle_input) 1037 if (handle_input)
674 set_fd(dev->fd, devices); 1038 set_fd(dev->fd, devices);
675 dev->desc = new_dev_desc(devices->descs, type, features, num_pages); 1039 dev->desc = new_dev_desc(devices->descs, type, features, num_pages);
@@ -680,27 +1044,37 @@ static struct device *new_device(struct device_list *devices,
680 return dev; 1044 return dev;
681} 1045}
682 1046
1047/* Our first setup routine is the console. It's a fairly simple device, but
1048 * UNIX tty handling makes it uglier than it could be. */
683static void setup_console(struct device_list *devices) 1049static void setup_console(struct device_list *devices)
684{ 1050{
685 struct device *dev; 1051 struct device *dev;
686 1052
1053 /* If we can save the initial standard input settings... */
687 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { 1054 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
688 struct termios term = orig_term; 1055 struct termios term = orig_term;
1056 /* Then we turn off echo, line buffering and ^C etc. We want a
1057 * raw input stream to the Guest. */
689 term.c_lflag &= ~(ISIG|ICANON|ECHO); 1058 term.c_lflag &= ~(ISIG|ICANON|ECHO);
690 tcsetattr(STDIN_FILENO, TCSANOW, &term); 1059 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1060 /* If we exit gracefully, the original settings will be
1061 * restored so the user can see what they're typing. */
691 atexit(restore_term); 1062 atexit(restore_term);
692 } 1063 }
693 1064
694 /* We don't currently require a page for the console. */ 1065 /* We don't currently require any memory for the console, so we ask for
1066 * 0 pages. */
695 dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, 1067 dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0,
696 STDIN_FILENO, handle_console_input, 1068 STDIN_FILENO, handle_console_input,
697 LGUEST_CONSOLE_DMA_KEY, handle_console_output); 1069 LGUEST_CONSOLE_DMA_KEY, handle_console_output);
1070 /* We store the console state in dev->priv, and initialize it. */
698 dev->priv = malloc(sizeof(struct console_abort)); 1071 dev->priv = malloc(sizeof(struct console_abort));
699 ((struct console_abort *)dev->priv)->count = 0; 1072 ((struct console_abort *)dev->priv)->count = 0;
700 verbose("device %p: console\n", 1073 verbose("device %p: console\n",
701 (void *)(dev->desc->pfn * getpagesize())); 1074 (void *)(dev->desc->pfn * getpagesize()));
702} 1075}
703 1076
1077/* Setting up a block file is also fairly straightforward. */
704static void setup_block_file(const char *filename, struct device_list *devices) 1078static void setup_block_file(const char *filename, struct device_list *devices)
705{ 1079{
706 int fd; 1080 int fd;
@@ -708,20 +1082,47 @@ static void setup_block_file(const char *filename, struct device_list *devices)
708 off64_t *device_len; 1082 off64_t *device_len;
709 struct lguest_block_page *p; 1083 struct lguest_block_page *p;
710 1084
1085 /* We open with O_LARGEFILE because otherwise we get stuck at 2G. We
1086 * open with O_DIRECT because otherwise our benchmarks go much too
1087 * fast. */
711 fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); 1088 fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT);
1089
1090 /* We want one page, and have no input handler (the block file never
1091 * has anything interesting to say to us). Our timing will be quite
1092 * random, so it should be a reasonable randomness source. */
712 dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, 1093 dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1,
713 LGUEST_DEVICE_F_RANDOMNESS, 1094 LGUEST_DEVICE_F_RANDOMNESS,
714 fd, NULL, 0, handle_block_output); 1095 fd, NULL, 0, handle_block_output);
1096
1097 /* We store the device size in the private area */
715 device_len = dev->priv = malloc(sizeof(*device_len)); 1098 device_len = dev->priv = malloc(sizeof(*device_len));
1099 /* This is the safe way of establishing the size of our device: it
1100 * might be a normal file or an actual block device like /dev/hdb. */
716 *device_len = lseek64(fd, 0, SEEK_END); 1101 *device_len = lseek64(fd, 0, SEEK_END);
717 p = dev->mem;
718 1102
1103 /* The device memory is a "struct lguest_block_page". It's zeroed
1104 * already, we just need to put in the device size. Block devices
1105 * think in sectors (ie. 512 byte chunks), so we translate here. */
1106 p = dev->mem;
719 p->num_sectors = *device_len/512; 1107 p->num_sectors = *device_len/512;
720 verbose("device %p: block %i sectors\n", 1108 verbose("device %p: block %i sectors\n",
721 (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); 1109 (void *)(dev->desc->pfn * getpagesize()), p->num_sectors);
722} 1110}
723 1111
724/* We use fnctl locks to reserve network slots (autocleanup!) */ 1112/*
1113 * Network Devices.
1114 *
1115 * Setting up network devices is quite a pain, because we have three types.
1116 * First, we have the inter-Guest network. This is a file which is mapped into
1117 * the address space of the Guests who are on the network. Because it is a
1118 * shared mapping, the same page underlies all the devices, and they can send
1119 * DMA to each other.
1120 *
1121 * Remember from our network driver, the Guest is told what slot in the page it
1122 * is to use. We use exclusive fnctl locks to reserve a slot. If another
1123 * Guest is using a slot, the lock will fail and we try another. Because fnctl
1124 * locks are cleaned up automatically when we die, this cleverly means that our
1125 * reservation on the slot will vanish if we crash. */
725static unsigned int find_slot(int netfd, const char *filename) 1126static unsigned int find_slot(int netfd, const char *filename)
726{ 1127{
727 struct flock fl; 1128 struct flock fl;
@@ -729,26 +1130,33 @@ static unsigned int find_slot(int netfd, const char *filename)
729 fl.l_type = F_WRLCK; 1130 fl.l_type = F_WRLCK;
730 fl.l_whence = SEEK_SET; 1131 fl.l_whence = SEEK_SET;
731 fl.l_len = 1; 1132 fl.l_len = 1;
1133 /* Try a 1 byte lock in each possible position number */
732 for (fl.l_start = 0; 1134 for (fl.l_start = 0;
733 fl.l_start < getpagesize()/sizeof(struct lguest_net); 1135 fl.l_start < getpagesize()/sizeof(struct lguest_net);
734 fl.l_start++) { 1136 fl.l_start++) {
1137 /* If we succeed, return the slot number. */
735 if (fcntl(netfd, F_SETLK, &fl) == 0) 1138 if (fcntl(netfd, F_SETLK, &fl) == 0)
736 return fl.l_start; 1139 return fl.l_start;
737 } 1140 }
738 errx(1, "No free slots in network file %s", filename); 1141 errx(1, "No free slots in network file %s", filename);
739} 1142}
740 1143
1144/* This function sets up the network file */
741static void setup_net_file(const char *filename, 1145static void setup_net_file(const char *filename,
742 struct device_list *devices) 1146 struct device_list *devices)
743{ 1147{
744 int netfd; 1148 int netfd;
745 struct device *dev; 1149 struct device *dev;
746 1150
1151 /* We don't use open_or_die() here: for friendliness we create the file
1152 * if it doesn't already exist. */
747 netfd = open(filename, O_RDWR, 0); 1153 netfd = open(filename, O_RDWR, 0);
748 if (netfd < 0) { 1154 if (netfd < 0) {
749 if (errno == ENOENT) { 1155 if (errno == ENOENT) {
750 netfd = open(filename, O_RDWR|O_CREAT, 0600); 1156 netfd = open(filename, O_RDWR|O_CREAT, 0600);
751 if (netfd >= 0) { 1157 if (netfd >= 0) {
1158 /* If we succeeded, initialize the file with a
1159 * blank page. */
752 char page[getpagesize()]; 1160 char page[getpagesize()];
753 memset(page, 0, sizeof(page)); 1161 memset(page, 0, sizeof(page));
754 write(netfd, page, sizeof(page)); 1162 write(netfd, page, sizeof(page));
@@ -758,11 +1166,15 @@ static void setup_net_file(const char *filename,
758 err(1, "cannot open net file '%s'", filename); 1166 err(1, "cannot open net file '%s'", filename);
759 } 1167 }
760 1168
1169 /* We need 1 page, and the features indicate the slot to use and that
1170 * no checksum is needed. We never touch this device again; it's
1171 * between the Guests on the network, so we don't register input or
1172 * output handlers. */
761 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, 1173 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
762 find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, 1174 find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM,
763 -1, NULL, 0, NULL); 1175 -1, NULL, 0, NULL);
764 1176
765 /* We overwrite the /dev/zero mapping with the actual file. */ 1177 /* Map the shared file. */
766 if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, 1178 if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE,
767 MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) 1179 MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem)
768 err(1, "could not mmap '%s'", filename); 1180 err(1, "could not mmap '%s'", filename);
@@ -770,6 +1182,7 @@ static void setup_net_file(const char *filename,
770 (void *)(dev->desc->pfn * getpagesize()), filename, 1182 (void *)(dev->desc->pfn * getpagesize()), filename,
771 dev->desc->features & ~LGUEST_NET_F_NOCSUM); 1183 dev->desc->features & ~LGUEST_NET_F_NOCSUM);
772} 1184}
1185/*:*/
773 1186
774static u32 str2ip(const char *ipaddr) 1187static u32 str2ip(const char *ipaddr)
775{ 1188{
@@ -779,7 +1192,11 @@ static u32 str2ip(const char *ipaddr)
779 return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; 1192 return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3];
780} 1193}
781 1194
782/* adapted from libbridge */ 1195/* This code is "adapted" from libbridge: it attaches the Host end of the
1196 * network device to the bridge device specified by the command line.
1197 *
1198 * This is yet another James Morris contribution (I'm an IP-level guy, so I
1199 * dislike bridging), and I just try not to break it. */
783static void add_to_bridge(int fd, const char *if_name, const char *br_name) 1200static void add_to_bridge(int fd, const char *if_name, const char *br_name)
784{ 1201{
785 int ifidx; 1202 int ifidx;
@@ -798,12 +1215,16 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name)
798 err(1, "can't add %s to bridge %s", if_name, br_name); 1215 err(1, "can't add %s to bridge %s", if_name, br_name);
799} 1216}
800 1217
1218/* This sets up the Host end of the network device with an IP address, brings
1219 * it up so packets will flow, the copies the MAC address into the hwaddr
1220 * pointer (in practice, the Host's slot in the network device's memory). */
801static void configure_device(int fd, const char *devname, u32 ipaddr, 1221static void configure_device(int fd, const char *devname, u32 ipaddr,
802 unsigned char hwaddr[6]) 1222 unsigned char hwaddr[6])
803{ 1223{
804 struct ifreq ifr; 1224 struct ifreq ifr;
805 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 1225 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
806 1226
1227 /* Don't read these incantations. Just cut & paste them like I did! */
807 memset(&ifr, 0, sizeof(ifr)); 1228 memset(&ifr, 0, sizeof(ifr));
808 strcpy(ifr.ifr_name, devname); 1229 strcpy(ifr.ifr_name, devname);
809 sin->sin_family = AF_INET; 1230 sin->sin_family = AF_INET;
@@ -814,12 +1235,19 @@ static void configure_device(int fd, const char *devname, u32 ipaddr,
814 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) 1235 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
815 err(1, "Bringing interface %s up", devname); 1236 err(1, "Bringing interface %s up", devname);
816 1237
1238 /* SIOC stands for Socket I/O Control. G means Get (vs S for Set
1239 * above). IF means Interface, and HWADDR is hardware address.
1240 * Simple! */
817 if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) 1241 if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
818 err(1, "getting hw address for %s", devname); 1242 err(1, "getting hw address for %s", devname);
819
820 memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); 1243 memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6);
821} 1244}
822 1245
1246/*L:195 The other kind of network is a Host<->Guest network. This can either
1247 * use briding or routing, but the principle is the same: it uses the "tun"
1248 * device to inject packets into the Host as if they came in from a normal
1249 * network card. We just shunt packets between the Guest and the tun
1250 * device. */
823static void setup_tun_net(const char *arg, struct device_list *devices) 1251static void setup_tun_net(const char *arg, struct device_list *devices)
824{ 1252{
825 struct device *dev; 1253 struct device *dev;
@@ -828,36 +1256,56 @@ static void setup_tun_net(const char *arg, struct device_list *devices)
828 u32 ip; 1256 u32 ip;
829 const char *br_name = NULL; 1257 const char *br_name = NULL;
830 1258
1259 /* We open the /dev/net/tun device and tell it we want a tap device. A
1260 * tap device is like a tun device, only somehow different. To tell
1261 * the truth, I completely blundered my way through this code, but it
1262 * works now! */
831 netfd = open_or_die("/dev/net/tun", O_RDWR); 1263 netfd = open_or_die("/dev/net/tun", O_RDWR);
832 memset(&ifr, 0, sizeof(ifr)); 1264 memset(&ifr, 0, sizeof(ifr));
833 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1265 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
834 strcpy(ifr.ifr_name, "tap%d"); 1266 strcpy(ifr.ifr_name, "tap%d");
835 if (ioctl(netfd, TUNSETIFF, &ifr) != 0) 1267 if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
836 err(1, "configuring /dev/net/tun"); 1268 err(1, "configuring /dev/net/tun");
1269 /* We don't need checksums calculated for packets coming in this
1270 * device: trust us! */
837 ioctl(netfd, TUNSETNOCSUM, 1); 1271 ioctl(netfd, TUNSETNOCSUM, 1);
838 1272
839 /* You will be peer 1: we should create enough jitter to randomize */ 1273 /* We create the net device with 1 page, using the features field of
1274 * the descriptor to tell the Guest it is in slot 1 (NET_PEERNUM), and
1275 * that the device has fairly random timing. We do *not* specify
1276 * LGUEST_NET_F_NOCSUM: these packets can reach the real world.
1277 *
1278 * We will put our MAC address is slot 0 for the Guest to see, so
1279 * it will send packets to us using the key "peer_offset(0)": */
840 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, 1280 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
841 NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, 1281 NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd,
842 handle_tun_input, peer_offset(0), handle_tun_output); 1282 handle_tun_input, peer_offset(0), handle_tun_output);
1283
1284 /* We keep a flag which says whether we've seen packets come out from
1285 * this network device. */
843 dev->priv = malloc(sizeof(bool)); 1286 dev->priv = malloc(sizeof(bool));
844 *(bool *)dev->priv = false; 1287 *(bool *)dev->priv = false;
845 1288
1289 /* We need a socket to perform the magic network ioctls to bring up the
1290 * tap interface, connect to the bridge etc. Any socket will do! */
846 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 1291 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
847 if (ipfd < 0) 1292 if (ipfd < 0)
848 err(1, "opening IP socket"); 1293 err(1, "opening IP socket");
849 1294
1295 /* If the command line was --tunnet=bridge:<name> do bridging. */
850 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { 1296 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
851 ip = INADDR_ANY; 1297 ip = INADDR_ANY;
852 br_name = arg + strlen(BRIDGE_PFX); 1298 br_name = arg + strlen(BRIDGE_PFX);
853 add_to_bridge(ipfd, ifr.ifr_name, br_name); 1299 add_to_bridge(ipfd, ifr.ifr_name, br_name);
854 } else 1300 } else /* It is an IP address to set up the device with */
855 ip = str2ip(arg); 1301 ip = str2ip(arg);
856 1302
857 /* We are peer 0, ie. first slot. */ 1303 /* We are peer 0, ie. first slot, so we hand dev->mem to this routine
1304 * to write the MAC address at the start of the device memory. */
858 configure_device(ipfd, ifr.ifr_name, ip, dev->mem); 1305 configure_device(ipfd, ifr.ifr_name, ip, dev->mem);
859 1306
860 /* Set "promisc" bit: we want every single packet. */ 1307 /* Set "promisc" bit: we want every single packet if we're going to
1308 * bridge to other machines (and otherwise it doesn't matter). */
861 *((u8 *)dev->mem) |= 0x1; 1309 *((u8 *)dev->mem) |= 0x1;
862 1310
863 close(ipfd); 1311 close(ipfd);
@@ -868,7 +1316,10 @@ static void setup_tun_net(const char *arg, struct device_list *devices)
868 if (br_name) 1316 if (br_name)
869 verbose("attached to bridge: %s\n", br_name); 1317 verbose("attached to bridge: %s\n", br_name);
870} 1318}
1319/* That's the end of device setup. */
871 1320
1321/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves
1322 * its input and output, and finally, lays it to rest. */
872static void __attribute__((noreturn)) 1323static void __attribute__((noreturn))
873run_guest(int lguest_fd, struct device_list *device_list) 1324run_guest(int lguest_fd, struct device_list *device_list)
874{ 1325{
@@ -880,20 +1331,37 @@ run_guest(int lguest_fd, struct device_list *device_list)
880 /* We read from the /dev/lguest device to run the Guest. */ 1331 /* We read from the /dev/lguest device to run the Guest. */
881 readval = read(lguest_fd, arr, sizeof(arr)); 1332 readval = read(lguest_fd, arr, sizeof(arr));
882 1333
1334 /* The read can only really return sizeof(arr) (the Guest did a
1335 * SEND_DMA to us), or an error. */
1336
1337 /* For a successful read, arr[0] is the address of the "struct
1338 * lguest_dma", and arr[1] is the key the Guest sent to. */
883 if (readval == sizeof(arr)) { 1339 if (readval == sizeof(arr)) {
884 handle_output(lguest_fd, arr[0], arr[1], device_list); 1340 handle_output(lguest_fd, arr[0], arr[1], device_list);
885 continue; 1341 continue;
1342 /* ENOENT means the Guest died. Reading tells us why. */
886 } else if (errno == ENOENT) { 1343 } else if (errno == ENOENT) {
887 char reason[1024] = { 0 }; 1344 char reason[1024] = { 0 };
888 read(lguest_fd, reason, sizeof(reason)-1); 1345 read(lguest_fd, reason, sizeof(reason)-1);
889 errx(1, "%s", reason); 1346 errx(1, "%s", reason);
1347 /* EAGAIN means the waker wanted us to look at some input.
1348 * Anything else means a bug or incompatible change. */
890 } else if (errno != EAGAIN) 1349 } else if (errno != EAGAIN)
891 err(1, "Running guest failed"); 1350 err(1, "Running guest failed");
1351
1352 /* Service input, then unset the BREAK which releases
1353 * the Waker. */
892 handle_input(lguest_fd, device_list); 1354 handle_input(lguest_fd, device_list);
893 if (write(lguest_fd, args, sizeof(args)) < 0) 1355 if (write(lguest_fd, args, sizeof(args)) < 0)
894 err(1, "Resetting break"); 1356 err(1, "Resetting break");
895 } 1357 }
896} 1358}
1359/*
1360 * This is the end of the Launcher.
1361 *
1362 * But wait! We've seen I/O from the Launcher, and we've seen I/O from the
1363 * Drivers. If we were to see the Host kernel I/O code, our understanding
1364 * would be complete... :*/
897 1365
898static struct option opts[] = { 1366static struct option opts[] = {
899 { "verbose", 0, NULL, 'v' }, 1367 { "verbose", 0, NULL, 'v' },
@@ -911,20 +1379,49 @@ static void usage(void)
911 "<mem-in-mb> vmlinux [args...]"); 1379 "<mem-in-mb> vmlinux [args...]");
912} 1380}
913 1381
1382/*L:100 The Launcher code itself takes us out into userspace, that scary place
1383 * where pointers run wild and free! Unfortunately, like most userspace
1384 * programs, it's quite boring (which is why everyone like to hack on the
1385 * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it
1386 * will get you through this section. Or, maybe not.
1387 *
1388 * The Launcher binary sits up high, usually starting at address 0xB8000000.
1389 * Everything below this is the "physical" memory for the Guest. For example,
1390 * if the Guest were to write a "1" at physical address 0, we would see a "1"
1391 * in the Launcher at "(int *)0". Guest physical == Launcher virtual.
1392 *
1393 * This can be tough to get your head around, but usually it just means that we
1394 * don't need to do any conversion when the Guest gives us it's "physical"
1395 * addresses.
1396 */
914int main(int argc, char *argv[]) 1397int main(int argc, char *argv[])
915{ 1398{
1399 /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size
1400 * of the (optional) initrd. */
916 unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; 1401 unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0;
1402 /* A temporary and the /dev/lguest file descriptor. */
917 int i, c, lguest_fd; 1403 int i, c, lguest_fd;
1404 /* The list of Guest devices, based on command line arguments. */
918 struct device_list device_list; 1405 struct device_list device_list;
1406 /* The boot information for the Guest: at guest-physical address 0. */
919 void *boot = (void *)0; 1407 void *boot = (void *)0;
1408 /* If they specify an initrd file to load. */
920 const char *initrd_name = NULL; 1409 const char *initrd_name = NULL;
921 1410
1411 /* First we initialize the device list. Since console and network
1412 * device receive input from a file descriptor, we keep an fdset
1413 * (infds) and the maximum fd number (max_infd) with the head of the
1414 * list. We also keep a pointer to the last device, for easy appending
1415 * to the list. */
922 device_list.max_infd = -1; 1416 device_list.max_infd = -1;
923 device_list.dev = NULL; 1417 device_list.dev = NULL;
924 device_list.lastdev = &device_list.dev; 1418 device_list.lastdev = &device_list.dev;
925 FD_ZERO(&device_list.infds); 1419 FD_ZERO(&device_list.infds);
926 1420
927 /* We need to know how much memory so we can allocate devices. */ 1421 /* We need to know how much memory so we can set up the device
1422 * descriptor and memory pages for the devices as we parse the command
1423 * line. So we quickly look through the arguments to find the amount
1424 * of memory now. */
928 for (i = 1; i < argc; i++) { 1425 for (i = 1; i < argc; i++) {
929 if (argv[i][0] != '-') { 1426 if (argv[i][0] != '-') {
930 mem = top = atoi(argv[i]) * 1024 * 1024; 1427 mem = top = atoi(argv[i]) * 1024 * 1024;
@@ -933,6 +1430,8 @@ int main(int argc, char *argv[])
933 break; 1430 break;
934 } 1431 }
935 } 1432 }
1433
1434 /* The options are fairly straight-forward */
936 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { 1435 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
937 switch (c) { 1436 switch (c) {
938 case 'v': 1437 case 'v':
@@ -955,42 +1454,71 @@ int main(int argc, char *argv[])
955 usage(); 1454 usage();
956 } 1455 }
957 } 1456 }
1457 /* After the other arguments we expect memory and kernel image name,
1458 * followed by command line arguments for the kernel. */
958 if (optind + 2 > argc) 1459 if (optind + 2 > argc)
959 usage(); 1460 usage();
960 1461
961 /* We need a console device */ 1462 /* We always have a console device */
962 setup_console(&device_list); 1463 setup_console(&device_list);
963 1464
964 /* First we map /dev/zero over all of guest-physical memory. */ 1465 /* We start by mapping anonymous pages over all of guest-physical
1466 * memory range. This fills it with 0, and ensures that the Guest
1467 * won't be killed when it tries to access it. */
965 map_zeroed_pages(0, mem / getpagesize()); 1468 map_zeroed_pages(0, mem / getpagesize());
966 1469
967 /* Now we load the kernel */ 1470 /* Now we load the kernel */
968 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), 1471 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY),
969 &page_offset); 1472 &page_offset);
970 1473
971 /* Map the initrd image if requested */ 1474 /* Map the initrd image if requested (at top of physical memory) */
972 if (initrd_name) { 1475 if (initrd_name) {
973 initrd_size = load_initrd(initrd_name, mem); 1476 initrd_size = load_initrd(initrd_name, mem);
1477 /* These are the location in the Linux boot header where the
1478 * start and size of the initrd are expected to be found. */
974 *(unsigned long *)(boot+0x218) = mem - initrd_size; 1479 *(unsigned long *)(boot+0x218) = mem - initrd_size;
975 *(unsigned long *)(boot+0x21c) = initrd_size; 1480 *(unsigned long *)(boot+0x21c) = initrd_size;
1481 /* The bootloader type 0xFF means "unknown"; that's OK. */
976 *(unsigned char *)(boot+0x210) = 0xFF; 1482 *(unsigned char *)(boot+0x210) = 0xFF;
977 } 1483 }
978 1484
979 /* Set up the initial linar pagetables. */ 1485 /* Set up the initial linear pagetables, starting below the initrd. */
980 pgdir = setup_pagetables(mem, initrd_size, page_offset); 1486 pgdir = setup_pagetables(mem, initrd_size, page_offset);
981 1487
982 /* E820 memory map: ours is a simple, single region. */ 1488 /* The Linux boot header contains an "E820" memory map: ours is a
1489 * simple, single region. */
983 *(char*)(boot+E820NR) = 1; 1490 *(char*)(boot+E820NR) = 1;
984 *((struct e820entry *)(boot+E820MAP)) 1491 *((struct e820entry *)(boot+E820MAP))
985 = ((struct e820entry) { 0, mem, E820_RAM }); 1492 = ((struct e820entry) { 0, mem, E820_RAM });
986 /* Command line pointer and command line (at 4096) */ 1493 /* The boot header contains a command line pointer: we put the command
1494 * line after the boot header (at address 4096) */
987 *(void **)(boot + 0x228) = boot + 4096; 1495 *(void **)(boot + 0x228) = boot + 4096;
988 concat(boot + 4096, argv+optind+2); 1496 concat(boot + 4096, argv+optind+2);
989 /* Paravirt type: 1 == lguest */ 1497
1498 /* The guest type value of "1" tells the Guest it's under lguest. */
990 *(int *)(boot + 0x23c) = 1; 1499 *(int *)(boot + 0x23c) = 1;
991 1500
1501 /* We tell the kernel to initialize the Guest: this returns the open
1502 * /dev/lguest file descriptor. */
992 lguest_fd = tell_kernel(pgdir, start, page_offset); 1503 lguest_fd = tell_kernel(pgdir, start, page_offset);
1504
1505 /* We fork off a child process, which wakes the Launcher whenever one
1506 * of the input file descriptors needs attention. Otherwise we would
1507 * run the Guest until it tries to output something. */
993 waker_fd = setup_waker(lguest_fd, &device_list); 1508 waker_fd = setup_waker(lguest_fd, &device_list);
994 1509
1510 /* Finally, run the Guest. This doesn't return. */
995 run_guest(lguest_fd, &device_list); 1511 run_guest(lguest_fd, &device_list);
996} 1512}
1513/*:*/
1514
1515/*M:999
1516 * Mastery is done: you now know everything I do.
1517 *
1518 * But surely you have seen code, features and bugs in your wanderings which
1519 * you now yearn to attack? That is the real game, and I look forward to you
1520 * patching and forking lguest into the Your-Name-Here-visor.
1521 *
1522 * Farewell, and good coding!
1523 * Rusty Russell.
1524 */
diff --git a/MAINTAINERS b/MAINTAINERS
index 01f222e51871..babd00b0c65c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3339,6 +3339,14 @@ M: thomas@winischhofer.net
3339W: http://www.winischhofer.at/linuxsisusbvga.shtml 3339W: http://www.winischhofer.at/linuxsisusbvga.shtml
3340S: Maintained 3340S: Maintained
3341 3341
3342SLAB ALLOCATOR
3343P: Christoph Lameter
3344M: clameter@sgi.com
3345P: Pekka Enberg
3346M: penberg@cs.helsinki.fi
3347L: linux-mm@kvack.org
3348S: Maintained
3349
3342SMC91x ETHERNET DRIVER 3350SMC91x ETHERNET DRIVER
3343P: Nicolas Pitre 3351P: Nicolas Pitre
3344M: nico@cam.org 3352M: nico@cam.org
diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S
index e27d23c74ba8..7ac1f1372c36 100644
--- a/arch/alpha/kernel/head.S
+++ b/arch/alpha/kernel/head.S
@@ -10,6 +10,7 @@
10#include <asm/system.h> 10#include <asm/system.h>
11#include <asm/asm-offsets.h> 11#include <asm/asm-offsets.h>
12 12
13.section .text.head, "ax"
13.globl swapper_pg_dir 14.globl swapper_pg_dir
14.globl _stext 15.globl _stext
15swapper_pg_dir=SWAPPER_PGD 16swapper_pg_dir=SWAPPER_PGD
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index ab642a4f08de..9dc1cee43265 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -195,7 +195,7 @@ pcibios_init(void)
195 195
196subsys_initcall(pcibios_init); 196subsys_initcall(pcibios_init);
197 197
198char * __init 198char * __devinit
199pcibios_setup(char *str) 199pcibios_setup(char *str)
200{ 200{
201 return str; 201 return str;
@@ -204,7 +204,7 @@ pcibios_setup(char *str)
204#ifdef ALPHA_RESTORE_SRM_SETUP 204#ifdef ALPHA_RESTORE_SRM_SETUP
205static struct pdev_srm_saved_conf *srm_saved_configs; 205static struct pdev_srm_saved_conf *srm_saved_configs;
206 206
207void __init 207void __devinit
208pdev_save_srm_config(struct pci_dev *dev) 208pdev_save_srm_config(struct pci_dev *dev)
209{ 209{
210 struct pdev_srm_saved_conf *tmp; 210 struct pdev_srm_saved_conf *tmp;
@@ -247,14 +247,14 @@ pci_restore_srm_config(void)
247} 247}
248#endif 248#endif
249 249
250void __init 250void __devinit
251pcibios_fixup_resource(struct resource *res, struct resource *root) 251pcibios_fixup_resource(struct resource *res, struct resource *root)
252{ 252{
253 res->start += root->start; 253 res->start += root->start;
254 res->end += root->start; 254 res->end += root->start;
255} 255}
256 256
257void __init 257void __devinit
258pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus) 258pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus)
259{ 259{
260 /* Update device resources. */ 260 /* Update device resources. */
@@ -273,7 +273,7 @@ pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus)
273 } 273 }
274} 274}
275 275
276void __init 276void __devinit
277pcibios_fixup_bus(struct pci_bus *bus) 277pcibios_fixup_bus(struct pci_bus *bus)
278{ 278{
279 /* Propagate hose info into the subordinate devices. */ 279 /* Propagate hose info into the subordinate devices. */
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 6b07f89a72c7..e1c470752ebc 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -58,7 +58,7 @@ size_for_memory(unsigned long max)
58 return max; 58 return max;
59} 59}
60 60
61struct pci_iommu_arena * 61struct pci_iommu_arena * __init
62iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base, 62iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
63 unsigned long window_size, unsigned long align) 63 unsigned long window_size, unsigned long align)
64{ 64{
@@ -117,7 +117,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
117 return arena; 117 return arena;
118} 118}
119 119
120struct pci_iommu_arena * 120struct pci_iommu_arena * __init
121iommu_arena_new(struct pci_controller *hose, dma_addr_t base, 121iommu_arena_new(struct pci_controller *hose, dma_addr_t base,
122 unsigned long window_size, unsigned long align) 122 unsigned long window_size, unsigned long align)
123{ 123{
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index b28731437c31..0804b6abe203 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -358,7 +358,7 @@ secondary_cpu_start(int cpuid, struct task_struct *idle)
358/* 358/*
359 * Bring one cpu online. 359 * Bring one cpu online.
360 */ 360 */
361static int __devinit 361static int __cpuinit
362smp_boot_one_cpu(int cpuid) 362smp_boot_one_cpu(int cpuid)
363{ 363{
364 struct task_struct *idle; 364 struct task_struct *idle;
@@ -487,7 +487,7 @@ smp_prepare_boot_cpu(void)
487{ 487{
488} 488}
489 489
490int __devinit 490int __cpuinit
491__cpu_up(unsigned int cpu) 491__cpu_up(unsigned int cpu)
492{ 492{
493 smp_boot_one_cpu(cpu); 493 smp_boot_one_cpu(cpu);
@@ -541,7 +541,7 @@ smp_percpu_timer_interrupt(struct pt_regs *regs)
541 set_irq_regs(old_regs); 541 set_irq_regs(old_regs);
542} 542}
543 543
544int __init 544int
545setup_profiling_timer(unsigned int multiplier) 545setup_profiling_timer(unsigned int multiplier)
546{ 546{
547 return -EINVAL; 547 return -EINVAL;
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index fe13daa5cb2c..7af07d3ad5f0 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -15,6 +15,7 @@ SECTIONS
15 15
16 _text = .; /* Text and read-only data */ 16 _text = .; /* Text and read-only data */
17 .text : { 17 .text : {
18 *(.text.head)
18 TEXT_TEXT 19 TEXT_TEXT
19 SCHED_TEXT 20 SCHED_TEXT
20 LOCK_TEXT 21 LOCK_TEXT
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index c3750c2c4113..c85598acb8fd 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -430,22 +430,12 @@ void __init alternative_instructions(void)
430 * And on the local CPU you need to be protected again NMI or MCE handlers 430 * And on the local CPU you need to be protected again NMI or MCE handlers
431 * seeing an inconsistent instruction while you patch. 431 * seeing an inconsistent instruction while you patch.
432 */ 432 */
433void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len) 433void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
434{ 434{
435 u8 *addr = oaddr;
436 if (!pte_write(*lookup_address((unsigned long)addr))) {
437 struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
438 addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
439 if (!addr)
440 return;
441 addr += ((unsigned long)oaddr) % PAGE_SIZE;
442 }
443 memcpy(addr, opcode, len); 435 memcpy(addr, opcode, len);
444 sync_core(); 436 sync_core();
445 /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline 437 /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
446 case. */ 438 case. */
447 if (cpu_has_clflush) 439 if (cpu_has_clflush)
448 asm("clflush (%0) " :: "r" (oaddr) : "memory"); 440 asm("clflush (%0) " :: "r" (addr) : "memory");
449 if (addr != oaddr)
450 vunmap(addr);
451} 441}
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index 094118ba00da..d8c6f132dc7a 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -92,7 +92,7 @@ config X86_POWERNOW_K8
92config X86_POWERNOW_K8_ACPI 92config X86_POWERNOW_K8_ACPI
93 bool "ACPI Support" 93 bool "ACPI Support"
94 select ACPI_PROCESSOR 94 select ACPI_PROCESSOR
95 depends on X86_POWERNOW_K8 95 depends on ACPI && X86_POWERNOW_K8
96 default y 96 default y
97 help 97 help
98 This provides access to the K8s Processor Performance States via ACPI. 98 This provides access to the K8s Processor Performance States via ACPI.
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 1b1a1e66d099..4c4809f13cb1 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -800,9 +800,17 @@ void mark_rodata_ro(void)
800 unsigned long start = PFN_ALIGN(_text); 800 unsigned long start = PFN_ALIGN(_text);
801 unsigned long size = PFN_ALIGN(_etext) - start; 801 unsigned long size = PFN_ALIGN(_etext) - start;
802 802
803 change_page_attr(virt_to_page(start), 803#ifndef CONFIG_KPROBES
804 size >> PAGE_SHIFT, PAGE_KERNEL_RX); 804#ifdef CONFIG_HOTPLUG_CPU
805 printk("Write protecting the kernel text: %luk\n", size >> 10); 805 /* It must still be possible to apply SMP alternatives. */
806 if (num_possible_cpus() <= 1)
807#endif
808 {
809 change_page_attr(virt_to_page(start),
810 size >> PAGE_SHIFT, PAGE_KERNEL_RX);
811 printk("Write protecting the kernel text: %luk\n", size >> 10);
812 }
813#endif
806 start += size; 814 start += size;
807 size = (unsigned long)__end_rodata - start; 815 size = (unsigned long)__end_rodata - start;
808 change_page_attr(virt_to_page(start), 816 change_page_attr(virt_to_page(start),
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 2fd96d9062a1..790ef0d87e12 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -38,11 +38,11 @@ static struct clocksource clocksource_cyclone = {
38 38
39int __init init_cyclone_clock(void) 39int __init init_cyclone_clock(void)
40{ 40{
41 u64* reg; 41 u64 __iomem *reg;
42 u64 base; /* saved cyclone base address */ 42 u64 base; /* saved cyclone base address */
43 u64 offset; /* offset from pageaddr to cyclone_timer register */ 43 u64 offset; /* offset from pageaddr to cyclone_timer register */
44 int i; 44 int i;
45 u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ 45 u32 __iomem *cyclone_timer; /* Cyclone MPMC0 register */
46 46
47 if (!use_cyclone) 47 if (!use_cyclone)
48 return 0; 48 return 0;
@@ -51,7 +51,7 @@ int __init init_cyclone_clock(void)
51 51
52 /* find base address */ 52 /* find base address */
53 offset = (CYCLONE_CBAR_ADDR); 53 offset = (CYCLONE_CBAR_ADDR);
54 reg = (u64*)ioremap_nocache(offset, sizeof(u64)); 54 reg = ioremap_nocache(offset, sizeof(u64));
55 if(!reg){ 55 if(!reg){
56 printk(KERN_ERR "Summit chipset: Could not find valid CBAR" 56 printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
57 " register.\n"); 57 " register.\n");
@@ -69,7 +69,7 @@ int __init init_cyclone_clock(void)
69 69
70 /* setup PMCC */ 70 /* setup PMCC */
71 offset = (base + CYCLONE_PMCC_OFFSET); 71 offset = (base + CYCLONE_PMCC_OFFSET);
72 reg = (u64*)ioremap_nocache(offset, sizeof(u64)); 72 reg = ioremap_nocache(offset, sizeof(u64));
73 if(!reg){ 73 if(!reg){
74 printk(KERN_ERR "Summit chipset: Could not find valid PMCC" 74 printk(KERN_ERR "Summit chipset: Could not find valid PMCC"
75 " register.\n"); 75 " register.\n");
@@ -81,7 +81,7 @@ int __init init_cyclone_clock(void)
81 81
82 /* setup MPCS */ 82 /* setup MPCS */
83 offset = (base + CYCLONE_MPCS_OFFSET); 83 offset = (base + CYCLONE_MPCS_OFFSET);
84 reg = (u64*)ioremap_nocache(offset, sizeof(u64)); 84 reg = ioremap_nocache(offset, sizeof(u64));
85 if(!reg){ 85 if(!reg){
86 printk(KERN_ERR "Summit chipset: Could not find valid MPCS" 86 printk(KERN_ERR "Summit chipset: Could not find valid MPCS"
87 " register.\n"); 87 " register.\n");
@@ -93,7 +93,7 @@ int __init init_cyclone_clock(void)
93 93
94 /* map in cyclone_timer */ 94 /* map in cyclone_timer */
95 offset = (base + CYCLONE_MPMC_OFFSET); 95 offset = (base + CYCLONE_MPMC_OFFSET);
96 cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32)); 96 cyclone_timer = ioremap_nocache(offset, sizeof(u32));
97 if(!cyclone_timer){ 97 if(!cyclone_timer){
98 printk(KERN_ERR "Summit chipset: Could not find valid MPMC" 98 printk(KERN_ERR "Summit chipset: Could not find valid MPMC"
99 " register.\n"); 99 " register.\n");
@@ -110,7 +110,7 @@ int __init init_cyclone_clock(void)
110 printk(KERN_ERR "Summit chipset: Counter not counting!" 110 printk(KERN_ERR "Summit chipset: Counter not counting!"
111 " DISABLED\n"); 111 " DISABLED\n");
112 iounmap(cyclone_timer); 112 iounmap(cyclone_timer);
113 cyclone_timer = 0; 113 cyclone_timer = NULL;
114 use_cyclone = 0; 114 use_cyclone = 0;
115 return -ENODEV; 115 return -ENODEV;
116 } 116 }
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 627785c48ea9..6c0e9e2e1b82 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -52,7 +52,7 @@ static struct clocksource clocksource_itc = {
52 .name = "itc", 52 .name = "itc",
53 .rating = 350, 53 .rating = 350,
54 .read = itc_get_cycles, 54 .read = itc_get_cycles,
55 .mask = 0xffffffffffffffff, 55 .mask = CLOCKSOURCE_MASK(64),
56 .mult = 0, /*to be caluclated*/ 56 .mult = 0, /*to be caluclated*/
57 .shift = 16, 57 .shift = 16,
58 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 58 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -255,7 +255,7 @@ ia64_init_itm (void)
255 } 255 }
256} 256}
257 257
258static cycle_t itc_get_cycles() 258static cycle_t itc_get_cycles(void)
259{ 259{
260 u64 lcycle, now, ret; 260 u64 lcycle, now, ret;
261 261
diff --git a/arch/m32r/kernel/setup_mappi.c b/arch/m32r/kernel/setup_mappi.c
index 6b2d77da0683..fe73c9ec611f 100644
--- a/arch/m32r/kernel/setup_mappi.c
+++ b/arch/m32r/kernel/setup_mappi.c
@@ -45,7 +45,8 @@ static void mask_and_ack_mappi(unsigned int irq)
45 45
46static void end_mappi_irq(unsigned int irq) 46static void end_mappi_irq(unsigned int irq)
47{ 47{
48 enable_mappi_irq(irq); 48 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
49 enable_mappi_irq(irq);
49} 50}
50 51
51static unsigned int startup_mappi_irq(unsigned int irq) 52static unsigned int startup_mappi_irq(unsigned int irq)
@@ -88,7 +89,7 @@ void __init init_IRQ(void)
88 irq_desc[M32R_IRQ_INT0].chip = &mappi_irq_type; 89 irq_desc[M32R_IRQ_INT0].chip = &mappi_irq_type;
89 irq_desc[M32R_IRQ_INT0].action = NULL; 90 irq_desc[M32R_IRQ_INT0].action = NULL;
90 irq_desc[M32R_IRQ_INT0].depth = 1; 91 irq_desc[M32R_IRQ_INT0].depth = 1;
91 icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; 92 icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD11;
92 disable_mappi_irq(M32R_IRQ_INT0); 93 disable_mappi_irq(M32R_IRQ_INT0);
93#endif /* CONFIG_M32R_NE2000 */ 94#endif /* CONFIG_M32R_NE2000 */
94 95
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index a5ac0d40fbec..3f86ade3a22a 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -42,8 +42,6 @@ EXPORT_SYMBOL(memory_end);
42 42
43char __initdata command_line[COMMAND_LINE_SIZE]; 43char __initdata command_line[COMMAND_LINE_SIZE];
44 44
45void (*mach_trap_init)(void);
46
47/* machine dependent timer functions */ 45/* machine dependent timer functions */
48void (*mach_sched_init)(irq_handler_t handler); 46void (*mach_sched_init)(irq_handler_t handler);
49void (*mach_tick)(void); 47void (*mach_tick)(void);
diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68knommu/platform/5206/config.c
index d265ed4e5afc..d0f2dc5cb5a1 100644
--- a/arch/m68knommu/platform/5206/config.c
+++ b/arch/m68knommu/platform/5206/config.c
@@ -28,7 +28,6 @@
28void coldfire_tick(void); 28void coldfire_tick(void);
29void coldfire_timer_init(irq_handler_t handler); 29void coldfire_timer_init(irq_handler_t handler);
30unsigned long coldfire_timer_offset(void); 30unsigned long coldfire_timer_offset(void);
31void coldfire_trap_init(void);
32void coldfire_reset(void); 31void coldfire_reset(void);
33 32
34/***************************************************************************/ 33/***************************************************************************/
@@ -101,7 +100,6 @@ void config_BSP(char *commandp, int size)
101 mach_sched_init = coldfire_timer_init; 100 mach_sched_init = coldfire_timer_init;
102 mach_tick = coldfire_tick; 101 mach_tick = coldfire_tick;
103 mach_gettimeoffset = coldfire_timer_offset; 102 mach_gettimeoffset = coldfire_timer_offset;
104 mach_trap_init = coldfire_trap_init;
105 mach_reset = coldfire_reset; 103 mach_reset = coldfire_reset;
106} 104}
107 105
diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c
index 7fa5e8254c31..4ab614f1ecda 100644
--- a/arch/m68knommu/platform/5206e/config.c
+++ b/arch/m68knommu/platform/5206e/config.c
@@ -27,7 +27,6 @@
27void coldfire_tick(void); 27void coldfire_tick(void);
28void coldfire_timer_init(irq_handler_t handler); 28void coldfire_timer_init(irq_handler_t handler);
29unsigned long coldfire_timer_offset(void); 29unsigned long coldfire_timer_offset(void);
30void coldfire_trap_init(void);
31void coldfire_reset(void); 30void coldfire_reset(void);
32 31
33/***************************************************************************/ 32/***************************************************************************/
@@ -107,7 +106,6 @@ void config_BSP(char *commandp, int size)
107 mach_sched_init = coldfire_timer_init; 106 mach_sched_init = coldfire_timer_init;
108 mach_tick = coldfire_tick; 107 mach_tick = coldfire_tick;
109 mach_gettimeoffset = coldfire_timer_offset; 108 mach_gettimeoffset = coldfire_timer_offset;
110 mach_trap_init = coldfire_trap_init;
111 mach_reset = coldfire_reset; 109 mach_reset = coldfire_reset;
112} 110}
113 111
diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c
index 85830f9882f3..a2c95bebd004 100644
--- a/arch/m68knommu/platform/520x/config.c
+++ b/arch/m68knommu/platform/520x/config.c
@@ -30,7 +30,6 @@ unsigned int dma_device_address[MAX_M68K_DMA_CHANNELS];
30void coldfire_pit_tick(void); 30void coldfire_pit_tick(void);
31void coldfire_pit_init(irq_handler_t handler); 31void coldfire_pit_init(irq_handler_t handler);
32unsigned long coldfire_pit_offset(void); 32unsigned long coldfire_pit_offset(void);
33void coldfire_trap_init(void);
34void coldfire_reset(void); 33void coldfire_reset(void);
35 34
36/***************************************************************************/ 35/***************************************************************************/
@@ -51,7 +50,6 @@ void config_BSP(char *commandp, int size)
51 mach_sched_init = coldfire_pit_init; 50 mach_sched_init = coldfire_pit_init;
52 mach_tick = coldfire_pit_tick; 51 mach_tick = coldfire_pit_tick;
53 mach_gettimeoffset = coldfire_pit_offset; 52 mach_gettimeoffset = coldfire_pit_offset;
54 mach_trap_init = coldfire_trap_init;
55 mach_reset = coldfire_reset; 53 mach_reset = coldfire_reset;
56} 54}
57 55
diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68knommu/platform/523x/config.c
index c0157e110035..0a3af05a434b 100644
--- a/arch/m68knommu/platform/523x/config.c
+++ b/arch/m68knommu/platform/523x/config.c
@@ -29,7 +29,6 @@
29void coldfire_pit_tick(void); 29void coldfire_pit_tick(void);
30void coldfire_pit_init(irq_handler_t handler); 30void coldfire_pit_init(irq_handler_t handler);
31unsigned long coldfire_pit_offset(void); 31unsigned long coldfire_pit_offset(void);
32void coldfire_trap_init(void);
33void coldfire_reset(void); 32void coldfire_reset(void);
34 33
35/***************************************************************************/ 34/***************************************************************************/
@@ -66,7 +65,6 @@ void config_BSP(char *commandp, int size)
66 mach_sched_init = coldfire_pit_init; 65 mach_sched_init = coldfire_pit_init;
67 mach_tick = coldfire_pit_tick; 66 mach_tick = coldfire_pit_tick;
68 mach_gettimeoffset = coldfire_pit_offset; 67 mach_gettimeoffset = coldfire_pit_offset;
69 mach_trap_init = coldfire_trap_init;
70 mach_reset = coldfire_reset; 68 mach_reset = coldfire_reset;
71} 69}
72 70
diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68knommu/platform/5249/config.c
index 4cdeb719512d..dc2c362590c2 100644
--- a/arch/m68knommu/platform/5249/config.c
+++ b/arch/m68knommu/platform/5249/config.c
@@ -27,7 +27,6 @@
27void coldfire_tick(void); 27void coldfire_tick(void);
28void coldfire_timer_init(irq_handler_t handler); 28void coldfire_timer_init(irq_handler_t handler);
29unsigned long coldfire_timer_offset(void); 29unsigned long coldfire_timer_offset(void);
30void coldfire_trap_init(void);
31void coldfire_reset(void); 30void coldfire_reset(void);
32 31
33/***************************************************************************/ 32/***************************************************************************/
@@ -99,7 +98,6 @@ void config_BSP(char *commandp, int size)
99 mach_sched_init = coldfire_timer_init; 98 mach_sched_init = coldfire_timer_init;
100 mach_tick = coldfire_tick; 99 mach_tick = coldfire_tick;
101 mach_gettimeoffset = coldfire_timer_offset; 100 mach_gettimeoffset = coldfire_timer_offset;
102 mach_trap_init = coldfire_trap_init;
103 mach_reset = coldfire_reset; 101 mach_reset = coldfire_reset;
104} 102}
105 103
diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68knommu/platform/5272/config.c
index 609b10e4b9b9..1365a8300d5d 100644
--- a/arch/m68knommu/platform/5272/config.c
+++ b/arch/m68knommu/platform/5272/config.c
@@ -28,7 +28,6 @@
28void coldfire_tick(void); 28void coldfire_tick(void);
29void coldfire_timer_init(irq_handler_t handler); 29void coldfire_timer_init(irq_handler_t handler);
30unsigned long coldfire_timer_offset(void); 30unsigned long coldfire_timer_offset(void);
31void coldfire_trap_init(void);
32void coldfire_reset(void); 31void coldfire_reset(void);
33 32
34extern unsigned int mcf_timervector; 33extern unsigned int mcf_timervector;
@@ -132,7 +131,6 @@ void config_BSP(char *commandp, int size)
132 mach_sched_init = coldfire_timer_init; 131 mach_sched_init = coldfire_timer_init;
133 mach_tick = coldfire_tick; 132 mach_tick = coldfire_tick;
134 mach_gettimeoffset = coldfire_timer_offset; 133 mach_gettimeoffset = coldfire_timer_offset;
135 mach_trap_init = coldfire_trap_init;
136 mach_reset = coldfire_reset; 134 mach_reset = coldfire_reset;
137} 135}
138 136
diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68knommu/platform/527x/config.c
index 126dac066482..1b820441419a 100644
--- a/arch/m68knommu/platform/527x/config.c
+++ b/arch/m68knommu/platform/527x/config.c
@@ -29,7 +29,6 @@
29void coldfire_pit_tick(void); 29void coldfire_pit_tick(void);
30void coldfire_pit_init(irq_handler_t handler); 30void coldfire_pit_init(irq_handler_t handler);
31unsigned long coldfire_pit_offset(void); 31unsigned long coldfire_pit_offset(void);
32void coldfire_trap_init(void);
33void coldfire_reset(void); 32void coldfire_reset(void);
34 33
35/***************************************************************************/ 34/***************************************************************************/
@@ -66,7 +65,6 @@ void config_BSP(char *commandp, int size)
66 mach_sched_init = coldfire_pit_init; 65 mach_sched_init = coldfire_pit_init;
67 mach_tick = coldfire_pit_tick; 66 mach_tick = coldfire_pit_tick;
68 mach_gettimeoffset = coldfire_pit_offset; 67 mach_gettimeoffset = coldfire_pit_offset;
69 mach_trap_init = coldfire_trap_init;
70 mach_reset = coldfire_reset; 68 mach_reset = coldfire_reset;
71} 69}
72 70
diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68knommu/platform/528x/config.c
index aab1ef0c1f78..a089e9513699 100644
--- a/arch/m68knommu/platform/528x/config.c
+++ b/arch/m68knommu/platform/528x/config.c
@@ -29,7 +29,6 @@
29void coldfire_pit_tick(void); 29void coldfire_pit_tick(void);
30void coldfire_pit_init(irq_handler_t handler); 30void coldfire_pit_init(irq_handler_t handler);
31unsigned long coldfire_pit_offset(void); 31unsigned long coldfire_pit_offset(void);
32void coldfire_trap_init(void);
33void coldfire_reset(void); 32void coldfire_reset(void);
34 33
35/***************************************************************************/ 34/***************************************************************************/
@@ -66,7 +65,6 @@ void config_BSP(char *commandp, int size)
66 mach_sched_init = coldfire_pit_init; 65 mach_sched_init = coldfire_pit_init;
67 mach_tick = coldfire_pit_tick; 66 mach_tick = coldfire_pit_tick;
68 mach_gettimeoffset = coldfire_pit_offset; 67 mach_gettimeoffset = coldfire_pit_offset;
69 mach_trap_init = coldfire_trap_init;
70 mach_reset = coldfire_reset; 68 mach_reset = coldfire_reset;
71} 69}
72 70
diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68knommu/platform/5307/config.c
index 1f10e941b87c..e3461619fd65 100644
--- a/arch/m68knommu/platform/5307/config.c
+++ b/arch/m68knommu/platform/5307/config.c
@@ -29,7 +29,6 @@
29void coldfire_tick(void); 29void coldfire_tick(void);
30void coldfire_timer_init(irq_handler_t handler); 30void coldfire_timer_init(irq_handler_t handler);
31unsigned long coldfire_timer_offset(void); 31unsigned long coldfire_timer_offset(void);
32void coldfire_trap_init(void);
33void coldfire_reset(void); 32void coldfire_reset(void);
34 33
35extern unsigned int mcf_timervector; 34extern unsigned int mcf_timervector;
@@ -126,7 +125,6 @@ void config_BSP(char *commandp, int size)
126 mach_sched_init = coldfire_timer_init; 125 mach_sched_init = coldfire_timer_init;
127 mach_tick = coldfire_tick; 126 mach_tick = coldfire_tick;
128 mach_gettimeoffset = coldfire_timer_offset; 127 mach_gettimeoffset = coldfire_timer_offset;
129 mach_trap_init = coldfire_trap_init;
130 mach_reset = coldfire_reset; 128 mach_reset = coldfire_reset;
131 129
132#ifdef MCF_BDM_DISABLE 130#ifdef MCF_BDM_DISABLE
diff --git a/arch/m68knommu/platform/5307/pit.c b/arch/m68knommu/platform/5307/pit.c
index aa15beeb36ca..e53c446d10e4 100644
--- a/arch/m68knommu/platform/5307/pit.c
+++ b/arch/m68knommu/platform/5307/pit.c
@@ -5,9 +5,8 @@
5 * hardware timer only exists in the Freescale ColdFire 5 * hardware timer only exists in the Freescale ColdFire
6 * 5270/5271, 5282 and other CPUs. 6 * 5270/5271, 5282 and other CPUs.
7 * 7 *
8 * Copyright (C) 1999-2006, Greg Ungerer (gerg@snapgear.com) 8 * Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com)
9 * Copyright (C) 2001-2004, SnapGear Inc. (www.snapgear.com) 9 * Copyright (C) 2001-2004, SnapGear Inc. (www.snapgear.com)
10 *
11 */ 10 */
12 11
13/***************************************************************************/ 12/***************************************************************************/
@@ -17,8 +16,8 @@
17#include <linux/param.h> 16#include <linux/param.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/irq.h>
20#include <asm/io.h> 20#include <asm/io.h>
21#include <asm/irq.h>
22#include <asm/coldfire.h> 21#include <asm/coldfire.h>
23#include <asm/mcfpit.h> 22#include <asm/mcfpit.h>
24#include <asm/mcfsim.h> 23#include <asm/mcfsim.h>
@@ -43,13 +42,18 @@ void coldfire_pit_tick(void)
43 42
44/***************************************************************************/ 43/***************************************************************************/
45 44
45static struct irqaction coldfire_pit_irq = {
46 .name = "timer",
47 .flags = IRQF_DISABLED | IRQF_TIMER,
48};
49
46void coldfire_pit_init(irq_handler_t handler) 50void coldfire_pit_init(irq_handler_t handler)
47{ 51{
48 volatile unsigned char *icrp; 52 volatile unsigned char *icrp;
49 volatile unsigned long *imrp; 53 volatile unsigned long *imrp;
50 54
51 request_irq(MCFINT_VECBASE + MCFINT_PIT1, handler, IRQF_DISABLED, 55 coldfire_pit_irq.handler = handler;
52 "ColdFire Timer", NULL); 56 setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &coldfire_pit_irq);
53 57
54 icrp = (volatile unsigned char *) (MCF_IPSBAR + MCFICM_INTC0 + 58 icrp = (volatile unsigned char *) (MCF_IPSBAR + MCFICM_INTC0 +
55 MCFINTC_ICR0 + MCFINT_PIT1); 59 MCFINTC_ICR0 + MCFINT_PIT1);
diff --git a/arch/m68knommu/platform/5307/timers.c b/arch/m68knommu/platform/5307/timers.c
index fb66eadd5896..64bd0ff9029e 100644
--- a/arch/m68knommu/platform/5307/timers.c
+++ b/arch/m68knommu/platform/5307/timers.c
@@ -3,7 +3,7 @@
3/* 3/*
4 * timers.c -- generic ColdFire hardware timer support. 4 * timers.c -- generic ColdFire hardware timer support.
5 * 5 *
6 * Copyright (C) 1999-2006, Greg Ungerer (gerg@snapgear.com) 6 * Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com)
7 */ 7 */
8 8
9/***************************************************************************/ 9/***************************************************************************/
@@ -13,8 +13,8 @@
13#include <linux/param.h> 13#include <linux/param.h>
14#include <linux/interrupt.h> 14#include <linux/interrupt.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/irq.h>
16#include <asm/io.h> 17#include <asm/io.h>
17#include <asm/irq.h>
18#include <asm/traps.h> 18#include <asm/traps.h>
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20#include <asm/coldfire.h> 20#include <asm/coldfire.h>
@@ -62,17 +62,24 @@ void coldfire_tick(void)
62 62
63/***************************************************************************/ 63/***************************************************************************/
64 64
65static struct irqaction coldfire_timer_irq = {
66 .name = "timer",
67 .flags = IRQF_DISABLED | IRQF_TIMER,
68};
69
65static int ticks_per_intr; 70static int ticks_per_intr;
66 71
67void coldfire_timer_init(irq_handler_t handler) 72void coldfire_timer_init(irq_handler_t handler)
68{ 73{
74 coldfire_timer_irq.handler = handler;
75 setup_irq(mcf_timervector, &coldfire_timer_irq);
76
69 __raw_writew(MCFTIMER_TMR_DISABLE, TA(MCFTIMER_TMR)); 77 __raw_writew(MCFTIMER_TMR_DISABLE, TA(MCFTIMER_TMR));
70 ticks_per_intr = (MCF_BUSCLK / 16) / HZ; 78 ticks_per_intr = (MCF_BUSCLK / 16) / HZ;
71 __raw_writetrr(ticks_per_intr - 1, TA(MCFTIMER_TRR)); 79 __raw_writetrr(ticks_per_intr - 1, TA(MCFTIMER_TRR));
72 __raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 | 80 __raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 |
73 MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, TA(MCFTIMER_TMR)); 81 MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, TA(MCFTIMER_TMR));
74 82
75 request_irq(mcf_timervector, handler, IRQF_DISABLED, "timer", NULL);
76 mcf_settimericr(1, mcf_timerlevel); 83 mcf_settimericr(1, mcf_timerlevel);
77 84
78#ifdef CONFIG_HIGHPROFILE 85#ifdef CONFIG_HIGHPROFILE
diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68knommu/platform/532x/config.c
index dc39c466e33f..b32c6425f821 100644
--- a/arch/m68knommu/platform/532x/config.c
+++ b/arch/m68knommu/platform/532x/config.c
@@ -37,7 +37,6 @@
37void coldfire_tick(void); 37void coldfire_tick(void);
38void coldfire_timer_init(irq_handler_t handler); 38void coldfire_timer_init(irq_handler_t handler);
39unsigned long coldfire_timer_offset(void); 39unsigned long coldfire_timer_offset(void);
40void coldfire_trap_init(void);
41void coldfire_reset(void); 40void coldfire_reset(void);
42 41
43extern unsigned int mcf_timervector; 42extern unsigned int mcf_timervector;
@@ -108,7 +107,6 @@ void config_BSP(char *commandp, int size)
108 mach_sched_init = coldfire_timer_init; 107 mach_sched_init = coldfire_timer_init;
109 mach_tick = coldfire_tick; 108 mach_tick = coldfire_tick;
110 mach_gettimeoffset = coldfire_timer_offset; 109 mach_gettimeoffset = coldfire_timer_offset;
111 mach_trap_init = coldfire_trap_init;
112 mach_reset = coldfire_reset; 110 mach_reset = coldfire_reset;
113 111
114#ifdef MCF_BDM_DISABLE 112#ifdef MCF_BDM_DISABLE
diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68knommu/platform/5407/config.c
index fde417fdd650..e692536817d8 100644
--- a/arch/m68knommu/platform/5407/config.c
+++ b/arch/m68knommu/platform/5407/config.c
@@ -28,7 +28,6 @@
28void coldfire_tick(void); 28void coldfire_tick(void);
29void coldfire_timer_init(irq_handler_t handler); 29void coldfire_timer_init(irq_handler_t handler);
30unsigned long coldfire_timer_offset(void); 30unsigned long coldfire_timer_offset(void);
31void coldfire_trap_init(void);
32void coldfire_reset(void); 31void coldfire_reset(void);
33 32
34extern unsigned int mcf_timervector; 33extern unsigned int mcf_timervector;
@@ -112,7 +111,6 @@ void config_BSP(char *commandp, int size)
112 mach_sched_init = coldfire_timer_init; 111 mach_sched_init = coldfire_timer_init;
113 mach_tick = coldfire_tick; 112 mach_tick = coldfire_tick;
114 mach_gettimeoffset = coldfire_timer_offset; 113 mach_gettimeoffset = coldfire_timer_offset;
115 mach_trap_init = coldfire_trap_init;
116 mach_reset = coldfire_reset; 114 mach_reset = coldfire_reset;
117} 115}
118 116
diff --git a/arch/m68knommu/platform/68328/timers.c b/arch/m68knommu/platform/68328/timers.c
index ef067f4c3cd4..0396476f955d 100644
--- a/arch/m68knommu/platform/68328/timers.c
+++ b/arch/m68knommu/platform/68328/timers.c
@@ -18,10 +18,10 @@
18#include <linux/kernel.h> 18#include <linux/kernel.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/irq.h>
21#include <asm/setup.h> 22#include <asm/setup.h>
22#include <asm/system.h> 23#include <asm/system.h>
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/irq.h>
25#include <asm/machdep.h> 25#include <asm/machdep.h>
26#include <asm/MC68VZ328.h> 26#include <asm/MC68VZ328.h>
27 27
@@ -53,14 +53,19 @@
53 53
54/***************************************************************************/ 54/***************************************************************************/
55 55
56static struct irqaction m68328_timer_irq = {
57 .name = "timer",
58 .flags = IRQF_DISABLED | IRQF_TIMER,
59};
60
56void m68328_timer_init(irq_handler_t timer_routine) 61void m68328_timer_init(irq_handler_t timer_routine)
57{ 62{
58 /* disable timer 1 */ 63 /* disable timer 1 */
59 TCTL = 0; 64 TCTL = 0;
60 65
61 /* set ISR */ 66 /* set ISR */
62 if (request_irq(TMR_IRQ_NUM, timer_routine, IRQ_FLG_LOCK, "timer", NULL)) 67 m68328_timer_irq.handler = timer_routine;
63 panic("Unable to attach timer interrupt\n"); 68 setup_irq(TMR_IRQ_NUM, &m68328_timer_irq);
64 69
65 /* Restart mode, Enable int, Set clock source */ 70 /* Restart mode, Enable int, Set clock source */
66 TCTL = TCTL_OM | TCTL_IRQEN | CLOCK_SOURCE; 71 TCTL = TCTL_OM | TCTL_IRQEN | CLOCK_SOURCE;
diff --git a/arch/m68knommu/platform/68360/config.c b/arch/m68knommu/platform/68360/config.c
index 4ff13bd51ffd..155b72fe2607 100644
--- a/arch/m68knommu/platform/68360/config.c
+++ b/arch/m68knommu/platform/68360/config.c
@@ -17,11 +17,11 @@
17#include <linux/tty.h> 17#include <linux/tty.h>
18#include <linux/console.h> 18#include <linux/console.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/irq.h>
20 21
21#include <asm/setup.h> 22#include <asm/setup.h>
22#include <asm/system.h> 23#include <asm/system.h>
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/irq.h>
25#include <asm/machdep.h> 25#include <asm/machdep.h>
26#include <asm/m68360.h> 26#include <asm/m68360.h>
27 27
@@ -51,11 +51,15 @@ extern unsigned long int system_clock; //In kernel setup.c
51 51
52extern void config_M68360_irq(void); 52extern void config_M68360_irq(void);
53 53
54static struct irqaction m68360_timer_irq = {
55 .name = "timer",
56 .flags = IRQF_DISABLED | IRQF_TIMER,
57};
58
54void BSP_sched_init(irq_handler_t timer_routine) 59void BSP_sched_init(irq_handler_t timer_routine)
55{ 60{
56 unsigned char prescaler; 61 unsigned char prescaler;
57 unsigned short tgcr_save; 62 unsigned short tgcr_save;
58 int return_value;
59 63
60#if 0 64#if 0
61 /* Restart mode, Enable int, 32KHz, Enable timer */ 65 /* Restart mode, Enable int, 32KHz, Enable timer */
@@ -86,10 +90,8 @@ void BSP_sched_init(irq_handler_t timer_routine)
86 pquicc->timer_ter1 = 0x0003; /* clear timer events */ 90 pquicc->timer_ter1 = 0x0003; /* clear timer events */
87 91
88 /* enable timer 1 interrupt in CIMR */ 92 /* enable timer 1 interrupt in CIMR */
89// request_irq(IRQ_MACHSPEC | CPMVEC_TIMER1, timer_routine, IRQ_FLG_LOCK, "timer", NULL); 93 m68360_timer_irq.handler = timer_routine;
90 //return_value = request_irq( CPMVEC_TIMER1, timer_routine, IRQ_FLG_LOCK, "timer", NULL); 94 setup_irq(CPMVEC_TIMER1, &m68360_timer_irq);
91 return_value = request_irq(CPMVEC_TIMER1 , timer_routine, IRQ_FLG_LOCK,
92 "Timer", NULL);
93 95
94 /* Start timer 1: */ 96 /* Start timer 1: */
95 tgcr_save = (pquicc->timer_tgcr & 0xfff0) | 0x0001; 97 tgcr_save = (pquicc->timer_tgcr & 0xfff0) | 0x0001;
diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c
index 09126fc338ba..708fa1705fbe 100644
--- a/arch/sparc64/kernel/viohs.c
+++ b/arch/sparc64/kernel/viohs.c
@@ -702,7 +702,7 @@ u32 vio_send_sid(struct vio_driver_state *vio)
702} 702}
703EXPORT_SYMBOL(vio_send_sid); 703EXPORT_SYMBOL(vio_send_sid);
704 704
705extern int vio_ldc_alloc(struct vio_driver_state *vio, 705int vio_ldc_alloc(struct vio_driver_state *vio,
706 struct ldc_channel_config *base_cfg, 706 struct ldc_channel_config *base_cfg,
707 void *event_arg) 707 void *event_arg)
708{ 708{
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index b70f3e7cf06c..dffd2ac72747 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -41,8 +41,9 @@ int sysctl_vsyscall32 = 1;
41#undef ARCH_DLINFO 41#undef ARCH_DLINFO
42#define ARCH_DLINFO do { \ 42#define ARCH_DLINFO do { \
43 if (sysctl_vsyscall32) { \ 43 if (sysctl_vsyscall32) { \
44 NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \ 44 current->mm->context.vdso = (void *)VSYSCALL32_BASE; \
45 NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \ 45 NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
46 NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL32_BASE); \
46 } \ 47 } \
47} while(0) 48} while(0)
48 49
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index fc4419ff0355..15013bac181c 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -49,14 +49,6 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
49 return ret; 49 return ret;
50} 50}
51 51
52const char *arch_vma_name(struct vm_area_struct *vma)
53{
54 if (vma->vm_start == VSYSCALL32_BASE &&
55 vma->vm_mm && vma->vm_mm->task_size == IA32_PAGE_OFFSET)
56 return "[vdso]";
57 return NULL;
58}
59
60static int __init init_syscall32(void) 52static int __init init_syscall32(void)
61{ 53{
62 char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 54 char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
index 3aeae2fa2e24..821527e7faa3 100644
--- a/arch/x86_64/kernel/tce.c
+++ b/arch/x86_64/kernel/tce.c
@@ -165,7 +165,7 @@ done:
165 return ret; 165 return ret;
166} 166}
167 167
168void* alloc_tce_table(void) 168void * __init alloc_tce_table(void)
169{ 169{
170 unsigned int size; 170 unsigned int size;
171 171
@@ -175,7 +175,7 @@ void* alloc_tce_table(void)
175 return __alloc_bootmem_low(size, size, 0); 175 return __alloc_bootmem_low(size, size, 0);
176} 176}
177 177
178void free_tce_table(void *tbl) 178void __init free_tce_table(void *tbl)
179{ 179{
180 unsigned int size; 180 unsigned int size;
181 181
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index 9b76b03d0600..2a59bde663f2 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -118,8 +118,6 @@ core_initcall(cpufreq_tsc);
118 118
119#endif 119#endif
120 120
121static int tsc_unstable = 0;
122
123/* 121/*
124 * Make an educated guess if the TSC is trustworthy and synchronized 122 * Make an educated guess if the TSC is trustworthy and synchronized
125 * over all CPUs. 123 * over all CPUs.
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 38f5d6368006..458893b376f8 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -600,6 +600,16 @@ void mark_rodata_ro(void)
600{ 600{
601 unsigned long start = (unsigned long)_stext, end; 601 unsigned long start = (unsigned long)_stext, end;
602 602
603#ifdef CONFIG_HOTPLUG_CPU
604 /* It must still be possible to apply SMP alternatives. */
605 if (num_possible_cpus() > 1)
606 start = (unsigned long)_etext;
607#endif
608
609#ifdef CONFIG_KPROBES
610 start = (unsigned long)__start_rodata;
611#endif
612
603 end = (unsigned long)__end_rodata; 613 end = (unsigned long)__end_rodata;
604 start = (start + PAGE_SIZE - 1) & PAGE_MASK; 614 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
605 end &= PAGE_MASK; 615 end &= PAGE_MASK;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 251344cb29ae..22b401b2e088 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -11,9 +11,6 @@ menuconfig ACPI
11 depends on PCI 11 depends on PCI
12 depends on PM 12 depends on PM
13 select PNP 13 select PNP
14 # for sleep
15 select HOTPLUG_CPU if X86 && SMP
16 select SUSPEND_SMP if X86 && SMP
17 default y 14 default y
18 ---help--- 15 ---help---
19 Advanced Configuration and Power Interface (ACPI) support for 16 Advanced Configuration and Power Interface (ACPI) support for
diff --git a/drivers/base/power/shutdown.c b/drivers/base/power/shutdown.c
index a47ee1b70d20..56e8eaaac012 100644
--- a/drivers/base/power/shutdown.c
+++ b/drivers/base/power/shutdown.c
@@ -44,7 +44,5 @@ void device_shutdown(void)
44 dev->driver->shutdown(dev); 44 dev->driver->shutdown(dev);
45 } 45 }
46 } 46 }
47
48 sysdev_shutdown();
49} 47}
50 48
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c
index 5b79d0724171..93e3c4001bf5 100644
--- a/drivers/block/lguest_blk.c
+++ b/drivers/block/lguest_blk.c
@@ -1,6 +1,12 @@
1/* A simple block driver for lguest. 1/*D:400
2 * The Guest block driver
2 * 3 *
3 * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 4 * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
5 * The mechanism is simple: we place the information about the request in the
6 * device page, then use SEND_DMA (containing the data for a write, or an empty
7 * "ping" DMA for a read).
8 :*/
9/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 * 10 *
5 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 12 * it under the terms of the GNU General Public License as published by
@@ -25,27 +31,50 @@
25 31
26static char next_block_index = 'a'; 32static char next_block_index = 'a';
27 33
34/*D:420 Here is the structure which holds all the information we need about
35 * each Guest block device.
36 *
37 * I'm sure at this stage, you're wondering "hey, where was the adventure I was
38 * promised?" and thinking "Rusty sucks, I shall say nasty things about him on
39 * my blog". I think Real adventures have boring bits, too, and you're in the
40 * middle of one. But it gets better. Just not quite yet. */
28struct blockdev 41struct blockdev
29{ 42{
43 /* The block queue infrastructure wants a spinlock: it is held while it
44 * calls our block request function. We grab it in our interrupt
45 * handler so the responses don't mess with new requests. */
30 spinlock_t lock; 46 spinlock_t lock;
31 47
32 /* The disk structure for the kernel. */ 48 /* The disk structure registered with kernel. */
33 struct gendisk *disk; 49 struct gendisk *disk;
34 50
35 /* The major number for this disk. */ 51 /* The major device number for this disk, and the interrupt. We only
52 * really keep them here for completeness; we'd need them if we
53 * supported device unplugging. */
36 int major; 54 int major;
37 int irq; 55 int irq;
38 56
57 /* The physical address of this device's memory page */
39 unsigned long phys_addr; 58 unsigned long phys_addr;
40 /* The mapped block page. */ 59 /* The mapped memory page for convenient acces. */
41 struct lguest_block_page *lb_page; 60 struct lguest_block_page *lb_page;
42 61
43 /* We only have a single request outstanding at a time. */ 62 /* We only have a single request outstanding at a time: this is it. */
44 struct lguest_dma dma; 63 struct lguest_dma dma;
45 struct request *req; 64 struct request *req;
46}; 65};
47 66
48/* Jens gave me this nice helper to end all chunks of a request. */ 67/*D:495 We originally used end_request() throughout the driver, but it turns
68 * out that end_request() is deprecated, and doesn't actually end the request
69 * (which seems like a good reason to deprecate it!). It simply ends the first
70 * bio. So if we had 3 bios in a "struct request" we would do all 3,
71 * end_request(), do 2, end_request(), do 1 and end_request(): twice as much
72 * work as we needed to do.
73 *
74 * This reinforced to me that I do not understand the block layer.
75 *
76 * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
77 * request. This improved disk speed by 130%. */
49static void end_entire_request(struct request *req, int uptodate) 78static void end_entire_request(struct request *req, int uptodate)
50{ 79{
51 if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) 80 if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
@@ -55,30 +84,62 @@ static void end_entire_request(struct request *req, int uptodate)
55 end_that_request_last(req, uptodate); 84 end_that_request_last(req, uptodate);
56} 85}
57 86
87/* I'm told there are only two stories in the world worth telling: love and
88 * hate. So there used to be a love scene here like this:
89 *
90 * Launcher: We could make beautiful I/O together, you and I.
91 * Guest: My, that's a big disk!
92 *
93 * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
94
95/*D:490 This is the interrupt handler, called when a block read or write has
96 * been completed for us. */
58static irqreturn_t lgb_irq(int irq, void *_bd) 97static irqreturn_t lgb_irq(int irq, void *_bd)
59{ 98{
99 /* We handed our "struct blockdev" as the argument to request_irq(), so
100 * it is passed through to us here. This tells us which device we're
101 * dealing with in case we have more than one. */
60 struct blockdev *bd = _bd; 102 struct blockdev *bd = _bd;
61 unsigned long flags; 103 unsigned long flags;
62 104
105 /* We weren't doing anything? Strange, but could happen if we shared
106 * interrupts (we don't!). */
63 if (!bd->req) { 107 if (!bd->req) {
64 pr_debug("No work!\n"); 108 pr_debug("No work!\n");
65 return IRQ_NONE; 109 return IRQ_NONE;
66 } 110 }
67 111
112 /* Not done yet? That's equally strange. */
68 if (!bd->lb_page->result) { 113 if (!bd->lb_page->result) {
69 pr_debug("No result!\n"); 114 pr_debug("No result!\n");
70 return IRQ_NONE; 115 return IRQ_NONE;
71 } 116 }
72 117
118 /* We have to grab the lock before ending the request. */
73 spin_lock_irqsave(&bd->lock, flags); 119 spin_lock_irqsave(&bd->lock, flags);
120 /* "result" is 1 for success, 2 for failure: end_entire_request() wants
121 * to know whether this succeeded or not. */
74 end_entire_request(bd->req, bd->lb_page->result == 1); 122 end_entire_request(bd->req, bd->lb_page->result == 1);
123 /* Clear out request, it's done. */
75 bd->req = NULL; 124 bd->req = NULL;
125 /* Reset incoming DMA for next time. */
76 bd->dma.used_len = 0; 126 bd->dma.used_len = 0;
127 /* Ready for more reads or writes */
77 blk_start_queue(bd->disk->queue); 128 blk_start_queue(bd->disk->queue);
78 spin_unlock_irqrestore(&bd->lock, flags); 129 spin_unlock_irqrestore(&bd->lock, flags);
130
131 /* The interrupt was for us, we dealt with it. */
79 return IRQ_HANDLED; 132 return IRQ_HANDLED;
80} 133}
81 134
135/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
136 * each of which contains "struct bio_vec"s, each of which contains a page, an
137 * offset and a length.
138 *
139 * Fortunately there are iterators to help us walk through the "struct
140 * request". Even more fortunately, there were plenty of places to steal the
141 * code from. We pack the "struct request" into our "struct lguest_dma" and
142 * return the total length. */
82static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) 143static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
83{ 144{
84 unsigned int i = 0, idx, len = 0; 145 unsigned int i = 0, idx, len = 0;
@@ -87,8 +148,13 @@ static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
87 rq_for_each_bio(bio, req) { 148 rq_for_each_bio(bio, req) {
88 struct bio_vec *bvec; 149 struct bio_vec *bvec;
89 bio_for_each_segment(bvec, bio, idx) { 150 bio_for_each_segment(bvec, bio, idx) {
151 /* We told the block layer not to give us too many. */
90 BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); 152 BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
153 /* If we had a zero-length segment, it would look like
154 * the end of the data referred to by the "struct
155 * lguest_dma", so make sure that doesn't happen. */
91 BUG_ON(!bvec->bv_len); 156 BUG_ON(!bvec->bv_len);
157 /* Convert page & offset to a physical address */
92 dma->addr[i] = page_to_phys(bvec->bv_page) 158 dma->addr[i] = page_to_phys(bvec->bv_page)
93 + bvec->bv_offset; 159 + bvec->bv_offset;
94 dma->len[i] = bvec->bv_len; 160 dma->len[i] = bvec->bv_len;
@@ -96,26 +162,39 @@ static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
96 i++; 162 i++;
97 } 163 }
98 } 164 }
165 /* If the array isn't full, we mark the end with a 0 length */
99 if (i < LGUEST_MAX_DMA_SECTIONS) 166 if (i < LGUEST_MAX_DMA_SECTIONS)
100 dma->len[i] = 0; 167 dma->len[i] = 0;
101 return len; 168 return len;
102} 169}
103 170
171/* This creates an empty DMA, useful for prodding the Host without sending data
172 * (ie. when we want to do a read) */
104static void empty_dma(struct lguest_dma *dma) 173static void empty_dma(struct lguest_dma *dma)
105{ 174{
106 dma->len[0] = 0; 175 dma->len[0] = 0;
107} 176}
108 177
178/*D:470 Setting up a request is fairly easy: */
109static void setup_req(struct blockdev *bd, 179static void setup_req(struct blockdev *bd,
110 int type, struct request *req, struct lguest_dma *dma) 180 int type, struct request *req, struct lguest_dma *dma)
111{ 181{
182 /* The type is 1 (write) or 0 (read). */
112 bd->lb_page->type = type; 183 bd->lb_page->type = type;
184 /* The sector on disk where the read or write starts. */
113 bd->lb_page->sector = req->sector; 185 bd->lb_page->sector = req->sector;
186 /* The result is initialized to 0 (unfinished). */
114 bd->lb_page->result = 0; 187 bd->lb_page->result = 0;
188 /* The current request (so we can end it in the interrupt handler). */
115 bd->req = req; 189 bd->req = req;
190 /* The number of bytes: returned as a side-effect of req_to_dma(),
191 * which packs the block layer's "struct request" into our "struct
192 * lguest_dma" */
116 bd->lb_page->bytes = req_to_dma(req, dma); 193 bd->lb_page->bytes = req_to_dma(req, dma);
117} 194}
118 195
196/*D:450 Write is pretty straightforward: we pack the request into a "struct
197 * lguest_dma", then use SEND_DMA to send the request. */
119static void do_write(struct blockdev *bd, struct request *req) 198static void do_write(struct blockdev *bd, struct request *req)
120{ 199{
121 struct lguest_dma send; 200 struct lguest_dma send;
@@ -126,6 +205,9 @@ static void do_write(struct blockdev *bd, struct request *req)
126 lguest_send_dma(bd->phys_addr, &send); 205 lguest_send_dma(bd->phys_addr, &send);
127} 206}
128 207
208/* Read is similar to write, except we pack the request into our receive
209 * "struct lguest_dma" and send through an empty DMA just to tell the Host that
210 * there's a request pending. */
129static void do_read(struct blockdev *bd, struct request *req) 211static void do_read(struct blockdev *bd, struct request *req)
130{ 212{
131 struct lguest_dma ping; 213 struct lguest_dma ping;
@@ -137,21 +219,30 @@ static void do_read(struct blockdev *bd, struct request *req)
137 lguest_send_dma(bd->phys_addr, &ping); 219 lguest_send_dma(bd->phys_addr, &ping);
138} 220}
139 221
222/*D:440 This where requests come in: we get handed the request queue and are
223 * expected to pull a "struct request" off it until we've finished them or
224 * we're waiting for a reply: */
140static void do_lgb_request(struct request_queue *q) 225static void do_lgb_request(struct request_queue *q)
141{ 226{
142 struct blockdev *bd; 227 struct blockdev *bd;
143 struct request *req; 228 struct request *req;
144 229
145again: 230again:
231 /* This sometimes returns NULL even on the very first time around. I
232 * wonder if it's something to do with letting elves handle the request
233 * queue... */
146 req = elv_next_request(q); 234 req = elv_next_request(q);
147 if (!req) 235 if (!req)
148 return; 236 return;
149 237
238 /* We attached the struct blockdev to the disk: get it back */
150 bd = req->rq_disk->private_data; 239 bd = req->rq_disk->private_data;
151 /* Sometimes we get repeated requests after blk_stop_queue. */ 240 /* Sometimes we get repeated requests after blk_stop_queue(), but we
241 * can only handle one at a time. */
152 if (bd->req) 242 if (bd->req)
153 return; 243 return;
154 244
245 /* We only do reads and writes: no tricky business! */
155 if (!blk_fs_request(req)) { 246 if (!blk_fs_request(req)) {
156 pr_debug("Got non-command 0x%08x\n", req->cmd_type); 247 pr_debug("Got non-command 0x%08x\n", req->cmd_type);
157 req->errors++; 248 req->errors++;
@@ -164,20 +255,31 @@ again:
164 else 255 else
165 do_read(bd, req); 256 do_read(bd, req);
166 257
167 /* Wait for interrupt to tell us it's done. */ 258 /* We've put out the request, so stop any more coming in until we get
259 * an interrupt, which takes us to lgb_irq() to re-enable the queue. */
168 blk_stop_queue(q); 260 blk_stop_queue(q);
169} 261}
170 262
263/*D:430 This is the "struct block_device_operations" we attach to the disk at
264 * the end of lguestblk_probe(). It doesn't seem to want much. */
171static struct block_device_operations lguestblk_fops = { 265static struct block_device_operations lguestblk_fops = {
172 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
173}; 267};
174 268
269/*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure
270 * quite why. I do know that the IDE code sent two or three of the maintainers
271 * insane, perhaps this is the fringe of the same disease?
272 *
273 * As in the console code, the probe function gets handed the generic
274 * lguest_device from lguest_bus.c: */
175static int lguestblk_probe(struct lguest_device *lgdev) 275static int lguestblk_probe(struct lguest_device *lgdev)
176{ 276{
177 struct blockdev *bd; 277 struct blockdev *bd;
178 int err; 278 int err;
179 int irqflags = IRQF_SHARED; 279 int irqflags = IRQF_SHARED;
180 280
281 /* First we allocate our own "struct blockdev" and initialize the easy
282 * fields. */
181 bd = kmalloc(sizeof(*bd), GFP_KERNEL); 283 bd = kmalloc(sizeof(*bd), GFP_KERNEL);
182 if (!bd) 284 if (!bd)
183 return -ENOMEM; 285 return -ENOMEM;
@@ -187,59 +289,100 @@ static int lguestblk_probe(struct lguest_device *lgdev)
187 bd->req = NULL; 289 bd->req = NULL;
188 bd->dma.used_len = 0; 290 bd->dma.used_len = 0;
189 bd->dma.len[0] = 0; 291 bd->dma.len[0] = 0;
292 /* The descriptor in the lguest_devices array provided by the Host
293 * gives the Guest the physical page number of the device's page. */
190 bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); 294 bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
191 295
296 /* We use lguest_map() to get a pointer to the device page */
192 bd->lb_page = lguest_map(bd->phys_addr, 1); 297 bd->lb_page = lguest_map(bd->phys_addr, 1);
193 if (!bd->lb_page) { 298 if (!bd->lb_page) {
194 err = -ENOMEM; 299 err = -ENOMEM;
195 goto out_free_bd; 300 goto out_free_bd;
196 } 301 }
197 302
303 /* We need a major device number: 0 means "assign one dynamically". */
198 bd->major = register_blkdev(0, "lguestblk"); 304 bd->major = register_blkdev(0, "lguestblk");
199 if (bd->major < 0) { 305 if (bd->major < 0) {
200 err = bd->major; 306 err = bd->major;
201 goto out_unmap; 307 goto out_unmap;
202 } 308 }
203 309
310 /* This allocates a "struct gendisk" where we pack all the information
311 * about the disk which the rest of Linux sees. We ask for one minor
312 * number; I do wonder if we should be asking for more. */
204 bd->disk = alloc_disk(1); 313 bd->disk = alloc_disk(1);
205 if (!bd->disk) { 314 if (!bd->disk) {
206 err = -ENOMEM; 315 err = -ENOMEM;
207 goto out_unregister_blkdev; 316 goto out_unregister_blkdev;
208 } 317 }
209 318
319 /* Every disk needs a queue for requests to come in: we set up the
320 * queue with a callback function (the core of our driver) and the lock
321 * to use. */
210 bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); 322 bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
211 if (!bd->disk->queue) { 323 if (!bd->disk->queue) {
212 err = -ENOMEM; 324 err = -ENOMEM;
213 goto out_put_disk; 325 goto out_put_disk;
214 } 326 }
215 327
216 /* We can only handle a certain number of sg entries */ 328 /* We can only handle a certain number of pointers in our SEND_DMA
329 * call, so we set that with blk_queue_max_hw_segments(). This is not
330 * to be confused with blk_queue_max_phys_segments() of course! I
331 * know, who could possibly confuse the two?
332 *
333 * Well, it's simple to tell them apart: this one seems to work and the
334 * other one didn't. */
217 blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); 335 blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
218 /* Buffers must not cross page boundaries */ 336
337 /* Due to technical limitations of our Host (and simple coding) we
338 * can't have a single buffer which crosses a page boundary. Tell it
339 * here. This means that our maximum request size is 16
340 * (LGUEST_MAX_DMA_SECTIONS) pages. */
219 blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); 341 blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
220 342
343 /* We name our disk: this becomes the device name when udev does its
344 * magic thing and creates the device node, such as /dev/lgba.
345 * next_block_index is a global which starts at 'a'. Unfortunately
346 * this simple increment logic means that the 27th disk will be called
347 * "/dev/lgb{". In that case, I recommend having at least 29 disks, so
348 * your /dev directory will be balanced. */
221 sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); 349 sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
350
351 /* We look to the device descriptor again to see if this device's
352 * interrupts are expected to be random. If they are, we tell the irq
353 * subsystem. At the moment this bit is always set. */
222 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) 354 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
223 irqflags |= IRQF_SAMPLE_RANDOM; 355 irqflags |= IRQF_SAMPLE_RANDOM;
356
357 /* Now we have the name and irqflags, we can request the interrupt; we
358 * give it the "struct blockdev" we have set up to pass to lgb_irq()
359 * when there is an interrupt. */
224 err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); 360 err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
225 if (err) 361 if (err)
226 goto out_cleanup_queue; 362 goto out_cleanup_queue;
227 363
364 /* We bind our one-entry DMA pool to the key for this block device so
365 * the Host can reply to our requests. The key is equal to the
366 * physical address of the device's page, which is conveniently
367 * unique. */
228 err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); 368 err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
229 if (err) 369 if (err)
230 goto out_free_irq; 370 goto out_free_irq;
231 371
372 /* We finish our disk initialization and add the disk to the system. */
232 bd->disk->major = bd->major; 373 bd->disk->major = bd->major;
233 bd->disk->first_minor = 0; 374 bd->disk->first_minor = 0;
234 bd->disk->private_data = bd; 375 bd->disk->private_data = bd;
235 bd->disk->fops = &lguestblk_fops; 376 bd->disk->fops = &lguestblk_fops;
236 /* This is initialized to the disk size by the other end. */ 377 /* This is initialized to the disk size by the Launcher. */
237 set_capacity(bd->disk, bd->lb_page->num_sectors); 378 set_capacity(bd->disk, bd->lb_page->num_sectors);
238 add_disk(bd->disk); 379 add_disk(bd->disk);
239 380
240 printk(KERN_INFO "%s: device %i at major %d\n", 381 printk(KERN_INFO "%s: device %i at major %d\n",
241 bd->disk->disk_name, lgdev->index, bd->major); 382 bd->disk->disk_name, lgdev->index, bd->major);
242 383
384 /* We don't need to keep the "struct blockdev" around, but if we ever
385 * implemented device removal, we'd need this. */
243 lgdev->private = bd; 386 lgdev->private = bd;
244 return 0; 387 return 0;
245 388
@@ -258,6 +401,8 @@ out_free_bd:
258 return err; 401 return err;
259} 402}
260 403
404/*D:410 The boilerplate code for registering the lguest block driver is just
405 * like the console: */
261static struct lguest_driver lguestblk_drv = { 406static struct lguest_driver lguestblk_drv = {
262 .name = "lguestblk", 407 .name = "lguestblk",
263 .owner = THIS_MODULE, 408 .owner = THIS_MODULE,
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index acdbcdc3e457..b391776e5bf3 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -130,6 +130,7 @@ config ROCKETPORT
130config CYCLADES 130config CYCLADES
131 tristate "Cyclades async mux support" 131 tristate "Cyclades async mux support"
132 depends on SERIAL_NONSTANDARD && (PCI || ISA) 132 depends on SERIAL_NONSTANDARD && (PCI || ISA)
133 select FW_LOADER
133 ---help--- 134 ---help---
134 This driver supports Cyclades Z and Y multiserial boards. 135 This driver supports Cyclades Z and Y multiserial boards.
135 You would need something like this to connect more than two modems to 136 You would need something like this to connect more than two modems to
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 9a2694e5f8b9..77bf4aa217a8 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -73,7 +73,7 @@ static struct clocksource clocksource_hpet = {
73 .name = "hpet", 73 .name = "hpet",
74 .rating = 250, 74 .rating = 250,
75 .read = read_hpet, 75 .read = read_hpet,
76 .mask = 0xffffffffffffffff, 76 .mask = CLOCKSOURCE_MASK(64),
77 .mult = 0, /*to be caluclated*/ 77 .mult = 0, /*to be caluclated*/
78 .shift = 10, 78 .shift = 10,
79 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 79 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
index e7b889e404a7..feeccbaec438 100644
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -1,6 +1,22 @@
1/* Simple console for lguest. 1/*D:300
2 * The Guest console driver
2 * 3 *
3 * Copyright (C) 2006 Rusty Russell, IBM Corporation 4 * This is a trivial console driver: we use lguest's DMA mechanism to send
5 * bytes out, and register a DMA buffer to receive bytes in. It is assumed to
6 * be present and available from the very beginning of boot.
7 *
8 * Writing console drivers is one of the few remaining Dark Arts in Linux.
9 * Fortunately for us, the path of virtual consoles has been well-trodden by
10 * the PowerPC folks, who wrote "hvc_console.c" to generically support any
11 * virtual console. We use that infrastructure which only requires us to write
12 * the basic put_chars and get_chars functions and call the right register
13 * functions.
14 :*/
15
16/*M:002 The console can be flooded: while the Guest is processing input the
17 * Host can send more. Buffering in the Host could alleviate this, but it is a
18 * difficult problem in general. :*/
19/* Copyright (C) 2006 Rusty Russell, IBM Corporation
4 * 20 *
5 * This program is free software; you can redistribute it and/or modify 21 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 22 * it under the terms of the GNU General Public License as published by
@@ -21,49 +37,81 @@
21#include <linux/lguest_bus.h> 37#include <linux/lguest_bus.h>
22#include "hvc_console.h" 38#include "hvc_console.h"
23 39
40/*D:340 This is our single console input buffer, with associated "struct
41 * lguest_dma" referring to it. Note the 0-terminated length array, and the
42 * use of physical address for the buffer itself. */
24static char inbuf[256]; 43static char inbuf[256];
25static struct lguest_dma cons_input = { .used_len = 0, 44static struct lguest_dma cons_input = { .used_len = 0,
26 .addr[0] = __pa(inbuf), 45 .addr[0] = __pa(inbuf),
27 .len[0] = sizeof(inbuf), 46 .len[0] = sizeof(inbuf),
28 .len[1] = 0 }; 47 .len[1] = 0 };
29 48
49/*D:310 The put_chars() callback is pretty straightforward.
50 *
51 * First we put the pointer and length in a "struct lguest_dma": we only have
52 * one pointer, so we set the second length to 0. Then we use SEND_DMA to send
53 * the data to (Host) buffers attached to the console key. Usually a device's
54 * key is a physical address within the device's memory, but because the
55 * console device doesn't have any associated physical memory, we use the
56 * LGUEST_CONSOLE_DMA_KEY constant (aka 0). */
30static int put_chars(u32 vtermno, const char *buf, int count) 57static int put_chars(u32 vtermno, const char *buf, int count)
31{ 58{
32 struct lguest_dma dma; 59 struct lguest_dma dma;
33 60
34 /* FIXME: what if it's over a page boundary? */ 61 /* FIXME: DMA buffers in a "struct lguest_dma" are not allowed
62 * to go over page boundaries. This never seems to happen,
63 * but if it did we'd need to fix this code. */
35 dma.len[0] = count; 64 dma.len[0] = count;
36 dma.len[1] = 0; 65 dma.len[1] = 0;
37 dma.addr[0] = __pa(buf); 66 dma.addr[0] = __pa(buf);
38 67
39 lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma); 68 lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
69 /* We're expected to return the amount of data we wrote: all of it. */
40 return count; 70 return count;
41} 71}
42 72
73/*D:350 get_chars() is the callback from the hvc_console infrastructure when
74 * an interrupt is received.
75 *
76 * Firstly we see if our buffer has been filled: if not, we return. The rest
77 * of the code deals with the fact that the hvc_console() infrastructure only
78 * asks us for 16 bytes at a time. We keep a "cons_offset" variable for
79 * partially-read buffers. */
43static int get_chars(u32 vtermno, char *buf, int count) 80static int get_chars(u32 vtermno, char *buf, int count)
44{ 81{
45 static int cons_offset; 82 static int cons_offset;
46 83
84 /* Nothing left to see here... */
47 if (!cons_input.used_len) 85 if (!cons_input.used_len)
48 return 0; 86 return 0;
49 87
88 /* You want more than we have to give? Well, try wanting less! */
50 if (cons_input.used_len - cons_offset < count) 89 if (cons_input.used_len - cons_offset < count)
51 count = cons_input.used_len - cons_offset; 90 count = cons_input.used_len - cons_offset;
52 91
92 /* Copy across to their buffer and increment offset. */
53 memcpy(buf, inbuf + cons_offset, count); 93 memcpy(buf, inbuf + cons_offset, count);
54 cons_offset += count; 94 cons_offset += count;
95
96 /* Finished? Zero offset, and reset cons_input so Host will use it
97 * again. */
55 if (cons_offset == cons_input.used_len) { 98 if (cons_offset == cons_input.used_len) {
56 cons_offset = 0; 99 cons_offset = 0;
57 cons_input.used_len = 0; 100 cons_input.used_len = 0;
58 } 101 }
59 return count; 102 return count;
60} 103}
104/*:*/
61 105
62static struct hv_ops lguest_cons = { 106static struct hv_ops lguest_cons = {
63 .get_chars = get_chars, 107 .get_chars = get_chars,
64 .put_chars = put_chars, 108 .put_chars = put_chars,
65}; 109};
66 110
111/*D:320 Console drivers are initialized very early so boot messages can go
112 * out. At this stage, the console is output-only. Our driver checks we're a
113 * Guest, and if so hands hvc_instantiate() the console number (0), priority
114 * (0), and the struct hv_ops containing the put_chars() function. */
67static int __init cons_init(void) 115static int __init cons_init(void)
68{ 116{
69 if (strcmp(paravirt_ops.name, "lguest") != 0) 117 if (strcmp(paravirt_ops.name, "lguest") != 0)
@@ -73,21 +121,46 @@ static int __init cons_init(void)
73} 121}
74console_initcall(cons_init); 122console_initcall(cons_init);
75 123
124/*D:370 To set up and manage our virtual console, we call hvc_alloc() and
125 * stash the result in the private pointer of the "struct lguest_device".
126 * Since we never remove the console device we never need this pointer again,
127 * but using ->private is considered good form, and you never know who's going
128 * to copy your driver.
129 *
130 * Once the console is set up, we bind our input buffer ready for input. */
76static int lguestcons_probe(struct lguest_device *lgdev) 131static int lguestcons_probe(struct lguest_device *lgdev)
77{ 132{
78 int err; 133 int err;
79 134
135 /* The first argument of hvc_alloc() is the virtual console number, so
136 * we use zero. The second argument is the interrupt number.
137 *
138 * The third argument is a "struct hv_ops" containing the put_chars()
139 * and get_chars() pointers. The final argument is the output buffer
140 * size: we use 256 and expect the Host to have room for us to send
141 * that much. */
80 lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256); 142 lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
81 if (IS_ERR(lgdev->private)) 143 if (IS_ERR(lgdev->private))
82 return PTR_ERR(lgdev->private); 144 return PTR_ERR(lgdev->private);
83 145
146 /* We bind a single DMA buffer at key LGUEST_CONSOLE_DMA_KEY.
147 * "cons_input" is that statically-initialized global DMA buffer we saw
148 * above, and we also give the interrupt we want. */
84 err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1, 149 err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
85 lgdev_irq(lgdev)); 150 lgdev_irq(lgdev));
86 if (err) 151 if (err)
87 printk("lguest console: failed to bind buffer.\n"); 152 printk("lguest console: failed to bind buffer.\n");
88 return err; 153 return err;
89} 154}
155/* Note the use of lgdev_irq() for the interrupt number. We tell hvc_alloc()
156 * to expect input when this interrupt is triggered, and then tell
157 * lguest_bind_dma() that is the interrupt to send us when input comes in. */
90 158
159/*D:360 From now on the console driver follows standard Guest driver form:
160 * register_lguest_driver() registers the device type and probe function, and
161 * the probe function sets up the device.
162 *
163 * The standard "struct lguest_driver": */
91static struct lguest_driver lguestcons_drv = { 164static struct lguest_driver lguestcons_drv = {
92 .name = "lguestcons", 165 .name = "lguestcons",
93 .owner = THIS_MODULE, 166 .owner = THIS_MODULE,
@@ -95,6 +168,7 @@ static struct lguest_driver lguestcons_drv = {
95 .probe = lguestcons_probe, 168 .probe = lguestcons_probe,
96}; 169};
97 170
171/* The standard init function */
98static int __init hvc_lguest_init(void) 172static int __init hvc_lguest_init(void)
99{ 173{
100 return register_lguest_driver(&lguestcons_drv); 174 return register_lguest_driver(&lguestcons_drv);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 1724c41d2414..98b6b4fb4257 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -8,7 +8,7 @@ menuconfig EDAC
8 bool "EDAC - error detection and reporting (EXPERIMENTAL)" 8 bool "EDAC - error detection and reporting (EXPERIMENTAL)"
9 depends on HAS_IOMEM 9 depends on HAS_IOMEM
10 depends on EXPERIMENTAL 10 depends on EXPERIMENTAL
11 depends on X86 || MIPS || PPC 11 depends on X86 || PPC
12 help 12 help
13 EDAC is designed to report errors in the core system. 13 EDAC is designed to report errors in the core system.
14 These are low-level errors that are reported in the CPU or 14 These are low-level errors that are reported in the CPU or
@@ -126,7 +126,7 @@ config EDAC_I5000
126config EDAC_PASEMI 126config EDAC_PASEMI
127 tristate "PA Semi PWRficient" 127 tristate "PA Semi PWRficient"
128 depends on EDAC_MM_EDAC && PCI 128 depends on EDAC_MM_EDAC && PCI
129 depends on PPC 129 depends on PPC_PASEMI
130 help 130 help
131 Support for error detection and correction on PA Semi 131 Support for error detection and correction on PA Semi
132 PWRficient. 132 PWRficient.
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4471be362599..063a1bffe38b 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -214,6 +214,13 @@ void edac_mc_free(struct mem_ctl_info *mci)
214} 214}
215EXPORT_SYMBOL_GPL(edac_mc_free); 215EXPORT_SYMBOL_GPL(edac_mc_free);
216 216
217
218/*
219 * find_mci_by_dev
220 *
221 * scan list of controllers looking for the one that manages
222 * the 'dev' device
223 */
217static struct mem_ctl_info *find_mci_by_dev(struct device *dev) 224static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
218{ 225{
219 struct mem_ctl_info *mci; 226 struct mem_ctl_info *mci;
@@ -268,12 +275,6 @@ static void edac_mc_workq_function(struct work_struct *work_req)
268 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 275 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
269 mci->edac_check(mci); 276 mci->edac_check(mci);
270 277
271 /*
272 * FIXME: temp place holder for PCI checks,
273 * goes away when we break out PCI
274 */
275 edac_pci_do_parity_check();
276
277 mutex_unlock(&mem_ctls_mutex); 278 mutex_unlock(&mem_ctls_mutex);
278 279
279 /* Reschedule */ 280 /* Reschedule */
@@ -314,36 +315,55 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
314{ 315{
315 int status; 316 int status;
316 317
317 /* if not running POLL, leave now */ 318 status = cancel_delayed_work(&mci->work);
318 if (mci->op_state == OP_RUNNING_POLL) { 319 if (status == 0) {
319 status = cancel_delayed_work(&mci->work); 320 debugf0("%s() not canceled, flush the queue\n",
320 if (status == 0) { 321 __func__);
321 debugf0("%s() not canceled, flush the queue\n",
322 __func__);
323 322
324 /* workq instance might be running, wait for it */ 323 /* workq instance might be running, wait for it */
325 flush_workqueue(edac_workqueue); 324 flush_workqueue(edac_workqueue);
326 }
327 } 325 }
328} 326}
329 327
330/* 328/*
331 * edac_reset_delay_period 329 * edac_mc_reset_delay_period(unsigned long value)
330 *
331 * user space has updated our poll period value, need to
332 * reset our workq delays
332 */ 333 */
333static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) 334void edac_mc_reset_delay_period(int value)
334{ 335{
335 /* cancel the current workq request */ 336 struct mem_ctl_info *mci;
336 edac_mc_workq_teardown(mci); 337 struct list_head *item;
337 338
338 /* lock the list of devices for the new setup */
339 mutex_lock(&mem_ctls_mutex); 339 mutex_lock(&mem_ctls_mutex);
340 340
341 /* restart the workq request, with new delay value */ 341 /* scan the list and turn off all workq timers, doing so under lock
342 edac_mc_workq_setup(mci, value); 342 */
343 list_for_each(item, &mc_devices) {
344 mci = list_entry(item, struct mem_ctl_info, link);
345
346 if (mci->op_state == OP_RUNNING_POLL)
347 cancel_delayed_work(&mci->work);
348 }
349
350 mutex_unlock(&mem_ctls_mutex);
351
352
353 /* re-walk the list, and reset the poll delay */
354 mutex_lock(&mem_ctls_mutex);
355
356 list_for_each(item, &mc_devices) {
357 mci = list_entry(item, struct mem_ctl_info, link);
358
359 edac_mc_workq_setup(mci, (unsigned long) value);
360 }
343 361
344 mutex_unlock(&mem_ctls_mutex); 362 mutex_unlock(&mem_ctls_mutex);
345} 363}
346 364
365
366
347/* Return 0 on success, 1 on failure. 367/* Return 0 on success, 1 on failure.
348 * Before calling this function, caller must 368 * Before calling this function, caller must
349 * assign a unique value to mci->mc_idx. 369 * assign a unique value to mci->mc_idx.
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index cd090b0677a7..4a0576bd06fc 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -122,6 +122,23 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
122 return count; 122 return count;
123} 123}
124 124
125/*
126 * mc poll_msec time value
127 */
128static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
129{
130 int *value = (int *)ptr;
131
132 if (isdigit(*buffer)) {
133 *value = simple_strtoul(buffer, NULL, 0);
134
135 /* notify edac_mc engine to reset the poll period */
136 edac_mc_reset_delay_period(*value);
137 }
138
139 return count;
140}
141
125 142
126/* EDAC sysfs CSROW data structures and methods 143/* EDAC sysfs CSROW data structures and methods
127 */ 144 */
@@ -704,7 +721,7 @@ MEMCTRL_ATTR(edac_mc_log_ce,
704 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); 721 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
705 722
706MEMCTRL_ATTR(edac_mc_poll_msec, 723MEMCTRL_ATTR(edac_mc_poll_msec,
707 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); 724 S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
708 725
709/* Base Attributes of the memory ECC object */ 726/* Base Attributes of the memory ECC object */
710static struct memctrl_dev_attribute *memctrl_attr[] = { 727static struct memctrl_dev_attribute *memctrl_attr[] = {
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index a2134dfc3cc6..cbc419c8ebc1 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -52,6 +52,8 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
52extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); 52extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev);
53extern void edac_device_reset_delay_period(struct edac_device_ctl_info 53extern void edac_device_reset_delay_period(struct edac_device_ctl_info
54 *edac_dev, unsigned long value); 54 *edac_dev, unsigned long value);
55extern void edac_mc_reset_delay_period(int value);
56
55extern void *edac_align_ptr(void *ptr, unsigned size); 57extern void *edac_align_ptr(void *ptr, unsigned size);
56 58
57/* 59/*
@@ -64,6 +66,10 @@ extern int edac_sysfs_pci_setup(void);
64extern void edac_sysfs_pci_teardown(void); 66extern void edac_sysfs_pci_teardown(void);
65extern int edac_pci_get_check_errors(void); 67extern int edac_pci_get_check_errors(void);
66extern int edac_pci_get_poll_msec(void); 68extern int edac_pci_get_poll_msec(void);
69extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
70extern void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg);
71extern void edac_pci_handle_npe(struct edac_pci_ctl_info *pci,
72 const char *msg);
67#else /* CONFIG_PCI */ 73#else /* CONFIG_PCI */
68/* pre-process these away */ 74/* pre-process these away */
69#define edac_pci_do_parity_check() 75#define edac_pci_do_parity_check()
@@ -72,6 +78,8 @@ extern int edac_pci_get_poll_msec(void);
72#define edac_sysfs_pci_teardown() 78#define edac_sysfs_pci_teardown()
73#define edac_pci_get_check_errors() 79#define edac_pci_get_check_errors()
74#define edac_pci_get_poll_msec() 80#define edac_pci_get_poll_msec()
81#define edac_pci_handle_pe()
82#define edac_pci_handle_npe()
75#endif /* CONFIG_PCI */ 83#endif /* CONFIG_PCI */
76 84
77#endif /* __EDAC_MODULE_H__ */ 85#endif /* __EDAC_MODULE_H__ */
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index d9cd5e048cee..5dee9f50414b 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -31,20 +31,12 @@
31static DEFINE_MUTEX(edac_pci_ctls_mutex); 31static DEFINE_MUTEX(edac_pci_ctls_mutex);
32static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list); 32static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list);
33 33
34static inline void edac_lock_pci_list(void)
35{
36 mutex_lock(&edac_pci_ctls_mutex);
37}
38
39static inline void edac_unlock_pci_list(void)
40{
41 mutex_unlock(&edac_pci_ctls_mutex);
42}
43
44/* 34/*
45 * The alloc() and free() functions for the 'edac_pci' control info 35 * edac_pci_alloc_ctl_info
46 * structure. The chip driver will allocate one of these for each 36 *
47 * edac_pci it is going to control/register with the EDAC CORE. 37 * The alloc() function for the 'edac_pci' control info
38 * structure. The chip driver will allocate one of these for each
39 * edac_pci it is going to control/register with the EDAC CORE.
48 */ 40 */
49struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, 41struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
50 const char *edac_pci_name) 42 const char *edac_pci_name)
@@ -53,47 +45,59 @@ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
53 void *pvt; 45 void *pvt;
54 unsigned int size; 46 unsigned int size;
55 47
48 debugf1("%s()\n", __func__);
49
56 pci = (struct edac_pci_ctl_info *)0; 50 pci = (struct edac_pci_ctl_info *)0;
57 pvt = edac_align_ptr(&pci[1], sz_pvt); 51 pvt = edac_align_ptr(&pci[1], sz_pvt);
58 size = ((unsigned long)pvt) + sz_pvt; 52 size = ((unsigned long)pvt) + sz_pvt;
59 53
60 if ((pci = kzalloc(size, GFP_KERNEL)) == NULL) 54 /* Alloc the needed control struct memory */
55 pci = kzalloc(size, GFP_KERNEL);
56 if (pci == NULL)
61 return NULL; 57 return NULL;
62 58
59 /* Now much private space */
63 pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; 60 pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL;
64 61
65 pci->pvt_info = pvt; 62 pci->pvt_info = pvt;
66
67 pci->op_state = OP_ALLOC; 63 pci->op_state = OP_ALLOC;
68 64
69 snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); 65 snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name);
70 66
71 return pci; 67 return pci;
72} 68}
73
74EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); 69EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
75 70
76/* 71/*
77 * edac_pci_free_ctl_info() 72 * edac_pci_free_ctl_info()
78 * frees the memory allocated by edac_pci_alloc_ctl_info() function 73 *
74 * Last action on the pci control structure.
75 *
76 * call the remove sysfs informaton, which will unregister
77 * this control struct's kobj. When that kobj's ref count
78 * goes to zero, its release function will be call and then
79 * kfree() the memory.
79 */ 80 */
80void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) 81void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci)
81{ 82{
82 kfree(pci); 83 debugf1("%s()\n", __func__);
83}
84 84
85 edac_pci_remove_sysfs(pci);
86}
85EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info); 87EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info);
86 88
87/* 89/*
88 * find_edac_pci_by_dev() 90 * find_edac_pci_by_dev()
89 * scans the edac_pci list for a specific 'struct device *' 91 * scans the edac_pci list for a specific 'struct device *'
92 *
93 * return NULL if not found, or return control struct pointer
90 */ 94 */
91static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev) 95static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev)
92{ 96{
93 struct edac_pci_ctl_info *pci; 97 struct edac_pci_ctl_info *pci;
94 struct list_head *item; 98 struct list_head *item;
95 99
96 debugf3("%s()\n", __func__); 100 debugf1("%s()\n", __func__);
97 101
98 list_for_each(item, &edac_pci_list) { 102 list_for_each(item, &edac_pci_list) {
99 pci = list_entry(item, struct edac_pci_ctl_info, link); 103 pci = list_entry(item, struct edac_pci_ctl_info, link);
@@ -118,10 +122,13 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
118 struct list_head *item, *insert_before; 122 struct list_head *item, *insert_before;
119 struct edac_pci_ctl_info *rover; 123 struct edac_pci_ctl_info *rover;
120 124
125 debugf1("%s()\n", __func__);
126
121 insert_before = &edac_pci_list; 127 insert_before = &edac_pci_list;
122 128
123 /* Determine if already on the list */ 129 /* Determine if already on the list */
124 if (unlikely((rover = find_edac_pci_by_dev(pci->dev)) != NULL)) 130 rover = find_edac_pci_by_dev(pci->dev);
131 if (unlikely(rover != NULL))
125 goto fail0; 132 goto fail0;
126 133
127 /* Insert in ascending order by 'pci_idx', so find position */ 134 /* Insert in ascending order by 'pci_idx', so find position */
@@ -157,6 +164,8 @@ fail1:
157 164
158/* 165/*
159 * complete_edac_pci_list_del 166 * complete_edac_pci_list_del
167 *
168 * RCU completion callback to indicate item is deleted
160 */ 169 */
161static void complete_edac_pci_list_del(struct rcu_head *head) 170static void complete_edac_pci_list_del(struct rcu_head *head)
162{ 171{
@@ -169,6 +178,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
169 178
170/* 179/*
171 * del_edac_pci_from_global_list 180 * del_edac_pci_from_global_list
181 *
182 * remove the PCI control struct from the global list
172 */ 183 */
173static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) 184static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
174{ 185{
@@ -207,35 +218,52 @@ struct edac_pci_ctl_info *edac_pci_find(int idx)
207 218
208 return NULL; 219 return NULL;
209} 220}
210
211EXPORT_SYMBOL_GPL(edac_pci_find); 221EXPORT_SYMBOL_GPL(edac_pci_find);
212 222
213/* 223/*
214 * edac_pci_workq_function() 224 * edac_pci_workq_function()
215 * performs the operation scheduled by a workq request 225 *
226 * periodic function that performs the operation
227 * scheduled by a workq request, for a given PCI control struct
216 */ 228 */
217static void edac_pci_workq_function(struct work_struct *work_req) 229static void edac_pci_workq_function(struct work_struct *work_req)
218{ 230{
219 struct delayed_work *d_work = (struct delayed_work *)work_req; 231 struct delayed_work *d_work = (struct delayed_work *)work_req;
220 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); 232 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
233 int msec;
234 unsigned long delay;
221 235
222 edac_lock_pci_list(); 236 debugf3("%s() checking\n", __func__);
223 237
224 if ((pci->op_state == OP_RUNNING_POLL) && 238 mutex_lock(&edac_pci_ctls_mutex);
225 (pci->edac_check != NULL) && (edac_pci_get_check_errors()))
226 pci->edac_check(pci);
227 239
228 edac_unlock_pci_list(); 240 if (pci->op_state == OP_RUNNING_POLL) {
241 /* we might be in POLL mode, but there may NOT be a poll func
242 */
243 if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
244 pci->edac_check(pci);
245
246 /* if we are on a one second period, then use round */
247 msec = edac_pci_get_poll_msec();
248 if (msec == 1000)
249 delay = round_jiffies(msecs_to_jiffies(msec));
250 else
251 delay = msecs_to_jiffies(msec);
252
253 /* Reschedule only if we are in POLL mode */
254 queue_delayed_work(edac_workqueue, &pci->work, delay);
255 }
229 256
230 /* Reschedule */ 257 mutex_unlock(&edac_pci_ctls_mutex);
231 queue_delayed_work(edac_workqueue, &pci->work,
232 msecs_to_jiffies(edac_pci_get_poll_msec()));
233} 258}
234 259
235/* 260/*
236 * edac_pci_workq_setup() 261 * edac_pci_workq_setup()
237 * initialize a workq item for this edac_pci instance 262 * initialize a workq item for this edac_pci instance
238 * passing in the new delay period in msec 263 * passing in the new delay period in msec
264 *
265 * locking model:
266 * called when 'edac_pci_ctls_mutex' is locked
239 */ 267 */
240static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, 268static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
241 unsigned int msec) 269 unsigned int msec)
@@ -255,6 +283,8 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
255{ 283{
256 int status; 284 int status;
257 285
286 debugf0("%s()\n", __func__);
287
258 status = cancel_delayed_work(&pci->work); 288 status = cancel_delayed_work(&pci->work);
259 if (status == 0) 289 if (status == 0)
260 flush_workqueue(edac_workqueue); 290 flush_workqueue(edac_workqueue);
@@ -262,19 +292,25 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
262 292
263/* 293/*
264 * edac_pci_reset_delay_period 294 * edac_pci_reset_delay_period
295 *
296 * called with a new period value for the workq period
297 * a) stop current workq timer
298 * b) restart workq timer with new value
265 */ 299 */
266void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, 300void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
267 unsigned long value) 301 unsigned long value)
268{ 302{
269 edac_lock_pci_list(); 303 debugf0("%s()\n", __func__);
270 304
271 edac_pci_workq_teardown(pci); 305 edac_pci_workq_teardown(pci);
272 306
307 /* need to lock for the setup */
308 mutex_lock(&edac_pci_ctls_mutex);
309
273 edac_pci_workq_setup(pci, value); 310 edac_pci_workq_setup(pci, value);
274 311
275 edac_unlock_pci_list(); 312 mutex_unlock(&edac_pci_ctls_mutex);
276} 313}
277
278EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); 314EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period);
279 315
280/* 316/*
@@ -294,14 +330,13 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
294 debugf0("%s()\n", __func__); 330 debugf0("%s()\n", __func__);
295 331
296 pci->pci_idx = edac_idx; 332 pci->pci_idx = edac_idx;
333 pci->start_time = jiffies;
297 334
298 edac_lock_pci_list(); 335 mutex_lock(&edac_pci_ctls_mutex);
299 336
300 if (add_edac_pci_to_global_list(pci)) 337 if (add_edac_pci_to_global_list(pci))
301 goto fail0; 338 goto fail0;
302 339
303 pci->start_time = jiffies;
304
305 if (edac_pci_create_sysfs(pci)) { 340 if (edac_pci_create_sysfs(pci)) {
306 edac_pci_printk(pci, KERN_WARNING, 341 edac_pci_printk(pci, KERN_WARNING,
307 "failed to create sysfs pci\n"); 342 "failed to create sysfs pci\n");
@@ -323,16 +358,16 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
323 pci->ctl_name, 358 pci->ctl_name,
324 dev_name(pci), edac_op_state_to_string(pci->op_state)); 359 dev_name(pci), edac_op_state_to_string(pci->op_state));
325 360
326 edac_unlock_pci_list(); 361 mutex_unlock(&edac_pci_ctls_mutex);
327 return 0; 362 return 0;
328 363
364 /* error unwind stack */
329fail1: 365fail1:
330 del_edac_pci_from_global_list(pci); 366 del_edac_pci_from_global_list(pci);
331fail0: 367fail0:
332 edac_unlock_pci_list(); 368 mutex_unlock(&edac_pci_ctls_mutex);
333 return 1; 369 return 1;
334} 370}
335
336EXPORT_SYMBOL_GPL(edac_pci_add_device); 371EXPORT_SYMBOL_GPL(edac_pci_add_device);
337 372
338/* 373/*
@@ -354,22 +389,25 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
354 389
355 debugf0("%s()\n", __func__); 390 debugf0("%s()\n", __func__);
356 391
357 edac_lock_pci_list(); 392 mutex_lock(&edac_pci_ctls_mutex);
358 393
359 if ((pci = find_edac_pci_by_dev(dev)) == NULL) { 394 /* ensure the control struct is on the global list
360 edac_unlock_pci_list(); 395 * if not, then leave
396 */
397 pci = find_edac_pci_by_dev(dev);
398 if (pci == NULL) {
399 mutex_unlock(&edac_pci_ctls_mutex);
361 return NULL; 400 return NULL;
362 } 401 }
363 402
364 pci->op_state = OP_OFFLINE; 403 pci->op_state = OP_OFFLINE;
365 404
366 edac_pci_workq_teardown(pci);
367
368 edac_pci_remove_sysfs(pci);
369
370 del_edac_pci_from_global_list(pci); 405 del_edac_pci_from_global_list(pci);
371 406
372 edac_unlock_pci_list(); 407 mutex_unlock(&edac_pci_ctls_mutex);
408
409 /* stop the workq timer */
410 edac_pci_workq_teardown(pci);
373 411
374 edac_printk(KERN_INFO, EDAC_PCI, 412 edac_printk(KERN_INFO, EDAC_PCI,
375 "Removed device %d for %s %s: DEV %s\n", 413 "Removed device %d for %s %s: DEV %s\n",
@@ -377,14 +415,20 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
377 415
378 return pci; 416 return pci;
379} 417}
380
381EXPORT_SYMBOL_GPL(edac_pci_del_device); 418EXPORT_SYMBOL_GPL(edac_pci_del_device);
382 419
420/*
421 * edac_pci_generic_check
422 *
423 * a Generic parity check API
424 */
383void edac_pci_generic_check(struct edac_pci_ctl_info *pci) 425void edac_pci_generic_check(struct edac_pci_ctl_info *pci)
384{ 426{
427 debugf4("%s()\n", __func__);
385 edac_pci_do_parity_check(); 428 edac_pci_do_parity_check();
386} 429}
387 430
431/* free running instance index counter */
388static int edac_pci_idx; 432static int edac_pci_idx;
389#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller" 433#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller"
390 434
@@ -392,6 +436,17 @@ struct edac_pci_gen_data {
392 int edac_idx; 436 int edac_idx;
393}; 437};
394 438
439/*
440 * edac_pci_create_generic_ctl
441 *
442 * A generic constructor for a PCI parity polling device
443 * Some systems have more than one domain of PCI busses.
444 * For systems with one domain, then this API will
445 * provide for a generic poller.
446 *
447 * This routine calls the edac_pci_alloc_ctl_info() for
448 * the generic device, with default values
449 */
395struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, 450struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
396 const char *mod_name) 451 const char *mod_name)
397{ 452{
@@ -421,13 +476,18 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
421 476
422 return pci; 477 return pci;
423} 478}
424
425EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); 479EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl);
426 480
481/*
482 * edac_pci_release_generic_ctl
483 *
484 * The release function of a generic EDAC PCI polling device
485 */
427void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) 486void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci)
428{ 487{
488 debugf0("%s() pci mod=%s\n", __func__, pci->mod_name);
489
429 edac_pci_del_device(pci->dev); 490 edac_pci_del_device(pci->dev);
430 edac_pci_free_ctl_info(pci); 491 edac_pci_free_ctl_info(pci);
431} 492}
432
433EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl); 493EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl);
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index fac94cae2c3d..69f5dddabddf 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -13,22 +13,25 @@
13#include "edac_core.h" 13#include "edac_core.h"
14#include "edac_module.h" 14#include "edac_module.h"
15 15
16/* Turn off this whole feature if PCI is not configured */
16#ifdef CONFIG_PCI 17#ifdef CONFIG_PCI
17 18
18#define EDAC_PCI_SYMLINK "device" 19#define EDAC_PCI_SYMLINK "device"
19 20
20static int check_pci_errors; /* default YES check PCI parity */ 21/* data variables exported via sysfs */
21static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */ 22static int check_pci_errors; /* default NO check PCI parity */
22static int edac_pci_log_pe = 1; /* log PCI parity errors */ 23static int edac_pci_panic_on_pe; /* default NO panic on PCI Parity */
24static int edac_pci_log_pe = 1; /* log PCI parity errors */
23static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ 25static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */
26static int edac_pci_poll_msec = 1000; /* one second workq period */
27
24static atomic_t pci_parity_count = ATOMIC_INIT(0); 28static atomic_t pci_parity_count = ATOMIC_INIT(0);
25static atomic_t pci_nonparity_count = ATOMIC_INIT(0); 29static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
26static int edac_pci_poll_msec = 1000;
27 30
28static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ 31static struct kobject edac_pci_top_main_kobj;
29static struct completion edac_pci_kobj_complete;
30static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); 32static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
31 33
34/* getter functions for the data variables */
32int edac_pci_get_check_errors(void) 35int edac_pci_get_check_errors(void)
33{ 36{
34 return check_pci_errors; 37 return check_pci_errors;
@@ -74,17 +77,22 @@ static void edac_pci_instance_release(struct kobject *kobj)
74{ 77{
75 struct edac_pci_ctl_info *pci; 78 struct edac_pci_ctl_info *pci;
76 79
77 debugf1("%s()\n", __func__); 80 debugf0("%s()\n", __func__);
78 81
82 /* Form pointer to containing struct, the pci control struct */
79 pci = to_instance(kobj); 83 pci = to_instance(kobj);
80 complete(&pci->kobj_complete); 84
85 /* decrement reference count on top main kobj */
86 kobject_put(&edac_pci_top_main_kobj);
87
88 kfree(pci); /* Free the control struct */
81} 89}
82 90
83/* instance specific attribute structure */ 91/* instance specific attribute structure */
84struct instance_attribute { 92struct instance_attribute {
85 struct attribute attr; 93 struct attribute attr;
86 ssize_t(*show) (struct edac_pci_ctl_info *, char *); 94 ssize_t(*show) (struct edac_pci_ctl_info *, char *);
87 ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t); 95 ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t);
88}; 96};
89 97
90/* Function to 'show' fields from the edac_pci 'instance' structure */ 98/* Function to 'show' fields from the edac_pci 'instance' structure */
@@ -112,6 +120,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj,
112 return -EIO; 120 return -EIO;
113} 121}
114 122
123/* fs_ops table */
115static struct sysfs_ops pci_instance_ops = { 124static struct sysfs_ops pci_instance_ops = {
116 .show = edac_pci_instance_show, 125 .show = edac_pci_instance_show,
117 .store = edac_pci_instance_store 126 .store = edac_pci_instance_store
@@ -134,48 +143,82 @@ static struct instance_attribute *pci_instance_attr[] = {
134 NULL 143 NULL
135}; 144};
136 145
137/* the ktype for pci instance */ 146/* the ktype for a pci instance */
138static struct kobj_type ktype_pci_instance = { 147static struct kobj_type ktype_pci_instance = {
139 .release = edac_pci_instance_release, 148 .release = edac_pci_instance_release,
140 .sysfs_ops = &pci_instance_ops, 149 .sysfs_ops = &pci_instance_ops,
141 .default_attrs = (struct attribute **)pci_instance_attr, 150 .default_attrs = (struct attribute **)pci_instance_attr,
142}; 151};
143 152
153/*
154 * edac_pci_create_instance_kobj
155 *
156 * construct one EDAC PCI instance's kobject for use
157 */
144static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) 158static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
145{ 159{
160 struct kobject *main_kobj;
146 int err; 161 int err;
147 162
148 pci->kobj.parent = &edac_pci_kobj; 163 debugf0("%s()\n", __func__);
164
165 /* Set the parent and the instance's ktype */
166 pci->kobj.parent = &edac_pci_top_main_kobj;
149 pci->kobj.ktype = &ktype_pci_instance; 167 pci->kobj.ktype = &ktype_pci_instance;
150 168
151 err = kobject_set_name(&pci->kobj, "pci%d", idx); 169 err = kobject_set_name(&pci->kobj, "pci%d", idx);
152 if (err) 170 if (err)
153 return err; 171 return err;
154 172
173 /* First bump the ref count on the top main kobj, which will
174 * track the number of PCI instances we have, and thus nest
175 * properly on keeping the module loaded
176 */
177 main_kobj = kobject_get(&edac_pci_top_main_kobj);
178 if (!main_kobj) {
179 err = -ENODEV;
180 goto error_out;
181 }
182
183 /* And now register this new kobject under the main kobj */
155 err = kobject_register(&pci->kobj); 184 err = kobject_register(&pci->kobj);
156 if (err != 0) { 185 if (err != 0) {
157 debugf2("%s() failed to register instance pci%d\n", 186 debugf2("%s() failed to register instance pci%d\n",
158 __func__, idx); 187 __func__, idx);
159 return err; 188 kobject_put(&edac_pci_top_main_kobj);
189 goto error_out;
160 } 190 }
161 191
162 debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); 192 debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx);
163 193
164 return 0; 194 return 0;
195
196 /* Error unwind statck */
197error_out:
198 return err;
165} 199}
166 200
167static void 201/*
168edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx) 202 * edac_pci_unregister_sysfs_instance_kobj
203 *
204 * unregister the kobj for the EDAC PCI instance
205 */
206void edac_pci_unregister_sysfs_instance_kobj(struct edac_pci_ctl_info *pci)
169{ 207{
170 init_completion(&pci->kobj_complete); 208 debugf0("%s()\n", __func__);
209
210 /* Unregister the instance kobject and allow its release
211 * function release the main reference count and then
212 * kfree the memory
213 */
171 kobject_unregister(&pci->kobj); 214 kobject_unregister(&pci->kobj);
172 wait_for_completion(&pci->kobj_complete);
173} 215}
174 216
175/***************************** EDAC PCI sysfs root **********************/ 217/***************************** EDAC PCI sysfs root **********************/
176#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj) 218#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
177#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr) 219#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
178 220
221/* simple show/store functions for attributes */
179static ssize_t edac_pci_int_show(void *ptr, char *buffer) 222static ssize_t edac_pci_int_show(void *ptr, char *buffer)
180{ 223{
181 int *value = ptr; 224 int *value = ptr;
@@ -267,118 +310,189 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
267 NULL, 310 NULL,
268}; 311};
269 312
270/* No memory to release */ 313/*
271static void edac_pci_release(struct kobject *kobj) 314 * edac_pci_release_main_kobj
315 *
316 * This release function is called when the reference count to the
317 * passed kobj goes to zero.
318 *
319 * This kobj is the 'main' kobject that EDAC PCI instances
320 * link to, and thus provide for proper nesting counts
321 */
322static void edac_pci_release_main_kobj(struct kobject *kobj)
272{ 323{
273 struct edac_pci_ctl_info *pci;
274 324
275 pci = to_edacpci(kobj); 325 debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
276 326
277 debugf1("%s()\n", __func__); 327 /* last reference to top EDAC PCI kobject has been removed,
278 complete(&pci->kobj_complete); 328 * NOW release our ref count on the core module
329 */
330 module_put(THIS_MODULE);
279} 331}
280 332
281static struct kobj_type ktype_edac_pci = { 333/* ktype struct for the EDAC PCI main kobj */
282 .release = edac_pci_release, 334static struct kobj_type ktype_edac_pci_main_kobj = {
335 .release = edac_pci_release_main_kobj,
283 .sysfs_ops = &edac_pci_sysfs_ops, 336 .sysfs_ops = &edac_pci_sysfs_ops,
284 .default_attrs = (struct attribute **)edac_pci_attr, 337 .default_attrs = (struct attribute **)edac_pci_attr,
285}; 338};
286 339
287/** 340/**
288 * edac_sysfs_pci_setup() 341 * edac_pci_main_kobj_setup()
289 * 342 *
290 * setup the sysfs for EDAC PCI attributes 343 * setup the sysfs for EDAC PCI attributes
291 * assumes edac_class has already been initialized 344 * assumes edac_class has already been initialized
292 */ 345 */
293int edac_pci_register_main_kobj(void) 346int edac_pci_main_kobj_setup(void)
294{ 347{
295 int err; 348 int err;
296 struct sysdev_class *edac_class; 349 struct sysdev_class *edac_class;
297 350
298 debugf1("%s()\n", __func__); 351 debugf0("%s()\n", __func__);
352
353 /* check and count if we have already created the main kobject */
354 if (atomic_inc_return(&edac_pci_sysfs_refcount) != 1)
355 return 0;
299 356
357 /* First time, so create the main kobject and its
358 * controls and atributes
359 */
300 edac_class = edac_get_edac_class(); 360 edac_class = edac_get_edac_class();
301 if (edac_class == NULL) { 361 if (edac_class == NULL) {
302 debugf1("%s() no edac_class\n", __func__); 362 debugf1("%s() no edac_class\n", __func__);
303 return -ENODEV; 363 err = -ENODEV;
364 goto decrement_count_fail;
304 } 365 }
305 366
306 edac_pci_kobj.ktype = &ktype_edac_pci; 367 /* Need the kobject hook ups, and name setting */
368 edac_pci_top_main_kobj.ktype = &ktype_edac_pci_main_kobj;
369 edac_pci_top_main_kobj.parent = &edac_class->kset.kobj;
307 370
308 edac_pci_kobj.parent = &edac_class->kset.kobj; 371 err = kobject_set_name(&edac_pci_top_main_kobj, "pci");
309
310 err = kobject_set_name(&edac_pci_kobj, "pci");
311 if (err) 372 if (err)
312 return err; 373 goto decrement_count_fail;
374
375 /* Bump the reference count on this module to ensure the
376 * modules isn't unloaded until we deconstruct the top
377 * level main kobj for EDAC PCI
378 */
379 if (!try_module_get(THIS_MODULE)) {
380 debugf1("%s() try_module_get() failed\n", __func__);
381 err = -ENODEV;
382 goto decrement_count_fail;
383 }
313 384
314 /* Instanstiate the pci object */ 385 /* Instanstiate the pci object */
315 /* FIXME: maybe new sysdev_create_subdir() */ 386 /* FIXME: maybe new sysdev_create_subdir() */
316 err = kobject_register(&edac_pci_kobj); 387 err = kobject_register(&edac_pci_top_main_kobj);
317
318 if (err) { 388 if (err) {
319 debugf1("Failed to register '.../edac/pci'\n"); 389 debugf1("Failed to register '.../edac/pci'\n");
320 return err; 390 goto kobject_register_fail;
321 } 391 }
322 392
393 /* At this point, to 'release' the top level kobject
394 * for EDAC PCI, then edac_pci_main_kobj_teardown()
395 * must be used, for resources to be cleaned up properly
396 */
323 debugf1("Registered '.../edac/pci' kobject\n"); 397 debugf1("Registered '.../edac/pci' kobject\n");
324 398
325 return 0; 399 return 0;
400
401 /* Error unwind statck */
402kobject_register_fail:
403 module_put(THIS_MODULE);
404
405decrement_count_fail:
406 /* if are on this error exit, nothing to tear down */
407 atomic_dec(&edac_pci_sysfs_refcount);
408
409 return err;
326} 410}
327 411
328/* 412/*
329 * edac_pci_unregister_main_kobj() 413 * edac_pci_main_kobj_teardown()
330 * 414 *
331 * perform the sysfs teardown for the PCI attributes 415 * if no longer linked (needed) remove the top level EDAC PCI
416 * kobject with its controls and attributes
332 */ 417 */
333void edac_pci_unregister_main_kobj(void) 418static void edac_pci_main_kobj_teardown(void)
334{ 419{
335 debugf0("%s()\n", __func__); 420 debugf0("%s()\n", __func__);
336 init_completion(&edac_pci_kobj_complete); 421
337 kobject_unregister(&edac_pci_kobj); 422 /* Decrement the count and only if no more controller instances
338 wait_for_completion(&edac_pci_kobj_complete); 423 * are connected perform the unregisteration of the top level
424 * main kobj
425 */
426 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
427 debugf0("%s() called kobject_unregister on main kobj\n",
428 __func__);
429 kobject_unregister(&edac_pci_top_main_kobj);
430 }
339} 431}
340 432
433/*
434 *
435 * edac_pci_create_sysfs
436 *
437 * Create the controls/attributes for the specified EDAC PCI device
438 */
341int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) 439int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
342{ 440{
343 int err; 441 int err;
344 struct kobject *edac_kobj = &pci->kobj; 442 struct kobject *edac_kobj = &pci->kobj;
345 443
346 if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) { 444 debugf0("%s() idx=%d\n", __func__, pci->pci_idx);
347 err = edac_pci_register_main_kobj();
348 if (err) {
349 atomic_dec(&edac_pci_sysfs_refcount);
350 return err;
351 }
352 }
353 445
354 err = edac_pci_create_instance_kobj(pci, pci->pci_idx); 446 /* create the top main EDAC PCI kobject, IF needed */
355 if (err) { 447 err = edac_pci_main_kobj_setup();
356 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) 448 if (err)
357 edac_pci_unregister_main_kobj(); 449 return err;
358 }
359 450
360 debugf0("%s() idx=%d\n", __func__, pci->pci_idx); 451 /* Create this instance's kobject under the MAIN kobject */
452 err = edac_pci_create_instance_kobj(pci, pci->pci_idx);
453 if (err)
454 goto unregister_cleanup;
361 455
362 err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); 456 err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
363 if (err) { 457 if (err) {
364 debugf0("%s() sysfs_create_link() returned err= %d\n", 458 debugf0("%s() sysfs_create_link() returned err= %d\n",
365 __func__, err); 459 __func__, err);
366 return err; 460 goto symlink_fail;
367 } 461 }
368 462
369 return 0; 463 return 0;
464
465 /* Error unwind stack */
466symlink_fail:
467 edac_pci_unregister_sysfs_instance_kobj(pci);
468
469unregister_cleanup:
470 edac_pci_main_kobj_teardown();
471
472 return err;
370} 473}
371 474
475/*
476 * edac_pci_remove_sysfs
477 *
478 * remove the controls and attributes for this EDAC PCI device
479 */
372void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) 480void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
373{ 481{
374 debugf0("%s()\n", __func__); 482 debugf0("%s() index=%d\n", __func__, pci->pci_idx);
375
376 edac_pci_delete_instance_kobj(pci, pci->pci_idx);
377 483
484 /* Remove the symlink */
378 sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); 485 sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);
379 486
380 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) 487 /* remove this PCI instance's sysfs entries */
381 edac_pci_unregister_main_kobj(); 488 edac_pci_unregister_sysfs_instance_kobj(pci);
489
490 /* Call the main unregister function, which will determine
491 * if this 'pci' is the last instance.
492 * If it is, the main kobject will be unregistered as a result
493 */
494 debugf0("%s() calling edac_pci_main_kobj_teardown()\n", __func__);
495 edac_pci_main_kobj_teardown();
382} 496}
383 497
384/************************ PCI error handling *************************/ 498/************************ PCI error handling *************************/
@@ -414,13 +528,14 @@ static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
414 return status; 528 return status;
415} 529}
416 530
417typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev);
418 531
419/* Clear any PCI parity errors logged by this device. */ 532/* Clear any PCI parity errors logged by this device. */
420static void edac_pci_dev_parity_clear(struct pci_dev *dev) 533static void edac_pci_dev_parity_clear(struct pci_dev *dev)
421{ 534{
422 u8 header_type; 535 u8 header_type;
423 536
537 debugf0("%s()\n", __func__);
538
424 get_pci_parity_status(dev, 0); 539 get_pci_parity_status(dev, 0);
425 540
426 /* read the device TYPE, looking for bridges */ 541 /* read the device TYPE, looking for bridges */
@@ -433,17 +548,28 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev)
433/* 548/*
434 * PCI Parity polling 549 * PCI Parity polling
435 * 550 *
551 * Fucntion to retrieve the current parity status
552 * and decode it
553 *
436 */ 554 */
437static void edac_pci_dev_parity_test(struct pci_dev *dev) 555static void edac_pci_dev_parity_test(struct pci_dev *dev)
438{ 556{
557 unsigned long flags;
439 u16 status; 558 u16 status;
440 u8 header_type; 559 u8 header_type;
441 560
442 /* read the STATUS register on this device 561 /* stop any interrupts until we can acquire the status */
443 */ 562 local_irq_save(flags);
563
564 /* read the STATUS register on this device */
444 status = get_pci_parity_status(dev, 0); 565 status = get_pci_parity_status(dev, 0);
445 566
446 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 567 /* read the device TYPE, looking for bridges */
568 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
569
570 local_irq_restore(flags);
571
572 debugf4("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
447 573
448 /* check the status reg for errors */ 574 /* check the status reg for errors */
449 if (status) { 575 if (status) {
@@ -471,16 +597,14 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
471 } 597 }
472 } 598 }
473 599
474 /* read the device TYPE, looking for bridges */
475 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
476 600
477 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); 601 debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id);
478 602
479 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { 603 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
480 /* On bridges, need to examine secondary status register */ 604 /* On bridges, need to examine secondary status register */
481 status = get_pci_parity_status(dev, 1); 605 status = get_pci_parity_status(dev, 1);
482 606
483 debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 607 debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
484 608
485 /* check the secondary status reg for errors */ 609 /* check the secondary status reg for errors */
486 if (status) { 610 if (status) {
@@ -510,9 +634,12 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
510 } 634 }
511} 635}
512 636
637/* reduce some complexity in definition of the iterator */
638typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
639
513/* 640/*
514 * pci_dev parity list iterator 641 * pci_dev parity list iterator
515 * Scan the PCI device list for one iteration, looking for SERRORs 642 * Scan the PCI device list for one pass, looking for SERRORs
516 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices 643 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
517 */ 644 */
518static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) 645static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
@@ -535,22 +662,22 @@ static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
535 */ 662 */
536void edac_pci_do_parity_check(void) 663void edac_pci_do_parity_check(void)
537{ 664{
538 unsigned long flags;
539 int before_count; 665 int before_count;
540 666
541 debugf3("%s()\n", __func__); 667 debugf3("%s()\n", __func__);
542 668
669 /* if policy has PCI check off, leave now */
543 if (!check_pci_errors) 670 if (!check_pci_errors)
544 return; 671 return;
545 672
546 before_count = atomic_read(&pci_parity_count); 673 before_count = atomic_read(&pci_parity_count);
547 674
548 /* scan all PCI devices looking for a Parity Error on devices and 675 /* scan all PCI devices looking for a Parity Error on devices and
549 * bridges 676 * bridges.
677 * The iterator calls pci_get_device() which might sleep, thus
678 * we cannot disable interrupts in this scan.
550 */ 679 */
551 local_irq_save(flags);
552 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); 680 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
553 local_irq_restore(flags);
554 681
555 /* Only if operator has selected panic on PCI Error */ 682 /* Only if operator has selected panic on PCI Error */
556 if (edac_pci_get_panic_on_pe()) { 683 if (edac_pci_get_panic_on_pe()) {
@@ -560,6 +687,12 @@ void edac_pci_do_parity_check(void)
560 } 687 }
561} 688}
562 689
690/*
691 * edac_pci_clear_parity_errors
692 *
693 * function to perform an iteration over the PCI devices
694 * and clearn their current status
695 */
563void edac_pci_clear_parity_errors(void) 696void edac_pci_clear_parity_errors(void)
564{ 697{
565 /* Clear any PCI bus parity errors that devices initially have logged 698 /* Clear any PCI bus parity errors that devices initially have logged
@@ -567,6 +700,12 @@ void edac_pci_clear_parity_errors(void)
567 */ 700 */
568 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); 701 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
569} 702}
703
704/*
705 * edac_pci_handle_pe
706 *
707 * Called to handle a PARITY ERROR event
708 */
570void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg) 709void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
571{ 710{
572 711
@@ -584,9 +723,14 @@ void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
584 */ 723 */
585 edac_pci_do_parity_check(); 724 edac_pci_do_parity_check();
586} 725}
587
588EXPORT_SYMBOL_GPL(edac_pci_handle_pe); 726EXPORT_SYMBOL_GPL(edac_pci_handle_pe);
589 727
728
729/*
730 * edac_pci_handle_npe
731 *
732 * Called to handle a NON-PARITY ERROR event
733 */
590void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg) 734void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
591{ 735{
592 736
@@ -604,7 +748,6 @@ void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
604 */ 748 */
605 edac_pci_do_parity_check(); 749 edac_pci_do_parity_check();
606} 750}
607
608EXPORT_SYMBOL_GPL(edac_pci_handle_npe); 751EXPORT_SYMBOL_GPL(edac_pci_handle_npe);
609 752
610/* 753/*
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index 0ecfdc432f87..e895f9f887ab 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -275,7 +275,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
275 unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2]; 275 unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
276 unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL]; 276 unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL];
277 unsigned long mchbar; 277 unsigned long mchbar;
278 void *window; 278 void __iomem *window;
279 279
280 debugf0("MC: %s()\n", __func__); 280 debugf0("MC: %s()\n", __func__);
281 281
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 5879f0f25495..9e94542c18a2 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -75,7 +75,8 @@ static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
75 /* Special case: the 32 bit regs are time values with 1/4s 75 /* Special case: the 32 bit regs are time values with 1/4s
76 * resolution, scale them up to milliseconds */ 76 * resolution, scale them up to milliseconds */
77 if (sattr->nr == 4) 77 if (sattr->nr == 4)
78 return sprintf(buf, "%llu\n", ((u64) le32_to_cpu(val)) * 250); 78 return sprintf(buf, "%llu\n",
79 ((unsigned long long)le32_to_cpu(val)) * 250);
79 80
80 /* Format the output string and return # of bytes */ 81 /* Format the output string and return # of bytes */
81 return sprintf(buf, "%li\n", (long)le32_to_cpu(val)); 82 return sprintf(buf, "%li\n", (long)le32_to_cpu(val));
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index f668d235e6be..bf19ddfa6cda 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -551,8 +551,8 @@ static int setup_mmio_scc (struct pci_dev *dev, const char *name)
551 unsigned long dma_base = pci_resource_start(dev, 1); 551 unsigned long dma_base = pci_resource_start(dev, 1);
552 unsigned long ctl_size = pci_resource_len(dev, 0); 552 unsigned long ctl_size = pci_resource_len(dev, 0);
553 unsigned long dma_size = pci_resource_len(dev, 1); 553 unsigned long dma_size = pci_resource_len(dev, 1);
554 void *ctl_addr; 554 void __iomem *ctl_addr;
555 void *dma_addr; 555 void __iomem *dma_addr;
556 int i; 556 int i;
557 557
558 for (i = 0; i < MAX_HWIFS; i++) { 558 for (i = 0; i < MAX_HWIFS; i++) {
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 336e5ff4cfcf..cadf0479cce5 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -2677,7 +2677,7 @@ static long raw1394_iso_xmit_recv_packets32(struct file *file, unsigned int cmd,
2677 struct raw1394_iso_packets32 __user *arg) 2677 struct raw1394_iso_packets32 __user *arg)
2678{ 2678{
2679 compat_uptr_t infos32; 2679 compat_uptr_t infos32;
2680 void *infos; 2680 void __user *infos;
2681 long err = -EFAULT; 2681 long err = -EFAULT;
2682 struct raw1394_iso_packets __user *dst = compat_alloc_user_space(sizeof(struct raw1394_iso_packets)); 2682 struct raw1394_iso_packets __user *dst = compat_alloc_user_space(sizeof(struct raw1394_iso_packets));
2683 2683
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index 55382c7d799c..e5047471c334 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -5,3 +5,15 @@ obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o
5obj-$(CONFIG_LGUEST) += lg.o 5obj-$(CONFIG_LGUEST) += lg.o
6lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ 6lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \
7 segments.o io.o lguest_user.o switcher.o 7 segments.o io.o lguest_user.o switcher.o
8
9Preparation Preparation!: PREFIX=P
10Guest: PREFIX=G
11Drivers: PREFIX=D
12Launcher: PREFIX=L
13Host: PREFIX=H
14Switcher: PREFIX=S
15Mastery: PREFIX=M
16Beer:
17 @for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}"
18Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery:
19 @sh ../../Documentation/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
diff --git a/drivers/lguest/README b/drivers/lguest/README
new file mode 100644
index 000000000000..b7db39a64c66
--- /dev/null
+++ b/drivers/lguest/README
@@ -0,0 +1,47 @@
1Welcome, friend reader, to lguest.
2
3Lguest is an adventure, with you, the reader, as Hero. I can't think of many
45000-line projects which offer both such capability and glimpses of future
5potential; it is an exciting time to be delving into the source!
6
7But be warned; this is an arduous journey of several hours or more! And as we
8know, all true Heroes are driven by a Noble Goal. Thus I offer a Beer (or
9equivalent) to anyone I meet who has completed this documentation.
10
11So get comfortable and keep your wits about you (both quick and humorous).
12Along your way to the Noble Goal, you will also gain masterly insight into
13lguest, and hypervisors and x86 virtualization in general.
14
15Our Quest is in seven parts: (best read with C highlighting turned on)
16
17I) Preparation
18 - In which our potential hero is flown quickly over the landscape for a
19 taste of its scope. Suitable for the armchair coders and other such
20 persons of faint constitution.
21
22II) Guest
23 - Where we encounter the first tantalising wisps of code, and come to
24 understand the details of the life of a Guest kernel.
25
26III) Drivers
27 - Whereby the Guest finds its voice and become useful, and our
28 understanding of the Guest is completed.
29
30IV) Launcher
31 - Where we trace back to the creation of the Guest, and thus begin our
32 understanding of the Host.
33
34V) Host
35 - Where we master the Host code, through a long and tortuous journey.
36 Indeed, it is here that our hero is tested in the Bit of Despair.
37
38VI) Switcher
39 - Where our understanding of the intertwined nature of Guests and Hosts
40 is completed.
41
42VII) Mastery
43 - Where our fully fledged hero grapples with the Great Question:
44 "What next?"
45
46make Preparation!
47Rusty Russell.
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index ce909ec57499..0a46e8837d9a 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,5 +1,8 @@
1/* World's simplest hypervisor, to test paravirt_ops and show 1/*P:400 This contains run_guest() which actually calls into the Host<->Guest
2 * unbelievers that virtualization is the future. Plus, it's fun! */ 2 * Switcher and analyzes the return, such as determining if the Guest wants the
3 * Host to do something. This file also contains useful helper routines, and a
4 * couple of non-obvious setup and teardown pieces which were implemented after
5 * days of debugging pain. :*/
3#include <linux/module.h> 6#include <linux/module.h>
4#include <linux/stringify.h> 7#include <linux/stringify.h>
5#include <linux/stddef.h> 8#include <linux/stddef.h>
@@ -61,11 +64,33 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
61 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); 64 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
62} 65}
63 66
67/*H:010 We need to set up the Switcher at a high virtual address. Remember the
68 * Switcher is a few hundred bytes of assembler code which actually changes the
69 * CPU to run the Guest, and then changes back to the Host when a trap or
70 * interrupt happens.
71 *
72 * The Switcher code must be at the same virtual address in the Guest as the
73 * Host since it will be running as the switchover occurs.
74 *
75 * Trying to map memory at a particular address is an unusual thing to do, so
76 * it's not a simple one-liner. We also set up the per-cpu parts of the
77 * Switcher here.
78 */
64static __init int map_switcher(void) 79static __init int map_switcher(void)
65{ 80{
66 int i, err; 81 int i, err;
67 struct page **pagep; 82 struct page **pagep;
68 83
84 /*
85 * Map the Switcher in to high memory.
86 *
87 * It turns out that if we choose the address 0xFFC00000 (4MB under the
88 * top virtual address), it makes setting up the page tables really
89 * easy.
90 */
91
92 /* We allocate an array of "struct page"s. map_vm_area() wants the
93 * pages in this form, rather than just an array of pointers. */
69 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, 94 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
70 GFP_KERNEL); 95 GFP_KERNEL);
71 if (!switcher_page) { 96 if (!switcher_page) {
@@ -73,6 +98,8 @@ static __init int map_switcher(void)
73 goto out; 98 goto out;
74 } 99 }
75 100
101 /* Now we actually allocate the pages. The Guest will see these pages,
102 * so we make sure they're zeroed. */
76 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { 103 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
77 unsigned long addr = get_zeroed_page(GFP_KERNEL); 104 unsigned long addr = get_zeroed_page(GFP_KERNEL);
78 if (!addr) { 105 if (!addr) {
@@ -82,6 +109,9 @@ static __init int map_switcher(void)
82 switcher_page[i] = virt_to_page(addr); 109 switcher_page[i] = virt_to_page(addr);
83 } 110 }
84 111
112 /* Now we reserve the "virtual memory area" we want: 0xFFC00000
113 * (SWITCHER_ADDR). We might not get it in theory, but in practice
114 * it's worked so far. */
85 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, 115 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
86 VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); 116 VM_ALLOC, SWITCHER_ADDR, VMALLOC_END);
87 if (!switcher_vma) { 117 if (!switcher_vma) {
@@ -90,49 +120,105 @@ static __init int map_switcher(void)
90 goto free_pages; 120 goto free_pages;
91 } 121 }
92 122
123 /* This code actually sets up the pages we've allocated to appear at
124 * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the
125 * kind of pages we're mapping (kernel pages), and a pointer to our
126 * array of struct pages. It increments that pointer, but we don't
127 * care. */
93 pagep = switcher_page; 128 pagep = switcher_page;
94 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); 129 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
95 if (err) { 130 if (err) {
96 printk("lguest: map_vm_area failed: %i\n", err); 131 printk("lguest: map_vm_area failed: %i\n", err);
97 goto free_vma; 132 goto free_vma;
98 } 133 }
134
135 /* Now the switcher is mapped at the right address, we can't fail!
136 * Copy in the compiled-in Switcher code (from switcher.S). */
99 memcpy(switcher_vma->addr, start_switcher_text, 137 memcpy(switcher_vma->addr, start_switcher_text,
100 end_switcher_text - start_switcher_text); 138 end_switcher_text - start_switcher_text);
101 139
102 /* Fix up IDT entries to point into copied text. */ 140 /* Most of the switcher.S doesn't care that it's been moved; on Intel,
141 * jumps are relative, and it doesn't access any references to external
142 * code or data.
143 *
144 * The only exception is the interrupt handlers in switcher.S: their
145 * addresses are placed in a table (default_idt_entries), so we need to
146 * update the table with the new addresses. switcher_offset() is a
147 * convenience function which returns the distance between the builtin
148 * switcher code and the high-mapped copy we just made. */
103 for (i = 0; i < IDT_ENTRIES; i++) 149 for (i = 0; i < IDT_ENTRIES; i++)
104 default_idt_entries[i] += switcher_offset(); 150 default_idt_entries[i] += switcher_offset();
105 151
152 /*
153 * Set up the Switcher's per-cpu areas.
154 *
155 * Each CPU gets two pages of its own within the high-mapped region
156 * (aka. "struct lguest_pages"). Much of this can be initialized now,
157 * but some depends on what Guest we are running (which is set up in
158 * copy_in_guest_info()).
159 */
106 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 /* lguest_pages() returns this CPU's two pages. */
107 struct lguest_pages *pages = lguest_pages(i); 162 struct lguest_pages *pages = lguest_pages(i);
163 /* This is a convenience pointer to make the code fit one
164 * statement to a line. */
108 struct lguest_ro_state *state = &pages->state; 165 struct lguest_ro_state *state = &pages->state;
109 166
110 /* These fields are static: rest done in copy_in_guest_info */ 167 /* The Global Descriptor Table: the Host has a different one
168 * for each CPU. We keep a descriptor for the GDT which says
169 * where it is and how big it is (the size is actually the last
170 * byte, not the size, hence the "-1"). */
111 state->host_gdt_desc.size = GDT_SIZE-1; 171 state->host_gdt_desc.size = GDT_SIZE-1;
112 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); 172 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
173
174 /* All CPUs on the Host use the same Interrupt Descriptor
175 * Table, so we just use store_idt(), which gets this CPU's IDT
176 * descriptor. */
113 store_idt(&state->host_idt_desc); 177 store_idt(&state->host_idt_desc);
178
179 /* The descriptors for the Guest's GDT and IDT can be filled
180 * out now, too. We copy the GDT & IDT into ->guest_gdt and
181 * ->guest_idt before actually running the Guest. */
114 state->guest_idt_desc.size = sizeof(state->guest_idt)-1; 182 state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
115 state->guest_idt_desc.address = (long)&state->guest_idt; 183 state->guest_idt_desc.address = (long)&state->guest_idt;
116 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; 184 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
117 state->guest_gdt_desc.address = (long)&state->guest_gdt; 185 state->guest_gdt_desc.address = (long)&state->guest_gdt;
186
187 /* We know where we want the stack to be when the Guest enters
188 * the switcher: in pages->regs. The stack grows upwards, so
189 * we start it at the end of that structure. */
118 state->guest_tss.esp0 = (long)(&pages->regs + 1); 190 state->guest_tss.esp0 = (long)(&pages->regs + 1);
191 /* And this is the GDT entry to use for the stack: we keep a
192 * couple of special LGUEST entries. */
119 state->guest_tss.ss0 = LGUEST_DS; 193 state->guest_tss.ss0 = LGUEST_DS;
120 /* No I/O for you! */ 194
195 /* x86 can have a finegrained bitmap which indicates what I/O
196 * ports the process can use. We set it to the end of our
197 * structure, meaning "none". */
121 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); 198 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
199
200 /* Some GDT entries are the same across all Guests, so we can
201 * set them up now. */
122 setup_default_gdt_entries(state); 202 setup_default_gdt_entries(state);
203 /* Most IDT entries are the same for all Guests, too.*/
123 setup_default_idt_entries(state, default_idt_entries); 204 setup_default_idt_entries(state, default_idt_entries);
124 205
125 /* Setup LGUEST segments on all cpus */ 206 /* The Host needs to be able to use the LGUEST segments on this
207 * CPU, too, so put them in the Host GDT. */
126 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; 208 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
127 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; 209 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
128 } 210 }
129 211
130 /* Initialize entry point into switcher. */ 212 /* In the Switcher, we want the %cs segment register to use the
213 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
214 * it will be undisturbed when we switch. To change %cs and jump we
215 * need this structure to feed to Intel's "lcall" instruction. */
131 lguest_entry.offset = (long)switch_to_guest + switcher_offset(); 216 lguest_entry.offset = (long)switch_to_guest + switcher_offset();
132 lguest_entry.segment = LGUEST_CS; 217 lguest_entry.segment = LGUEST_CS;
133 218
134 printk(KERN_INFO "lguest: mapped switcher at %p\n", 219 printk(KERN_INFO "lguest: mapped switcher at %p\n",
135 switcher_vma->addr); 220 switcher_vma->addr);
221 /* And we succeeded... */
136 return 0; 222 return 0;
137 223
138free_vma: 224free_vma:
@@ -146,35 +232,58 @@ free_some_pages:
146out: 232out:
147 return err; 233 return err;
148} 234}
235/*:*/
149 236
237/* Cleaning up the mapping when the module is unloaded is almost...
238 * too easy. */
150static void unmap_switcher(void) 239static void unmap_switcher(void)
151{ 240{
152 unsigned int i; 241 unsigned int i;
153 242
243 /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */
154 vunmap(switcher_vma->addr); 244 vunmap(switcher_vma->addr);
245 /* Now we just need to free the pages we copied the switcher into */
155 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) 246 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
156 __free_pages(switcher_page[i], 0); 247 __free_pages(switcher_page[i], 0);
157} 248}
158 249
159/* IN/OUT insns: enough to get us past boot-time probing. */ 250/*H:130 Our Guest is usually so well behaved; it never tries to do things it
251 * isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite
252 * complete, because it doesn't contain replacements for the Intel I/O
253 * instructions. As a result, the Guest sometimes fumbles across one during
254 * the boot process as it probes for various things which are usually attached
255 * to a PC.
256 *
257 * When the Guest uses one of these instructions, we get trap #13 (General
258 * Protection Fault) and come here. We see if it's one of those troublesome
259 * instructions and skip over it. We return true if we did. */
160static int emulate_insn(struct lguest *lg) 260static int emulate_insn(struct lguest *lg)
161{ 261{
162 u8 insn; 262 u8 insn;
163 unsigned int insnlen = 0, in = 0, shift = 0; 263 unsigned int insnlen = 0, in = 0, shift = 0;
264 /* The eip contains the *virtual* address of the Guest's instruction:
265 * guest_pa just subtracts the Guest's page_offset. */
164 unsigned long physaddr = guest_pa(lg, lg->regs->eip); 266 unsigned long physaddr = guest_pa(lg, lg->regs->eip);
165 267
166 /* This only works for addresses in linear mapping... */ 268 /* The guest_pa() function only works for Guest kernel addresses, but
269 * that's all we're trying to do anyway. */
167 if (lg->regs->eip < lg->page_offset) 270 if (lg->regs->eip < lg->page_offset)
168 return 0; 271 return 0;
272
273 /* Decoding x86 instructions is icky. */
169 lgread(lg, &insn, physaddr, 1); 274 lgread(lg, &insn, physaddr, 1);
170 275
171 /* Operand size prefix means it's actually for ax. */ 276 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
277 of the eax register. */
172 if (insn == 0x66) { 278 if (insn == 0x66) {
173 shift = 16; 279 shift = 16;
280 /* The instruction is 1 byte so far, read the next byte. */
174 insnlen = 1; 281 insnlen = 1;
175 lgread(lg, &insn, physaddr + insnlen, 1); 282 lgread(lg, &insn, physaddr + insnlen, 1);
176 } 283 }
177 284
285 /* We can ignore the lower bit for the moment and decode the 4 opcodes
286 * we need to emulate. */
178 switch (insn & 0xFE) { 287 switch (insn & 0xFE) {
179 case 0xE4: /* in <next byte>,%al */ 288 case 0xE4: /* in <next byte>,%al */
180 insnlen += 2; 289 insnlen += 2;
@@ -191,9 +300,13 @@ static int emulate_insn(struct lguest *lg)
191 insnlen += 1; 300 insnlen += 1;
192 break; 301 break;
193 default: 302 default:
303 /* OK, we don't know what this is, can't emulate. */
194 return 0; 304 return 0;
195 } 305 }
196 306
307 /* If it was an "IN" instruction, they expect the result to be read
308 * into %eax, so we change %eax. We always return all-ones, which
309 * traditionally means "there's nothing there". */
197 if (in) { 310 if (in) {
198 /* Lower bit tells is whether it's a 16 or 32 bit access */ 311 /* Lower bit tells is whether it's a 16 or 32 bit access */
199 if (insn & 0x1) 312 if (insn & 0x1)
@@ -201,28 +314,46 @@ static int emulate_insn(struct lguest *lg)
201 else 314 else
202 lg->regs->eax |= (0xFFFF << shift); 315 lg->regs->eax |= (0xFFFF << shift);
203 } 316 }
317 /* Finally, we've "done" the instruction, so move past it. */
204 lg->regs->eip += insnlen; 318 lg->regs->eip += insnlen;
319 /* Success! */
205 return 1; 320 return 1;
206} 321}
207 322/*:*/
323
324/*L:305
325 * Dealing With Guest Memory.
326 *
327 * When the Guest gives us (what it thinks is) a physical address, we can use
328 * the normal copy_from_user() & copy_to_user() on that address: remember,
329 * Guest physical == Launcher virtual.
330 *
331 * But we can't trust the Guest: it might be trying to access the Launcher
332 * code. We have to check that the range is below the pfn_limit the Launcher
333 * gave us. We have to make sure that addr + len doesn't give us a false
334 * positive by overflowing, too. */
208int lguest_address_ok(const struct lguest *lg, 335int lguest_address_ok(const struct lguest *lg,
209 unsigned long addr, unsigned long len) 336 unsigned long addr, unsigned long len)
210{ 337{
211 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); 338 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
212} 339}
213 340
214/* Just like get_user, but don't let guest access lguest binary. */ 341/* This is a convenient routine to get a 32-bit value from the Guest (a very
342 * common operation). Here we can see how useful the kill_lguest() routine we
343 * met in the Launcher can be: we return a random value (0) instead of needing
344 * to return an error. */
215u32 lgread_u32(struct lguest *lg, unsigned long addr) 345u32 lgread_u32(struct lguest *lg, unsigned long addr)
216{ 346{
217 u32 val = 0; 347 u32 val = 0;
218 348
219 /* Don't let them access lguest binary */ 349 /* Don't let them access lguest binary. */
220 if (!lguest_address_ok(lg, addr, sizeof(val)) 350 if (!lguest_address_ok(lg, addr, sizeof(val))
221 || get_user(val, (u32 __user *)addr) != 0) 351 || get_user(val, (u32 __user *)addr) != 0)
222 kill_guest(lg, "bad read address %#lx", addr); 352 kill_guest(lg, "bad read address %#lx", addr);
223 return val; 353 return val;
224} 354}
225 355
356/* Same thing for writing a value. */
226void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) 357void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
227{ 358{
228 if (!lguest_address_ok(lg, addr, sizeof(val)) 359 if (!lguest_address_ok(lg, addr, sizeof(val))
@@ -230,6 +361,9 @@ void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
230 kill_guest(lg, "bad write address %#lx", addr); 361 kill_guest(lg, "bad write address %#lx", addr);
231} 362}
232 363
364/* This routine is more generic, and copies a range of Guest bytes into a
365 * buffer. If the copy_from_user() fails, we fill the buffer with zeroes, so
366 * the caller doesn't end up using uninitialized kernel memory. */
233void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) 367void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
234{ 368{
235 if (!lguest_address_ok(lg, addr, bytes) 369 if (!lguest_address_ok(lg, addr, bytes)
@@ -240,6 +374,7 @@ void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
240 } 374 }
241} 375}
242 376
377/* Similarly, our generic routine to copy into a range of Guest bytes. */
243void lgwrite(struct lguest *lg, unsigned long addr, const void *b, 378void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
244 unsigned bytes) 379 unsigned bytes)
245{ 380{
@@ -247,6 +382,7 @@ void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
247 || copy_to_user((void __user *)addr, b, bytes) != 0) 382 || copy_to_user((void __user *)addr, b, bytes) != 0)
248 kill_guest(lg, "bad write address %#lx len %u", addr, bytes); 383 kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
249} 384}
385/* (end of memory access helper routines) :*/
250 386
251static void set_ts(void) 387static void set_ts(void)
252{ 388{
@@ -257,54 +393,108 @@ static void set_ts(void)
257 write_cr0(cr0|8); 393 write_cr0(cr0|8);
258} 394}
259 395
396/*S:010
397 * We are getting close to the Switcher.
398 *
399 * Remember that each CPU has two pages which are visible to the Guest when it
400 * runs on that CPU. This has to contain the state for that Guest: we copy the
401 * state in just before we run the Guest.
402 *
403 * Each Guest has "changed" flags which indicate what has changed in the Guest
404 * since it last ran. We saw this set in interrupts_and_traps.c and
405 * segments.c.
406 */
260static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) 407static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
261{ 408{
409 /* Copying all this data can be quite expensive. We usually run the
410 * same Guest we ran last time (and that Guest hasn't run anywhere else
411 * meanwhile). If that's not the case, we pretend everything in the
412 * Guest has changed. */
262 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { 413 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
263 __get_cpu_var(last_guest) = lg; 414 __get_cpu_var(last_guest) = lg;
264 lg->last_pages = pages; 415 lg->last_pages = pages;
265 lg->changed = CHANGED_ALL; 416 lg->changed = CHANGED_ALL;
266 } 417 }
267 418
268 /* These are pretty cheap, so we do them unconditionally. */ 419 /* These copies are pretty cheap, so we do them unconditionally: */
420 /* Save the current Host top-level page directory. */
269 pages->state.host_cr3 = __pa(current->mm->pgd); 421 pages->state.host_cr3 = __pa(current->mm->pgd);
422 /* Set up the Guest's page tables to see this CPU's pages (and no
423 * other CPU's pages). */
270 map_switcher_in_guest(lg, pages); 424 map_switcher_in_guest(lg, pages);
425 /* Set up the two "TSS" members which tell the CPU what stack to use
426 * for traps which do directly into the Guest (ie. traps at privilege
427 * level 1). */
271 pages->state.guest_tss.esp1 = lg->esp1; 428 pages->state.guest_tss.esp1 = lg->esp1;
272 pages->state.guest_tss.ss1 = lg->ss1; 429 pages->state.guest_tss.ss1 = lg->ss1;
273 430
274 /* Copy direct trap entries. */ 431 /* Copy direct-to-Guest trap entries. */
275 if (lg->changed & CHANGED_IDT) 432 if (lg->changed & CHANGED_IDT)
276 copy_traps(lg, pages->state.guest_idt, default_idt_entries); 433 copy_traps(lg, pages->state.guest_idt, default_idt_entries);
277 434
278 /* Copy all GDT entries but the TSS. */ 435 /* Copy all GDT entries which the Guest can change. */
279 if (lg->changed & CHANGED_GDT) 436 if (lg->changed & CHANGED_GDT)
280 copy_gdt(lg, pages->state.guest_gdt); 437 copy_gdt(lg, pages->state.guest_gdt);
281 /* If only the TLS entries have changed, copy them. */ 438 /* If only the TLS entries have changed, copy them. */
282 else if (lg->changed & CHANGED_GDT_TLS) 439 else if (lg->changed & CHANGED_GDT_TLS)
283 copy_gdt_tls(lg, pages->state.guest_gdt); 440 copy_gdt_tls(lg, pages->state.guest_gdt);
284 441
442 /* Mark the Guest as unchanged for next time. */
285 lg->changed = 0; 443 lg->changed = 0;
286} 444}
287 445
446/* Finally: the code to actually call into the Switcher to run the Guest. */
288static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) 447static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
289{ 448{
449 /* This is a dummy value we need for GCC's sake. */
290 unsigned int clobber; 450 unsigned int clobber;
291 451
452 /* Copy the guest-specific information into this CPU's "struct
453 * lguest_pages". */
292 copy_in_guest_info(lg, pages); 454 copy_in_guest_info(lg, pages);
293 455
294 /* Put eflags on stack, lcall does rest: suitable for iret return. */ 456 /* Now: we push the "eflags" register on the stack, then do an "lcall".
457 * This is how we change from using the kernel code segment to using
458 * the dedicated lguest code segment, as well as jumping into the
459 * Switcher.
460 *
461 * The lcall also pushes the old code segment (KERNEL_CS) onto the
462 * stack, then the address of this call. This stack layout happens to
463 * exactly match the stack of an interrupt... */
295 asm volatile("pushf; lcall *lguest_entry" 464 asm volatile("pushf; lcall *lguest_entry"
465 /* This is how we tell GCC that %eax ("a") and %ebx ("b")
466 * are changed by this routine. The "=" means output. */
296 : "=a"(clobber), "=b"(clobber) 467 : "=a"(clobber), "=b"(clobber)
468 /* %eax contains the pages pointer. ("0" refers to the
469 * 0-th argument above, ie "a"). %ebx contains the
470 * physical address of the Guest's top-level page
471 * directory. */
297 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) 472 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
473 /* We tell gcc that all these registers could change,
474 * which means we don't have to save and restore them in
475 * the Switcher. */
298 : "memory", "%edx", "%ecx", "%edi", "%esi"); 476 : "memory", "%edx", "%ecx", "%edi", "%esi");
299} 477}
478/*:*/
300 479
480/*H:030 Let's jump straight to the the main loop which runs the Guest.
481 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
482 * going around and around until something interesting happens. */
301int run_guest(struct lguest *lg, unsigned long __user *user) 483int run_guest(struct lguest *lg, unsigned long __user *user)
302{ 484{
485 /* We stop running once the Guest is dead. */
303 while (!lg->dead) { 486 while (!lg->dead) {
487 /* We need to initialize this, otherwise gcc complains. It's
488 * not (yet) clever enough to see that it's initialized when we
489 * need it. */
304 unsigned int cr2 = 0; /* Damn gcc */ 490 unsigned int cr2 = 0; /* Damn gcc */
305 491
306 /* Hypercalls first: we might have been out to userspace */ 492 /* First we run any hypercalls the Guest wants done: either in
493 * the hypercall ring in "struct lguest_data", or directly by
494 * using int 31 (LGUEST_TRAP_ENTRY). */
307 do_hypercalls(lg); 495 do_hypercalls(lg);
496 /* It's possible the Guest did a SEND_DMA hypercall to the
497 * Launcher, in which case we return from the read() now. */
308 if (lg->dma_is_pending) { 498 if (lg->dma_is_pending) {
309 if (put_user(lg->pending_dma, user) || 499 if (put_user(lg->pending_dma, user) ||
310 put_user(lg->pending_key, user+1)) 500 put_user(lg->pending_key, user+1))
@@ -312,6 +502,7 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
312 return sizeof(unsigned long)*2; 502 return sizeof(unsigned long)*2;
313 } 503 }
314 504
505 /* Check for signals */
315 if (signal_pending(current)) 506 if (signal_pending(current))
316 return -ERESTARTSYS; 507 return -ERESTARTSYS;
317 508
@@ -319,77 +510,154 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
319 if (lg->break_out) 510 if (lg->break_out)
320 return -EAGAIN; 511 return -EAGAIN;
321 512
513 /* Check if there are any interrupts which can be delivered
514 * now: if so, this sets up the hander to be executed when we
515 * next run the Guest. */
322 maybe_do_interrupt(lg); 516 maybe_do_interrupt(lg);
323 517
518 /* All long-lived kernel loops need to check with this horrible
519 * thing called the freezer. If the Host is trying to suspend,
520 * it stops us. */
324 try_to_freeze(); 521 try_to_freeze();
325 522
523 /* Just make absolutely sure the Guest is still alive. One of
524 * those hypercalls could have been fatal, for example. */
326 if (lg->dead) 525 if (lg->dead)
327 break; 526 break;
328 527
528 /* If the Guest asked to be stopped, we sleep. The Guest's
529 * clock timer or LHCALL_BREAK from the Waker will wake us. */
329 if (lg->halted) { 530 if (lg->halted) {
330 set_current_state(TASK_INTERRUPTIBLE); 531 set_current_state(TASK_INTERRUPTIBLE);
331 schedule(); 532 schedule();
332 continue; 533 continue;
333 } 534 }
334 535
536 /* OK, now we're ready to jump into the Guest. First we put up
537 * the "Do Not Disturb" sign: */
335 local_irq_disable(); 538 local_irq_disable();
336 539
337 /* Even if *we* don't want FPU trap, guest might... */ 540 /* Remember the awfully-named TS bit? If the Guest has asked
541 * to set it we set it now, so we can trap and pass that trap
542 * to the Guest if it uses the FPU. */
338 if (lg->ts) 543 if (lg->ts)
339 set_ts(); 544 set_ts();
340 545
341 /* Don't let Guest do SYSENTER: we can't handle it. */ 546 /* SYSENTER is an optimized way of doing system calls. We
547 * can't allow it because it always jumps to privilege level 0.
548 * A normal Guest won't try it because we don't advertise it in
549 * CPUID, but a malicious Guest (or malicious Guest userspace
550 * program) could, so we tell the CPU to disable it before
551 * running the Guest. */
342 if (boot_cpu_has(X86_FEATURE_SEP)) 552 if (boot_cpu_has(X86_FEATURE_SEP))
343 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); 553 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
344 554
555 /* Now we actually run the Guest. It will pop back out when
556 * something interesting happens, and we can examine its
557 * registers to see what it was doing. */
345 run_guest_once(lg, lguest_pages(raw_smp_processor_id())); 558 run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
346 559
347 /* Save cr2 now if we page-faulted. */ 560 /* The "regs" pointer contains two extra entries which are not
561 * really registers: a trap number which says what interrupt or
562 * trap made the switcher code come back, and an error code
563 * which some traps set. */
564
565 /* If the Guest page faulted, then the cr2 register will tell
566 * us the bad virtual address. We have to grab this now,
567 * because once we re-enable interrupts an interrupt could
568 * fault and thus overwrite cr2, or we could even move off to a
569 * different CPU. */
348 if (lg->regs->trapnum == 14) 570 if (lg->regs->trapnum == 14)
349 cr2 = read_cr2(); 571 cr2 = read_cr2();
572 /* Similarly, if we took a trap because the Guest used the FPU,
573 * we have to restore the FPU it expects to see. */
350 else if (lg->regs->trapnum == 7) 574 else if (lg->regs->trapnum == 7)
351 math_state_restore(); 575 math_state_restore();
352 576
577 /* Restore SYSENTER if it's supposed to be on. */
353 if (boot_cpu_has(X86_FEATURE_SEP)) 578 if (boot_cpu_has(X86_FEATURE_SEP))
354 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 579 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
580
581 /* Now we're ready to be interrupted or moved to other CPUs */
355 local_irq_enable(); 582 local_irq_enable();
356 583
584 /* OK, so what happened? */
357 switch (lg->regs->trapnum) { 585 switch (lg->regs->trapnum) {
358 case 13: /* We've intercepted a GPF. */ 586 case 13: /* We've intercepted a GPF. */
587 /* Check if this was one of those annoying IN or OUT
588 * instructions which we need to emulate. If so, we
589 * just go back into the Guest after we've done it. */
359 if (lg->regs->errcode == 0) { 590 if (lg->regs->errcode == 0) {
360 if (emulate_insn(lg)) 591 if (emulate_insn(lg))
361 continue; 592 continue;
362 } 593 }
363 break; 594 break;
364 case 14: /* We've intercepted a page fault. */ 595 case 14: /* We've intercepted a page fault. */
596 /* The Guest accessed a virtual address that wasn't
597 * mapped. This happens a lot: we don't actually set
598 * up most of the page tables for the Guest at all when
599 * we start: as it runs it asks for more and more, and
600 * we set them up as required. In this case, we don't
601 * even tell the Guest that the fault happened.
602 *
603 * The errcode tells whether this was a read or a
604 * write, and whether kernel or userspace code. */
365 if (demand_page(lg, cr2, lg->regs->errcode)) 605 if (demand_page(lg, cr2, lg->regs->errcode))
366 continue; 606 continue;
367 607
368 /* If lguest_data is NULL, this won't hurt. */ 608 /* OK, it's really not there (or not OK): the Guest
609 * needs to know. We write out the cr2 value so it
610 * knows where the fault occurred.
611 *
612 * Note that if the Guest were really messed up, this
613 * could happen before it's done the INITIALIZE
614 * hypercall, so lg->lguest_data will be NULL, so
615 * &lg->lguest_data->cr2 will be address 8. Writing
616 * into that address won't hurt the Host at all,
617 * though. */
369 if (put_user(cr2, &lg->lguest_data->cr2)) 618 if (put_user(cr2, &lg->lguest_data->cr2))
370 kill_guest(lg, "Writing cr2"); 619 kill_guest(lg, "Writing cr2");
371 break; 620 break;
372 case 7: /* We've intercepted a Device Not Available fault. */ 621 case 7: /* We've intercepted a Device Not Available fault. */
373 /* If they don't want to know, just absorb it. */ 622 /* If the Guest doesn't want to know, we already
623 * restored the Floating Point Unit, so we just
624 * continue without telling it. */
374 if (!lg->ts) 625 if (!lg->ts)
375 continue; 626 continue;
376 break; 627 break;
377 case 32 ... 255: /* Real interrupt, fall thru */ 628 case 32 ... 255:
629 /* These values mean a real interrupt occurred, in
630 * which case the Host handler has already been run.
631 * We just do a friendly check if another process
632 * should now be run, then fall through to loop
633 * around: */
378 cond_resched(); 634 cond_resched();
379 case LGUEST_TRAP_ENTRY: /* Handled at top of loop */ 635 case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
380 continue; 636 continue;
381 } 637 }
382 638
639 /* If we get here, it's a trap the Guest wants to know
640 * about. */
383 if (deliver_trap(lg, lg->regs->trapnum)) 641 if (deliver_trap(lg, lg->regs->trapnum))
384 continue; 642 continue;
385 643
644 /* If the Guest doesn't have a handler (either it hasn't
645 * registered any yet, or it's one of the faults we don't let
646 * it handle), it dies with a cryptic error message. */
386 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", 647 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
387 lg->regs->trapnum, lg->regs->eip, 648 lg->regs->trapnum, lg->regs->eip,
388 lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode); 649 lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
389 } 650 }
651 /* The Guest is dead => "No such file or directory" */
390 return -ENOENT; 652 return -ENOENT;
391} 653}
392 654
655/* Now we can look at each of the routines this calls, in increasing order of
656 * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
657 * deliver_trap() and demand_page(). After all those, we'll be ready to
658 * examine the Switcher, and our philosophical understanding of the Host/Guest
659 * duality will be complete. :*/
660
393int find_free_guest(void) 661int find_free_guest(void)
394{ 662{
395 unsigned int i; 663 unsigned int i;
@@ -407,55 +675,96 @@ static void adjust_pge(void *on)
407 write_cr4(read_cr4() & ~X86_CR4_PGE); 675 write_cr4(read_cr4() & ~X86_CR4_PGE);
408} 676}
409 677
678/*H:000
679 * Welcome to the Host!
680 *
681 * By this point your brain has been tickled by the Guest code and numbed by
682 * the Launcher code; prepare for it to be stretched by the Host code. This is
683 * the heart. Let's begin at the initialization routine for the Host's lg
684 * module.
685 */
410static int __init init(void) 686static int __init init(void)
411{ 687{
412 int err; 688 int err;
413 689
690 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
414 if (paravirt_enabled()) { 691 if (paravirt_enabled()) {
415 printk("lguest is afraid of %s\n", paravirt_ops.name); 692 printk("lguest is afraid of %s\n", paravirt_ops.name);
416 return -EPERM; 693 return -EPERM;
417 } 694 }
418 695
696 /* First we put the Switcher up in very high virtual memory. */
419 err = map_switcher(); 697 err = map_switcher();
420 if (err) 698 if (err)
421 return err; 699 return err;
422 700
701 /* Now we set up the pagetable implementation for the Guests. */
423 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); 702 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
424 if (err) { 703 if (err) {
425 unmap_switcher(); 704 unmap_switcher();
426 return err; 705 return err;
427 } 706 }
707
708 /* The I/O subsystem needs some things initialized. */
428 lguest_io_init(); 709 lguest_io_init();
429 710
711 /* /dev/lguest needs to be registered. */
430 err = lguest_device_init(); 712 err = lguest_device_init();
431 if (err) { 713 if (err) {
432 free_pagetables(); 714 free_pagetables();
433 unmap_switcher(); 715 unmap_switcher();
434 return err; 716 return err;
435 } 717 }
718
719 /* Finally, we need to turn off "Page Global Enable". PGE is an
720 * optimization where page table entries are specially marked to show
721 * they never change. The Host kernel marks all the kernel pages this
722 * way because it's always present, even when userspace is running.
723 *
724 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
725 * switch to the Guest kernel. If you don't disable this on all CPUs,
726 * you'll get really weird bugs that you'll chase for two days.
727 *
728 * I used to turn PGE off every time we switched to the Guest and back
729 * on when we return, but that slowed the Switcher down noticibly. */
730
731 /* We don't need the complexity of CPUs coming and going while we're
732 * doing this. */
436 lock_cpu_hotplug(); 733 lock_cpu_hotplug();
437 if (cpu_has_pge) { /* We have a broader idea of "global". */ 734 if (cpu_has_pge) { /* We have a broader idea of "global". */
735 /* Remember that this was originally set (for cleanup). */
438 cpu_had_pge = 1; 736 cpu_had_pge = 1;
737 /* adjust_pge is a helper function which sets or unsets the PGE
738 * bit on its CPU, depending on the argument (0 == unset). */
439 on_each_cpu(adjust_pge, (void *)0, 0, 1); 739 on_each_cpu(adjust_pge, (void *)0, 0, 1);
740 /* Turn off the feature in the global feature set. */
440 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); 741 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
441 } 742 }
442 unlock_cpu_hotplug(); 743 unlock_cpu_hotplug();
744
745 /* All good! */
443 return 0; 746 return 0;
444} 747}
445 748
749/* Cleaning up is just the same code, backwards. With a little French. */
446static void __exit fini(void) 750static void __exit fini(void)
447{ 751{
448 lguest_device_remove(); 752 lguest_device_remove();
449 free_pagetables(); 753 free_pagetables();
450 unmap_switcher(); 754 unmap_switcher();
755
756 /* If we had PGE before we started, turn it back on now. */
451 lock_cpu_hotplug(); 757 lock_cpu_hotplug();
452 if (cpu_had_pge) { 758 if (cpu_had_pge) {
453 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); 759 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
760 /* adjust_pge's argument "1" means set PGE. */
454 on_each_cpu(adjust_pge, (void *)1, 0, 1); 761 on_each_cpu(adjust_pge, (void *)1, 0, 1);
455 } 762 }
456 unlock_cpu_hotplug(); 763 unlock_cpu_hotplug();
457} 764}
458 765
766/* The Host side of lguest can be a module. This is a nice way for people to
767 * play with it. */
459module_init(init); 768module_init(init);
460module_exit(fini); 769module_exit(fini);
461MODULE_LICENSE("GPL"); 770MODULE_LICENSE("GPL");
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index ea52ca451f74..7a5299f9679d 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -1,5 +1,10 @@
1/* Actual hypercalls, which allow guests to actually do something. 1/*P:500 Just as userspace programs request kernel operations through a system
2 Copyright (C) 2006 Rusty Russell IBM Corporation 2 * call, the Guest requests Host operations through a "hypercall". You might
3 * notice this nomenclature doesn't really follow any logic, but the name has
4 * been around for long enough that we're stuck with it. As you'd expect, this
5 * code is basically a one big switch statement. :*/
6
7/* Copyright (C) 2006 Rusty Russell IBM Corporation
3 8
4 This program is free software; you can redistribute it and/or modify 9 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by 10 it under the terms of the GNU General Public License as published by
@@ -23,37 +28,63 @@
23#include <irq_vectors.h> 28#include <irq_vectors.h>
24#include "lg.h" 29#include "lg.h"
25 30
31/*H:120 This is the core hypercall routine: where the Guest gets what it
32 * wants. Or gets killed. Or, in the case of LHCALL_CRASH, both.
33 *
34 * Remember from the Guest: %eax == which call to make, and the arguments are
35 * packed into %edx, %ebx and %ecx if needed. */
26static void do_hcall(struct lguest *lg, struct lguest_regs *regs) 36static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
27{ 37{
28 switch (regs->eax) { 38 switch (regs->eax) {
29 case LHCALL_FLUSH_ASYNC: 39 case LHCALL_FLUSH_ASYNC:
40 /* This call does nothing, except by breaking out of the Guest
41 * it makes us process all the asynchronous hypercalls. */
30 break; 42 break;
31 case LHCALL_LGUEST_INIT: 43 case LHCALL_LGUEST_INIT:
44 /* You can't get here unless you're already initialized. Don't
45 * do that. */
32 kill_guest(lg, "already have lguest_data"); 46 kill_guest(lg, "already have lguest_data");
33 break; 47 break;
34 case LHCALL_CRASH: { 48 case LHCALL_CRASH: {
49 /* Crash is such a trivial hypercall that we do it in four
50 * lines right here. */
35 char msg[128]; 51 char msg[128];
52 /* If the lgread fails, it will call kill_guest() itself; the
53 * kill_guest() with the message will be ignored. */
36 lgread(lg, msg, regs->edx, sizeof(msg)); 54 lgread(lg, msg, regs->edx, sizeof(msg));
37 msg[sizeof(msg)-1] = '\0'; 55 msg[sizeof(msg)-1] = '\0';
38 kill_guest(lg, "CRASH: %s", msg); 56 kill_guest(lg, "CRASH: %s", msg);
39 break; 57 break;
40 } 58 }
41 case LHCALL_FLUSH_TLB: 59 case LHCALL_FLUSH_TLB:
60 /* FLUSH_TLB comes in two flavors, depending on the
61 * argument: */
42 if (regs->edx) 62 if (regs->edx)
43 guest_pagetable_clear_all(lg); 63 guest_pagetable_clear_all(lg);
44 else 64 else
45 guest_pagetable_flush_user(lg); 65 guest_pagetable_flush_user(lg);
46 break; 66 break;
47 case LHCALL_GET_WALLCLOCK: { 67 case LHCALL_GET_WALLCLOCK: {
68 /* The Guest wants to know the real time in seconds since 1970,
69 * in good Unix tradition. */
48 struct timespec ts; 70 struct timespec ts;
49 ktime_get_real_ts(&ts); 71 ktime_get_real_ts(&ts);
50 regs->eax = ts.tv_sec; 72 regs->eax = ts.tv_sec;
51 break; 73 break;
52 } 74 }
53 case LHCALL_BIND_DMA: 75 case LHCALL_BIND_DMA:
76 /* BIND_DMA really wants four arguments, but it's the only call
77 * which does. So the Guest packs the number of buffers and
78 * the interrupt number into the final argument, and we decode
79 * it here. This can legitimately fail, since we currently
80 * place a limit on the number of DMA pools a Guest can have.
81 * So we return true or false from this call. */
54 regs->eax = bind_dma(lg, regs->edx, regs->ebx, 82 regs->eax = bind_dma(lg, regs->edx, regs->ebx,
55 regs->ecx >> 8, regs->ecx & 0xFF); 83 regs->ecx >> 8, regs->ecx & 0xFF);
56 break; 84 break;
85
86 /* All these calls simply pass the arguments through to the right
87 * routines. */
57 case LHCALL_SEND_DMA: 88 case LHCALL_SEND_DMA:
58 send_dma(lg, regs->edx, regs->ebx); 89 send_dma(lg, regs->edx, regs->ebx);
59 break; 90 break;
@@ -81,10 +112,13 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
81 case LHCALL_SET_CLOCKEVENT: 112 case LHCALL_SET_CLOCKEVENT:
82 guest_set_clockevent(lg, regs->edx); 113 guest_set_clockevent(lg, regs->edx);
83 break; 114 break;
115
84 case LHCALL_TS: 116 case LHCALL_TS:
117 /* This sets the TS flag, as we saw used in run_guest(). */
85 lg->ts = regs->edx; 118 lg->ts = regs->edx;
86 break; 119 break;
87 case LHCALL_HALT: 120 case LHCALL_HALT:
121 /* Similarly, this sets the halted flag for run_guest(). */
88 lg->halted = 1; 122 lg->halted = 1;
89 break; 123 break;
90 default: 124 default:
@@ -92,25 +126,42 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
92 } 126 }
93} 127}
94 128
95/* We always do queued calls before actual hypercall. */ 129/* Asynchronous hypercalls are easy: we just look in the array in the Guest's
130 * "struct lguest_data" and see if there are any new ones marked "ready".
131 *
132 * We are careful to do these in order: obviously we respect the order the
133 * Guest put them in the ring, but we also promise the Guest that they will
134 * happen before any normal hypercall (which is why we check this before
135 * checking for a normal hcall). */
96static void do_async_hcalls(struct lguest *lg) 136static void do_async_hcalls(struct lguest *lg)
97{ 137{
98 unsigned int i; 138 unsigned int i;
99 u8 st[LHCALL_RING_SIZE]; 139 u8 st[LHCALL_RING_SIZE];
100 140
141 /* For simplicity, we copy the entire call status array in at once. */
101 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) 142 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
102 return; 143 return;
103 144
145
146 /* We process "struct lguest_data"s hcalls[] ring once. */
104 for (i = 0; i < ARRAY_SIZE(st); i++) { 147 for (i = 0; i < ARRAY_SIZE(st); i++) {
105 struct lguest_regs regs; 148 struct lguest_regs regs;
149 /* We remember where we were up to from last time. This makes
150 * sure that the hypercalls are done in the order the Guest
151 * places them in the ring. */
106 unsigned int n = lg->next_hcall; 152 unsigned int n = lg->next_hcall;
107 153
154 /* 0xFF means there's no call here (yet). */
108 if (st[n] == 0xFF) 155 if (st[n] == 0xFF)
109 break; 156 break;
110 157
158 /* OK, we have hypercall. Increment the "next_hcall" cursor,
159 * and wrap back to 0 if we reach the end. */
111 if (++lg->next_hcall == LHCALL_RING_SIZE) 160 if (++lg->next_hcall == LHCALL_RING_SIZE)
112 lg->next_hcall = 0; 161 lg->next_hcall = 0;
113 162
163 /* We copy the hypercall arguments into a fake register
164 * structure. This makes life simple for do_hcall(). */
114 if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) 165 if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax)
115 || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) 166 || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx)
116 || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx) 167 || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
@@ -119,74 +170,126 @@ static void do_async_hcalls(struct lguest *lg)
119 break; 170 break;
120 } 171 }
121 172
173 /* Do the hypercall, same as a normal one. */
122 do_hcall(lg, &regs); 174 do_hcall(lg, &regs);
175
176 /* Mark the hypercall done. */
123 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { 177 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
124 kill_guest(lg, "Writing result for async hypercall"); 178 kill_guest(lg, "Writing result for async hypercall");
125 break; 179 break;
126 } 180 }
127 181
182 /* Stop doing hypercalls if we've just done a DMA to the
183 * Launcher: it needs to service this first. */
128 if (lg->dma_is_pending) 184 if (lg->dma_is_pending)
129 break; 185 break;
130 } 186 }
131} 187}
132 188
189/* Last of all, we look at what happens first of all. The very first time the
190 * Guest makes a hypercall, we end up here to set things up: */
133static void initialize(struct lguest *lg) 191static void initialize(struct lguest *lg)
134{ 192{
135 u32 tsc_speed; 193 u32 tsc_speed;
136 194
195 /* You can't do anything until you're initialized. The Guest knows the
196 * rules, so we're unforgiving here. */
137 if (lg->regs->eax != LHCALL_LGUEST_INIT) { 197 if (lg->regs->eax != LHCALL_LGUEST_INIT) {
138 kill_guest(lg, "hypercall %li before LGUEST_INIT", 198 kill_guest(lg, "hypercall %li before LGUEST_INIT",
139 lg->regs->eax); 199 lg->regs->eax);
140 return; 200 return;
141 } 201 }
142 202
143 /* We only tell the guest to use the TSC if it's reliable. */ 203 /* We insist that the Time Stamp Counter exist and doesn't change with
204 * cpu frequency. Some devious chip manufacturers decided that TSC
205 * changes could be handled in software. I decided that time going
206 * backwards might be good for benchmarks, but it's bad for users.
207 *
208 * We also insist that the TSC be stable: the kernel detects unreliable
209 * TSCs for its own purposes, and we use that here. */
144 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) 210 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
145 tsc_speed = tsc_khz; 211 tsc_speed = tsc_khz;
146 else 212 else
147 tsc_speed = 0; 213 tsc_speed = 0;
148 214
215 /* The pointer to the Guest's "struct lguest_data" is the only
216 * argument. */
149 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx; 217 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
150 /* We check here so we can simply copy_to_user/from_user */ 218 /* If we check the address they gave is OK now, we can simply
219 * copy_to_user/from_user from now on rather than using lgread/lgwrite.
220 * I put this in to show that I'm not immune to writing stupid
221 * optimizations. */
151 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { 222 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
152 kill_guest(lg, "bad guest page %p", lg->lguest_data); 223 kill_guest(lg, "bad guest page %p", lg->lguest_data);
153 return; 224 return;
154 } 225 }
226 /* The Guest tells us where we're not to deliver interrupts by putting
227 * the range of addresses into "struct lguest_data". */
155 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 228 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
156 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) 229 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
157 /* We reserve the top pgd entry. */ 230 /* We tell the Guest that it can't use the top 4MB of virtual
231 * addresses used by the Switcher. */
158 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) 232 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
159 || put_user(tsc_speed, &lg->lguest_data->tsc_khz) 233 || put_user(tsc_speed, &lg->lguest_data->tsc_khz)
234 /* We also give the Guest a unique id, as used in lguest_net.c. */
160 || put_user(lg->guestid, &lg->lguest_data->guestid)) 235 || put_user(lg->guestid, &lg->lguest_data->guestid))
161 kill_guest(lg, "bad guest page %p", lg->lguest_data); 236 kill_guest(lg, "bad guest page %p", lg->lguest_data);
162 237
163 /* This is the one case where the above accesses might have 238 /* This is the one case where the above accesses might have been the
164 * been the first write to a Guest page. This may have caused 239 * first write to a Guest page. This may have caused a copy-on-write
165 * a copy-on-write fault, but the Guest might be referring to 240 * fault, but the Guest might be referring to the old (read-only)
166 * the old (read-only) page. */ 241 * page. */
167 guest_pagetable_clear_all(lg); 242 guest_pagetable_clear_all(lg);
168} 243}
244/* Now we've examined the hypercall code; our Guest can make requests. There
245 * is one other way we can do things for the Guest, as we see in
246 * emulate_insn(). */
169 247
170/* Even if we go out to userspace and come back, we don't want to do 248/*H:110 Tricky point: we mark the hypercall as "done" once we've done it.
171 * the hypercall again. */ 249 * Normally we don't need to do this: the Guest will run again and update the
250 * trap number before we come back around the run_guest() loop to
251 * do_hypercalls().
252 *
253 * However, if we are signalled or the Guest sends DMA to the Launcher, that
254 * loop will exit without running the Guest. When it comes back it would try
255 * to re-run the hypercall. */
172static void clear_hcall(struct lguest *lg) 256static void clear_hcall(struct lguest *lg)
173{ 257{
174 lg->regs->trapnum = 255; 258 lg->regs->trapnum = 255;
175} 259}
176 260
261/*H:100
262 * Hypercalls
263 *
264 * Remember from the Guest, hypercalls come in two flavors: normal and
265 * asynchronous. This file handles both of types.
266 */
177void do_hypercalls(struct lguest *lg) 267void do_hypercalls(struct lguest *lg)
178{ 268{
269 /* Not initialized yet? */
179 if (unlikely(!lg->lguest_data)) { 270 if (unlikely(!lg->lguest_data)) {
271 /* Did the Guest make a hypercall? We might have come back for
272 * some other reason (an interrupt, a different trap). */
180 if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 273 if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
274 /* Set up the "struct lguest_data" */
181 initialize(lg); 275 initialize(lg);
276 /* The hypercall is done. */
182 clear_hcall(lg); 277 clear_hcall(lg);
183 } 278 }
184 return; 279 return;
185 } 280 }
186 281
282 /* The Guest has initialized.
283 *
284 * Look in the hypercall ring for the async hypercalls: */
187 do_async_hcalls(lg); 285 do_async_hcalls(lg);
286
287 /* If we stopped reading the hypercall ring because the Guest did a
288 * SEND_DMA to the Launcher, we want to return now. Otherwise if the
289 * Guest asked us to do a hypercall, we do it. */
188 if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 290 if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
189 do_hcall(lg, lg->regs); 291 do_hcall(lg, lg->regs);
292 /* The hypercall is done. */
190 clear_hcall(lg); 293 clear_hcall(lg);
191 } 294 }
192} 295}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index bee029bb2c7b..bd0091bf79ec 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -1,100 +1,160 @@
1/*P:800 Interrupts (traps) are complicated enough to earn their own file.
2 * There are three classes of interrupts:
3 *
4 * 1) Real hardware interrupts which occur while we're running the Guest,
5 * 2) Interrupts for virtual devices attached to the Guest, and
6 * 3) Traps and faults from the Guest.
7 *
8 * Real hardware interrupts must be delivered to the Host, not the Guest.
9 * Virtual interrupts must be delivered to the Guest, but we make them look
10 * just like real hardware would deliver them. Traps from the Guest can be set
11 * up to go directly back into the Guest, but sometimes the Host wants to see
12 * them first, so we also have a way of "reflecting" them into the Guest as if
13 * they had been delivered to it directly. :*/
1#include <linux/uaccess.h> 14#include <linux/uaccess.h>
2#include "lg.h" 15#include "lg.h"
3 16
17/* The address of the interrupt handler is split into two bits: */
4static unsigned long idt_address(u32 lo, u32 hi) 18static unsigned long idt_address(u32 lo, u32 hi)
5{ 19{
6 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); 20 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000);
7} 21}
8 22
23/* The "type" of the interrupt handler is a 4 bit field: we only support a
24 * couple of types. */
9static int idt_type(u32 lo, u32 hi) 25static int idt_type(u32 lo, u32 hi)
10{ 26{
11 return (hi >> 8) & 0xF; 27 return (hi >> 8) & 0xF;
12} 28}
13 29
30/* An IDT entry can't be used unless the "present" bit is set. */
14static int idt_present(u32 lo, u32 hi) 31static int idt_present(u32 lo, u32 hi)
15{ 32{
16 return (hi & 0x8000); 33 return (hi & 0x8000);
17} 34}
18 35
36/* We need a helper to "push" a value onto the Guest's stack, since that's a
37 * big part of what delivering an interrupt does. */
19static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) 38static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
20{ 39{
40 /* Stack grows upwards: move stack then write value. */
21 *gstack -= 4; 41 *gstack -= 4;
22 lgwrite_u32(lg, *gstack, val); 42 lgwrite_u32(lg, *gstack, val);
23} 43}
24 44
45/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
46 * trap. The mechanics of delivering traps and interrupts to the Guest are the
47 * same, except some traps have an "error code" which gets pushed onto the
48 * stack as well: the caller tells us if this is one.
49 *
50 * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
51 * interrupt or trap. It's split into two parts for traditional reasons: gcc
52 * on i386 used to be frightened by 64 bit numbers.
53 *
54 * We set up the stack just like the CPU does for a real interrupt, so it's
55 * identical for the Guest (and the standard "iret" instruction will undo
56 * it). */
25static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) 57static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
26{ 58{
27 unsigned long gstack; 59 unsigned long gstack;
28 u32 eflags, ss, irq_enable; 60 u32 eflags, ss, irq_enable;
29 61
30 /* If they want a ring change, we use new stack and push old ss/esp */ 62 /* There are two cases for interrupts: one where the Guest is already
63 * in the kernel, and a more complex one where the Guest is in
64 * userspace. We check the privilege level to find out. */
31 if ((lg->regs->ss&0x3) != GUEST_PL) { 65 if ((lg->regs->ss&0x3) != GUEST_PL) {
66 /* The Guest told us their kernel stack with the SET_STACK
67 * hypercall: both the virtual address and the segment */
32 gstack = guest_pa(lg, lg->esp1); 68 gstack = guest_pa(lg, lg->esp1);
33 ss = lg->ss1; 69 ss = lg->ss1;
70 /* We push the old stack segment and pointer onto the new
71 * stack: when the Guest does an "iret" back from the interrupt
72 * handler the CPU will notice they're dropping privilege
73 * levels and expect these here. */
34 push_guest_stack(lg, &gstack, lg->regs->ss); 74 push_guest_stack(lg, &gstack, lg->regs->ss);
35 push_guest_stack(lg, &gstack, lg->regs->esp); 75 push_guest_stack(lg, &gstack, lg->regs->esp);
36 } else { 76 } else {
77 /* We're staying on the same Guest (kernel) stack. */
37 gstack = guest_pa(lg, lg->regs->esp); 78 gstack = guest_pa(lg, lg->regs->esp);
38 ss = lg->regs->ss; 79 ss = lg->regs->ss;
39 } 80 }
40 81
41 /* We use IF bit in eflags to indicate whether irqs were enabled 82 /* Remember that we never let the Guest actually disable interrupts, so
42 (it's always 1, since irqs are enabled when guest is running). */ 83 * the "Interrupt Flag" bit is always set. We copy that bit from the
84 * Guest's "irq_enabled" field into the eflags word: the Guest copies
85 * it back in "lguest_iret". */
43 eflags = lg->regs->eflags; 86 eflags = lg->regs->eflags;
44 if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 87 if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0
45 && !(irq_enable & X86_EFLAGS_IF)) 88 && !(irq_enable & X86_EFLAGS_IF))
46 eflags &= ~X86_EFLAGS_IF; 89 eflags &= ~X86_EFLAGS_IF;
47 90
91 /* An interrupt is expected to push three things on the stack: the old
92 * "eflags" word, the old code segment, and the old instruction
93 * pointer. */
48 push_guest_stack(lg, &gstack, eflags); 94 push_guest_stack(lg, &gstack, eflags);
49 push_guest_stack(lg, &gstack, lg->regs->cs); 95 push_guest_stack(lg, &gstack, lg->regs->cs);
50 push_guest_stack(lg, &gstack, lg->regs->eip); 96 push_guest_stack(lg, &gstack, lg->regs->eip);
51 97
98 /* For the six traps which supply an error code, we push that, too. */
52 if (has_err) 99 if (has_err)
53 push_guest_stack(lg, &gstack, lg->regs->errcode); 100 push_guest_stack(lg, &gstack, lg->regs->errcode);
54 101
55 /* Change the real stack so switcher returns to trap handler */ 102 /* Now we've pushed all the old state, we change the stack, the code
103 * segment and the address to execute. */
56 lg->regs->ss = ss; 104 lg->regs->ss = ss;
57 lg->regs->esp = gstack + lg->page_offset; 105 lg->regs->esp = gstack + lg->page_offset;
58 lg->regs->cs = (__KERNEL_CS|GUEST_PL); 106 lg->regs->cs = (__KERNEL_CS|GUEST_PL);
59 lg->regs->eip = idt_address(lo, hi); 107 lg->regs->eip = idt_address(lo, hi);
60 108
61 /* Disable interrupts for an interrupt gate. */ 109 /* There are two kinds of interrupt handlers: 0xE is an "interrupt
110 * gate" which expects interrupts to be disabled on entry. */
62 if (idt_type(lo, hi) == 0xE) 111 if (idt_type(lo, hi) == 0xE)
63 if (put_user(0, &lg->lguest_data->irq_enabled)) 112 if (put_user(0, &lg->lguest_data->irq_enabled))
64 kill_guest(lg, "Disabling interrupts"); 113 kill_guest(lg, "Disabling interrupts");
65} 114}
66 115
116/*H:200
117 * Virtual Interrupts.
118 *
119 * maybe_do_interrupt() gets called before every entry to the Guest, to see if
120 * we should divert the Guest to running an interrupt handler. */
67void maybe_do_interrupt(struct lguest *lg) 121void maybe_do_interrupt(struct lguest *lg)
68{ 122{
69 unsigned int irq; 123 unsigned int irq;
70 DECLARE_BITMAP(blk, LGUEST_IRQS); 124 DECLARE_BITMAP(blk, LGUEST_IRQS);
71 struct desc_struct *idt; 125 struct desc_struct *idt;
72 126
127 /* If the Guest hasn't even initialized yet, we can do nothing. */
73 if (!lg->lguest_data) 128 if (!lg->lguest_data)
74 return; 129 return;
75 130
76 /* Mask out any interrupts they have blocked. */ 131 /* Take our "irqs_pending" array and remove any interrupts the Guest
132 * wants blocked: the result ends up in "blk". */
77 if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, 133 if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts,
78 sizeof(blk))) 134 sizeof(blk)))
79 return; 135 return;
80 136
81 bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); 137 bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS);
82 138
139 /* Find the first interrupt. */
83 irq = find_first_bit(blk, LGUEST_IRQS); 140 irq = find_first_bit(blk, LGUEST_IRQS);
141 /* None? Nothing to do */
84 if (irq >= LGUEST_IRQS) 142 if (irq >= LGUEST_IRQS)
85 return; 143 return;
86 144
145 /* They may be in the middle of an iret, where they asked us never to
146 * deliver interrupts. */
87 if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) 147 if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end)
88 return; 148 return;
89 149
90 /* If they're halted, we re-enable interrupts. */ 150 /* If they're halted, interrupts restart them. */
91 if (lg->halted) { 151 if (lg->halted) {
92 /* Re-enable interrupts. */ 152 /* Re-enable interrupts. */
93 if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) 153 if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled))
94 kill_guest(lg, "Re-enabling interrupts"); 154 kill_guest(lg, "Re-enabling interrupts");
95 lg->halted = 0; 155 lg->halted = 0;
96 } else { 156 } else {
97 /* Maybe they have interrupts disabled? */ 157 /* Otherwise we check if they have interrupts disabled. */
98 u32 irq_enabled; 158 u32 irq_enabled;
99 if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) 159 if (get_user(irq_enabled, &lg->lguest_data->irq_enabled))
100 irq_enabled = 0; 160 irq_enabled = 0;
@@ -102,112 +162,211 @@ void maybe_do_interrupt(struct lguest *lg)
102 return; 162 return;
103 } 163 }
104 164
165 /* Look at the IDT entry the Guest gave us for this interrupt. The
166 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
167 * over them. */
105 idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; 168 idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq];
169 /* If they don't have a handler (yet?), we just ignore it */
106 if (idt_present(idt->a, idt->b)) { 170 if (idt_present(idt->a, idt->b)) {
171 /* OK, mark it no longer pending and deliver it. */
107 clear_bit(irq, lg->irqs_pending); 172 clear_bit(irq, lg->irqs_pending);
173 /* set_guest_interrupt() takes the interrupt descriptor and a
174 * flag to say whether this interrupt pushes an error code onto
175 * the stack as well: virtual interrupts never do. */
108 set_guest_interrupt(lg, idt->a, idt->b, 0); 176 set_guest_interrupt(lg, idt->a, idt->b, 0);
109 } 177 }
110} 178}
111 179
180/*H:220 Now we've got the routines to deliver interrupts, delivering traps
181 * like page fault is easy. The only trick is that Intel decided that some
182 * traps should have error codes: */
112static int has_err(unsigned int trap) 183static int has_err(unsigned int trap)
113{ 184{
114 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); 185 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
115} 186}
116 187
188/* deliver_trap() returns true if it could deliver the trap. */
117int deliver_trap(struct lguest *lg, unsigned int num) 189int deliver_trap(struct lguest *lg, unsigned int num)
118{ 190{
119 u32 lo = lg->idt[num].a, hi = lg->idt[num].b; 191 u32 lo = lg->idt[num].a, hi = lg->idt[num].b;
120 192
193 /* Early on the Guest hasn't set the IDT entries (or maybe it put a
194 * bogus one in): if we fail here, the Guest will be killed. */
121 if (!idt_present(lo, hi)) 195 if (!idt_present(lo, hi))
122 return 0; 196 return 0;
123 set_guest_interrupt(lg, lo, hi, has_err(num)); 197 set_guest_interrupt(lg, lo, hi, has_err(num));
124 return 1; 198 return 1;
125} 199}
126 200
201/*H:250 Here's the hard part: returning to the Host every time a trap happens
202 * and then calling deliver_trap() and re-entering the Guest is slow.
203 * Particularly because Guest userspace system calls are traps (trap 128).
204 *
205 * So we'd like to set up the IDT to tell the CPU to deliver traps directly
206 * into the Guest. This is possible, but the complexities cause the size of
207 * this file to double! However, 150 lines of code is worth writing for taking
208 * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all
209 * the other hypervisors would tease it.
210 *
211 * This routine determines if a trap can be delivered directly. */
127static int direct_trap(const struct lguest *lg, 212static int direct_trap(const struct lguest *lg,
128 const struct desc_struct *trap, 213 const struct desc_struct *trap,
129 unsigned int num) 214 unsigned int num)
130{ 215{
131 /* Hardware interrupts don't go to guest (except syscall). */ 216 /* Hardware interrupts don't go to the Guest at all (except system
217 * call). */
132 if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) 218 if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR)
133 return 0; 219 return 0;
134 220
135 /* We intercept page fault (demand shadow paging & cr2 saving) 221 /* The Host needs to see page faults (for shadow paging and to save the
136 protection fault (in/out emulation) and device not 222 * fault address), general protection faults (in/out emulation) and
137 available (TS handling), and hypercall */ 223 * device not available (TS handling), and of course, the hypercall
224 * trap. */
138 if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) 225 if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY)
139 return 0; 226 return 0;
140 227
141 /* Interrupt gates (0xE) or not present (0x0) can't go direct. */ 228 /* Only trap gates (type 15) can go direct to the Guest. Interrupt
229 * gates (type 14) disable interrupts as they are entered, which we
230 * never let the Guest do. Not present entries (type 0x0) also can't
231 * go direct, of course 8) */
142 return idt_type(trap->a, trap->b) == 0xF; 232 return idt_type(trap->a, trap->b) == 0xF;
143} 233}
144 234/*:*/
235
236/*M:005 The Guest has the ability to turn its interrupt gates into trap gates,
237 * if it is careful. The Host will let trap gates can go directly to the
238 * Guest, but the Guest needs the interrupts atomically disabled for an
239 * interrupt gate. It can do this by pointing the trap gate at instructions
240 * within noirq_start and noirq_end, where it can safely disable interrupts. */
241
242/*M:006 The Guests do not use the sysenter (fast system call) instruction,
243 * because it's hardcoded to enter privilege level 0 and so can't go direct.
244 * It's about twice as fast as the older "int 0x80" system call, so it might
245 * still be worthwhile to handle it in the Switcher and lcall down to the
246 * Guest. The sysenter semantics are hairy tho: search for that keyword in
247 * entry.S :*/
248
249/*H:260 When we make traps go directly into the Guest, we need to make sure
250 * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the
251 * CPU trying to deliver the trap will fault while trying to push the interrupt
252 * words on the stack: this is called a double fault, and it forces us to kill
253 * the Guest.
254 *
255 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
145void pin_stack_pages(struct lguest *lg) 256void pin_stack_pages(struct lguest *lg)
146{ 257{
147 unsigned int i; 258 unsigned int i;
148 259
260 /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
261 * two pages of stack space. */
149 for (i = 0; i < lg->stack_pages; i++) 262 for (i = 0; i < lg->stack_pages; i++)
263 /* The stack grows *upwards*, hence the subtraction */
150 pin_page(lg, lg->esp1 - i * PAGE_SIZE); 264 pin_page(lg, lg->esp1 - i * PAGE_SIZE);
151} 265}
152 266
267/* Direct traps also mean that we need to know whenever the Guest wants to use
268 * a different kernel stack, so we can change the IDT entries to use that
269 * stack. The IDT entries expect a virtual address, so unlike most addresses
270 * the Guest gives us, the "esp" (stack pointer) value here is virtual, not
271 * physical.
272 *
273 * In Linux each process has its own kernel stack, so this happens a lot: we
274 * change stacks on each context switch. */
153void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) 275void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
154{ 276{
155 /* You cannot have a stack segment with priv level 0. */ 277 /* You are not allowd have a stack segment with privilege level 0: bad
278 * Guest! */
156 if ((seg & 0x3) != GUEST_PL) 279 if ((seg & 0x3) != GUEST_PL)
157 kill_guest(lg, "bad stack segment %i", seg); 280 kill_guest(lg, "bad stack segment %i", seg);
281 /* We only expect one or two stack pages. */
158 if (pages > 2) 282 if (pages > 2)
159 kill_guest(lg, "bad stack pages %u", pages); 283 kill_guest(lg, "bad stack pages %u", pages);
284 /* Save where the stack is, and how many pages */
160 lg->ss1 = seg; 285 lg->ss1 = seg;
161 lg->esp1 = esp; 286 lg->esp1 = esp;
162 lg->stack_pages = pages; 287 lg->stack_pages = pages;
288 /* Make sure the new stack pages are mapped */
163 pin_stack_pages(lg); 289 pin_stack_pages(lg);
164} 290}
165 291
166/* Set up trap in IDT. */ 292/* All this reference to mapping stacks leads us neatly into the other complex
293 * part of the Host: page table handling. */
294
295/*H:235 This is the routine which actually checks the Guest's IDT entry and
296 * transfers it into our entry in "struct lguest": */
167static void set_trap(struct lguest *lg, struct desc_struct *trap, 297static void set_trap(struct lguest *lg, struct desc_struct *trap,
168 unsigned int num, u32 lo, u32 hi) 298 unsigned int num, u32 lo, u32 hi)
169{ 299{
170 u8 type = idt_type(lo, hi); 300 u8 type = idt_type(lo, hi);
171 301
302 /* We zero-out a not-present entry */
172 if (!idt_present(lo, hi)) { 303 if (!idt_present(lo, hi)) {
173 trap->a = trap->b = 0; 304 trap->a = trap->b = 0;
174 return; 305 return;
175 } 306 }
176 307
308 /* We only support interrupt and trap gates. */
177 if (type != 0xE && type != 0xF) 309 if (type != 0xE && type != 0xF)
178 kill_guest(lg, "bad IDT type %i", type); 310 kill_guest(lg, "bad IDT type %i", type);
179 311
312 /* We only copy the handler address, present bit, privilege level and
313 * type. The privilege level controls where the trap can be triggered
314 * manually with an "int" instruction. This is usually GUEST_PL,
315 * except for system calls which userspace can use. */
180 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); 316 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF);
181 trap->b = (hi&0xFFFFEF00); 317 trap->b = (hi&0xFFFFEF00);
182} 318}
183 319
320/*H:230 While we're here, dealing with delivering traps and interrupts to the
321 * Guest, we might as well complete the picture: how the Guest tells us where
322 * it wants them to go. This would be simple, except making traps fast
323 * requires some tricks.
324 *
325 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
326 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
184void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) 327void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
185{ 328{
186 /* Guest never handles: NMI, doublefault, hypercall, spurious irq. */ 329 /* Guest never handles: NMI, doublefault, spurious interrupt or
330 * hypercall. We ignore when it tries to set them. */
187 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) 331 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY)
188 return; 332 return;
189 333
334 /* Mark the IDT as changed: next time the Guest runs we'll know we have
335 * to copy this again. */
190 lg->changed |= CHANGED_IDT; 336 lg->changed |= CHANGED_IDT;
337
338 /* The IDT which we keep in "struct lguest" only contains 32 entries
339 * for the traps and LGUEST_IRQS (32) entries for interrupts. We
340 * ignore attempts to set handlers for higher interrupt numbers, except
341 * for the system call "interrupt" at 128: we have a special IDT entry
342 * for that. */
191 if (num < ARRAY_SIZE(lg->idt)) 343 if (num < ARRAY_SIZE(lg->idt))
192 set_trap(lg, &lg->idt[num], num, lo, hi); 344 set_trap(lg, &lg->idt[num], num, lo, hi);
193 else if (num == SYSCALL_VECTOR) 345 else if (num == SYSCALL_VECTOR)
194 set_trap(lg, &lg->syscall_idt, num, lo, hi); 346 set_trap(lg, &lg->syscall_idt, num, lo, hi);
195} 347}
196 348
349/* The default entry for each interrupt points into the Switcher routines which
350 * simply return to the Host. The run_guest() loop will then call
351 * deliver_trap() to bounce it back into the Guest. */
197static void default_idt_entry(struct desc_struct *idt, 352static void default_idt_entry(struct desc_struct *idt,
198 int trap, 353 int trap,
199 const unsigned long handler) 354 const unsigned long handler)
200{ 355{
356 /* A present interrupt gate. */
201 u32 flags = 0x8e00; 357 u32 flags = 0x8e00;
202 358
203 /* They can't "int" into any of them except hypercall. */ 359 /* Set the privilege level on the entry for the hypercall: this allows
360 * the Guest to use the "int" instruction to trigger it. */
204 if (trap == LGUEST_TRAP_ENTRY) 361 if (trap == LGUEST_TRAP_ENTRY)
205 flags |= (GUEST_PL << 13); 362 flags |= (GUEST_PL << 13);
206 363
364 /* Now pack it into the IDT entry in its weird format. */
207 idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); 365 idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF);
208 idt->b = (handler&0xFFFF0000) | flags; 366 idt->b = (handler&0xFFFF0000) | flags;
209} 367}
210 368
369/* When the Guest first starts, we put default entries into the IDT. */
211void setup_default_idt_entries(struct lguest_ro_state *state, 370void setup_default_idt_entries(struct lguest_ro_state *state,
212 const unsigned long *def) 371 const unsigned long *def)
213{ 372{
@@ -217,19 +376,25 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
217 default_idt_entry(&state->guest_idt[i], i, def[i]); 376 default_idt_entry(&state->guest_idt[i], i, def[i]);
218} 377}
219 378
379/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
380 * we copy them into the IDT which we've set up for Guests on this CPU, just
381 * before we run the Guest. This routine does that copy. */
220void copy_traps(const struct lguest *lg, struct desc_struct *idt, 382void copy_traps(const struct lguest *lg, struct desc_struct *idt,
221 const unsigned long *def) 383 const unsigned long *def)
222{ 384{
223 unsigned int i; 385 unsigned int i;
224 386
225 /* All hardware interrupts are same whatever the guest: only the 387 /* We can simply copy the direct traps, otherwise we use the default
226 * traps might be different. */ 388 * ones in the Switcher: they will return to the Host. */
227 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { 389 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) {
228 if (direct_trap(lg, &lg->idt[i], i)) 390 if (direct_trap(lg, &lg->idt[i], i))
229 idt[i] = lg->idt[i]; 391 idt[i] = lg->idt[i];
230 else 392 else
231 default_idt_entry(&idt[i], i, def[i]); 393 default_idt_entry(&idt[i], i, def[i]);
232 } 394 }
395
396 /* Don't forget the system call trap! The IDT entries for other
397 * interupts never change, so no need to copy them. */
233 i = SYSCALL_VECTOR; 398 i = SYSCALL_VECTOR;
234 if (direct_trap(lg, &lg->syscall_idt, i)) 399 if (direct_trap(lg, &lg->syscall_idt, i))
235 idt[i] = lg->syscall_idt; 400 idt[i] = lg->syscall_idt;
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c
index c8eb79266991..ea68613b43f6 100644
--- a/drivers/lguest/io.c
+++ b/drivers/lguest/io.c
@@ -1,5 +1,9 @@
1/* Simple I/O model for guests, based on shared memory. 1/*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest
2 * Copyright (C) 2006 Rusty Russell IBM Corporation 2 * to talk to the Launcher or directly to another Guest. It uses familiar
3 * concepts of DMA and interrupts, plus some neat code stolen from
4 * futexes... :*/
5
6/* Copyright (C) 2006 Rusty Russell IBM Corporation
3 * 7 *
4 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
@@ -23,8 +27,36 @@
23#include <linux/uaccess.h> 27#include <linux/uaccess.h>
24#include "lg.h" 28#include "lg.h"
25 29
30/*L:300
31 * I/O
32 *
33 * Getting data in and out of the Guest is quite an art. There are numerous
34 * ways to do it, and they all suck differently. We try to keep things fairly
35 * close to "real" hardware so our Guest's drivers don't look like an alien
36 * visitation in the middle of the Linux code, and yet make sure that Guests
37 * can talk directly to other Guests, not just the Launcher.
38 *
39 * To do this, the Guest gives us a key when it binds or sends DMA buffers.
40 * The key corresponds to a "physical" address inside the Guest (ie. a virtual
41 * address inside the Launcher process). We don't, however, use this key
42 * directly.
43 *
44 * We want Guests which share memory to be able to DMA to each other: two
45 * Launchers can mmap memory the same file, then the Guests can communicate.
46 * Fortunately, the futex code provides us with a way to get a "union
47 * futex_key" corresponding to the memory lying at a virtual address: if the
48 * two processes share memory, the "union futex_key" for that memory will match
49 * even if the memory is mapped at different addresses in each. So we always
50 * convert the keys to "union futex_key"s to compare them.
51 *
52 * Before we dive into this though, we need to look at another set of helper
53 * routines used throughout the Host kernel code to access Guest memory.
54 :*/
26static struct list_head dma_hash[61]; 55static struct list_head dma_hash[61];
27 56
57/* An unfortunate side effect of the Linux double-linked list implementation is
58 * that there's no good way to statically initialize an array of linked
59 * lists. */
28void lguest_io_init(void) 60void lguest_io_init(void)
29{ 61{
30 unsigned int i; 62 unsigned int i;
@@ -56,6 +88,19 @@ kill:
56 return 0; 88 return 0;
57} 89}
58 90
91/*L:330 This is our hash function, using the wonderful Jenkins hash.
92 *
93 * The futex key is a union with three parts: an unsigned long word, a pointer,
94 * and an int "offset". We could use jhash_2words() which takes three u32s.
95 * (Ok, the hash functions are great: the naming sucks though).
96 *
97 * It's nice to be portable to 64-bit platforms, so we use the more generic
98 * jhash2(), which takes an array of u32, the number of u32s, and an initial
99 * u32 to roll in. This is uglier, but breaks down to almost the same code on
100 * 32-bit platforms like this one.
101 *
102 * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61).
103 */
59static unsigned int hash(const union futex_key *key) 104static unsigned int hash(const union futex_key *key)
60{ 105{
61 return jhash2((u32*)&key->both.word, 106 return jhash2((u32*)&key->both.word,
@@ -64,6 +109,9 @@ static unsigned int hash(const union futex_key *key)
64 % ARRAY_SIZE(dma_hash); 109 % ARRAY_SIZE(dma_hash);
65} 110}
66 111
112/* This is a convenience routine to compare two keys. It's a much bemoaned C
113 * weakness that it doesn't allow '==' on structures or unions, so we have to
114 * open-code it like this. */
67static inline int key_eq(const union futex_key *a, const union futex_key *b) 115static inline int key_eq(const union futex_key *a, const union futex_key *b)
68{ 116{
69 return (a->both.word == b->both.word 117 return (a->both.word == b->both.word
@@ -71,22 +119,36 @@ static inline int key_eq(const union futex_key *a, const union futex_key *b)
71 && a->both.offset == b->both.offset); 119 && a->both.offset == b->both.offset);
72} 120}
73 121
74/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ 122/*L:360 OK, when we need to actually free up a Guest's DMA array we do several
123 * things, so we have a convenient function to do it.
124 *
125 * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem
126 * for the drop_futex_key_refs(). */
75static void unlink_dma(struct lguest_dma_info *dmainfo) 127static void unlink_dma(struct lguest_dma_info *dmainfo)
76{ 128{
129 /* You locked this too, right? */
77 BUG_ON(!mutex_is_locked(&lguest_lock)); 130 BUG_ON(!mutex_is_locked(&lguest_lock));
131 /* This is how we know that the entry is free. */
78 dmainfo->interrupt = 0; 132 dmainfo->interrupt = 0;
133 /* Remove it from the hash table. */
79 list_del(&dmainfo->list); 134 list_del(&dmainfo->list);
135 /* Drop the references we were holding (to the inode or mm). */
80 drop_futex_key_refs(&dmainfo->key); 136 drop_futex_key_refs(&dmainfo->key);
81} 137}
82 138
139/*L:350 This is the routine which we call when the Guest asks to unregister a
140 * DMA array attached to a given key. Returns true if the array was found. */
83static int unbind_dma(struct lguest *lg, 141static int unbind_dma(struct lguest *lg,
84 const union futex_key *key, 142 const union futex_key *key,
85 unsigned long dmas) 143 unsigned long dmas)
86{ 144{
87 int i, ret = 0; 145 int i, ret = 0;
88 146
147 /* We don't bother with the hash table, just look through all this
148 * Guest's DMA arrays. */
89 for (i = 0; i < LGUEST_MAX_DMA; i++) { 149 for (i = 0; i < LGUEST_MAX_DMA; i++) {
150 /* In theory it could have more than one array on the same key,
151 * or one array on multiple keys, so we check both */
90 if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { 152 if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) {
91 unlink_dma(&lg->dma[i]); 153 unlink_dma(&lg->dma[i]);
92 ret = 1; 154 ret = 1;
@@ -96,51 +158,91 @@ static int unbind_dma(struct lguest *lg,
96 return ret; 158 return ret;
97} 159}
98 160
161/*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct
162 * lguest_dma" for receiving I/O.
163 *
164 * The Guest wants to bind an array of "struct lguest_dma"s to a particular key
165 * to receive input. This only happens when the Guest is setting up a new
166 * device, so it doesn't have to be very fast.
167 *
168 * It returns 1 on a successful registration (it can fail if we hit the limit
169 * of registrations for this Guest).
170 */
99int bind_dma(struct lguest *lg, 171int bind_dma(struct lguest *lg,
100 unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) 172 unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt)
101{ 173{
102 unsigned int i; 174 unsigned int i;
103 int ret = 0; 175 int ret = 0;
104 union futex_key key; 176 union futex_key key;
177 /* Futex code needs the mmap_sem. */
105 struct rw_semaphore *fshared = &current->mm->mmap_sem; 178 struct rw_semaphore *fshared = &current->mm->mmap_sem;
106 179
180 /* Invalid interrupt? (We could kill the guest here). */
107 if (interrupt >= LGUEST_IRQS) 181 if (interrupt >= LGUEST_IRQS)
108 return 0; 182 return 0;
109 183
184 /* We need to grab the Big Lguest Lock, because other Guests may be
185 * trying to look through this Guest's DMAs to send something while
186 * we're doing this. */
110 mutex_lock(&lguest_lock); 187 mutex_lock(&lguest_lock);
111 down_read(fshared); 188 down_read(fshared);
112 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 189 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
113 kill_guest(lg, "bad dma key %#lx", ukey); 190 kill_guest(lg, "bad dma key %#lx", ukey);
114 goto unlock; 191 goto unlock;
115 } 192 }
193
194 /* We want to keep this key valid once we drop mmap_sem, so we have to
195 * hold a reference. */
116 get_futex_key_refs(&key); 196 get_futex_key_refs(&key);
117 197
198 /* If the Guest specified an interrupt of 0, that means they want to
199 * unregister this array of "struct lguest_dma"s. */
118 if (interrupt == 0) 200 if (interrupt == 0)
119 ret = unbind_dma(lg, &key, dmas); 201 ret = unbind_dma(lg, &key, dmas);
120 else { 202 else {
203 /* Look through this Guest's dma array for an unused entry. */
121 for (i = 0; i < LGUEST_MAX_DMA; i++) { 204 for (i = 0; i < LGUEST_MAX_DMA; i++) {
205 /* If the interrupt is non-zero, the entry is already
206 * used. */
122 if (lg->dma[i].interrupt) 207 if (lg->dma[i].interrupt)
123 continue; 208 continue;
124 209
210 /* OK, a free one! Fill on our details. */
125 lg->dma[i].dmas = dmas; 211 lg->dma[i].dmas = dmas;
126 lg->dma[i].num_dmas = numdmas; 212 lg->dma[i].num_dmas = numdmas;
127 lg->dma[i].next_dma = 0; 213 lg->dma[i].next_dma = 0;
128 lg->dma[i].key = key; 214 lg->dma[i].key = key;
129 lg->dma[i].guestid = lg->guestid; 215 lg->dma[i].guestid = lg->guestid;
130 lg->dma[i].interrupt = interrupt; 216 lg->dma[i].interrupt = interrupt;
217
218 /* Now we add it to the hash table: the position
219 * depends on the futex key that we got. */
131 list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); 220 list_add(&lg->dma[i].list, &dma_hash[hash(&key)]);
221 /* Success! */
132 ret = 1; 222 ret = 1;
133 goto unlock; 223 goto unlock;
134 } 224 }
135 } 225 }
226 /* If we didn't find a slot to put the key in, drop the reference
227 * again. */
136 drop_futex_key_refs(&key); 228 drop_futex_key_refs(&key);
137unlock: 229unlock:
230 /* Unlock and out. */
138 up_read(fshared); 231 up_read(fshared);
139 mutex_unlock(&lguest_lock); 232 mutex_unlock(&lguest_lock);
140 return ret; 233 return ret;
141} 234}
142 235
143/* lgread from another guest */ 236/*L:385 Note that our routines to access a different Guest's memory are called
237 * lgread_other() and lgwrite_other(): these names emphasize that they are only
238 * used when the Guest is *not* the current Guest.
239 *
240 * The interface for copying from another process's memory is called
241 * access_process_vm(), with a final argument of 0 for a read, and 1 for a
242 * write.
243 *
244 * We need lgread_other() to read the destination Guest's "struct lguest_dma"
245 * array. */
144static int lgread_other(struct lguest *lg, 246static int lgread_other(struct lguest *lg,
145 void *buf, u32 addr, unsigned bytes) 247 void *buf, u32 addr, unsigned bytes)
146{ 248{
@@ -153,7 +255,8 @@ static int lgread_other(struct lguest *lg,
153 return 1; 255 return 1;
154} 256}
155 257
156/* lgwrite to another guest */ 258/* "lgwrite()" to another Guest: used to update the destination "used_len" once
259 * we've transferred data into the buffer. */
157static int lgwrite_other(struct lguest *lg, u32 addr, 260static int lgwrite_other(struct lguest *lg, u32 addr,
158 const void *buf, unsigned bytes) 261 const void *buf, unsigned bytes)
159{ 262{
@@ -166,6 +269,15 @@ static int lgwrite_other(struct lguest *lg, u32 addr,
166 return 1; 269 return 1;
167} 270}
168 271
272/*L:400 This is the generic engine which copies from a source "struct
273 * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The
274 * destination Guest's pages have already been mapped, as contained in the
275 * pages array.
276 *
277 * If you're wondering if there's a nice "copy from one process to another"
278 * routine, so was I. But Linux isn't really set up to copy between two
279 * unrelated processes, so we have to write it ourselves.
280 */
169static u32 copy_data(struct lguest *srclg, 281static u32 copy_data(struct lguest *srclg,
170 const struct lguest_dma *src, 282 const struct lguest_dma *src,
171 const struct lguest_dma *dst, 283 const struct lguest_dma *dst,
@@ -174,33 +286,59 @@ static u32 copy_data(struct lguest *srclg,
174 unsigned int totlen, si, di, srcoff, dstoff; 286 unsigned int totlen, si, di, srcoff, dstoff;
175 void *maddr = NULL; 287 void *maddr = NULL;
176 288
289 /* We return the total length transferred. */
177 totlen = 0; 290 totlen = 0;
291
292 /* We keep indexes into the source and destination "struct lguest_dma",
293 * and an offset within each region. */
178 si = di = 0; 294 si = di = 0;
179 srcoff = dstoff = 0; 295 srcoff = dstoff = 0;
296
297 /* We loop until the source or destination is exhausted. */
180 while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] 298 while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
181 && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { 299 && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
300 /* We can only transfer the rest of the src buffer, or as much
301 * as will fit into the destination buffer. */
182 u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); 302 u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
183 303
304 /* For systems using "highmem" we need to use kmap() to access
305 * the page we want. We often use the same page over and over,
306 * so rather than kmap() it on every loop, we set the maddr
307 * pointer to NULL when we need to move to the next
308 * destination page. */
184 if (!maddr) 309 if (!maddr)
185 maddr = kmap(pages[di]); 310 maddr = kmap(pages[di]);
186 311
187 /* FIXME: This is not completely portable, since 312 /* Copy directly from (this Guest's) source address to the
188 archs do different things for copy_to_user_page. */ 313 * destination Guest's kmap()ed buffer. Note that maddr points
314 * to the start of the page: we need to add the offset of the
315 * destination address and offset within the buffer. */
316
317 /* FIXME: This is not completely portable. I looked at
318 * copy_to_user_page(), and some arch's seem to need special
319 * flushes. x86 is fine. */
189 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, 320 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
190 (void __user *)src->addr[si], len) != 0) { 321 (void __user *)src->addr[si], len) != 0) {
322 /* If a copy failed, it's the source's fault. */
191 kill_guest(srclg, "bad address in sending DMA"); 323 kill_guest(srclg, "bad address in sending DMA");
192 totlen = 0; 324 totlen = 0;
193 break; 325 break;
194 } 326 }
195 327
328 /* Increment the total and src & dst offsets */
196 totlen += len; 329 totlen += len;
197 srcoff += len; 330 srcoff += len;
198 dstoff += len; 331 dstoff += len;
332
333 /* Presumably we reached the end of the src or dest buffers: */
199 if (srcoff == src->len[si]) { 334 if (srcoff == src->len[si]) {
335 /* Move to the next buffer at offset 0 */
200 si++; 336 si++;
201 srcoff = 0; 337 srcoff = 0;
202 } 338 }
203 if (dstoff == dst->len[di]) { 339 if (dstoff == dst->len[di]) {
340 /* We need to unmap that destination page and reset
341 * maddr ready for the next one. */
204 kunmap(pages[di]); 342 kunmap(pages[di]);
205 maddr = NULL; 343 maddr = NULL;
206 di++; 344 di++;
@@ -208,13 +346,15 @@ static u32 copy_data(struct lguest *srclg,
208 } 346 }
209 } 347 }
210 348
349 /* If we still had a page mapped at the end, unmap now. */
211 if (maddr) 350 if (maddr)
212 kunmap(pages[di]); 351 kunmap(pages[di]);
213 352
214 return totlen; 353 return totlen;
215} 354}
216 355
217/* Src is us, ie. current. */ 356/*L:390 This is how we transfer a "struct lguest_dma" from the source Guest
357 * (the current Guest which called SEND_DMA) to another Guest. */
218static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, 358static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
219 struct lguest *dstlg, const struct lguest_dma *dst) 359 struct lguest *dstlg, const struct lguest_dma *dst)
220{ 360{
@@ -222,23 +362,31 @@ static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
222 u32 ret; 362 u32 ret;
223 struct page *pages[LGUEST_MAX_DMA_SECTIONS]; 363 struct page *pages[LGUEST_MAX_DMA_SECTIONS];
224 364
365 /* We check that both source and destination "struct lguest_dma"s are
366 * within the bounds of the source and destination Guests */
225 if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) 367 if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
226 return 0; 368 return 0;
227 369
228 /* First get the destination pages */ 370 /* We need to map the pages which correspond to each parts of
371 * destination buffer. */
229 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { 372 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
230 if (dst->len[i] == 0) 373 if (dst->len[i] == 0)
231 break; 374 break;
375 /* get_user_pages() is a complicated function, especially since
376 * we only want a single page. But it works, and returns the
377 * number of pages. Note that we're holding the destination's
378 * mmap_sem, as get_user_pages() requires. */
232 if (get_user_pages(dstlg->tsk, dstlg->mm, 379 if (get_user_pages(dstlg->tsk, dstlg->mm,
233 dst->addr[i], 1, 1, 1, pages+i, NULL) 380 dst->addr[i], 1, 1, 1, pages+i, NULL)
234 != 1) { 381 != 1) {
382 /* This means the destination gave us a bogus buffer */
235 kill_guest(dstlg, "Error mapping DMA pages"); 383 kill_guest(dstlg, "Error mapping DMA pages");
236 ret = 0; 384 ret = 0;
237 goto drop_pages; 385 goto drop_pages;
238 } 386 }
239 } 387 }
240 388
241 /* Now copy until we run out of src or dst. */ 389 /* Now copy the data until we run out of src or dst. */
242 ret = copy_data(srclg, src, dst, pages); 390 ret = copy_data(srclg, src, dst, pages);
243 391
244drop_pages: 392drop_pages:
@@ -247,6 +395,11 @@ drop_pages:
247 return ret; 395 return ret;
248} 396}
249 397
398/*L:380 Transferring data from one Guest to another is not as simple as I'd
399 * like. We've found the "struct lguest_dma_info" bound to the same address as
400 * the send, we need to copy into it.
401 *
402 * This function returns true if the destination array was empty. */
250static int dma_transfer(struct lguest *srclg, 403static int dma_transfer(struct lguest *srclg,
251 unsigned long udma, 404 unsigned long udma,
252 struct lguest_dma_info *dst) 405 struct lguest_dma_info *dst)
@@ -255,15 +408,23 @@ static int dma_transfer(struct lguest *srclg,
255 struct lguest *dstlg; 408 struct lguest *dstlg;
256 u32 i, dma = 0; 409 u32 i, dma = 0;
257 410
411 /* From the "struct lguest_dma_info" we found in the hash, grab the
412 * Guest. */
258 dstlg = &lguests[dst->guestid]; 413 dstlg = &lguests[dst->guestid];
259 /* Get our dma list. */ 414 /* Read in the source "struct lguest_dma" handed to SEND_DMA. */
260 lgread(srclg, &src_dma, udma, sizeof(src_dma)); 415 lgread(srclg, &src_dma, udma, sizeof(src_dma));
261 416
262 /* We can't deadlock against them dmaing to us, because this 417 /* We need the destination's mmap_sem, and we already hold the source's
263 * is all under the lguest_lock. */ 418 * mmap_sem for the futex key lookup. Normally this would suggest that
419 * we could deadlock if the destination Guest was trying to send to
420 * this source Guest at the same time, which is another reason that all
421 * I/O is done under the big lguest_lock. */
264 down_read(&dstlg->mm->mmap_sem); 422 down_read(&dstlg->mm->mmap_sem);
265 423
424 /* Look through the destination DMA array for an available buffer. */
266 for (i = 0; i < dst->num_dmas; i++) { 425 for (i = 0; i < dst->num_dmas; i++) {
426 /* We keep a "next_dma" pointer which often helps us avoid
427 * looking at lots of previously-filled entries. */
267 dma = (dst->next_dma + i) % dst->num_dmas; 428 dma = (dst->next_dma + i) % dst->num_dmas;
268 if (!lgread_other(dstlg, &dst_dma, 429 if (!lgread_other(dstlg, &dst_dma,
269 dst->dmas + dma * sizeof(struct lguest_dma), 430 dst->dmas + dma * sizeof(struct lguest_dma),
@@ -273,30 +434,46 @@ static int dma_transfer(struct lguest *srclg,
273 if (!dst_dma.used_len) 434 if (!dst_dma.used_len)
274 break; 435 break;
275 } 436 }
437
438 /* If we found a buffer, we do the actual data copy. */
276 if (i != dst->num_dmas) { 439 if (i != dst->num_dmas) {
277 unsigned long used_lenp; 440 unsigned long used_lenp;
278 unsigned int ret; 441 unsigned int ret;
279 442
280 ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); 443 ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
281 /* Put used length in src. */ 444 /* Put used length in the source "struct lguest_dma"'s used_len
445 * field. It's a little tricky to figure out where that is,
446 * though. */
282 lgwrite_u32(srclg, 447 lgwrite_u32(srclg,
283 udma+offsetof(struct lguest_dma, used_len), ret); 448 udma+offsetof(struct lguest_dma, used_len), ret);
449 /* Tranferring 0 bytes is OK if the source buffer was empty. */
284 if (ret == 0 && src_dma.len[0] != 0) 450 if (ret == 0 && src_dma.len[0] != 0)
285 goto fail; 451 goto fail;
286 452
287 /* Make sure destination sees contents before length. */ 453 /* The destination Guest might be running on a different CPU:
454 * we have to make sure that it will see the "used_len" field
455 * change to non-zero *after* it sees the data we copied into
456 * the buffer. Hence a write memory barrier. */
288 wmb(); 457 wmb();
458 /* Figuring out where the destination's used_len field for this
459 * "struct lguest_dma" in the array is also a little ugly. */
289 used_lenp = dst->dmas 460 used_lenp = dst->dmas
290 + dma * sizeof(struct lguest_dma) 461 + dma * sizeof(struct lguest_dma)
291 + offsetof(struct lguest_dma, used_len); 462 + offsetof(struct lguest_dma, used_len);
292 lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); 463 lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
464 /* Move the cursor for next time. */
293 dst->next_dma++; 465 dst->next_dma++;
294 } 466 }
295 up_read(&dstlg->mm->mmap_sem); 467 up_read(&dstlg->mm->mmap_sem);
296 468
297 /* Do this last so dst doesn't simply sleep on lock. */ 469 /* We trigger the destination interrupt, even if the destination was
470 * empty and we didn't transfer anything: this gives them a chance to
471 * wake up and refill. */
298 set_bit(dst->interrupt, dstlg->irqs_pending); 472 set_bit(dst->interrupt, dstlg->irqs_pending);
473 /* Wake up the destination process. */
299 wake_up_process(dstlg->tsk); 474 wake_up_process(dstlg->tsk);
475 /* If we passed the last "struct lguest_dma", the receive had no
476 * buffers left. */
300 return i == dst->num_dmas; 477 return i == dst->num_dmas;
301 478
302fail: 479fail:
@@ -304,6 +481,8 @@ fail:
304 return 0; 481 return 0;
305} 482}
306 483
484/*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA
485 * hypercall. We find out who's listening, and send to them. */
307void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) 486void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
308{ 487{
309 union futex_key key; 488 union futex_key key;
@@ -313,31 +492,43 @@ void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
313again: 492again:
314 mutex_lock(&lguest_lock); 493 mutex_lock(&lguest_lock);
315 down_read(fshared); 494 down_read(fshared);
495 /* Get the futex key for the key the Guest gave us */
316 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 496 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
317 kill_guest(lg, "bad sending DMA key"); 497 kill_guest(lg, "bad sending DMA key");
318 goto unlock; 498 goto unlock;
319 } 499 }
320 /* Shared mapping? Look for other guests... */ 500 /* Since the key must be a multiple of 4, the futex key uses the lower
501 * bit of the "offset" field (which would always be 0) to indicate a
502 * mapping which is shared with other processes (ie. Guests). */
321 if (key.shared.offset & 1) { 503 if (key.shared.offset & 1) {
322 struct lguest_dma_info *i; 504 struct lguest_dma_info *i;
505 /* Look through the hash for other Guests. */
323 list_for_each_entry(i, &dma_hash[hash(&key)], list) { 506 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
507 /* Don't send to ourselves. */
324 if (i->guestid == lg->guestid) 508 if (i->guestid == lg->guestid)
325 continue; 509 continue;
326 if (!key_eq(&key, &i->key)) 510 if (!key_eq(&key, &i->key))
327 continue; 511 continue;
328 512
513 /* If dma_transfer() tells us the destination has no
514 * available buffers, we increment "empty". */
329 empty += dma_transfer(lg, udma, i); 515 empty += dma_transfer(lg, udma, i);
330 break; 516 break;
331 } 517 }
518 /* If the destination is empty, we release our locks and
519 * give the destination Guest a brief chance to restock. */
332 if (empty == 1) { 520 if (empty == 1) {
333 /* Give any recipients one chance to restock. */ 521 /* Give any recipients one chance to restock. */
334 up_read(&current->mm->mmap_sem); 522 up_read(&current->mm->mmap_sem);
335 mutex_unlock(&lguest_lock); 523 mutex_unlock(&lguest_lock);
524 /* Next time, we won't try again. */
336 empty++; 525 empty++;
337 goto again; 526 goto again;
338 } 527 }
339 } else { 528 } else {
340 /* Private mapping: tell our userspace. */ 529 /* Private mapping: Guest is sending to its Launcher. We set
530 * the "dma_is_pending" flag so that the main loop will exit
531 * and the Launcher's read() from /dev/lguest will return. */
341 lg->dma_is_pending = 1; 532 lg->dma_is_pending = 1;
342 lg->pending_dma = udma; 533 lg->pending_dma = udma;
343 lg->pending_key = ukey; 534 lg->pending_key = ukey;
@@ -346,6 +537,7 @@ unlock:
346 up_read(fshared); 537 up_read(fshared);
347 mutex_unlock(&lguest_lock); 538 mutex_unlock(&lguest_lock);
348} 539}
540/*:*/
349 541
350void release_all_dma(struct lguest *lg) 542void release_all_dma(struct lguest *lg)
351{ 543{
@@ -361,7 +553,18 @@ void release_all_dma(struct lguest *lg)
361 up_read(&lg->mm->mmap_sem); 553 up_read(&lg->mm->mmap_sem);
362} 554}
363 555
364/* Userspace wants a dma buffer from this guest. */ 556/*M:007 We only return a single DMA buffer to the Launcher, but it would be
557 * more efficient to return a pointer to the entire array of DMA buffers, which
558 * it can cache and choose one whenever it wants.
559 *
560 * Currently the Launcher uses a write to /dev/lguest, and the return value is
561 * the address of the DMA structure with the interrupt number placed in
562 * dma->used_len. If we wanted to return the entire array, we need to return
563 * the address, array size and interrupt number: this seems to require an
564 * ioctl(). :*/
565
566/*L:320 This routine looks for a DMA buffer registered by the Guest on the
567 * given key (using the BIND_DMA hypercall). */
365unsigned long get_dma_buffer(struct lguest *lg, 568unsigned long get_dma_buffer(struct lguest *lg,
366 unsigned long ukey, unsigned long *interrupt) 569 unsigned long ukey, unsigned long *interrupt)
367{ 570{
@@ -370,15 +573,29 @@ unsigned long get_dma_buffer(struct lguest *lg,
370 struct lguest_dma_info *i; 573 struct lguest_dma_info *i;
371 struct rw_semaphore *fshared = &current->mm->mmap_sem; 574 struct rw_semaphore *fshared = &current->mm->mmap_sem;
372 575
576 /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA
577 * at the same time. */
373 mutex_lock(&lguest_lock); 578 mutex_lock(&lguest_lock);
579 /* To match between Guests sharing the same underlying memory we steal
580 * code from the futex infrastructure. This requires that we hold the
581 * "mmap_sem" for our process (the Launcher), and pass it to the futex
582 * code. */
374 down_read(fshared); 583 down_read(fshared);
584
585 /* This can fail if it's not a valid address, or if the address is not
586 * divisible by 4 (the futex code needs that, we don't really). */
375 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 587 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
376 kill_guest(lg, "bad registered DMA buffer"); 588 kill_guest(lg, "bad registered DMA buffer");
377 goto unlock; 589 goto unlock;
378 } 590 }
591 /* Search the hash table for matching entries (the Launcher can only
592 * send to its own Guest for the moment, so the entry must be for this
593 * Guest) */
379 list_for_each_entry(i, &dma_hash[hash(&key)], list) { 594 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
380 if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { 595 if (key_eq(&key, &i->key) && i->guestid == lg->guestid) {
381 unsigned int j; 596 unsigned int j;
597 /* Look through the registered DMA array for an
598 * available buffer. */
382 for (j = 0; j < i->num_dmas; j++) { 599 for (j = 0; j < i->num_dmas; j++) {
383 struct lguest_dma dma; 600 struct lguest_dma dma;
384 601
@@ -387,6 +604,8 @@ unsigned long get_dma_buffer(struct lguest *lg,
387 if (dma.used_len == 0) 604 if (dma.used_len == 0)
388 break; 605 break;
389 } 606 }
607 /* Store the interrupt the Guest wants when the buffer
608 * is used. */
390 *interrupt = i->interrupt; 609 *interrupt = i->interrupt;
391 break; 610 break;
392 } 611 }
@@ -396,4 +615,12 @@ unlock:
396 mutex_unlock(&lguest_lock); 615 mutex_unlock(&lguest_lock);
397 return ret; 616 return ret;
398} 617}
618/*:*/
399 619
620/*L:410 This really has completed the Launcher. Not only have we now finished
621 * the longest chapter in our journey, but this also means we are over halfway
622 * through!
623 *
624 * Enough prevaricating around the bush: it is time for us to dive into the
625 * core of the Host, in "make Host".
626 */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 3e2ddfbc816e..269116eee85f 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -58,9 +58,18 @@ struct lguest_dma_info
58 u8 interrupt; /* 0 when not registered */ 58 u8 interrupt; /* 0 when not registered */
59}; 59};
60 60
61/* We have separate types for the guest's ptes & pgds and the shadow ptes & 61/*H:310 The page-table code owes a great debt of gratitude to Andi Kleen. He
62 * pgds. Since this host might use three-level pagetables and the guest and 62 * reviewed the original code which used "u32" for all page table entries, and
63 * shadow pagetables don't, we can't use the normal pte_t/pgd_t. */ 63 * insisted that it would be far clearer with explicit typing. I thought it
64 * was overkill, but he was right: it is much clearer than it was before.
65 *
66 * We have separate types for the Guest's ptes & pgds and the shadow ptes &
67 * pgds. There's already a Linux type for these (pte_t and pgd_t) but they
68 * change depending on kernel config options (PAE). */
69
70/* Each entry is identical: lower 12 bits of flags and upper 20 bits for the
71 * "page frame number" (0 == first physical page, etc). They are different
72 * types so the compiler will warn us if we mix them improperly. */
64typedef union { 73typedef union {
65 struct { unsigned flags:12, pfn:20; }; 74 struct { unsigned flags:12, pfn:20; };
66 struct { unsigned long val; } raw; 75 struct { unsigned long val; } raw;
@@ -77,8 +86,12 @@ typedef union {
77 struct { unsigned flags:12, pfn:20; }; 86 struct { unsigned flags:12, pfn:20; };
78 struct { unsigned long val; } raw; 87 struct { unsigned long val; } raw;
79} gpte_t; 88} gpte_t;
89
90/* We have two convenient macros to convert a "raw" value as handed to us by
91 * the Guest into the correct Guest PGD or PTE type. */
80#define mkgpte(_val) ((gpte_t){.raw.val = _val}) 92#define mkgpte(_val) ((gpte_t){.raw.val = _val})
81#define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) 93#define mkgpgd(_val) ((gpgd_t){.raw.val = _val})
94/*:*/
82 95
83struct pgdir 96struct pgdir
84{ 97{
@@ -244,6 +257,30 @@ unsigned long get_dma_buffer(struct lguest *lg, unsigned long key,
244/* hypercalls.c: */ 257/* hypercalls.c: */
245void do_hypercalls(struct lguest *lg); 258void do_hypercalls(struct lguest *lg);
246 259
260/*L:035
261 * Let's step aside for the moment, to study one important routine that's used
262 * widely in the Host code.
263 *
264 * There are many cases where the Guest does something invalid, like pass crap
265 * to a hypercall. Since only the Guest kernel can make hypercalls, it's quite
266 * acceptable to simply terminate the Guest and give the Launcher a nicely
267 * formatted reason. It's also simpler for the Guest itself, which doesn't
268 * need to check most hypercalls for "success"; if you're still running, it
269 * succeeded.
270 *
271 * Once this is called, the Guest will never run again, so most Host code can
272 * call this then continue as if nothing had happened. This means many
273 * functions don't have to explicitly return an error code, which keeps the
274 * code simple.
275 *
276 * It also means that this can be called more than once: only the first one is
277 * remembered. The only trick is that we still need to kill the Guest even if
278 * we can't allocate memory to store the reason. Linux has a neat way of
279 * packing error codes into invalid pointers, so we use that here.
280 *
281 * Like any macro which uses an "if", it is safely wrapped in a run-once "do {
282 * } while(0)".
283 */
247#define kill_guest(lg, fmt...) \ 284#define kill_guest(lg, fmt...) \
248do { \ 285do { \
249 if (!(lg)->dead) { \ 286 if (!(lg)->dead) { \
@@ -252,6 +289,7 @@ do { \
252 (lg)->dead = ERR_PTR(-ENOMEM); \ 289 (lg)->dead = ERR_PTR(-ENOMEM); \
253 } \ 290 } \
254} while(0) 291} while(0)
292/* (End of aside) :*/
255 293
256static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) 294static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
257{ 295{
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 18dade06d4a9..6dfe568523a2 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -1,6 +1,32 @@
1/* 1/*P:010
2 * Lguest specific paravirt-ops implementation 2 * A hypervisor allows multiple Operating Systems to run on a single machine.
3 * To quote David Wheeler: "Any problem in computer science can be solved with
4 * another layer of indirection."
5 *
6 * We keep things simple in two ways. First, we start with a normal Linux
7 * kernel and insert a module (lg.ko) which allows us to run other Linux
8 * kernels the same way we'd run processes. We call the first kernel the Host,
9 * and the others the Guests. The program which sets up and configures Guests
10 * (such as the example in Documentation/lguest/lguest.c) is called the
11 * Launcher.
12 *
13 * Secondly, we only run specially modified Guests, not normal kernels. When
14 * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
15 * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
16 * how to be a Guest. This means that you can use the same kernel you boot
17 * normally (ie. as a Host) as a Guest.
3 * 18 *
19 * These Guests know that they cannot do privileged operations, such as disable
20 * interrupts, and that they have to ask the Host to do such things explicitly.
21 * This file consists of all the replacements for such low-level native
22 * hardware operations: these special Guest versions call the Host.
23 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special
25 * entry point marked with a magic string, which sets up a few things then
26 * calls here. We replace the native functions in "struct paravirt_ops"
27 * with our Guest versions, then boot like normal. :*/
28
29/*
4 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. 30 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
5 * 31 *
6 * This program is free software; you can redistribute it and/or modify 32 * This program is free software; you can redistribute it and/or modify
@@ -40,6 +66,12 @@
40#include <asm/mce.h> 66#include <asm/mce.h>
41#include <asm/io.h> 67#include <asm/io.h>
42 68
69/*G:010 Welcome to the Guest!
70 *
71 * The Guest in our tale is a simple creature: identical to the Host but
72 * behaving in simplified but equivalent ways. In particular, the Guest is the
73 * same kernel as the Host (or at least, built from the same source code). :*/
74
43/* Declarations for definitions in lguest_guest.S */ 75/* Declarations for definitions in lguest_guest.S */
44extern char lguest_noirq_start[], lguest_noirq_end[]; 76extern char lguest_noirq_start[], lguest_noirq_end[];
45extern const char lgstart_cli[], lgend_cli[]; 77extern const char lgstart_cli[], lgend_cli[];
@@ -58,7 +90,26 @@ struct lguest_data lguest_data = {
58struct lguest_device_desc *lguest_devices; 90struct lguest_device_desc *lguest_devices;
59static cycle_t clock_base; 91static cycle_t clock_base;
60 92
61static enum paravirt_lazy_mode lazy_mode; 93/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first
94 * real optimization trick!
95 *
96 * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
98 * are reasonably expensive, batching them up makes sense. For example, a
99 * large mmap might update dozens of page table entries: that code calls
100 * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls
101 * lguest_lazy_mode(PARAVIRT_LAZY_NONE).
102 *
103 * So, when we're in lazy mode, we call async_hypercall() to store the call for
104 * future processing. When lazy mode is turned off we issue a hypercall to
105 * flush the stored calls.
106 *
107 * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which
108 * indicates we're to flush any outstanding calls immediately. This is used
109 * when an interrupt handler does a kmap_atomic(): the page table changes must
110 * happen immediately even if we're in the middle of a batch. Usually we're
111 * not, though, so there's nothing to do. */
112static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
62static void lguest_lazy_mode(enum paravirt_lazy_mode mode) 113static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
63{ 114{
64 if (mode == PARAVIRT_LAZY_FLUSH) { 115 if (mode == PARAVIRT_LAZY_FLUSH) {
@@ -82,6 +133,16 @@ static void lazy_hcall(unsigned long call,
82 async_hcall(call, arg1, arg2, arg3); 133 async_hcall(call, arg1, arg2, arg3);
83} 134}
84 135
136/* async_hcall() is pretty simple: I'm quite proud of it really. We have a
137 * ring buffer of stored hypercalls which the Host will run though next time we
138 * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall
139 * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
140 * and 255 once the Host has finished with it.
141 *
142 * If we come around to a slot which hasn't been finished, then the table is
143 * full and we just make the hypercall directly. This has the nice side
144 * effect of causing the Host to run all the stored calls in the ring buffer
145 * which empties it for next time! */
85void async_hcall(unsigned long call, 146void async_hcall(unsigned long call,
86 unsigned long arg1, unsigned long arg2, unsigned long arg3) 147 unsigned long arg1, unsigned long arg2, unsigned long arg3)
87{ 148{
@@ -89,6 +150,9 @@ void async_hcall(unsigned long call,
89 static unsigned int next_call; 150 static unsigned int next_call;
90 unsigned long flags; 151 unsigned long flags;
91 152
153 /* Disable interrupts if not already disabled: we don't want an
154 * interrupt handler making a hypercall while we're already doing
155 * one! */
92 local_irq_save(flags); 156 local_irq_save(flags);
93 if (lguest_data.hcall_status[next_call] != 0xFF) { 157 if (lguest_data.hcall_status[next_call] != 0xFF) {
94 /* Table full, so do normal hcall which will flush table. */ 158 /* Table full, so do normal hcall which will flush table. */
@@ -98,7 +162,7 @@ void async_hcall(unsigned long call,
98 lguest_data.hcalls[next_call].edx = arg1; 162 lguest_data.hcalls[next_call].edx = arg1;
99 lguest_data.hcalls[next_call].ebx = arg2; 163 lguest_data.hcalls[next_call].ebx = arg2;
100 lguest_data.hcalls[next_call].ecx = arg3; 164 lguest_data.hcalls[next_call].ecx = arg3;
101 /* Make sure host sees arguments before "valid" flag. */ 165 /* Arguments must all be written before we mark it to go */
102 wmb(); 166 wmb();
103 lguest_data.hcall_status[next_call] = 0; 167 lguest_data.hcall_status[next_call] = 0;
104 if (++next_call == LHCALL_RING_SIZE) 168 if (++next_call == LHCALL_RING_SIZE)
@@ -106,9 +170,14 @@ void async_hcall(unsigned long call,
106 } 170 }
107 local_irq_restore(flags); 171 local_irq_restore(flags);
108} 172}
173/*:*/
109 174
175/* Wrappers for the SEND_DMA and BIND_DMA hypercalls. This is mainly because
176 * Jeff Garzik complained that __pa() should never appear in drivers, and this
177 * helps remove most of them. But also, it wraps some ugliness. */
110void lguest_send_dma(unsigned long key, struct lguest_dma *dma) 178void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
111{ 179{
180 /* The hcall might not write this if something goes wrong */
112 dma->used_len = 0; 181 dma->used_len = 0;
113 hcall(LHCALL_SEND_DMA, key, __pa(dma), 0); 182 hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
114} 183}
@@ -116,11 +185,16 @@ void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
116int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, 185int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
117 unsigned int num, u8 irq) 186 unsigned int num, u8 irq)
118{ 187{
188 /* This is the only hypercall which actually wants 5 arguments, and we
189 * only support 4. Fortunately the interrupt number is always less
190 * than 256, so we can pack it with the number of dmas in the final
191 * argument. */
119 if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq)) 192 if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
120 return -ENOMEM; 193 return -ENOMEM;
121 return 0; 194 return 0;
122} 195}
123 196
197/* Unbinding is the same hypercall as binding, but with 0 num & irq. */
124void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas) 198void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
125{ 199{
126 hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0); 200 hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
@@ -138,35 +212,73 @@ void lguest_unmap(void *addr)
138 iounmap((__force void __iomem *)addr); 212 iounmap((__force void __iomem *)addr);
139} 213}
140 214
215/*G:033
216 * Here are our first native-instruction replacements: four functions for
217 * interrupt control.
218 *
219 * The simplest way of implementing these would be to have "turn interrupts
220 * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
221 * these are by far the most commonly called functions of those we override.
222 *
223 * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
224 * which the Guest can update with a single instruction. The Host knows to
225 * check there when it wants to deliver an interrupt.
226 */
227
228/* save_flags() is expected to return the processor state (ie. "eflags"). The
229 * eflags word contains all kind of stuff, but in practice Linux only cares
230 * about the interrupt flag. Our "save_flags()" just returns that. */
141static unsigned long save_fl(void) 231static unsigned long save_fl(void)
142{ 232{
143 return lguest_data.irq_enabled; 233 return lguest_data.irq_enabled;
144} 234}
145 235
236/* "restore_flags" just sets the flags back to the value given. */
146static void restore_fl(unsigned long flags) 237static void restore_fl(unsigned long flags)
147{ 238{
148 /* FIXME: Check if interrupt pending... */
149 lguest_data.irq_enabled = flags; 239 lguest_data.irq_enabled = flags;
150} 240}
151 241
242/* Interrupts go off... */
152static void irq_disable(void) 243static void irq_disable(void)
153{ 244{
154 lguest_data.irq_enabled = 0; 245 lguest_data.irq_enabled = 0;
155} 246}
156 247
248/* Interrupts go on... */
157static void irq_enable(void) 249static void irq_enable(void)
158{ 250{
159 /* FIXME: Check if interrupt pending... */
160 lguest_data.irq_enabled = X86_EFLAGS_IF; 251 lguest_data.irq_enabled = X86_EFLAGS_IF;
161} 252}
162 253/*:*/
254/*M:003 Note that we don't check for outstanding interrupts when we re-enable
255 * them (or when we unmask an interrupt). This seems to work for the moment,
256 * since interrupts are rare and we'll just get the interrupt on the next timer
257 * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way
258 * would be to put the "irq_enabled" field in a page by itself, and have the
259 * Host write-protect it when an interrupt comes in when irqs are disabled.
260 * There will then be a page fault as soon as interrupts are re-enabled. :*/
261
262/*G:034
263 * The Interrupt Descriptor Table (IDT).
264 *
265 * The IDT tells the processor what to do when an interrupt comes in. Each
266 * entry in the table is a 64-bit descriptor: this holds the privilege level,
267 * address of the handler, and... well, who cares? The Guest just asks the
268 * Host to make the change anyway, because the Host controls the real IDT.
269 */
163static void lguest_write_idt_entry(struct desc_struct *dt, 270static void lguest_write_idt_entry(struct desc_struct *dt,
164 int entrynum, u32 low, u32 high) 271 int entrynum, u32 low, u32 high)
165{ 272{
273 /* Keep the local copy up to date. */
166 write_dt_entry(dt, entrynum, low, high); 274 write_dt_entry(dt, entrynum, low, high);
275 /* Tell Host about this new entry. */
167 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); 276 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
168} 277}
169 278
279/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
280 * time it is written, so we can simply loop through all entries and tell the
281 * Host about them. */
170static void lguest_load_idt(const struct Xgt_desc_struct *desc) 282static void lguest_load_idt(const struct Xgt_desc_struct *desc)
171{ 283{
172 unsigned int i; 284 unsigned int i;
@@ -176,12 +288,29 @@ static void lguest_load_idt(const struct Xgt_desc_struct *desc)
176 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); 288 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
177} 289}
178 290
291/*
292 * The Global Descriptor Table.
293 *
294 * The Intel architecture defines another table, called the Global Descriptor
295 * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt"
296 * instruction, and then several other instructions refer to entries in the
297 * table. There are three entries which the Switcher needs, so the Host simply
298 * controls the entire thing and the Guest asks it to make changes using the
299 * LOAD_GDT hypercall.
300 *
301 * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
302 * hypercall and use that repeatedly to load a new IDT. I don't think it
303 * really matters, but wouldn't it be nice if they were the same?
304 */
179static void lguest_load_gdt(const struct Xgt_desc_struct *desc) 305static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
180{ 306{
181 BUG_ON((desc->size+1)/8 != GDT_ENTRIES); 307 BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
182 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); 308 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
183} 309}
184 310
311/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
312 * then tell the Host to reload the entire thing. This operation is so rare
313 * that this naive implementation is reasonable. */
185static void lguest_write_gdt_entry(struct desc_struct *dt, 314static void lguest_write_gdt_entry(struct desc_struct *dt,
186 int entrynum, u32 low, u32 high) 315 int entrynum, u32 low, u32 high)
187{ 316{
@@ -189,19 +318,58 @@ static void lguest_write_gdt_entry(struct desc_struct *dt,
189 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); 318 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
190} 319}
191 320
321/* OK, I lied. There are three "thread local storage" GDT entries which change
322 * on every context switch (these three entries are how glibc implements
323 * __thread variables). So we have a hypercall specifically for this case. */
192static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) 324static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
193{ 325{
194 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); 326 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
195} 327}
328/*:*/
196 329
330/*G:038 That's enough excitement for now, back to ploughing through each of
331 * the paravirt_ops (we're about 1/3 of the way through).
332 *
333 * This is the Local Descriptor Table, another weird Intel thingy. Linux only
334 * uses this for some strange applications like Wine. We don't do anything
335 * here, so they'll get an informative and friendly Segmentation Fault. */
197static void lguest_set_ldt(const void *addr, unsigned entries) 336static void lguest_set_ldt(const void *addr, unsigned entries)
198{ 337{
199} 338}
200 339
340/* This loads a GDT entry into the "Task Register": that entry points to a
341 * structure called the Task State Segment. Some comments scattered though the
342 * kernel code indicate that this used for task switching in ages past, along
343 * with blood sacrifice and astrology.
344 *
345 * Now there's nothing interesting in here that we don't get told elsewhere.
346 * But the native version uses the "ltr" instruction, which makes the Host
347 * complain to the Guest about a Segmentation Fault and it'll oops. So we
348 * override the native version with a do-nothing version. */
201static void lguest_load_tr_desc(void) 349static void lguest_load_tr_desc(void)
202{ 350{
203} 351}
204 352
353/* The "cpuid" instruction is a way of querying both the CPU identity
354 * (manufacturer, model, etc) and its features. It was introduced before the
355 * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you
356 * might imagine, after a decade and a half this treatment, it is now a giant
357 * ball of hair. Its entry in the current Intel manual runs to 28 pages.
358 *
359 * This instruction even it has its own Wikipedia entry. The Wikipedia entry
360 * has been translated into 4 languages. I am not making this up!
361 *
362 * We could get funky here and identify ourselves as "GenuineLguest", but
363 * instead we just use the real "cpuid" instruction. Then I pretty much turned
364 * off feature bits until the Guest booted. (Don't say that: you'll damage
365 * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is
366 * hardly future proof.) Noone's listening! They don't like you anyway,
367 * parenthetic weirdo!
368 *
369 * Replacing the cpuid so we can turn features off is great for the kernel, but
370 * anyone (including userspace) can just use the raw "cpuid" instruction and
371 * the Host won't even notice since it isn't privileged. So we try not to get
372 * too worked up about it. */
205static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, 373static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
206 unsigned int *ecx, unsigned int *edx) 374 unsigned int *ecx, unsigned int *edx)
207{ 375{
@@ -214,21 +382,43 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
214 *ecx &= 0x00002201; 382 *ecx &= 0x00002201;
215 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ 383 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
216 *edx &= 0x07808101; 384 *edx &= 0x07808101;
217 /* Host wants to know when we flush kernel pages: set PGE. */ 385 /* The Host can do a nice optimization if it knows that the
386 * kernel mappings (addresses above 0xC0000000 or whatever
387 * PAGE_OFFSET is set to) haven't changed. But Linux calls
388 * flush_tlb_user() for both user and kernel mappings unless
389 * the Page Global Enable (PGE) feature bit is set. */
218 *edx |= 0x00002000; 390 *edx |= 0x00002000;
219 break; 391 break;
220 case 0x80000000: 392 case 0x80000000:
221 /* Futureproof this a little: if they ask how much extended 393 /* Futureproof this a little: if they ask how much extended
222 * processor information, limit it to known fields. */ 394 * processor information there is, limit it to known fields. */
223 if (*eax > 0x80000008) 395 if (*eax > 0x80000008)
224 *eax = 0x80000008; 396 *eax = 0x80000008;
225 break; 397 break;
226 } 398 }
227} 399}
228 400
401/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
402 * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
403 * it. The Host needs to know when the Guest wants to change them, so we have
404 * a whole series of functions like read_cr0() and write_cr0().
405 *
406 * We start with CR0. CR0 allows you to turn on and off all kinds of basic
407 * features, but Linux only really cares about one: the horrifically-named Task
408 * Switched (TS) bit at bit 3 (ie. 8)
409 *
410 * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
411 * the floating point unit is used. Which allows us to restore FPU state
412 * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
413 * name like "FPUTRAP bit" be a little less cryptic?
414 *
415 * We store cr0 (and cr3) locally, because the Host never changes it. The
416 * Guest sometimes wants to read it and we'd prefer not to bother the Host
417 * unnecessarily. */
229static unsigned long current_cr0, current_cr3; 418static unsigned long current_cr0, current_cr3;
230static void lguest_write_cr0(unsigned long val) 419static void lguest_write_cr0(unsigned long val)
231{ 420{
421 /* 8 == TS bit. */
232 lazy_hcall(LHCALL_TS, val & 8, 0, 0); 422 lazy_hcall(LHCALL_TS, val & 8, 0, 0);
233 current_cr0 = val; 423 current_cr0 = val;
234} 424}
@@ -238,17 +428,25 @@ static unsigned long lguest_read_cr0(void)
238 return current_cr0; 428 return current_cr0;
239} 429}
240 430
431/* Intel provided a special instruction to clear the TS bit for people too cool
432 * to use write_cr0() to do it. This "clts" instruction is faster, because all
433 * the vowels have been optimized out. */
241static void lguest_clts(void) 434static void lguest_clts(void)
242{ 435{
243 lazy_hcall(LHCALL_TS, 0, 0, 0); 436 lazy_hcall(LHCALL_TS, 0, 0, 0);
244 current_cr0 &= ~8U; 437 current_cr0 &= ~8U;
245} 438}
246 439
440/* CR2 is the virtual address of the last page fault, which the Guest only ever
441 * reads. The Host kindly writes this into our "struct lguest_data", so we
442 * just read it out of there. */
247static unsigned long lguest_read_cr2(void) 443static unsigned long lguest_read_cr2(void)
248{ 444{
249 return lguest_data.cr2; 445 return lguest_data.cr2;
250} 446}
251 447
448/* CR3 is the current toplevel pagetable page: the principle is the same as
449 * cr0. Keep a local copy, and tell the Host when it changes. */
252static void lguest_write_cr3(unsigned long cr3) 450static void lguest_write_cr3(unsigned long cr3)
253{ 451{
254 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); 452 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
@@ -260,7 +458,7 @@ static unsigned long lguest_read_cr3(void)
260 return current_cr3; 458 return current_cr3;
261} 459}
262 460
263/* Used to enable/disable PGE, but we don't care. */ 461/* CR4 is used to enable and disable PGE, but we don't care. */
264static unsigned long lguest_read_cr4(void) 462static unsigned long lguest_read_cr4(void)
265{ 463{
266 return 0; 464 return 0;
@@ -270,6 +468,59 @@ static void lguest_write_cr4(unsigned long val)
270{ 468{
271} 469}
272 470
471/*
472 * Page Table Handling.
473 *
474 * Now would be a good time to take a rest and grab a coffee or similarly
475 * relaxing stimulant. The easy parts are behind us, and the trek gradually
476 * winds uphill from here.
477 *
478 * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU
479 * maps virtual addresses to physical addresses using "page tables". We could
480 * use one huge index of 1 million entries: each address is 4 bytes, so that's
481 * 1024 pages just to hold the page tables. But since most virtual addresses
482 * are unused, we use a two level index which saves space. The CR3 register
483 * contains the physical address of the top level "page directory" page, which
484 * contains physical addresses of up to 1024 second-level pages. Each of these
485 * second level pages contains up to 1024 physical addresses of actual pages,
486 * or Page Table Entries (PTEs).
487 *
488 * Here's a diagram, where arrows indicate physical addresses:
489 *
490 * CR3 ---> +---------+
491 * | --------->+---------+
492 * | | | PADDR1 |
493 * Top-level | | PADDR2 |
494 * (PMD) page | | |
495 * | | Lower-level |
496 * | | (PTE) page |
497 * | | | |
498 * .... ....
499 *
500 * So to convert a virtual address to a physical address, we look up the top
501 * level, which points us to the second level, which gives us the physical
502 * address of that page. If the top level entry was not present, or the second
503 * level entry was not present, then the virtual address is invalid (we
504 * say "the page was not mapped").
505 *
506 * Put another way, a 32-bit virtual address is divided up like so:
507 *
508 * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
509 * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
510 * Index into top Index into second Offset within page
511 * page directory page pagetable page
512 *
513 * The kernel spends a lot of time changing both the top-level page directory
514 * and lower-level pagetable pages. The Guest doesn't know physical addresses,
515 * so while it maintains these page tables exactly like normal, it also needs
516 * to keep the Host informed whenever it makes a change: the Host will create
517 * the real page tables based on the Guests'.
518 */
519
520/* The Guest calls this to set a second-level entry (pte), ie. to map a page
521 * into a process' address space. We set the entry then tell the Host the
522 * toplevel and address this corresponds to. The Guest uses one pagetable per
523 * process, so we need to tell the Host which one we're changing (mm->pgd). */
273static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, 524static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
274 pte_t *ptep, pte_t pteval) 525 pte_t *ptep, pte_t pteval)
275{ 526{
@@ -277,7 +528,9 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
277 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); 528 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low);
278} 529}
279 530
280/* We only support two-level pagetables at the moment. */ 531/* The Guest calls this to set a top-level entry. Again, we set the entry then
532 * tell the Host which top-level page we changed, and the index of the entry we
533 * changed. */
281static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) 534static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
282{ 535{
283 *pmdp = pmdval; 536 *pmdp = pmdval;
@@ -285,7 +538,15 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
285 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); 538 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
286} 539}
287 540
288/* FIXME: Eliminate all callers of this. */ 541/* There are a couple of legacy places where the kernel sets a PTE, but we
542 * don't know the top level any more. This is useless for us, since we don't
543 * know which pagetable is changing or what address, so we just tell the Host
544 * to forget all of them. Fortunately, this is very rare.
545 *
546 * ... except in early boot when the kernel sets up the initial pagetables,
547 * which makes booting astonishingly slow. So we don't even tell the Host
548 * anything changed until we've done the first page table switch.
549 */
289static void lguest_set_pte(pte_t *ptep, pte_t pteval) 550static void lguest_set_pte(pte_t *ptep, pte_t pteval)
290{ 551{
291 *ptep = pteval; 552 *ptep = pteval;
@@ -294,22 +555,51 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
294 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 555 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
295} 556}
296 557
558/* Unfortunately for Lguest, the paravirt_ops for page tables were based on
559 * native page table operations. On native hardware you can set a new page
560 * table entry whenever you want, but if you want to remove one you have to do
561 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
562 *
563 * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
564 * called when a valid entry is written, not when it's removed (ie. marked not
565 * present). Instead, this is where we come when the Guest wants to remove a
566 * page table entry: we tell the Host to set that entry to 0 (ie. the present
567 * bit is zero). */
297static void lguest_flush_tlb_single(unsigned long addr) 568static void lguest_flush_tlb_single(unsigned long addr)
298{ 569{
299 /* Simply set it to zero, and it will fault back in. */ 570 /* Simply set it to zero: if it was not, it will fault back in. */
300 lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); 571 lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
301} 572}
302 573
574/* This is what happens after the Guest has removed a large number of entries.
575 * This tells the Host that any of the page table entries for userspace might
576 * have changed, ie. virtual addresses below PAGE_OFFSET. */
303static void lguest_flush_tlb_user(void) 577static void lguest_flush_tlb_user(void)
304{ 578{
305 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); 579 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
306} 580}
307 581
582/* This is called when the kernel page tables have changed. That's not very
583 * common (unless the Guest is using highmem, which makes the Guest extremely
584 * slow), so it's worth separating this from the user flushing above. */
308static void lguest_flush_tlb_kernel(void) 585static void lguest_flush_tlb_kernel(void)
309{ 586{
310 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 587 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
311} 588}
312 589
590/*
591 * The Unadvanced Programmable Interrupt Controller.
592 *
593 * This is an attempt to implement the simplest possible interrupt controller.
594 * I spent some time looking though routines like set_irq_chip_and_handler,
595 * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
596 * I *think* this is as simple as it gets.
597 *
598 * We can tell the Host what interrupts we want blocked ready for using the
599 * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
600 * simple as setting a bit. We don't actually "ack" interrupts as such, we
601 * just mask and unmask them. I wonder if we should be cleverer?
602 */
313static void disable_lguest_irq(unsigned int irq) 603static void disable_lguest_irq(unsigned int irq)
314{ 604{
315 set_bit(irq, lguest_data.blocked_interrupts); 605 set_bit(irq, lguest_data.blocked_interrupts);
@@ -318,9 +608,9 @@ static void disable_lguest_irq(unsigned int irq)
318static void enable_lguest_irq(unsigned int irq) 608static void enable_lguest_irq(unsigned int irq)
319{ 609{
320 clear_bit(irq, lguest_data.blocked_interrupts); 610 clear_bit(irq, lguest_data.blocked_interrupts);
321 /* FIXME: If it's pending? */
322} 611}
323 612
613/* This structure describes the lguest IRQ controller. */
324static struct irq_chip lguest_irq_controller = { 614static struct irq_chip lguest_irq_controller = {
325 .name = "lguest", 615 .name = "lguest",
326 .mask = disable_lguest_irq, 616 .mask = disable_lguest_irq,
@@ -328,6 +618,10 @@ static struct irq_chip lguest_irq_controller = {
328 .unmask = enable_lguest_irq, 618 .unmask = enable_lguest_irq,
329}; 619};
330 620
621/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
622 * interrupt (except 128, which is used for system calls), and then tells the
623 * Linux infrastructure that each interrupt is controlled by our level-based
624 * lguest interrupt controller. */
331static void __init lguest_init_IRQ(void) 625static void __init lguest_init_IRQ(void)
332{ 626{
333 unsigned int i; 627 unsigned int i;
@@ -340,14 +634,24 @@ static void __init lguest_init_IRQ(void)
340 handle_level_irq); 634 handle_level_irq);
341 } 635 }
342 } 636 }
637 /* This call is required to set up for 4k stacks, where we have
638 * separate stacks for hard and soft interrupts. */
343 irq_ctx_init(smp_processor_id()); 639 irq_ctx_init(smp_processor_id());
344} 640}
345 641
642/*
643 * Time.
644 *
645 * It would be far better for everyone if the Guest had its own clock, but
646 * until then it must ask the Host for the time.
647 */
346static unsigned long lguest_get_wallclock(void) 648static unsigned long lguest_get_wallclock(void)
347{ 649{
348 return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); 650 return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0);
349} 651}
350 652
653/* If the Host tells us we can trust the TSC, we use that, otherwise we simply
654 * use the imprecise but reliable "jiffies" counter. */
351static cycle_t lguest_clock_read(void) 655static cycle_t lguest_clock_read(void)
352{ 656{
353 if (lguest_data.tsc_khz) 657 if (lguest_data.tsc_khz)
@@ -428,12 +732,19 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
428 local_irq_restore(flags); 732 local_irq_restore(flags);
429} 733}
430 734
735/* At some point in the boot process, we get asked to set up our timing
736 * infrastructure. The kernel doesn't expect timer interrupts before this, but
737 * we cleverly initialized the "blocked_interrupts" field of "struct
738 * lguest_data" so that timer interrupts were blocked until now. */
431static void lguest_time_init(void) 739static void lguest_time_init(void)
432{ 740{
741 /* Set up the timer interrupt (0) to go to our simple timer routine */
433 set_irq_handler(0, lguest_time_irq); 742 set_irq_handler(0, lguest_time_irq);
434 743
435 /* We use the TSC if the Host tells us we can, otherwise a dumb 744 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use
436 * jiffies-based clock. */ 745 * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way,
746 * the "rating" is initialized so high that it's always chosen over any
747 * other clocksource. */
437 if (lguest_data.tsc_khz) { 748 if (lguest_data.tsc_khz) {
438 lguest_clock.shift = 22; 749 lguest_clock.shift = 22;
439 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, 750 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
@@ -449,13 +760,30 @@ static void lguest_time_init(void)
449 clock_base = lguest_clock_read(); 760 clock_base = lguest_clock_read();
450 clocksource_register(&lguest_clock); 761 clocksource_register(&lguest_clock);
451 762
452 /* We can't set cpumask in the initializer: damn C limitations! */ 763 /* We can't set cpumask in the initializer: damn C limitations! Set it
764 * here and register our timer device. */
453 lguest_clockevent.cpumask = cpumask_of_cpu(0); 765 lguest_clockevent.cpumask = cpumask_of_cpu(0);
454 clockevents_register_device(&lguest_clockevent); 766 clockevents_register_device(&lguest_clockevent);
455 767
768 /* Finally, we unblock the timer interrupt. */
456 enable_lguest_irq(0); 769 enable_lguest_irq(0);
457} 770}
458 771
772/*
773 * Miscellaneous bits and pieces.
774 *
775 * Here is an oddball collection of functions which the Guest needs for things
776 * to work. They're pretty simple.
777 */
778
779/* The Guest needs to tell the host what stack it expects traps to use. For
780 * native hardware, this is part of the Task State Segment mentioned above in
781 * lguest_load_tr_desc(), but to help hypervisors there's this special call.
782 *
783 * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
784 * segment), the privilege level (we're privilege level 1, the Host is 0 and
785 * will not tolerate us trying to use that), the stack pointer, and the number
786 * of pages in the stack. */
459static void lguest_load_esp0(struct tss_struct *tss, 787static void lguest_load_esp0(struct tss_struct *tss,
460 struct thread_struct *thread) 788 struct thread_struct *thread)
461{ 789{
@@ -463,15 +791,31 @@ static void lguest_load_esp0(struct tss_struct *tss,
463 THREAD_SIZE/PAGE_SIZE); 791 THREAD_SIZE/PAGE_SIZE);
464} 792}
465 793
794/* Let's just say, I wouldn't do debugging under a Guest. */
466static void lguest_set_debugreg(int regno, unsigned long value) 795static void lguest_set_debugreg(int regno, unsigned long value)
467{ 796{
468 /* FIXME: Implement */ 797 /* FIXME: Implement */
469} 798}
470 799
800/* There are times when the kernel wants to make sure that no memory writes are
801 * caught in the cache (that they've all reached real hardware devices). This
802 * doesn't matter for the Guest which has virtual hardware.
803 *
804 * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
805 * (clflush) instruction is available and the kernel uses that. Otherwise, it
806 * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
807 * Unlike clflush, wbinvd can only be run at privilege level 0. So we can
808 * ignore clflush, but replace wbinvd.
809 */
471static void lguest_wbinvd(void) 810static void lguest_wbinvd(void)
472{ 811{
473} 812}
474 813
814/* If the Guest expects to have an Advanced Programmable Interrupt Controller,
815 * we play dumb by ignoring writes and returning 0 for reads. So it's no
816 * longer Programmable nor Controlling anything, and I don't think 8 lines of
817 * code qualifies for Advanced. It will also never interrupt anything. It
818 * does, however, allow us to get through the Linux boot code. */
475#ifdef CONFIG_X86_LOCAL_APIC 819#ifdef CONFIG_X86_LOCAL_APIC
476static void lguest_apic_write(unsigned long reg, unsigned long v) 820static void lguest_apic_write(unsigned long reg, unsigned long v)
477{ 821{
@@ -483,19 +827,32 @@ static unsigned long lguest_apic_read(unsigned long reg)
483} 827}
484#endif 828#endif
485 829
830/* STOP! Until an interrupt comes in. */
486static void lguest_safe_halt(void) 831static void lguest_safe_halt(void)
487{ 832{
488 hcall(LHCALL_HALT, 0, 0, 0); 833 hcall(LHCALL_HALT, 0, 0, 0);
489} 834}
490 835
836/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
837 * message out when we're crashing as well as elegant termination like powering
838 * off.
839 *
840 * Note that the Host always prefers that the Guest speak in physical addresses
841 * rather than virtual addresses, so we use __pa() here. */
491static void lguest_power_off(void) 842static void lguest_power_off(void)
492{ 843{
493 hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); 844 hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
494} 845}
495 846
847/*
848 * Panicing.
849 *
850 * Don't. But if you did, this is what happens.
851 */
496static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) 852static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
497{ 853{
498 hcall(LHCALL_CRASH, __pa(p), 0, 0); 854 hcall(LHCALL_CRASH, __pa(p), 0, 0);
855 /* The hcall won't return, but to keep gcc happy, we're "done". */
499 return NOTIFY_DONE; 856 return NOTIFY_DONE;
500} 857}
501 858
@@ -503,15 +860,45 @@ static struct notifier_block paniced = {
503 .notifier_call = lguest_panic 860 .notifier_call = lguest_panic
504}; 861};
505 862
863/* Setting up memory is fairly easy. */
506static __init char *lguest_memory_setup(void) 864static __init char *lguest_memory_setup(void)
507{ 865{
508 /* We do this here because lockcheck barfs if before start_kernel */ 866 /* We do this here and not earlier because lockcheck barfs if we do it
867 * before start_kernel() */
509 atomic_notifier_chain_register(&panic_notifier_list, &paniced); 868 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
510 869
870 /* The Linux bootloader header contains an "e820" memory map: the
871 * Launcher populated the first entry with our memory limit. */
511 add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); 872 add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type);
873
874 /* This string is for the boot messages. */
512 return "LGUEST"; 875 return "LGUEST";
513} 876}
514 877
878/*G:050
879 * Patching (Powerfully Placating Performance Pedants)
880 *
881 * We have already seen that "struct paravirt_ops" lets us replace simple
882 * native instructions with calls to the appropriate back end all throughout
883 * the kernel. This allows the same kernel to run as a Guest and as a native
884 * kernel, but it's slow because of all the indirect branches.
885 *
886 * Remember that David Wheeler quote about "Any problem in computer science can
887 * be solved with another layer of indirection"? The rest of that quote is
888 * "... But that usually will create another problem." This is the first of
889 * those problems.
890 *
891 * Our current solution is to allow the paravirt back end to optionally patch
892 * over the indirect calls to replace them with something more efficient. We
893 * patch the four most commonly called functions: disable interrupts, enable
894 * interrupts, restore interrupts and save interrupts. We usually have 10
895 * bytes to patch into: the Guest versions of these operations are small enough
896 * that we can fit comfortably.
897 *
898 * First we need assembly templates of each of the patchable Guest operations,
899 * and these are in lguest_asm.S. */
900
901/*G:060 We construct a table from the assembler templates: */
515static const struct lguest_insns 902static const struct lguest_insns
516{ 903{
517 const char *start, *end; 904 const char *start, *end;
@@ -521,35 +908,52 @@ static const struct lguest_insns
521 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, 908 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf },
522 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, 909 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf },
523}; 910};
911
912/* Now our patch routine is fairly simple (based on the native one in
913 * paravirt.c). If we have a replacement, we copy it in and return how much of
914 * the available space we used. */
524static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) 915static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len)
525{ 916{
526 unsigned int insn_len; 917 unsigned int insn_len;
527 918
528 /* Don't touch it if we don't have a replacement */ 919 /* Don't do anything special if we don't have a replacement */
529 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 920 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
530 return paravirt_patch_default(type, clobber, insns, len); 921 return paravirt_patch_default(type, clobber, insns, len);
531 922
532 insn_len = lguest_insns[type].end - lguest_insns[type].start; 923 insn_len = lguest_insns[type].end - lguest_insns[type].start;
533 924
534 /* Similarly if we can't fit replacement. */ 925 /* Similarly if we can't fit replacement (shouldn't happen, but let's
926 * be thorough). */
535 if (len < insn_len) 927 if (len < insn_len)
536 return paravirt_patch_default(type, clobber, insns, len); 928 return paravirt_patch_default(type, clobber, insns, len);
537 929
930 /* Copy in our instructions. */
538 memcpy(insns, lguest_insns[type].start, insn_len); 931 memcpy(insns, lguest_insns[type].start, insn_len);
539 return insn_len; 932 return insn_len;
540} 933}
541 934
935/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops
936 * structure in the kernel provides a single point for (almost) every routine
937 * we have to override to avoid privileged instructions. */
542__init void lguest_init(void *boot) 938__init void lguest_init(void *boot)
543{ 939{
544 /* Copy boot parameters first. */ 940 /* Copy boot parameters first: the Launcher put the physical location
941 * in %esi, and head.S converted that to a virtual address and handed
942 * it to us. */
545 memcpy(&boot_params, boot, PARAM_SIZE); 943 memcpy(&boot_params, boot, PARAM_SIZE);
944 /* The boot parameters also tell us where the command-line is: save
945 * that, too. */
546 memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), 946 memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr),
547 COMMAND_LINE_SIZE); 947 COMMAND_LINE_SIZE);
548 948
949 /* We're under lguest, paravirt is enabled, and we're running at
950 * privilege level 1, not 0 as normal. */
549 paravirt_ops.name = "lguest"; 951 paravirt_ops.name = "lguest";
550 paravirt_ops.paravirt_enabled = 1; 952 paravirt_ops.paravirt_enabled = 1;
551 paravirt_ops.kernel_rpl = 1; 953 paravirt_ops.kernel_rpl = 1;
552 954
955 /* We set up all the lguest overrides for sensitive operations. These
956 * are detailed with the operations themselves. */
553 paravirt_ops.save_fl = save_fl; 957 paravirt_ops.save_fl = save_fl;
554 paravirt_ops.restore_fl = restore_fl; 958 paravirt_ops.restore_fl = restore_fl;
555 paravirt_ops.irq_disable = irq_disable; 959 paravirt_ops.irq_disable = irq_disable;
@@ -593,20 +997,45 @@ __init void lguest_init(void *boot)
593 paravirt_ops.set_lazy_mode = lguest_lazy_mode; 997 paravirt_ops.set_lazy_mode = lguest_lazy_mode;
594 paravirt_ops.wbinvd = lguest_wbinvd; 998 paravirt_ops.wbinvd = lguest_wbinvd;
595 paravirt_ops.sched_clock = lguest_sched_clock; 999 paravirt_ops.sched_clock = lguest_sched_clock;
596 1000 /* Now is a good time to look at the implementations of these functions
1001 * before returning to the rest of lguest_init(). */
1002
1003 /*G:070 Now we've seen all the paravirt_ops, we return to
1004 * lguest_init() where the rest of the fairly chaotic boot setup
1005 * occurs.
1006 *
1007 * The Host expects our first hypercall to tell it where our "struct
1008 * lguest_data" is, so we do that first. */
597 hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); 1009 hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0);
598 1010
599 /* We use top of mem for initial pagetables. */ 1011 /* The native boot code sets up initial page tables immediately after
1012 * the kernel itself, and sets init_pg_tables_end so they're not
1013 * clobbered. The Launcher places our initial pagetables somewhere at
1014 * the top of our physical memory, so we don't need extra space: set
1015 * init_pg_tables_end to the end of the kernel. */
600 init_pg_tables_end = __pa(pg0); 1016 init_pg_tables_end = __pa(pg0);
601 1017
1018 /* Load the %fs segment register (the per-cpu segment register) with
1019 * the normal data segment to get through booting. */
602 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); 1020 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
603 1021
1022 /* The Host uses the top of the Guest's virtual address space for the
1023 * Host<->Guest Switcher, and it tells us how much it needs in
1024 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
604 reserve_top_address(lguest_data.reserve_mem); 1025 reserve_top_address(lguest_data.reserve_mem);
605 1026
1027 /* If we don't initialize the lock dependency checker now, it crashes
1028 * paravirt_disable_iospace. */
606 lockdep_init(); 1029 lockdep_init();
607 1030
1031 /* The IDE code spends about 3 seconds probing for disks: if we reserve
1032 * all the I/O ports up front it can't get them and so doesn't probe.
1033 * Other device drivers are similar (but less severe). This cuts the
1034 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */
608 paravirt_disable_iospace(); 1035 paravirt_disable_iospace();
609 1036
1037 /* This is messy CPU setup stuff which the native boot code does before
1038 * start_kernel, so we have to do, too: */
610 cpu_detect(&new_cpu_data); 1039 cpu_detect(&new_cpu_data);
611 /* head.S usually sets up the first capability word, so do it here. */ 1040 /* head.S usually sets up the first capability word, so do it here. */
612 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1041 new_cpu_data.x86_capability[0] = cpuid_edx(1);
@@ -617,14 +1046,27 @@ __init void lguest_init(void *boot)
617#ifdef CONFIG_X86_MCE 1046#ifdef CONFIG_X86_MCE
618 mce_disabled = 1; 1047 mce_disabled = 1;
619#endif 1048#endif
620
621#ifdef CONFIG_ACPI 1049#ifdef CONFIG_ACPI
622 acpi_disabled = 1; 1050 acpi_disabled = 1;
623 acpi_ht = 0; 1051 acpi_ht = 0;
624#endif 1052#endif
625 1053
1054 /* We set the perferred console to "hvc". This is the "hypervisor
1055 * virtual console" driver written by the PowerPC people, which we also
1056 * adapted for lguest's use. */
626 add_preferred_console("hvc", 0, NULL); 1057 add_preferred_console("hvc", 0, NULL);
627 1058
1059 /* Last of all, we set the power management poweroff hook to point to
1060 * the Guest routine to power off. */
628 pm_power_off = lguest_power_off; 1061 pm_power_off = lguest_power_off;
1062
1063 /* Now we're set up, call start_kernel() in init/main.c and we proceed
1064 * to boot as normal. It never returns. */
629 start_kernel(); 1065 start_kernel();
630} 1066}
1067/*
1068 * This marks the end of stage II of our journey, The Guest.
1069 *
1070 * It is now time for us to explore the nooks and crannies of the three Guest
1071 * devices and complete our understanding of the Guest in "make Drivers".
1072 */
diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S
index a3dbf22ee365..f182c6a36209 100644
--- a/drivers/lguest/lguest_asm.S
+++ b/drivers/lguest/lguest_asm.S
@@ -4,15 +4,15 @@
4#include <asm/thread_info.h> 4#include <asm/thread_info.h>
5#include <asm/processor-flags.h> 5#include <asm/processor-flags.h>
6 6
7/* 7/*G:020 This is where we begin: we have a magic signature which the launcher
8 * This is where we begin: we have a magic signature which the launcher looks 8 * looks for. The plan is that the Linux boot protocol will be extended with a
9 * for. The plan is that the Linux boot protocol will be extended with a
10 * "platform type" field which will guide us here from the normal entry point, 9 * "platform type" field which will guide us here from the normal entry point,
11 * but for the moment this suffices. We pass the virtual address of the boot 10 * but for the moment this suffices. The normal boot code uses %esi for the
12 * info to lguest_init(). 11 * boot header, so we do too. We convert it to a virtual address by adding
12 * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
13 * 13 *
14 * We put it in .init.text will be discarded after boot. 14 * The .section line puts this code in .init.text so it will be discarded after
15 */ 15 * boot. */
16.section .init.text, "ax", @progbits 16.section .init.text, "ax", @progbits
17.ascii "GenuineLguest" 17.ascii "GenuineLguest"
18 /* Set up initial stack. */ 18 /* Set up initial stack. */
@@ -21,7 +21,9 @@
21 addl $__PAGE_OFFSET, %eax 21 addl $__PAGE_OFFSET, %eax
22 jmp lguest_init 22 jmp lguest_init
23 23
24/* The templates for inline patching. */ 24/*G:055 We create a macro which puts the assembler code between lgstart_ and
25 * lgend_ markers. These templates end up in the .init.text section, so they
26 * are discarded after boot. */
25#define LGUEST_PATCH(name, insns...) \ 27#define LGUEST_PATCH(name, insns...) \
26 lgstart_##name: insns; lgend_##name:; \ 28 lgstart_##name: insns; lgend_##name:; \
27 .globl lgstart_##name; .globl lgend_##name 29 .globl lgstart_##name; .globl lgend_##name
@@ -30,24 +32,61 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
30LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) 32LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
31LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) 33LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
32LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) 34LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
35/*:*/
33 36
34.text 37.text
35/* These demark the EIP range where host should never deliver interrupts. */ 38/* These demark the EIP range where host should never deliver interrupts. */
36.global lguest_noirq_start 39.global lguest_noirq_start
37.global lguest_noirq_end 40.global lguest_noirq_end
38 41
39/* 42/*M:004 When the Host reflects a trap or injects an interrupt into the Guest,
40 * We move eflags word to lguest_data.irq_enabled to restore interrupt state. 43 * it sets the eflags interrupt bit on the stack based on
41 * For page faults, gpfs and virtual interrupts, the hypervisor has saved 44 * lguest_data.irq_enabled, so the Guest iret logic does the right thing when
42 * eflags manually, otherwise it was delivered directly and so eflags reflects 45 * restoring it. However, when the Host sets the Guest up for direct traps,
43 * the real machine IF state, ie. interrupts on. Since the kernel always dies 46 * such as system calls, the processor is the one to push eflags onto the
44 * if it takes such a trap with interrupts disabled anyway, turning interrupts 47 * stack, and the interrupt bit will be 1 (in reality, interrupts are always
45 * back on unconditionally here is OK. 48 * enabled in the Guest).
46 */ 49 *
50 * This turns out to be harmless: the only trap which should happen under Linux
51 * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
52 * regions), which has to be reflected through the Host anyway. If another
53 * trap *does* go off when interrupts are disabled, the Guest will panic, and
54 * we'll never get to this iret! :*/
55
56/*G:045 There is one final paravirt_op that the Guest implements, and glancing
57 * at it you can see why I left it to last. It's *cool*! It's in *assembler*!
58 *
59 * The "iret" instruction is used to return from an interrupt or trap. The
60 * stack looks like this:
61 * old address
62 * old code segment & privilege level
63 * old processor flags ("eflags")
64 *
65 * The "iret" instruction pops those values off the stack and restores them all
66 * at once. The only problem is that eflags includes the Interrupt Flag which
67 * the Guest can't change: the CPU will simply ignore it when we do an "iret".
68 * So we have to copy eflags from the stack to lguest_data.irq_enabled before
69 * we do the "iret".
70 *
71 * There are two problems with this: firstly, we need to use a register to do
72 * the copy and secondly, the whole thing needs to be atomic. The first
73 * problem is easy to solve: push %eax on the stack so we can use it, and then
74 * restore it at the end just before the real "iret".
75 *
76 * The second is harder: copying eflags to lguest_data.irq_enabled will turn
77 * interrupts on before we're finished, so we could be interrupted before we
78 * return to userspace or wherever. Our solution to this is to surround the
79 * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
80 * Host that it is *never* to interrupt us there, even if interrupts seem to be
81 * enabled. */
47ENTRY(lguest_iret) 82ENTRY(lguest_iret)
48 pushl %eax 83 pushl %eax
49 movl 12(%esp), %eax 84 movl 12(%esp), %eax
50lguest_noirq_start: 85lguest_noirq_start:
86 /* Note the %ss: segment prefix here. Normal data accesses use the
87 * "ds" segment, but that will have already been restored for whatever
88 * we're returning to (such as userspace): we can't trust it. The %ss:
89 * prefix makes sure we use the stack segment, which is still valid. */
51 movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled 90 movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
52 popl %eax 91 popl %eax
53 iret 92 iret
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
index 18d6ab21a43b..55a7940ca732 100644
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -1,3 +1,6 @@
1/*P:050 Lguest guests use a very simple bus for devices. It's a simple array
2 * of device descriptors contained just above the top of normal memory. The
3 * lguest bus is 80% tedious boilerplate code. :*/
1#include <linux/init.h> 4#include <linux/init.h>
2#include <linux/bootmem.h> 5#include <linux/bootmem.h>
3#include <linux/lguest_bus.h> 6#include <linux/lguest_bus.h>
@@ -43,6 +46,10 @@ static struct device_attribute lguest_dev_attrs[] = {
43 __ATTR_NULL 46 __ATTR_NULL
44}; 47};
45 48
49/*D:130 The generic bus infrastructure requires a function which says whether a
50 * device matches a driver. For us, it is simple: "struct lguest_driver"
51 * contains a "device_type" field which indicates what type of device it can
52 * handle, so we just cast the args and compare: */
46static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) 53static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
47{ 54{
48 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); 55 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
@@ -50,6 +57,7 @@ static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
50 57
51 return (drv->device_type == lguest_devices[dev->index].type); 58 return (drv->device_type == lguest_devices[dev->index].type);
52} 59}
60/*:*/
53 61
54struct lguest_bus { 62struct lguest_bus {
55 struct bus_type bus; 63 struct bus_type bus;
@@ -68,11 +76,24 @@ static struct lguest_bus lguest_bus = {
68 } 76 }
69}; 77};
70 78
79/*D:140 This is the callback which occurs once the bus infrastructure matches
80 * up a device and driver, ie. in response to add_lguest_device() calling
81 * device_register(), or register_lguest_driver() calling driver_register().
82 *
83 * At the moment it's always the latter: the devices are added first, since
84 * scan_devices() is called from a "core_initcall", and the drivers themselves
85 * called later as a normal "initcall". But it would work the other way too.
86 *
87 * So now we have the happy couple, we add the status bit to indicate that we
88 * found a driver. If the driver truly loves the device, it will return
89 * happiness from its probe function (ok, perhaps this wasn't my greatest
90 * analogy), and we set the final "driver ok" bit so the Host sees it's all
91 * green. */
71static int lguest_dev_probe(struct device *_dev) 92static int lguest_dev_probe(struct device *_dev)
72{ 93{
73 int ret; 94 int ret;
74 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); 95 struct lguest_device*dev = container_of(_dev,struct lguest_device,dev);
75 struct lguest_driver *drv = container_of(dev->dev.driver, 96 struct lguest_driver*drv = container_of(dev->dev.driver,
76 struct lguest_driver, drv); 97 struct lguest_driver, drv);
77 98
78 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; 99 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
@@ -82,6 +103,10 @@ static int lguest_dev_probe(struct device *_dev)
82 return ret; 103 return ret;
83} 104}
84 105
106/* The last part of the bus infrastructure is the function lguest drivers use
107 * to register themselves. Firstly, we do nothing if there's no lguest bus
108 * (ie. this is not a Guest), otherwise we fill in the embedded generic "struct
109 * driver" fields and call the generic driver_register(). */
85int register_lguest_driver(struct lguest_driver *drv) 110int register_lguest_driver(struct lguest_driver *drv)
86{ 111{
87 if (!lguest_devices) 112 if (!lguest_devices)
@@ -94,12 +119,36 @@ int register_lguest_driver(struct lguest_driver *drv)
94 119
95 return driver_register(&drv->drv); 120 return driver_register(&drv->drv);
96} 121}
122
123/* At the moment we build all the drivers into the kernel because they're so
124 * simple: 8144 bytes for all three of them as I type this. And as the console
125 * really needs to be built in, it's actually only 3527 bytes for the network
126 * and block drivers.
127 *
128 * If they get complex it will make sense for them to be modularized, so we
129 * need to explicitly export the symbol.
130 *
131 * I don't think non-GPL modules make sense, so it's a GPL-only export.
132 */
97EXPORT_SYMBOL_GPL(register_lguest_driver); 133EXPORT_SYMBOL_GPL(register_lguest_driver);
98 134
135/*D:120 This is the core of the lguest bus: actually adding a new device.
136 * It's a separate function because it's neater that way, and because an
137 * earlier version of the code supported hotplug and unplug. They were removed
138 * early on because they were never used.
139 *
140 * As Andrew Tridgell says, "Untested code is buggy code".
141 *
142 * It's worth reading this carefully: we start with an index into the array of
143 * "struct lguest_device_desc"s indicating the device which is new: */
99static void add_lguest_device(unsigned int index) 144static void add_lguest_device(unsigned int index)
100{ 145{
101 struct lguest_device *new; 146 struct lguest_device *new;
102 147
148 /* Each "struct lguest_device_desc" has a "status" field, which the
149 * Guest updates as the device is probed. In the worst case, the Host
150 * can look at these bits to tell what part of device setup failed,
151 * even if the console isn't available. */
103 lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; 152 lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
104 new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); 153 new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
105 if (!new) { 154 if (!new) {
@@ -108,12 +157,17 @@ static void add_lguest_device(unsigned int index)
108 return; 157 return;
109 } 158 }
110 159
160 /* The "struct lguest_device" setup is pretty straight-forward example
161 * code. */
111 new->index = index; 162 new->index = index;
112 new->private = NULL; 163 new->private = NULL;
113 memset(&new->dev, 0, sizeof(new->dev)); 164 memset(&new->dev, 0, sizeof(new->dev));
114 new->dev.parent = &lguest_bus.dev; 165 new->dev.parent = &lguest_bus.dev;
115 new->dev.bus = &lguest_bus.bus; 166 new->dev.bus = &lguest_bus.bus;
116 sprintf(new->dev.bus_id, "%u", index); 167 sprintf(new->dev.bus_id, "%u", index);
168
169 /* device_register() causes the bus infrastructure to look for a
170 * matching driver. */
117 if (device_register(&new->dev) != 0) { 171 if (device_register(&new->dev) != 0) {
118 printk(KERN_EMERG "Cannot register lguest device %u\n", index); 172 printk(KERN_EMERG "Cannot register lguest device %u\n", index);
119 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; 173 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
@@ -121,6 +175,9 @@ static void add_lguest_device(unsigned int index)
121 } 175 }
122} 176}
123 177
178/*D:110 scan_devices() simply iterates through the device array. The type 0
179 * is reserved to mean "no device", and anything else means we have found a
180 * device: add it. */
124static void scan_devices(void) 181static void scan_devices(void)
125{ 182{
126 unsigned int i; 183 unsigned int i;
@@ -130,12 +187,23 @@ static void scan_devices(void)
130 add_lguest_device(i); 187 add_lguest_device(i);
131} 188}
132 189
190/*D:100 Fairly early in boot, lguest_bus_init() is called to set up the lguest
191 * bus. We check that we are a Guest by checking paravirt_ops.name: there are
192 * other ways of checking, but this seems most obvious to me.
193 *
194 * So we can access the array of "struct lguest_device_desc"s easily, we map
195 * that memory and store the pointer in the global "lguest_devices". Then we
196 * register the bus with the core. Doing two registrations seems clunky to me,
197 * but it seems to be the correct sysfs incantation.
198 *
199 * Finally we call scan_devices() which adds all the devices found in the
200 * "struct lguest_device_desc" array. */
133static int __init lguest_bus_init(void) 201static int __init lguest_bus_init(void)
134{ 202{
135 if (strcmp(paravirt_ops.name, "lguest") != 0) 203 if (strcmp(paravirt_ops.name, "lguest") != 0)
136 return 0; 204 return 0;
137 205
138 /* Devices are in page above top of "normal" mem. */ 206 /* Devices are in a single page above top of "normal" mem */
139 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); 207 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
140 208
141 if (bus_register(&lguest_bus.bus) != 0 209 if (bus_register(&lguest_bus.bus) != 0
@@ -145,4 +213,5 @@ static int __init lguest_bus_init(void)
145 scan_devices(); 213 scan_devices();
146 return 0; 214 return 0;
147} 215}
216/* Do this after core stuff, before devices. */
148postcore_initcall(lguest_bus_init); 217postcore_initcall(lguest_bus_init);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index e90d7a783daf..80d1b58c7698 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,36 +1,70 @@
1/* Userspace control of the guest, via /dev/lguest. */ 1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
2 * controls and communicates with the Guest. For example, the first write will
3 * tell us the memory size, pagetable, entry point and kernel address offset.
4 * A read will run the Guest until a signal is pending (-EINTR), or the Guest
5 * does a DMA out to the Launcher. Writes are also used to get a DMA buffer
6 * registered by the Guest and to send the Guest an interrupt. :*/
2#include <linux/uaccess.h> 7#include <linux/uaccess.h>
3#include <linux/miscdevice.h> 8#include <linux/miscdevice.h>
4#include <linux/fs.h> 9#include <linux/fs.h>
5#include "lg.h" 10#include "lg.h"
6 11
12/*L:030 setup_regs() doesn't really belong in this file, but it gives us an
13 * early glimpse deeper into the Host so it's worth having here.
14 *
15 * Most of the Guest's registers are left alone: we used get_zeroed_page() to
16 * allocate the structure, so they will be 0. */
7static void setup_regs(struct lguest_regs *regs, unsigned long start) 17static void setup_regs(struct lguest_regs *regs, unsigned long start)
8{ 18{
9 /* Write out stack in format lguest expects, so we can switch to it. */ 19 /* There are four "segment" registers which the Guest needs to boot:
20 * The "code segment" register (cs) refers to the kernel code segment
21 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
22 * refer to the kernel data segment __KERNEL_DS.
23 *
24 * The privilege level is packed into the lower bits. The Guest runs
25 * at privilege level 1 (GUEST_PL).*/
10 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; 26 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
11 regs->cs = __KERNEL_CS|GUEST_PL; 27 regs->cs = __KERNEL_CS|GUEST_PL;
12 regs->eflags = 0x202; /* Interrupts enabled. */ 28
29 /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
30 * is supposed to always be "1". Bit 9 (0x200) controls whether
31 * interrupts are enabled. We always leave interrupts enabled while
32 * running the Guest. */
33 regs->eflags = 0x202;
34
35 /* The "Extended Instruction Pointer" register says where the Guest is
36 * running. */
13 regs->eip = start; 37 regs->eip = start;
14 /* esi points to our boot information (physical address 0) */ 38
39 /* %esi points to our boot information, at physical address 0, so don't
40 * touch it. */
15} 41}
16 42
17/* + addr */ 43/*L:310 To send DMA into the Guest, the Launcher needs to be able to ask for a
44 * DMA buffer. This is done by writing LHREQ_GETDMA and the key to
45 * /dev/lguest. */
18static long user_get_dma(struct lguest *lg, const u32 __user *input) 46static long user_get_dma(struct lguest *lg, const u32 __user *input)
19{ 47{
20 unsigned long key, udma, irq; 48 unsigned long key, udma, irq;
21 49
50 /* Fetch the key they wrote to us. */
22 if (get_user(key, input) != 0) 51 if (get_user(key, input) != 0)
23 return -EFAULT; 52 return -EFAULT;
53 /* Look for a free Guest DMA buffer bound to that key. */
24 udma = get_dma_buffer(lg, key, &irq); 54 udma = get_dma_buffer(lg, key, &irq);
25 if (!udma) 55 if (!udma)
26 return -ENOENT; 56 return -ENOENT;
27 57
28 /* We put irq number in udma->used_len. */ 58 /* We need to tell the Launcher what interrupt the Guest expects after
59 * the buffer is filled. We stash it in udma->used_len. */
29 lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); 60 lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq);
61
62 /* The (guest-physical) address of the DMA buffer is returned from
63 * the write(). */
30 return udma; 64 return udma;
31} 65}
32 66
33/* To force the Guest to stop running and return to the Launcher, the 67/*L:315 To force the Guest to stop running and return to the Launcher, the
34 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The 68 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The
35 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ 69 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */
36static int break_guest_out(struct lguest *lg, const u32 __user *input) 70static int break_guest_out(struct lguest *lg, const u32 __user *input)
@@ -54,7 +88,8 @@ static int break_guest_out(struct lguest *lg, const u32 __user *input)
54 } 88 }
55} 89}
56 90
57/* + irq */ 91/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
92 * number to /dev/lguest. */
58static int user_send_irq(struct lguest *lg, const u32 __user *input) 93static int user_send_irq(struct lguest *lg, const u32 __user *input)
59{ 94{
60 u32 irq; 95 u32 irq;
@@ -63,14 +98,19 @@ static int user_send_irq(struct lguest *lg, const u32 __user *input)
63 return -EFAULT; 98 return -EFAULT;
64 if (irq >= LGUEST_IRQS) 99 if (irq >= LGUEST_IRQS)
65 return -EINVAL; 100 return -EINVAL;
101 /* Next time the Guest runs, the core code will see if it can deliver
102 * this interrupt. */
66 set_bit(irq, lg->irqs_pending); 103 set_bit(irq, lg->irqs_pending);
67 return 0; 104 return 0;
68} 105}
69 106
107/*L:040 Once our Guest is initialized, the Launcher makes it run by reading
108 * from /dev/lguest. */
70static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) 109static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
71{ 110{
72 struct lguest *lg = file->private_data; 111 struct lguest *lg = file->private_data;
73 112
113 /* You must write LHREQ_INITIALIZE first! */
74 if (!lg) 114 if (!lg)
75 return -EINVAL; 115 return -EINVAL;
76 116
@@ -78,27 +118,52 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
78 if (current != lg->tsk) 118 if (current != lg->tsk)
79 return -EPERM; 119 return -EPERM;
80 120
121 /* If the guest is already dead, we indicate why */
81 if (lg->dead) { 122 if (lg->dead) {
82 size_t len; 123 size_t len;
83 124
125 /* lg->dead either contains an error code, or a string. */
84 if (IS_ERR(lg->dead)) 126 if (IS_ERR(lg->dead))
85 return PTR_ERR(lg->dead); 127 return PTR_ERR(lg->dead);
86 128
129 /* We can only return as much as the buffer they read with. */
87 len = min(size, strlen(lg->dead)+1); 130 len = min(size, strlen(lg->dead)+1);
88 if (copy_to_user(user, lg->dead, len) != 0) 131 if (copy_to_user(user, lg->dead, len) != 0)
89 return -EFAULT; 132 return -EFAULT;
90 return len; 133 return len;
91 } 134 }
92 135
136 /* If we returned from read() last time because the Guest sent DMA,
137 * clear the flag. */
93 if (lg->dma_is_pending) 138 if (lg->dma_is_pending)
94 lg->dma_is_pending = 0; 139 lg->dma_is_pending = 0;
95 140
141 /* Run the Guest until something interesting happens. */
96 return run_guest(lg, (unsigned long __user *)user); 142 return run_guest(lg, (unsigned long __user *)user);
97} 143}
98 144
99/* Take: pfnlimit, pgdir, start, pageoffset. */ 145/*L:020 The initialization write supplies 4 32-bit values (in addition to the
146 * 32-bit LHREQ_INITIALIZE value). These are:
147 *
148 * pfnlimit: The highest (Guest-physical) page number the Guest should be
149 * allowed to access. The Launcher has to live in Guest memory, so it sets
150 * this to ensure the Guest can't reach it.
151 *
152 * pgdir: The (Guest-physical) address of the top of the initial Guest
153 * pagetables (which are set up by the Launcher).
154 *
155 * start: The first instruction to execute ("eip" in x86-speak).
156 *
157 * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should
158 * probably wean the code off this, but it's a very useful constant! Any
159 * address above this is within the Guest kernel, and any kernel address can
160 * quickly converted from physical to virtual by adding PAGE_OFFSET. It's
161 * 0xC0000000 (3G) by default, but it's configurable at kernel build time.
162 */
100static int initialize(struct file *file, const u32 __user *input) 163static int initialize(struct file *file, const u32 __user *input)
101{ 164{
165 /* "struct lguest" contains everything we (the Host) know about a
166 * Guest. */
102 struct lguest *lg; 167 struct lguest *lg;
103 int err, i; 168 int err, i;
104 u32 args[4]; 169 u32 args[4];
@@ -106,7 +171,7 @@ static int initialize(struct file *file, const u32 __user *input)
106 /* We grab the Big Lguest lock, which protects the global array 171 /* We grab the Big Lguest lock, which protects the global array
107 * "lguests" and multiple simultaneous initializations. */ 172 * "lguests" and multiple simultaneous initializations. */
108 mutex_lock(&lguest_lock); 173 mutex_lock(&lguest_lock);
109 174 /* You can't initialize twice! Close the device and start again... */
110 if (file->private_data) { 175 if (file->private_data) {
111 err = -EBUSY; 176 err = -EBUSY;
112 goto unlock; 177 goto unlock;
@@ -117,37 +182,70 @@ static int initialize(struct file *file, const u32 __user *input)
117 goto unlock; 182 goto unlock;
118 } 183 }
119 184
185 /* Find an unused guest. */
120 i = find_free_guest(); 186 i = find_free_guest();
121 if (i < 0) { 187 if (i < 0) {
122 err = -ENOSPC; 188 err = -ENOSPC;
123 goto unlock; 189 goto unlock;
124 } 190 }
191 /* OK, we have an index into the "lguest" array: "lg" is a convenient
192 * pointer. */
125 lg = &lguests[i]; 193 lg = &lguests[i];
194
195 /* Populate the easy fields of our "struct lguest" */
126 lg->guestid = i; 196 lg->guestid = i;
127 lg->pfn_limit = args[0]; 197 lg->pfn_limit = args[0];
128 lg->page_offset = args[3]; 198 lg->page_offset = args[3];
199
200 /* We need a complete page for the Guest registers: they are accessible
201 * to the Guest and we can only grant it access to whole pages. */
129 lg->regs_page = get_zeroed_page(GFP_KERNEL); 202 lg->regs_page = get_zeroed_page(GFP_KERNEL);
130 if (!lg->regs_page) { 203 if (!lg->regs_page) {
131 err = -ENOMEM; 204 err = -ENOMEM;
132 goto release_guest; 205 goto release_guest;
133 } 206 }
207 /* We actually put the registers at the bottom of the page. */
134 lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); 208 lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs);
135 209
210 /* Initialize the Guest's shadow page tables, using the toplevel
211 * address the Launcher gave us. This allocates memory, so can
212 * fail. */
136 err = init_guest_pagetable(lg, args[1]); 213 err = init_guest_pagetable(lg, args[1]);
137 if (err) 214 if (err)
138 goto free_regs; 215 goto free_regs;
139 216
217 /* Now we initialize the Guest's registers, handing it the start
218 * address. */
140 setup_regs(lg->regs, args[2]); 219 setup_regs(lg->regs, args[2]);
220
221 /* There are a couple of GDT entries the Guest expects when first
222 * booting. */
141 setup_guest_gdt(lg); 223 setup_guest_gdt(lg);
224
225 /* The timer for lguest's clock needs initialization. */
142 init_clockdev(lg); 226 init_clockdev(lg);
227
228 /* We keep a pointer to the Launcher task (ie. current task) for when
229 * other Guests want to wake this one (inter-Guest I/O). */
143 lg->tsk = current; 230 lg->tsk = current;
231 /* We need to keep a pointer to the Launcher's memory map, because if
232 * the Launcher dies we need to clean it up. If we don't keep a
233 * reference, it is destroyed before close() is called. */
144 lg->mm = get_task_mm(lg->tsk); 234 lg->mm = get_task_mm(lg->tsk);
235
236 /* Initialize the queue for the waker to wait on */
145 init_waitqueue_head(&lg->break_wq); 237 init_waitqueue_head(&lg->break_wq);
238
239 /* We remember which CPU's pages this Guest used last, for optimization
240 * when the same Guest runs on the same CPU twice. */
146 lg->last_pages = NULL; 241 lg->last_pages = NULL;
242
243 /* We keep our "struct lguest" in the file's private_data. */
147 file->private_data = lg; 244 file->private_data = lg;
148 245
149 mutex_unlock(&lguest_lock); 246 mutex_unlock(&lguest_lock);
150 247
248 /* And because this is a write() call, we return the length used. */
151 return sizeof(args); 249 return sizeof(args);
152 250
153free_regs: 251free_regs:
@@ -159,9 +257,15 @@ unlock:
159 return err; 257 return err;
160} 258}
161 259
260/*L:010 The first operation the Launcher does must be a write. All writes
261 * start with a 32 bit number: for the first write this must be
262 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use
263 * writes of other values to get DMA buffers and send interrupts. */
162static ssize_t write(struct file *file, const char __user *input, 264static ssize_t write(struct file *file, const char __user *input,
163 size_t size, loff_t *off) 265 size_t size, loff_t *off)
164{ 266{
267 /* Once the guest is initialized, we hold the "struct lguest" in the
268 * file private data. */
165 struct lguest *lg = file->private_data; 269 struct lguest *lg = file->private_data;
166 u32 req; 270 u32 req;
167 271
@@ -169,8 +273,11 @@ static ssize_t write(struct file *file, const char __user *input,
169 return -EFAULT; 273 return -EFAULT;
170 input += sizeof(req); 274 input += sizeof(req);
171 275
276 /* If you haven't initialized, you must do that first. */
172 if (req != LHREQ_INITIALIZE && !lg) 277 if (req != LHREQ_INITIALIZE && !lg)
173 return -EINVAL; 278 return -EINVAL;
279
280 /* Once the Guest is dead, all you can do is read() why it died. */
174 if (lg && lg->dead) 281 if (lg && lg->dead)
175 return -ENOENT; 282 return -ENOENT;
176 283
@@ -192,33 +299,72 @@ static ssize_t write(struct file *file, const char __user *input,
192 } 299 }
193} 300}
194 301
302/*L:060 The final piece of interface code is the close() routine. It reverses
303 * everything done in initialize(). This is usually called because the
304 * Launcher exited.
305 *
306 * Note that the close routine returns 0 or a negative error number: it can't
307 * really fail, but it can whine. I blame Sun for this wart, and K&R C for
308 * letting them do it. :*/
195static int close(struct inode *inode, struct file *file) 309static int close(struct inode *inode, struct file *file)
196{ 310{
197 struct lguest *lg = file->private_data; 311 struct lguest *lg = file->private_data;
198 312
313 /* If we never successfully initialized, there's nothing to clean up */
199 if (!lg) 314 if (!lg)
200 return 0; 315 return 0;
201 316
317 /* We need the big lock, to protect from inter-guest I/O and other
318 * Launchers initializing guests. */
202 mutex_lock(&lguest_lock); 319 mutex_lock(&lguest_lock);
203 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ 320 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
204 hrtimer_cancel(&lg->hrt); 321 hrtimer_cancel(&lg->hrt);
322 /* Free any DMA buffers the Guest had bound. */
205 release_all_dma(lg); 323 release_all_dma(lg);
324 /* Free up the shadow page tables for the Guest. */
206 free_guest_pagetable(lg); 325 free_guest_pagetable(lg);
326 /* Now all the memory cleanups are done, it's safe to release the
327 * Launcher's memory management structure. */
207 mmput(lg->mm); 328 mmput(lg->mm);
329 /* If lg->dead doesn't contain an error code it will be NULL or a
330 * kmalloc()ed string, either of which is ok to hand to kfree(). */
208 if (!IS_ERR(lg->dead)) 331 if (!IS_ERR(lg->dead))
209 kfree(lg->dead); 332 kfree(lg->dead);
333 /* We can free up the register page we allocated. */
210 free_page(lg->regs_page); 334 free_page(lg->regs_page);
335 /* We clear the entire structure, which also marks it as free for the
336 * next user. */
211 memset(lg, 0, sizeof(*lg)); 337 memset(lg, 0, sizeof(*lg));
338 /* Release lock and exit. */
212 mutex_unlock(&lguest_lock); 339 mutex_unlock(&lguest_lock);
340
213 return 0; 341 return 0;
214} 342}
215 343
344/*L:000
345 * Welcome to our journey through the Launcher!
346 *
347 * The Launcher is the Host userspace program which sets up, runs and services
348 * the Guest. In fact, many comments in the Drivers which refer to "the Host"
349 * doing things are inaccurate: the Launcher does all the device handling for
350 * the Guest. The Guest can't tell what's done by the the Launcher and what by
351 * the Host.
352 *
353 * Just to confuse you: to the Host kernel, the Launcher *is* the Guest and we
354 * shall see more of that later.
355 *
356 * We begin our understanding with the Host kernel interface which the Launcher
357 * uses: reading and writing a character device called /dev/lguest. All the
358 * work happens in the read(), write() and close() routines: */
216static struct file_operations lguest_fops = { 359static struct file_operations lguest_fops = {
217 .owner = THIS_MODULE, 360 .owner = THIS_MODULE,
218 .release = close, 361 .release = close,
219 .write = write, 362 .write = write,
220 .read = read, 363 .read = read,
221}; 364};
365
366/* This is a textbook example of a "misc" character device. Populate a "struct
367 * miscdevice" and register it with misc_register(). */
222static struct miscdevice lguest_dev = { 368static struct miscdevice lguest_dev = {
223 .minor = MISC_DYNAMIC_MINOR, 369 .minor = MISC_DYNAMIC_MINOR,
224 .name = "lguest", 370 .name = "lguest",
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 1b0ba09b1269..b7a924ace684 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -1,5 +1,11 @@
1/* Shadow page table operations. 1/*P:700 The pagetable code, on the other hand, still shows the scars of
2 * Copyright (C) Rusty Russell IBM Corporation 2006. 2 * previous encounters. It's functional, and as neat as it can be in the
3 * circumstances, but be wary, for these things are subtle and break easily.
4 * The Guest provides a virtual to physical mapping, but we can neither trust
5 * it nor use it: we verify and convert it here to point the hardware to the
6 * actual Guest pages when running the Guest. :*/
7
8/* Copyright (C) Rusty Russell IBM Corporation 2006.
3 * GPL v2 and any later version */ 9 * GPL v2 and any later version */
4#include <linux/mm.h> 10#include <linux/mm.h>
5#include <linux/types.h> 11#include <linux/types.h>
@@ -9,38 +15,96 @@
9#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
10#include "lg.h" 16#include "lg.h"
11 17
18/*M:008 We hold reference to pages, which prevents them from being swapped.
19 * It'd be nice to have a callback in the "struct mm_struct" when Linux wants
20 * to swap out. If we had this, and a shrinker callback to trim PTE pages, we
21 * could probably consider launching Guests as non-root. :*/
22
23/*H:300
24 * The Page Table Code
25 *
26 * We use two-level page tables for the Guest. If you're not entirely
27 * comfortable with virtual addresses, physical addresses and page tables then
28 * I recommend you review lguest.c's "Page Table Handling" (with diagrams!).
29 *
30 * The Guest keeps page tables, but we maintain the actual ones here: these are
31 * called "shadow" page tables. Which is a very Guest-centric name: these are
32 * the real page tables the CPU uses, although we keep them up to date to
33 * reflect the Guest's. (See what I mean about weird naming? Since when do
34 * shadows reflect anything?)
35 *
36 * Anyway, this is the most complicated part of the Host code. There are seven
37 * parts to this:
38 * (i) Setting up a page table entry for the Guest when it faults,
39 * (ii) Setting up the page table entry for the Guest stack,
40 * (iii) Setting up a page table entry when the Guest tells us it has changed,
41 * (iv) Switching page tables,
42 * (v) Flushing (thowing away) page tables,
43 * (vi) Mapping the Switcher when the Guest is about to run,
44 * (vii) Setting up the page tables initially.
45 :*/
46
47/* Pages a 4k long, and each page table entry is 4 bytes long, giving us 1024
48 * (or 2^10) entries per page. */
12#define PTES_PER_PAGE_SHIFT 10 49#define PTES_PER_PAGE_SHIFT 10
13#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT) 50#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
51
52/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is
53 * conveniently placed at the top 4MB, so it uses a separate, complete PTE
54 * page. */
14#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) 55#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)
15 56
57/* We actually need a separate PTE page for each CPU. Remember that after the
58 * Switcher code itself comes two pages for each CPU, and we don't want this
59 * CPU's guest to see the pages of any other CPU. */
16static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); 60static DEFINE_PER_CPU(spte_t *, switcher_pte_pages);
17#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) 61#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
18 62
63/*H:320 With our shadow and Guest types established, we need to deal with
64 * them: the page table code is curly enough to need helper functions to keep
65 * it clear and clean.
66 *
67 * The first helper takes a virtual address, and says which entry in the top
68 * level page table deals with that address. Since each top level entry deals
69 * with 4M, this effectively divides by 4M. */
19static unsigned vaddr_to_pgd_index(unsigned long vaddr) 70static unsigned vaddr_to_pgd_index(unsigned long vaddr)
20{ 71{
21 return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); 72 return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
22} 73}
23 74
24/* These access the shadow versions (ie. the ones used by the CPU). */ 75/* There are two functions which return pointers to the shadow (aka "real")
76 * page tables.
77 *
78 * spgd_addr() takes the virtual address and returns a pointer to the top-level
79 * page directory entry for that address. Since we keep track of several page
80 * tables, the "i" argument tells us which one we're interested in (it's
81 * usually the current one). */
25static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) 82static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
26{ 83{
27 unsigned int index = vaddr_to_pgd_index(vaddr); 84 unsigned int index = vaddr_to_pgd_index(vaddr);
28 85
86 /* We kill any Guest trying to touch the Switcher addresses. */
29 if (index >= SWITCHER_PGD_INDEX) { 87 if (index >= SWITCHER_PGD_INDEX) {
30 kill_guest(lg, "attempt to access switcher pages"); 88 kill_guest(lg, "attempt to access switcher pages");
31 index = 0; 89 index = 0;
32 } 90 }
91 /* Return a pointer index'th pgd entry for the i'th page table. */
33 return &lg->pgdirs[i].pgdir[index]; 92 return &lg->pgdirs[i].pgdir[index];
34} 93}
35 94
95/* This routine then takes the PGD entry given above, which contains the
96 * address of the PTE page. It then returns a pointer to the PTE entry for the
97 * given address. */
36static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) 98static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr)
37{ 99{
38 spte_t *page = __va(spgd.pfn << PAGE_SHIFT); 100 spte_t *page = __va(spgd.pfn << PAGE_SHIFT);
101 /* You should never call this if the PGD entry wasn't valid */
39 BUG_ON(!(spgd.flags & _PAGE_PRESENT)); 102 BUG_ON(!(spgd.flags & _PAGE_PRESENT));
40 return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; 103 return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE];
41} 104}
42 105
43/* These access the guest versions. */ 106/* These two functions just like the above two, except they access the Guest
107 * page tables. Hence they return a Guest address. */
44static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 108static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
45{ 109{
46 unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); 110 unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
@@ -55,12 +119,24 @@ static unsigned long gpte_addr(struct lguest *lg,
55 return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); 119 return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t);
56} 120}
57 121
58/* Do a virtual -> physical mapping on a user page. */ 122/*H:350 This routine takes a page number given by the Guest and converts it to
123 * an actual, physical page number. It can fail for several reasons: the
124 * virtual address might not be mapped by the Launcher, the write flag is set
125 * and the page is read-only, or the write flag was set and the page was
126 * shared so had to be copied, but we ran out of memory.
127 *
128 * This holds a reference to the page, so release_pte() is careful to
129 * put that back. */
59static unsigned long get_pfn(unsigned long virtpfn, int write) 130static unsigned long get_pfn(unsigned long virtpfn, int write)
60{ 131{
61 struct page *page; 132 struct page *page;
133 /* This value indicates failure. */
62 unsigned long ret = -1UL; 134 unsigned long ret = -1UL;
63 135
136 /* get_user_pages() is a complex interface: it gets the "struct
137 * vm_area_struct" and "struct page" assocated with a range of pages.
138 * It also needs the task's mmap_sem held, and is not very quick.
139 * It returns the number of pages it got. */
64 down_read(&current->mm->mmap_sem); 140 down_read(&current->mm->mmap_sem);
65 if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, 141 if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT,
66 1, write, 1, &page, NULL) == 1) 142 1, write, 1, &page, NULL) == 1)
@@ -69,28 +145,47 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
69 return ret; 145 return ret;
70} 146}
71 147
148/*H:340 Converting a Guest page table entry to a shadow (ie. real) page table
149 * entry can be a little tricky. The flags are (almost) the same, but the
150 * Guest PTE contains a virtual page number: the CPU needs the real page
151 * number. */
72static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) 152static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write)
73{ 153{
74 spte_t spte; 154 spte_t spte;
75 unsigned long pfn; 155 unsigned long pfn;
76 156
77 /* We ignore the global flag. */ 157 /* The Guest sets the global flag, because it thinks that it is using
158 * PGE. We only told it to use PGE so it would tell us whether it was
159 * flushing a kernel mapping or a userspace mapping. We don't actually
160 * use the global bit, so throw it away. */
78 spte.flags = (gpte.flags & ~_PAGE_GLOBAL); 161 spte.flags = (gpte.flags & ~_PAGE_GLOBAL);
162
163 /* We need a temporary "unsigned long" variable to hold the answer from
164 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
165 * fit in spte.pfn. get_pfn() finds the real physical number of the
166 * page, given the virtual number. */
79 pfn = get_pfn(gpte.pfn, write); 167 pfn = get_pfn(gpte.pfn, write);
80 if (pfn == -1UL) { 168 if (pfn == -1UL) {
81 kill_guest(lg, "failed to get page %u", gpte.pfn); 169 kill_guest(lg, "failed to get page %u", gpte.pfn);
82 /* Must not put_page() bogus page on cleanup. */ 170 /* When we destroy the Guest, we'll go through the shadow page
171 * tables and release_pte() them. Make sure we don't think
172 * this one is valid! */
83 spte.flags = 0; 173 spte.flags = 0;
84 } 174 }
175 /* Now we assign the page number, and our shadow PTE is complete. */
85 spte.pfn = pfn; 176 spte.pfn = pfn;
86 return spte; 177 return spte;
87} 178}
88 179
180/*H:460 And to complete the chain, release_pte() looks like this: */
89static void release_pte(spte_t pte) 181static void release_pte(spte_t pte)
90{ 182{
183 /* Remember that get_user_pages() took a reference to the page, in
184 * get_pfn()? We have to put it back now. */
91 if (pte.flags & _PAGE_PRESENT) 185 if (pte.flags & _PAGE_PRESENT)
92 put_page(pfn_to_page(pte.pfn)); 186 put_page(pfn_to_page(pte.pfn));
93} 187}
188/*:*/
94 189
95static void check_gpte(struct lguest *lg, gpte_t gpte) 190static void check_gpte(struct lguest *lg, gpte_t gpte)
96{ 191{
@@ -104,11 +199,16 @@ static void check_gpgd(struct lguest *lg, gpgd_t gpgd)
104 kill_guest(lg, "bad page directory entry"); 199 kill_guest(lg, "bad page directory entry");
105} 200}
106 201
107/* FIXME: We hold reference to pages, which prevents them from being 202/*H:330
108 swapped. It'd be nice to have a callback when Linux wants to swap out. */ 203 * (i) Setting up a page table entry for the Guest when it faults
109 204 *
110/* We fault pages in, which allows us to update accessed/dirty bits. 205 * We saw this call in run_guest(): when we see a page fault in the Guest, we
111 * Return true if we got page. */ 206 * come here. That's because we only set up the shadow page tables lazily as
207 * they're needed, so we get page faults all the time and quietly fix them up
208 * and return to the Guest without it knowing.
209 *
210 * If we fixed up the fault (ie. we mapped the address), this routine returns
211 * true. */
112int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 212int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
113{ 213{
114 gpgd_t gpgd; 214 gpgd_t gpgd;
@@ -117,106 +217,161 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
117 gpte_t gpte; 217 gpte_t gpte;
118 spte_t *spte; 218 spte_t *spte;
119 219
220 /* First step: get the top-level Guest page table entry. */
120 gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); 221 gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr)));
222 /* Toplevel not present? We can't map it in. */
121 if (!(gpgd.flags & _PAGE_PRESENT)) 223 if (!(gpgd.flags & _PAGE_PRESENT))
122 return 0; 224 return 0;
123 225
226 /* Now look at the matching shadow entry. */
124 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 227 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
125 if (!(spgd->flags & _PAGE_PRESENT)) { 228 if (!(spgd->flags & _PAGE_PRESENT)) {
126 /* Get a page of PTEs for them. */ 229 /* No shadow entry: allocate a new shadow PTE page. */
127 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 230 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
128 /* FIXME: Steal from self in this case? */ 231 /* This is not really the Guest's fault, but killing it is
232 * simple for this corner case. */
129 if (!ptepage) { 233 if (!ptepage) {
130 kill_guest(lg, "out of memory allocating pte page"); 234 kill_guest(lg, "out of memory allocating pte page");
131 return 0; 235 return 0;
132 } 236 }
237 /* We check that the Guest pgd is OK. */
133 check_gpgd(lg, gpgd); 238 check_gpgd(lg, gpgd);
239 /* And we copy the flags to the shadow PGD entry. The page
240 * number in the shadow PGD is the page we just allocated. */
134 spgd->raw.val = (__pa(ptepage) | gpgd.flags); 241 spgd->raw.val = (__pa(ptepage) | gpgd.flags);
135 } 242 }
136 243
244 /* OK, now we look at the lower level in the Guest page table: keep its
245 * address, because we might update it later. */
137 gpte_ptr = gpte_addr(lg, gpgd, vaddr); 246 gpte_ptr = gpte_addr(lg, gpgd, vaddr);
138 gpte = mkgpte(lgread_u32(lg, gpte_ptr)); 247 gpte = mkgpte(lgread_u32(lg, gpte_ptr));
139 248
140 /* No page? */ 249 /* If this page isn't in the Guest page tables, we can't page it in. */
141 if (!(gpte.flags & _PAGE_PRESENT)) 250 if (!(gpte.flags & _PAGE_PRESENT))
142 return 0; 251 return 0;
143 252
144 /* Write to read-only page? */ 253 /* Check they're not trying to write to a page the Guest wants
254 * read-only (bit 2 of errcode == write). */
145 if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) 255 if ((errcode & 2) && !(gpte.flags & _PAGE_RW))
146 return 0; 256 return 0;
147 257
148 /* User access to a non-user page? */ 258 /* User access to a kernel page? (bit 3 == user access) */
149 if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) 259 if ((errcode & 4) && !(gpte.flags & _PAGE_USER))
150 return 0; 260 return 0;
151 261
262 /* Check that the Guest PTE flags are OK, and the page number is below
263 * the pfn_limit (ie. not mapping the Launcher binary). */
152 check_gpte(lg, gpte); 264 check_gpte(lg, gpte);
265 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
153 gpte.flags |= _PAGE_ACCESSED; 266 gpte.flags |= _PAGE_ACCESSED;
154 if (errcode & 2) 267 if (errcode & 2)
155 gpte.flags |= _PAGE_DIRTY; 268 gpte.flags |= _PAGE_DIRTY;
156 269
157 /* We're done with the old pte. */ 270 /* Get the pointer to the shadow PTE entry we're going to set. */
158 spte = spte_addr(lg, *spgd, vaddr); 271 spte = spte_addr(lg, *spgd, vaddr);
272 /* If there was a valid shadow PTE entry here before, we release it.
273 * This can happen with a write to a previously read-only entry. */
159 release_pte(*spte); 274 release_pte(*spte);
160 275
161 /* We don't make it writable if this isn't a write: later 276 /* If this is a write, we insist that the Guest page is writable (the
162 * write will fault so we can set dirty bit in guest. */ 277 * final arg to gpte_to_spte()). */
163 if (gpte.flags & _PAGE_DIRTY) 278 if (gpte.flags & _PAGE_DIRTY)
164 *spte = gpte_to_spte(lg, gpte, 1); 279 *spte = gpte_to_spte(lg, gpte, 1);
165 else { 280 else {
281 /* If this is a read, don't set the "writable" bit in the page
282 * table entry, even if the Guest says it's writable. That way
283 * we come back here when a write does actually ocur, so we can
284 * update the Guest's _PAGE_DIRTY flag. */
166 gpte_t ro_gpte = gpte; 285 gpte_t ro_gpte = gpte;
167 ro_gpte.flags &= ~_PAGE_RW; 286 ro_gpte.flags &= ~_PAGE_RW;
168 *spte = gpte_to_spte(lg, ro_gpte, 0); 287 *spte = gpte_to_spte(lg, ro_gpte, 0);
169 } 288 }
170 289
171 /* Now we update dirty/accessed on guest. */ 290 /* Finally, we write the Guest PTE entry back: we've set the
291 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
172 lgwrite_u32(lg, gpte_ptr, gpte.raw.val); 292 lgwrite_u32(lg, gpte_ptr, gpte.raw.val);
293
294 /* We succeeded in mapping the page! */
173 return 1; 295 return 1;
174} 296}
175 297
176/* This is much faster than the full demand_page logic. */ 298/*H:360 (ii) Setting up the page table entry for the Guest stack.
299 *
300 * Remember pin_stack_pages() which makes sure the stack is mapped? It could
301 * simply call demand_page(), but as we've seen that logic is quite long, and
302 * usually the stack pages are already mapped anyway, so it's not required.
303 *
304 * This is a quick version which answers the question: is this virtual address
305 * mapped by the shadow page tables, and is it writable? */
177static int page_writable(struct lguest *lg, unsigned long vaddr) 306static int page_writable(struct lguest *lg, unsigned long vaddr)
178{ 307{
179 spgd_t *spgd; 308 spgd_t *spgd;
180 unsigned long flags; 309 unsigned long flags;
181 310
311 /* Look at the top level entry: is it present? */
182 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 312 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
183 if (!(spgd->flags & _PAGE_PRESENT)) 313 if (!(spgd->flags & _PAGE_PRESENT))
184 return 0; 314 return 0;
185 315
316 /* Check the flags on the pte entry itself: it must be present and
317 * writable. */
186 flags = spte_addr(lg, *spgd, vaddr)->flags; 318 flags = spte_addr(lg, *spgd, vaddr)->flags;
187 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 319 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
188} 320}
189 321
322/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
323 * in the page tables, and if not, we call demand_page() with error code 2
324 * (meaning "write"). */
190void pin_page(struct lguest *lg, unsigned long vaddr) 325void pin_page(struct lguest *lg, unsigned long vaddr)
191{ 326{
192 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) 327 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2))
193 kill_guest(lg, "bad stack page %#lx", vaddr); 328 kill_guest(lg, "bad stack page %#lx", vaddr);
194} 329}
195 330
331/*H:450 If we chase down the release_pgd() code, it looks like this: */
196static void release_pgd(struct lguest *lg, spgd_t *spgd) 332static void release_pgd(struct lguest *lg, spgd_t *spgd)
197{ 333{
334 /* If the entry's not present, there's nothing to release. */
198 if (spgd->flags & _PAGE_PRESENT) { 335 if (spgd->flags & _PAGE_PRESENT) {
199 unsigned int i; 336 unsigned int i;
337 /* Converting the pfn to find the actual PTE page is easy: turn
338 * the page number into a physical address, then convert to a
339 * virtual address (easy for kernel pages like this one). */
200 spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); 340 spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT);
341 /* For each entry in the page, we might need to release it. */
201 for (i = 0; i < PTES_PER_PAGE; i++) 342 for (i = 0; i < PTES_PER_PAGE; i++)
202 release_pte(ptepage[i]); 343 release_pte(ptepage[i]);
344 /* Now we can free the page of PTEs */
203 free_page((long)ptepage); 345 free_page((long)ptepage);
346 /* And zero out the PGD entry we we never release it twice. */
204 spgd->raw.val = 0; 347 spgd->raw.val = 0;
205 } 348 }
206} 349}
207 350
351/*H:440 (v) Flushing (thowing away) page tables,
352 *
353 * We saw flush_user_mappings() called when we re-used a top-level pgdir page.
354 * It simply releases every PTE page from 0 up to the kernel address. */
208static void flush_user_mappings(struct lguest *lg, int idx) 355static void flush_user_mappings(struct lguest *lg, int idx)
209{ 356{
210 unsigned int i; 357 unsigned int i;
358 /* Release every pgd entry up to the kernel's address. */
211 for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) 359 for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++)
212 release_pgd(lg, lg->pgdirs[idx].pgdir + i); 360 release_pgd(lg, lg->pgdirs[idx].pgdir + i);
213} 361}
214 362
363/* The Guest also has a hypercall to do this manually: it's used when a large
364 * number of mappings have been changed. */
215void guest_pagetable_flush_user(struct lguest *lg) 365void guest_pagetable_flush_user(struct lguest *lg)
216{ 366{
367 /* Drop the userspace part of the current page table. */
217 flush_user_mappings(lg, lg->pgdidx); 368 flush_user_mappings(lg, lg->pgdidx);
218} 369}
370/*:*/
219 371
372/* We keep several page tables. This is a simple routine to find the page
373 * table (if any) corresponding to this top-level address the Guest has given
374 * us. */
220static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) 375static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
221{ 376{
222 unsigned int i; 377 unsigned int i;
@@ -226,21 +381,30 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
226 return i; 381 return i;
227} 382}
228 383
384/*H:435 And this is us, creating the new page directory. If we really do
385 * allocate a new one (and so the kernel parts are not there), we set
386 * blank_pgdir. */
229static unsigned int new_pgdir(struct lguest *lg, 387static unsigned int new_pgdir(struct lguest *lg,
230 unsigned long cr3, 388 unsigned long cr3,
231 int *blank_pgdir) 389 int *blank_pgdir)
232{ 390{
233 unsigned int next; 391 unsigned int next;
234 392
393 /* We pick one entry at random to throw out. Choosing the Least
394 * Recently Used might be better, but this is easy. */
235 next = random32() % ARRAY_SIZE(lg->pgdirs); 395 next = random32() % ARRAY_SIZE(lg->pgdirs);
396 /* If it's never been allocated at all before, try now. */
236 if (!lg->pgdirs[next].pgdir) { 397 if (!lg->pgdirs[next].pgdir) {
237 lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); 398 lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
399 /* If the allocation fails, just keep using the one we have */
238 if (!lg->pgdirs[next].pgdir) 400 if (!lg->pgdirs[next].pgdir)
239 next = lg->pgdidx; 401 next = lg->pgdidx;
240 else 402 else
241 /* There are no mappings: you'll need to re-pin */ 403 /* This is a blank page, so there are no kernel
404 * mappings: caller must map the stack! */
242 *blank_pgdir = 1; 405 *blank_pgdir = 1;
243 } 406 }
407 /* Record which Guest toplevel this shadows. */
244 lg->pgdirs[next].cr3 = cr3; 408 lg->pgdirs[next].cr3 = cr3;
245 /* Release all the non-kernel mappings. */ 409 /* Release all the non-kernel mappings. */
246 flush_user_mappings(lg, next); 410 flush_user_mappings(lg, next);
@@ -248,82 +412,161 @@ static unsigned int new_pgdir(struct lguest *lg,
248 return next; 412 return next;
249} 413}
250 414
415/*H:430 (iv) Switching page tables
416 *
417 * This is what happens when the Guest changes page tables (ie. changes the
418 * top-level pgdir). This happens on almost every context switch. */
251void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) 419void guest_new_pagetable(struct lguest *lg, unsigned long pgtable)
252{ 420{
253 int newpgdir, repin = 0; 421 int newpgdir, repin = 0;
254 422
423 /* Look to see if we have this one already. */
255 newpgdir = find_pgdir(lg, pgtable); 424 newpgdir = find_pgdir(lg, pgtable);
425 /* If not, we allocate or mug an existing one: if it's a fresh one,
426 * repin gets set to 1. */
256 if (newpgdir == ARRAY_SIZE(lg->pgdirs)) 427 if (newpgdir == ARRAY_SIZE(lg->pgdirs))
257 newpgdir = new_pgdir(lg, pgtable, &repin); 428 newpgdir = new_pgdir(lg, pgtable, &repin);
429 /* Change the current pgd index to the new one. */
258 lg->pgdidx = newpgdir; 430 lg->pgdidx = newpgdir;
431 /* If it was completely blank, we map in the Guest kernel stack */
259 if (repin) 432 if (repin)
260 pin_stack_pages(lg); 433 pin_stack_pages(lg);
261} 434}
262 435
436/*H:470 Finally, a routine which throws away everything: all PGD entries in all
437 * the shadow page tables. This is used when we destroy the Guest. */
263static void release_all_pagetables(struct lguest *lg) 438static void release_all_pagetables(struct lguest *lg)
264{ 439{
265 unsigned int i, j; 440 unsigned int i, j;
266 441
442 /* Every shadow pagetable this Guest has */
267 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 443 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
268 if (lg->pgdirs[i].pgdir) 444 if (lg->pgdirs[i].pgdir)
445 /* Every PGD entry except the Switcher at the top */
269 for (j = 0; j < SWITCHER_PGD_INDEX; j++) 446 for (j = 0; j < SWITCHER_PGD_INDEX; j++)
270 release_pgd(lg, lg->pgdirs[i].pgdir + j); 447 release_pgd(lg, lg->pgdirs[i].pgdir + j);
271} 448}
272 449
450/* We also throw away everything when a Guest tells us it's changed a kernel
451 * mapping. Since kernel mappings are in every page table, it's easiest to
452 * throw them all away. This is amazingly slow, but thankfully rare. */
273void guest_pagetable_clear_all(struct lguest *lg) 453void guest_pagetable_clear_all(struct lguest *lg)
274{ 454{
275 release_all_pagetables(lg); 455 release_all_pagetables(lg);
456 /* We need the Guest kernel stack mapped again. */
276 pin_stack_pages(lg); 457 pin_stack_pages(lg);
277} 458}
278 459
460/*H:420 This is the routine which actually sets the page table entry for then
461 * "idx"'th shadow page table.
462 *
463 * Normally, we can just throw out the old entry and replace it with 0: if they
464 * use it demand_page() will put the new entry in. We need to do this anyway:
465 * The Guest expects _PAGE_ACCESSED to be set on its PTE the first time a page
466 * is read from, and _PAGE_DIRTY when it's written to.
467 *
468 * But Avi Kivity pointed out that most Operating Systems (Linux included) set
469 * these bits on PTEs immediately anyway. This is done to save the CPU from
470 * having to update them, but it helps us the same way: if they set
471 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
472 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
473 */
279static void do_set_pte(struct lguest *lg, int idx, 474static void do_set_pte(struct lguest *lg, int idx,
280 unsigned long vaddr, gpte_t gpte) 475 unsigned long vaddr, gpte_t gpte)
281{ 476{
477 /* Look up the matching shadow page directot entry. */
282 spgd_t *spgd = spgd_addr(lg, idx, vaddr); 478 spgd_t *spgd = spgd_addr(lg, idx, vaddr);
479
480 /* If the top level isn't present, there's no entry to update. */
283 if (spgd->flags & _PAGE_PRESENT) { 481 if (spgd->flags & _PAGE_PRESENT) {
482 /* Otherwise, we start by releasing the existing entry. */
284 spte_t *spte = spte_addr(lg, *spgd, vaddr); 483 spte_t *spte = spte_addr(lg, *spgd, vaddr);
285 release_pte(*spte); 484 release_pte(*spte);
485
486 /* If they're setting this entry as dirty or accessed, we might
487 * as well put that entry they've given us in now. This shaves
488 * 10% off a copy-on-write micro-benchmark. */
286 if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 489 if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
287 check_gpte(lg, gpte); 490 check_gpte(lg, gpte);
288 *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); 491 *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY);
289 } else 492 } else
493 /* Otherwise we can demand_page() it in later. */
290 spte->raw.val = 0; 494 spte->raw.val = 0;
291 } 495 }
292} 496}
293 497
498/*H:410 Updating a PTE entry is a little trickier.
499 *
500 * We keep track of several different page tables (the Guest uses one for each
501 * process, so it makes sense to cache at least a few). Each of these have
502 * identical kernel parts: ie. every mapping above PAGE_OFFSET is the same for
503 * all processes. So when the page table above that address changes, we update
504 * all the page tables, not just the current one. This is rare.
505 *
506 * The benefit is that when we have to track a new page table, we can copy keep
507 * all the kernel mappings. This speeds up context switch immensely. */
294void guest_set_pte(struct lguest *lg, 508void guest_set_pte(struct lguest *lg,
295 unsigned long cr3, unsigned long vaddr, gpte_t gpte) 509 unsigned long cr3, unsigned long vaddr, gpte_t gpte)
296{ 510{
297 /* Kernel mappings must be changed on all top levels. */ 511 /* Kernel mappings must be changed on all top levels. Slow, but
512 * doesn't happen often. */
298 if (vaddr >= lg->page_offset) { 513 if (vaddr >= lg->page_offset) {
299 unsigned int i; 514 unsigned int i;
300 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 515 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
301 if (lg->pgdirs[i].pgdir) 516 if (lg->pgdirs[i].pgdir)
302 do_set_pte(lg, i, vaddr, gpte); 517 do_set_pte(lg, i, vaddr, gpte);
303 } else { 518 } else {
519 /* Is this page table one we have a shadow for? */
304 int pgdir = find_pgdir(lg, cr3); 520 int pgdir = find_pgdir(lg, cr3);
305 if (pgdir != ARRAY_SIZE(lg->pgdirs)) 521 if (pgdir != ARRAY_SIZE(lg->pgdirs))
522 /* If so, do the update. */
306 do_set_pte(lg, pgdir, vaddr, gpte); 523 do_set_pte(lg, pgdir, vaddr, gpte);
307 } 524 }
308} 525}
309 526
527/*H:400
528 * (iii) Setting up a page table entry when the Guest tells us it has changed.
529 *
530 * Just like we did in interrupts_and_traps.c, it makes sense for us to deal
531 * with the other side of page tables while we're here: what happens when the
532 * Guest asks for a page table to be updated?
533 *
534 * We already saw that demand_page() will fill in the shadow page tables when
535 * needed, so we can simply remove shadow page table entries whenever the Guest
536 * tells us they've changed. When the Guest tries to use the new entry it will
537 * fault and demand_page() will fix it up.
538 *
539 * So with that in mind here's our code to to update a (top-level) PGD entry:
540 */
310void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) 541void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
311{ 542{
312 int pgdir; 543 int pgdir;
313 544
545 /* The kernel seems to try to initialize this early on: we ignore its
546 * attempts to map over the Switcher. */
314 if (idx >= SWITCHER_PGD_INDEX) 547 if (idx >= SWITCHER_PGD_INDEX)
315 return; 548 return;
316 549
550 /* If they're talking about a page table we have a shadow for... */
317 pgdir = find_pgdir(lg, cr3); 551 pgdir = find_pgdir(lg, cr3);
318 if (pgdir < ARRAY_SIZE(lg->pgdirs)) 552 if (pgdir < ARRAY_SIZE(lg->pgdirs))
553 /* ... throw it away. */
319 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); 554 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
320} 555}
321 556
557/*H:500 (vii) Setting up the page tables initially.
558 *
559 * When a Guest is first created, the Launcher tells us where the toplevel of
560 * its first page table is. We set some things up here: */
322int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) 561int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
323{ 562{
324 /* We assume this in flush_user_mappings, so check now */ 563 /* In flush_user_mappings() we loop from 0 to
564 * "vaddr_to_pgd_index(lg->page_offset)". This assumes it won't hit
565 * the Switcher mappings, so check that now. */
325 if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) 566 if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
326 return -EINVAL; 567 return -EINVAL;
568 /* We start on the first shadow page table, and give it a blank PGD
569 * page. */
327 lg->pgdidx = 0; 570 lg->pgdidx = 0;
328 lg->pgdirs[lg->pgdidx].cr3 = pgtable; 571 lg->pgdirs[lg->pgdidx].cr3 = pgtable;
329 lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); 572 lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
@@ -332,33 +575,48 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
332 return 0; 575 return 0;
333} 576}
334 577
578/* When a Guest dies, our cleanup is fairly simple. */
335void free_guest_pagetable(struct lguest *lg) 579void free_guest_pagetable(struct lguest *lg)
336{ 580{
337 unsigned int i; 581 unsigned int i;
338 582
583 /* Throw away all page table pages. */
339 release_all_pagetables(lg); 584 release_all_pagetables(lg);
585 /* Now free the top levels: free_page() can handle 0 just fine. */
340 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 586 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
341 free_page((long)lg->pgdirs[i].pgdir); 587 free_page((long)lg->pgdirs[i].pgdir);
342} 588}
343 589
344/* Caller must be preempt-safe */ 590/*H:480 (vi) Mapping the Switcher when the Guest is about to run.
591 *
592 * The Switcher and the two pages for this CPU need to be available to the
593 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages
594 * for each CPU already set up, we just need to hook them in. */
345void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) 595void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
346{ 596{
347 spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 597 spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
348 spgd_t switcher_pgd; 598 spgd_t switcher_pgd;
349 spte_t regs_pte; 599 spte_t regs_pte;
350 600
351 /* Since switcher less that 4MB, we simply mug top pte page. */ 601 /* Make the last PGD entry for this Guest point to the Switcher's PTE
602 * page for this CPU (with appropriate flags). */
352 switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; 603 switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT;
353 switcher_pgd.flags = _PAGE_KERNEL; 604 switcher_pgd.flags = _PAGE_KERNEL;
354 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 605 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
355 606
356 /* Map our regs page over stack page. */ 607 /* We also change the Switcher PTE page. When we're running the Guest,
608 * we want the Guest's "regs" page to appear where the first Switcher
609 * page for this CPU is. This is an optimization: when the Switcher
610 * saves the Guest registers, it saves them into the first page of this
611 * CPU's "struct lguest_pages": if we make sure the Guest's register
612 * page is already mapped there, we don't have to copy them out
613 * again. */
357 regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; 614 regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT;
358 regs_pte.flags = _PAGE_KERNEL; 615 regs_pte.flags = _PAGE_KERNEL;
359 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE] 616 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE]
360 = regs_pte; 617 = regs_pte;
361} 618}
619/*:*/
362 620
363static void free_switcher_pte_pages(void) 621static void free_switcher_pte_pages(void)
364{ 622{
@@ -368,6 +626,10 @@ static void free_switcher_pte_pages(void)
368 free_page((long)switcher_pte_page(i)); 626 free_page((long)switcher_pte_page(i));
369} 627}
370 628
629/*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given
630 * the CPU number and the "struct page"s for the Switcher code itself.
631 *
632 * Currently the Switcher is less than a page long, so "pages" is always 1. */
371static __init void populate_switcher_pte_page(unsigned int cpu, 633static __init void populate_switcher_pte_page(unsigned int cpu,
372 struct page *switcher_page[], 634 struct page *switcher_page[],
373 unsigned int pages) 635 unsigned int pages)
@@ -375,21 +637,26 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
375 unsigned int i; 637 unsigned int i;
376 spte_t *pte = switcher_pte_page(cpu); 638 spte_t *pte = switcher_pte_page(cpu);
377 639
640 /* The first entries are easy: they map the Switcher code. */
378 for (i = 0; i < pages; i++) { 641 for (i = 0; i < pages; i++) {
379 pte[i].pfn = page_to_pfn(switcher_page[i]); 642 pte[i].pfn = page_to_pfn(switcher_page[i]);
380 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 643 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
381 } 644 }
382 645
383 /* We only map this CPU's pages, so guest can't see others. */ 646 /* The only other thing we map is this CPU's pair of pages. */
384 i = pages + cpu*2; 647 i = pages + cpu*2;
385 648
386 /* First page (regs) is rw, second (state) is ro. */ 649 /* First page (Guest registers) is writable from the Guest */
387 pte[i].pfn = page_to_pfn(switcher_page[i]); 650 pte[i].pfn = page_to_pfn(switcher_page[i]);
388 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; 651 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW;
652 /* The second page contains the "struct lguest_ro_state", and is
653 * read-only. */
389 pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); 654 pte[i+1].pfn = page_to_pfn(switcher_page[i+1]);
390 pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 655 pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
391} 656}
392 657
658/*H:510 At boot or module load time, init_pagetables() allocates and populates
659 * the Switcher PTE page for each CPU. */
393__init int init_pagetables(struct page **switcher_page, unsigned int pages) 660__init int init_pagetables(struct page **switcher_page, unsigned int pages)
394{ 661{
395 unsigned int i; 662 unsigned int i;
@@ -404,7 +671,9 @@ __init int init_pagetables(struct page **switcher_page, unsigned int pages)
404 } 671 }
405 return 0; 672 return 0;
406} 673}
674/*:*/
407 675
676/* Cleaning up simply involves freeing the PTE page for each CPU. */
408void free_pagetables(void) 677void free_pagetables(void)
409{ 678{
410 free_switcher_pte_pages(); 679 free_switcher_pte_pages();
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 1b2cfe89dcd5..f675a41a80da 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -1,16 +1,68 @@
1/*P:600 The x86 architecture has segments, which involve a table of descriptors
2 * which can be used to do funky things with virtual address interpretation.
3 * We originally used to use segments so the Guest couldn't alter the
4 * Guest<->Host Switcher, and then we had to trim Guest segments, and restore
5 * for userspace per-thread segments, but trim again for on userspace->kernel
6 * transitions... This nightmarish creation was contained within this file,
7 * where we knew not to tread without heavy armament and a change of underwear.
8 *
9 * In these modern times, the segment handling code consists of simple sanity
10 * checks, and the worst you'll experience reading this code is butterfly-rash
11 * from frolicking through its parklike serenity. :*/
1#include "lg.h" 12#include "lg.h"
2 13
14/*H:600
15 * We've almost completed the Host; there's just one file to go!
16 *
17 * Segments & The Global Descriptor Table
18 *
19 * (That title sounds like a bad Nerdcore group. Not to suggest that there are
20 * any good Nerdcore groups, but in high school a friend of mine had a band
21 * called Joe Fish and the Chips, so there are definitely worse band names).
22 *
23 * To refresh: the GDT is a table of 8-byte values describing segments. Once
24 * set up, these segments can be loaded into one of the 6 "segment registers".
25 *
26 * GDT entries are passed around as "struct desc_struct"s, which like IDT
27 * entries are split into two 32-bit members, "a" and "b". One day, someone
28 * will clean that up, and be declared a Hero. (No pressure, I'm just saying).
29 *
30 * Anyway, the GDT entry contains a base (the start address of the segment), a
31 * limit (the size of the segment - 1), and some flags. Sounds simple, and it
32 * would be, except those zany Intel engineers decided that it was too boring
33 * to put the base at one end, the limit at the other, and the flags in
34 * between. They decided to shotgun the bits at random throughout the 8 bytes,
35 * like so:
36 *
37 * 0 16 40 48 52 56 63
38 * [ limit part 1 ][ base part 1 ][ flags ][li][fl][base ]
39 * mit ags part 2
40 * part 2
41 *
42 * As a result, this file contains a certain amount of magic numeracy. Let's
43 * begin.
44 */
45
46/* Is the descriptor the Guest wants us to put in OK?
47 *
48 * The flag which Intel says must be zero: must be zero. The descriptor must
49 * be present, (this is actually checked earlier but is here for thorougness),
50 * and the descriptor type must be 1 (a memory segment). */
3static int desc_ok(const struct desc_struct *gdt) 51static int desc_ok(const struct desc_struct *gdt)
4{ 52{
5 /* MBZ=0, P=1, DT=1 */
6 return ((gdt->b & 0x00209000) == 0x00009000); 53 return ((gdt->b & 0x00209000) == 0x00009000);
7} 54}
8 55
56/* Is the segment present? (Otherwise it can't be used by the Guest). */
9static int segment_present(const struct desc_struct *gdt) 57static int segment_present(const struct desc_struct *gdt)
10{ 58{
11 return gdt->b & 0x8000; 59 return gdt->b & 0x8000;
12} 60}
13 61
62/* There are several entries we don't let the Guest set. The TSS entry is the
63 * "Task State Segment" which controls all kinds of delicate things. The
64 * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
65 * the Guest can't be trusted to deal with double faults. */
14static int ignored_gdt(unsigned int num) 66static int ignored_gdt(unsigned int num)
15{ 67{
16 return (num == GDT_ENTRY_TSS 68 return (num == GDT_ENTRY_TSS
@@ -19,9 +71,18 @@ static int ignored_gdt(unsigned int num)
19 || num == GDT_ENTRY_DOUBLEFAULT_TSS); 71 || num == GDT_ENTRY_DOUBLEFAULT_TSS);
20} 72}
21 73
22/* We don't allow removal of CS, DS or SS; it doesn't make sense. */ 74/* If the Guest asks us to remove an entry from the GDT, we have to be careful.
75 * If one of the segment registers is pointing at that entry the Switcher will
76 * crash when it tries to reload the segment registers for the Guest.
77 *
78 * It doesn't make much sense for the Guest to try to remove its own code, data
79 * or stack segments while they're in use: assume that's a Guest bug. If it's
80 * one of the lesser segment registers using the removed entry, we simply set
81 * that register to 0 (unusable). */
23static void check_segment_use(struct lguest *lg, unsigned int desc) 82static void check_segment_use(struct lguest *lg, unsigned int desc)
24{ 83{
84 /* GDT entries are 8 bytes long, so we divide to get the index and
85 * ignore the bottom bits. */
25 if (lg->regs->gs / 8 == desc) 86 if (lg->regs->gs / 8 == desc)
26 lg->regs->gs = 0; 87 lg->regs->gs = 0;
27 if (lg->regs->fs / 8 == desc) 88 if (lg->regs->fs / 8 == desc)
@@ -33,13 +94,21 @@ static void check_segment_use(struct lguest *lg, unsigned int desc)
33 || lg->regs->ss / 8 == desc) 94 || lg->regs->ss / 8 == desc)
34 kill_guest(lg, "Removed live GDT entry %u", desc); 95 kill_guest(lg, "Removed live GDT entry %u", desc);
35} 96}
36 97/*:*/
98/*M:009 We wouldn't need to check for removal of in-use segments if we handled
99 * faults in the Switcher. However, it's probably not a worthwhile
100 * optimization. :*/
101
102/*H:610 Once the GDT has been changed, we look through the changed entries and
103 * see if they're OK. If not, we'll call kill_guest() and the Guest will never
104 * get to use the invalid entries. */
37static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) 105static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
38{ 106{
39 unsigned int i; 107 unsigned int i;
40 108
41 for (i = start; i < end; i++) { 109 for (i = start; i < end; i++) {
42 /* We never copy these ones to real gdt */ 110 /* We never copy these ones to real GDT, so we don't care what
111 * they say */
43 if (ignored_gdt(i)) 112 if (ignored_gdt(i))
44 continue; 113 continue;
45 114
@@ -53,41 +122,57 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
53 if (!desc_ok(&lg->gdt[i])) 122 if (!desc_ok(&lg->gdt[i]))
54 kill_guest(lg, "Bad GDT descriptor %i", i); 123 kill_guest(lg, "Bad GDT descriptor %i", i);
55 124
56 /* DPL 0 presumably means "for use by guest". */ 125 /* Segment descriptors contain a privilege level: the Guest is
126 * sometimes careless and leaves this as 0, even though it's
127 * running at privilege level 1. If so, we fix it here. */
57 if ((lg->gdt[i].b & 0x00006000) == 0) 128 if ((lg->gdt[i].b & 0x00006000) == 0)
58 lg->gdt[i].b |= (GUEST_PL << 13); 129 lg->gdt[i].b |= (GUEST_PL << 13);
59 130
60 /* Set accessed bit, since gdt isn't writable. */ 131 /* Each descriptor has an "accessed" bit. If we don't set it
132 * now, the CPU will try to set it when the Guest first loads
133 * that entry into a segment register. But the GDT isn't
134 * writable by the Guest, so bad things can happen. */
61 lg->gdt[i].b |= 0x00000100; 135 lg->gdt[i].b |= 0x00000100;
62 } 136 }
63} 137}
64 138
139/* This routine is called at boot or modprobe time for each CPU to set up the
140 * "constant" GDT entries for Guests running on that CPU. */
65void setup_default_gdt_entries(struct lguest_ro_state *state) 141void setup_default_gdt_entries(struct lguest_ro_state *state)
66{ 142{
67 struct desc_struct *gdt = state->guest_gdt; 143 struct desc_struct *gdt = state->guest_gdt;
68 unsigned long tss = (unsigned long)&state->guest_tss; 144 unsigned long tss = (unsigned long)&state->guest_tss;
69 145
70 /* Hypervisor segments. */ 146 /* The hypervisor segments are full 0-4G segments, privilege level 0 */
71 gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; 147 gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
72 gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; 148 gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
73 149
74 /* This is the one which we *cannot* copy from guest, since tss 150 /* The TSS segment refers to the TSS entry for this CPU, so we cannot
75 is depended on this lguest_ro_state, ie. this cpu. */ 151 * copy it from the Guest. Forgive the magic flags */
76 gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); 152 gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16);
77 gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) 153 gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000)
78 | ((tss >> 16) & 0x000000FF); 154 | ((tss >> 16) & 0x000000FF);
79} 155}
80 156
157/* This routine is called before the Guest is run for the first time. */
81void setup_guest_gdt(struct lguest *lg) 158void setup_guest_gdt(struct lguest *lg)
82{ 159{
160 /* Start with full 0-4G segments... */
83 lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; 161 lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
84 lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; 162 lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
163 /* ...except the Guest is allowed to use them, so set the privilege
164 * level appropriately in the flags. */
85 lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); 165 lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
86 lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); 166 lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
87} 167}
88 168
89/* This is a fast version for the common case where only the three TLS entries 169/* Like the IDT, we never simply use the GDT the Guest gives us. We set up the
90 * have changed. */ 170 * GDTs for each CPU, then we copy across the entries each time we want to run
171 * a different Guest on that CPU. */
172
173/* A partial GDT load, for the three "thead-local storage" entries. Otherwise
174 * it's just like load_guest_gdt(). So much, in fact, it would probably be
175 * neater to have a single hypercall to cover both. */
91void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) 176void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
92{ 177{
93 unsigned int i; 178 unsigned int i;
@@ -96,22 +181,31 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
96 gdt[i] = lg->gdt[i]; 181 gdt[i] = lg->gdt[i];
97} 182}
98 183
184/* This is the full version */
99void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) 185void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
100{ 186{
101 unsigned int i; 187 unsigned int i;
102 188
189 /* The default entries from setup_default_gdt_entries() are not
190 * replaced. See ignored_gdt() above. */
103 for (i = 0; i < GDT_ENTRIES; i++) 191 for (i = 0; i < GDT_ENTRIES; i++)
104 if (!ignored_gdt(i)) 192 if (!ignored_gdt(i))
105 gdt[i] = lg->gdt[i]; 193 gdt[i] = lg->gdt[i];
106} 194}
107 195
196/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
108void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) 197void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
109{ 198{
199 /* We assume the Guest has the same number of GDT entries as the
200 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
110 if (num > ARRAY_SIZE(lg->gdt)) 201 if (num > ARRAY_SIZE(lg->gdt))
111 kill_guest(lg, "too many gdt entries %i", num); 202 kill_guest(lg, "too many gdt entries %i", num);
112 203
204 /* We read the whole thing in, then fix it up. */
113 lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); 205 lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0]));
114 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); 206 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt));
207 /* Mark that the GDT changed so the core knows it has to copy it again,
208 * even if the Guest is run on the same CPU. */
115 lg->changed |= CHANGED_GDT; 209 lg->changed |= CHANGED_GDT;
116} 210}
117 211
@@ -123,3 +217,13 @@ void guest_load_tls(struct lguest *lg, unsigned long gtls)
123 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); 217 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
124 lg->changed |= CHANGED_GDT_TLS; 218 lg->changed |= CHANGED_GDT_TLS;
125} 219}
220
221/*
222 * With this, we have finished the Host.
223 *
224 * Five of the seven parts of our task are complete. You have made it through
225 * the Bit of Despair (I think that's somewhere in the page table code,
226 * myself).
227 *
228 * Next, we examine "make Switcher". It's short, but intense.
229 */
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S
index eadd4cc299d2..d418179ea6b5 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/switcher.S
@@ -1,45 +1,136 @@
1/* This code sits at 0xFFC00000 to do the low-level guest<->host switch. 1/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level
2 * Guest<->Host switch. It is as simple as it can be made, but it's naturally
3 * very specific to x86.
4 *
5 * You have now completed Preparation. If this has whet your appetite; if you
6 * are feeling invigorated and refreshed then the next, more challenging stage
7 * can be found in "make Guest". :*/
2 8
3 There is are two pages above us for this CPU (struct lguest_pages). 9/*S:100
4 The second page (struct lguest_ro_state) becomes read-only after the 10 * Welcome to the Switcher itself!
5 context switch. The first page (the stack for traps) remains writable, 11 *
6 but while we're in here, the guest cannot be running. 12 * This file contains the low-level code which changes the CPU to run the Guest
7*/ 13 * code, and returns to the Host when something happens. Understand this, and
14 * you understand the heart of our journey.
15 *
16 * Because this is in assembler rather than C, our tale switches from prose to
17 * verse. First I tried limericks:
18 *
19 * There once was an eax reg,
20 * To which our pointer was fed,
21 * It needed an add,
22 * Which asm-offsets.h had
23 * But this limerick is hurting my head.
24 *
25 * Next I tried haikus, but fitting the required reference to the seasons in
26 * every stanza was quickly becoming tiresome:
27 *
28 * The %eax reg
29 * Holds "struct lguest_pages" now:
30 * Cherry blossoms fall.
31 *
32 * Then I started with Heroic Verse, but the rhyming requirement leeched away
33 * the content density and led to some uniquely awful oblique rhymes:
34 *
35 * These constants are coming from struct offsets
36 * For use within the asm switcher text.
37 *
38 * Finally, I settled for something between heroic hexameter, and normal prose
39 * with inappropriate linebreaks. Anyway, it aint no Shakespeare.
40 */
41
42// Not all kernel headers work from assembler
43// But these ones are needed: the ENTRY() define
44// And constants extracted from struct offsets
45// To avoid magic numbers and breakage:
46// Should they change the compiler can't save us
47// Down here in the depths of assembler code.
8#include <linux/linkage.h> 48#include <linux/linkage.h>
9#include <asm/asm-offsets.h> 49#include <asm/asm-offsets.h>
10#include "lg.h" 50#include "lg.h"
11 51
52// We mark the start of the code to copy
53// It's placed in .text tho it's never run here
54// You'll see the trick macro at the end
55// Which interleaves data and text to effect.
12.text 56.text
13ENTRY(start_switcher_text) 57ENTRY(start_switcher_text)
14 58
15/* %eax points to lguest pages for this CPU. %ebx contains cr3 value. 59// When we reach switch_to_guest we have just left
16 All normal registers can be clobbered! */ 60// The safe and comforting shores of C code
61// %eax has the "struct lguest_pages" to use
62// Where we save state and still see it from the Guest
63// And %ebx holds the Guest shadow pagetable:
64// Once set we have truly left Host behind.
17ENTRY(switch_to_guest) 65ENTRY(switch_to_guest)
18 /* Save host segments on host stack. */ 66 // We told gcc all its regs could fade,
67 // Clobbered by our journey into the Guest
68 // We could have saved them, if we tried
69 // But time is our master and cycles count.
70
71 // Segment registers must be saved for the Host
72 // We push them on the Host stack for later
19 pushl %es 73 pushl %es
20 pushl %ds 74 pushl %ds
21 pushl %gs 75 pushl %gs
22 pushl %fs 76 pushl %fs
23 /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ 77 // But the compiler is fickle, and heeds
78 // No warning of %ebp clobbers
79 // When frame pointers are used. That register
80 // Must be saved and restored or chaos strikes.
24 pushl %ebp 81 pushl %ebp
25 /* Save host stack. */ 82 // The Host's stack is done, now save it away
83 // In our "struct lguest_pages" at offset
84 // Distilled into asm-offsets.h
26 movl %esp, LGUEST_PAGES_host_sp(%eax) 85 movl %esp, LGUEST_PAGES_host_sp(%eax)
27 /* Switch to guest stack: if we get NMI we expect to be there. */ 86
87 // All saved and there's now five steps before us:
88 // Stack, GDT, IDT, TSS
89 // And last of all the page tables are flipped.
90
91 // Yet beware that our stack pointer must be
92 // Always valid lest an NMI hits
93 // %edx does the duty here as we juggle
94 // %eax is lguest_pages: our stack lies within.
28 movl %eax, %edx 95 movl %eax, %edx
29 addl $LGUEST_PAGES_regs, %edx 96 addl $LGUEST_PAGES_regs, %edx
30 movl %edx, %esp 97 movl %edx, %esp
31 /* Switch to guest's GDT, IDT. */ 98
99 // The Guest's GDT we so carefully
100 // Placed in the "struct lguest_pages" before
32 lgdt LGUEST_PAGES_guest_gdt_desc(%eax) 101 lgdt LGUEST_PAGES_guest_gdt_desc(%eax)
102
103 // The Guest's IDT we did partially
104 // Move to the "struct lguest_pages" as well.
33 lidt LGUEST_PAGES_guest_idt_desc(%eax) 105 lidt LGUEST_PAGES_guest_idt_desc(%eax)
34 /* Switch to guest's TSS while GDT still writable. */ 106
107 // The TSS entry which controls traps
108 // Must be loaded up with "ltr" now:
109 // For after we switch over our page tables
110 // It (as the rest) will be writable no more.
111 // (The GDT entry TSS needs
112 // Changes type when we load it: damn Intel!)
35 movl $(GDT_ENTRY_TSS*8), %edx 113 movl $(GDT_ENTRY_TSS*8), %edx
36 ltr %dx 114 ltr %dx
37 /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ 115
116 // Look back now, before we take this last step!
117 // The Host's TSS entry was also marked used;
118 // Let's clear it again, ere we return.
119 // The GDT descriptor of the Host
120 // Points to the table after two "size" bytes
38 movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx 121 movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx
122 // Clear the type field of "used" (byte 5, bit 2)
39 andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) 123 andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx)
40 /* Switch to guest page tables: lguest_pages->state now read-only. */ 124
125 // Once our page table's switched, the Guest is live!
126 // The Host fades as we run this final step.
127 // Our "struct lguest_pages" is now read-only.
41 movl %ebx, %cr3 128 movl %ebx, %cr3
42 /* Restore guest regs */ 129
130 // The page table change did one tricky thing:
131 // The Guest's register page has been mapped
132 // Writable onto our %esp (stack) --
133 // We can simply pop off all Guest regs.
43 popl %ebx 134 popl %ebx
44 popl %ecx 135 popl %ecx
45 popl %edx 136 popl %edx
@@ -51,12 +142,27 @@ ENTRY(switch_to_guest)
51 popl %fs 142 popl %fs
52 popl %ds 143 popl %ds
53 popl %es 144 popl %es
54 /* Skip error code and trap number */ 145
146 // Near the base of the stack lurk two strange fields
147 // Which we fill as we exit the Guest
148 // These are the trap number and its error
149 // We can simply step past them on our way.
55 addl $8, %esp 150 addl $8, %esp
151
152 // The last five stack slots hold return address
153 // And everything needed to change privilege
154 // Into the Guest privilege level of 1,
155 // And the stack where the Guest had last left it.
156 // Interrupts are turned back on: we are Guest.
56 iret 157 iret
57 158
159// There are two paths where we switch to the Host
160// So we put the routine in a macro.
161// We are on our way home, back to the Host
162// Interrupted out of the Guest, we come here.
58#define SWITCH_TO_HOST \ 163#define SWITCH_TO_HOST \
59 /* Save guest state */ \ 164 /* We save the Guest state: all registers first \
165 * Laid out just as "struct lguest_regs" defines */ \
60 pushl %es; \ 166 pushl %es; \
61 pushl %ds; \ 167 pushl %ds; \
62 pushl %fs; \ 168 pushl %fs; \
@@ -68,58 +174,119 @@ ENTRY(switch_to_guest)
68 pushl %edx; \ 174 pushl %edx; \
69 pushl %ecx; \ 175 pushl %ecx; \
70 pushl %ebx; \ 176 pushl %ebx; \
71 /* Load lguest ds segment for convenience. */ \ 177 /* Our stack and our code are using segments \
178 * Set in the TSS and IDT \
179 * Yet if we were to touch data we'd use \
180 * Whatever data segment the Guest had. \
181 * Load the lguest ds segment for now. */ \
72 movl $(LGUEST_DS), %eax; \ 182 movl $(LGUEST_DS), %eax; \
73 movl %eax, %ds; \ 183 movl %eax, %ds; \
74 /* Figure out where we are, based on stack (at top of regs). */ \ 184 /* So where are we? Which CPU, which struct? \
185 * The stack is our clue: our TSS sets \
186 * It at the end of "struct lguest_pages" \
187 * And we then pushed and pushed and pushed Guest regs: \
188 * Now stack points atop the "struct lguest_regs". \
189 * Subtract that offset, and we find our struct. */ \
75 movl %esp, %eax; \ 190 movl %esp, %eax; \
76 subl $LGUEST_PAGES_regs, %eax; \ 191 subl $LGUEST_PAGES_regs, %eax; \
77 /* Put trap number in %ebx before we switch cr3 and lose it. */ \ 192 /* Save our trap number: the switch will obscure it \
193 * (The Guest regs are not mapped here in the Host) \
194 * %ebx holds it safe for deliver_to_host */ \
78 movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ 195 movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \
79 /* Switch to host page tables (host GDT, IDT and stack are in host \ 196 /* The Host GDT, IDT and stack! \
80 mem, so need this first) */ \ 197 * All these lie safely hidden from the Guest: \
198 * We must return to the Host page tables \
199 * (Hence that was saved in struct lguest_pages) */ \
81 movl LGUEST_PAGES_host_cr3(%eax), %edx; \ 200 movl LGUEST_PAGES_host_cr3(%eax), %edx; \
82 movl %edx, %cr3; \ 201 movl %edx, %cr3; \
83 /* Set guest's TSS to available (clear byte 5 bit 2). */ \ 202 /* As before, when we looked back at the Host \
203 * As we left and marked TSS unused \
204 * So must we now for the Guest left behind. */ \
84 andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ 205 andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \
85 /* Switch to host's GDT & IDT. */ \ 206 /* Switch to Host's GDT, IDT. */ \
86 lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ 207 lgdt LGUEST_PAGES_host_gdt_desc(%eax); \
87 lidt LGUEST_PAGES_host_idt_desc(%eax); \ 208 lidt LGUEST_PAGES_host_idt_desc(%eax); \
88 /* Switch to host's stack. */ \ 209 /* Restore the Host's stack where it's saved regs lie */ \
89 movl LGUEST_PAGES_host_sp(%eax), %esp; \ 210 movl LGUEST_PAGES_host_sp(%eax), %esp; \
90 /* Switch to host's TSS */ \ 211 /* Last the TSS: our Host is complete */ \
91 movl $(GDT_ENTRY_TSS*8), %edx; \ 212 movl $(GDT_ENTRY_TSS*8), %edx; \
92 ltr %dx; \ 213 ltr %dx; \
214 /* Restore now the regs saved right at the first. */ \
93 popl %ebp; \ 215 popl %ebp; \
94 popl %fs; \ 216 popl %fs; \
95 popl %gs; \ 217 popl %gs; \
96 popl %ds; \ 218 popl %ds; \
97 popl %es 219 popl %es
98 220
99/* Return to run_guest_once. */ 221// Here's where we come when the Guest has just trapped:
222// (Which trap we'll see has been pushed on the stack).
223// We need only switch back, and the Host will decode
224// Why we came home, and what needs to be done.
100return_to_host: 225return_to_host:
101 SWITCH_TO_HOST 226 SWITCH_TO_HOST
102 iret 227 iret
103 228
229// An interrupt, with some cause external
230// Has ajerked us rudely from the Guest's code
231// Again we must return home to the Host
104deliver_to_host: 232deliver_to_host:
105 SWITCH_TO_HOST 233 SWITCH_TO_HOST
106 /* Decode IDT and jump to hosts' irq handler. When that does iret, it 234 // But now we must go home via that place
107 * will return to run_guest_once. This is a feature. */ 235 // Where that interrupt was supposed to go
236 // Had we not been ensconced, running the Guest.
237 // Here we see the cleverness of our stack:
238 // The Host stack is formed like an interrupt
239 // With EIP, CS and EFLAGS layered.
240 // Interrupt handlers end with "iret"
241 // And that will take us home at long long last.
242
243 // But first we must find the handler to call!
244 // The IDT descriptor for the Host
245 // Has two bytes for size, and four for address:
246 // %edx will hold it for us for now.
108 movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx 247 movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx
248 // We now know the table address we need,
249 // And saved the trap's number inside %ebx.
250 // Yet the pointer to the handler is smeared
251 // Across the bits of the table entry.
252 // What oracle can tell us how to extract
253 // From such a convoluted encoding?
254 // I consulted gcc, and it gave
255 // These instructions, which I gladly credit:
109 leal (%edx,%ebx,8), %eax 256 leal (%edx,%ebx,8), %eax
110 movzwl (%eax),%edx 257 movzwl (%eax),%edx
111 movl 4(%eax), %eax 258 movl 4(%eax), %eax
112 xorw %ax, %ax 259 xorw %ax, %ax
113 orl %eax, %edx 260 orl %eax, %edx
261 // Now the address of the handler's in %edx
262 // We call it now: its "iret" takes us home.
114 jmp *%edx 263 jmp *%edx
115 264
116/* Real hardware interrupts are delivered straight to the host. Others 265// Every interrupt can come to us here
117 cause us to return to run_guest_once so it can decide what to do. Note 266// But we must truly tell each apart.
118 that some of these are overridden by the guest to deliver directly, and 267// They number two hundred and fifty six
119 never enter here (see load_guest_idt_entry). */ 268// And each must land in a different spot,
269// Push its number on stack, and join the stream.
270
271// And worse, a mere six of the traps stand apart
272// And push on their stack an addition:
273// An error number, thirty two bits long
274// So we punish the other two fifty
275// And make them push a zero so they match.
276
277// Yet two fifty six entries is long
278// And all will look most the same as the last
279// So we create a macro which can make
280// As many entries as we need to fill.
281
282// Note the change to .data then .text:
283// We plant the address of each entry
284// Into a (data) table for the Host
285// To know where each Guest interrupt should go.
120.macro IRQ_STUB N TARGET 286.macro IRQ_STUB N TARGET
121 .data; .long 1f; .text; 1: 287 .data; .long 1f; .text; 1:
122 /* Make an error number for most traps, which don't have one. */ 288 // Trap eight, ten through fourteen and seventeen
289 // Supply an error number. Else zero.
123 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) 290 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17)
124 pushl $0 291 pushl $0
125 .endif 292 .endif
@@ -128,6 +295,8 @@ deliver_to_host:
128 ALIGN 295 ALIGN
129.endm 296.endm
130 297
298// This macro creates numerous entries
299// Using GAS macros which out-power C's.
131.macro IRQ_STUBS FIRST LAST TARGET 300.macro IRQ_STUBS FIRST LAST TARGET
132 irq=\FIRST 301 irq=\FIRST
133 .rept \LAST-\FIRST+1 302 .rept \LAST-\FIRST+1
@@ -136,24 +305,43 @@ deliver_to_host:
136 .endr 305 .endr
137.endm 306.endm
138 307
139/* We intercept every interrupt, because we may need to switch back to 308// Here's the marker for our pointer table
140 * host. Unfortunately we can't tell them apart except by entry 309// Laid in the data section just before
141 * point, so we need 256 entry points. 310// Each macro places the address of code
142 */ 311// Forming an array: each one points to text
312// Which handles interrupt in its turn.
143.data 313.data
144.global default_idt_entries 314.global default_idt_entries
145default_idt_entries: 315default_idt_entries:
146.text 316.text
147 IRQ_STUBS 0 1 return_to_host /* First two traps */ 317 // The first two traps go straight back to the Host
148 IRQ_STUB 2 handle_nmi /* NMI */ 318 IRQ_STUBS 0 1 return_to_host
149 IRQ_STUBS 3 31 return_to_host /* Rest of traps */ 319 // We'll say nothing, yet, about NMI
150 IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ 320 IRQ_STUB 2 handle_nmi
151 IRQ_STUB 128 return_to_host /* System call (overridden) */ 321 // Other traps also return to the Host
152 IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ 322 IRQ_STUBS 3 31 return_to_host
153 323 // All interrupts go via their handlers
154/* We ignore NMI and return. */ 324 IRQ_STUBS 32 127 deliver_to_host
325 // 'Cept system calls coming from userspace
326 // Are to go to the Guest, never the Host.
327 IRQ_STUB 128 return_to_host
328 IRQ_STUBS 129 255 deliver_to_host
329
330// The NMI, what a fabulous beast
331// Which swoops in and stops us no matter that
332// We're suspended between heaven and hell,
333// (Or more likely between the Host and Guest)
334// When in it comes! We are dazed and confused
335// So we do the simplest thing which one can.
336// Though we've pushed the trap number and zero
337// We discard them, return, and hope we live.
155handle_nmi: 338handle_nmi:
156 addl $8, %esp 339 addl $8, %esp
157 iret 340 iret
158 341
342// We are done; all that's left is Mastery
343// And "make Mastery" is a journey long
344// Designed to make your fingers itch to code.
345
346// Here ends the text, the file and poem.
159ENTRY(end_switcher_text) 347ENTRY(end_switcher_text)
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 9dcbffd0aa15..e204e7b4028a 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -509,7 +509,7 @@ config VIDEO_VINO
509 509
510config VIDEO_STRADIS 510config VIDEO_STRADIS
511 tristate "Stradis 4:2:2 MPEG-2 video driver (EXPERIMENTAL)" 511 tristate "Stradis 4:2:2 MPEG-2 video driver (EXPERIMENTAL)"
512 depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && !PPC64 512 depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && VIRT_TO_BUS
513 help 513 help
514 Say Y here to enable support for the Stradis 4:2:2 MPEG-2 video 514 Say Y here to enable support for the Stradis 4:2:2 MPEG-2 video
515 driver for PCI. There is a product page at 515 driver for PCI. There is a product page at
@@ -520,7 +520,7 @@ config VIDEO_ZORAN_ZR36060
520 520
521config VIDEO_ZORAN 521config VIDEO_ZORAN
522 tristate "Zoran ZR36057/36067 Video For Linux" 522 tristate "Zoran ZR36057/36067 Video For Linux"
523 depends on PCI && I2C_ALGOBIT && VIDEO_V4L1 && !PPC64 523 depends on PCI && I2C_ALGOBIT && VIDEO_V4L1 && VIRT_TO_BUS
524 help 524 help
525 Say Y for support for MJPEG capture cards based on the Zoran 525 Say Y for support for MJPEG capture cards based on the Zoran
526 36057/36067 PCI controller chipset. This includes the Iomega 526 36057/36067 PCI controller chipset. This includes the Iomega
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index f88ebc5b685e..cc6c73442435 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -103,7 +103,7 @@ config MTD_PMC_MSP_RAMROOT
103 103
104config MTD_SUN_UFLASH 104config MTD_SUN_UFLASH
105 tristate "Sun Microsystems userflash support" 105 tristate "Sun Microsystems userflash support"
106 depends on SPARC && MTD_CFI 106 depends on SPARC && MTD_CFI && PCI
107 help 107 help
108 This provides a 'mapping' driver which supports the way in 108 This provides a 'mapping' driver which supports the way in
109 which user-programmable flash chips are connected on various 109 which user-programmable flash chips are connected on various
diff --git a/drivers/net/ax88796.c b/drivers/net/ax88796.c
index 1d882360b34d..e43e8047b90e 100644
--- a/drivers/net/ax88796.c
+++ b/drivers/net/ax88796.c
@@ -819,7 +819,7 @@ static int ax_probe(struct platform_device *pdev)
819 } 819 }
820 820
821 ei_status.mem = ioremap(res->start, size); 821 ei_status.mem = ioremap(res->start, size);
822 dev->base_addr = (long)ei_status.mem; 822 dev->base_addr = (unsigned long)ei_status.mem;
823 823
824 if (ei_status.mem == NULL) { 824 if (ei_status.mem == NULL) {
825 dev_err(&pdev->dev, "Cannot ioremap area (%08zx,%08zx)\n", 825 dev_err(&pdev->dev, "Cannot ioremap area (%08zx,%08zx)\n",
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index ebcf35e4cf5b..e620ed4c3ff0 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -699,7 +699,7 @@ static int do_cr(struct t3cdev *dev, struct sk_buff *skb)
699 * the buffer. 699 * the buffer.
700 */ 700 */
701static struct sk_buff *cxgb3_get_cpl_reply_skb(struct sk_buff *skb, size_t len, 701static struct sk_buff *cxgb3_get_cpl_reply_skb(struct sk_buff *skb, size_t len,
702 int gfp) 702 gfp_t gfp)
703{ 703{
704 if (likely(!skb_cloned(skb))) { 704 if (likely(!skb_cloned(skb))) {
705 BUG_ON(skb->len < len); 705 BUG_ON(skb->len < len);
diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c
index 112778652f7d..cab57911a80e 100644
--- a/drivers/net/lguest_net.c
+++ b/drivers/net/lguest_net.c
@@ -1,6 +1,13 @@
1/* A simple network driver for lguest. 1/*D:500
2 * The Guest network driver.
2 * 3 *
3 * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 4 * This is very simple a virtual network driver, and our last Guest driver.
5 * The only trick is that it can talk directly to multiple other recipients
6 * (ie. other Guests on the same network). It can also be used with only the
7 * Host on the network.
8 :*/
9
10/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 * 11 *
5 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 13 * it under the terms of the GNU General Public License as published by
@@ -28,23 +35,47 @@
28#define MAX_LANS 4 35#define MAX_LANS 4
29#define NUM_SKBS 8 36#define NUM_SKBS 8
30 37
38/*M:011 Network code master Jeff Garzik points out numerous shortcomings in
39 * this driver if it aspires to greatness.
40 *
41 * Firstly, it doesn't use "NAPI": the networking's New API, and is poorer for
42 * it. As he says "NAPI means system-wide load leveling, across multiple
43 * network interfaces. Lack of NAPI can mean competition at higher loads."
44 *
45 * He also points out that we don't implement set_mac_address, so users cannot
46 * change the devices hardware address. When I asked why one would want to:
47 * "Bonding, and situations where you /do/ want the MAC address to "leak" out
48 * of the host onto the wider net."
49 *
50 * Finally, he would like module unloading: "It is not unrealistic to think of
51 * [un|re|]loading the net support module in an lguest guest. And, adding
52 * module support makes the programmer more responsible, because they now have
53 * to learn to clean up after themselves. Any driver that cannot clean up
54 * after itself is an incomplete driver in my book."
55 :*/
56
57/*D:530 The "struct lguestnet_info" contains all the information we need to
58 * know about the network device. */
31struct lguestnet_info 59struct lguestnet_info
32{ 60{
33 /* The shared page(s). */ 61 /* The mapped device page(s) (an array of "struct lguest_net"). */
34 struct lguest_net *peer; 62 struct lguest_net *peer;
63 /* The physical address of the device page(s) */
35 unsigned long peer_phys; 64 unsigned long peer_phys;
65 /* The size of the device page(s). */
36 unsigned long mapsize; 66 unsigned long mapsize;
37 67
38 /* The lguest_device I come from */ 68 /* The lguest_device I come from */
39 struct lguest_device *lgdev; 69 struct lguest_device *lgdev;
40 70
41 /* My peerid. */ 71 /* My peerid (ie. my slot in the array). */
42 unsigned int me; 72 unsigned int me;
43 73
44 /* Receive queue. */ 74 /* Receive queue: the network packets waiting to be filled. */
45 struct sk_buff *skb[NUM_SKBS]; 75 struct sk_buff *skb[NUM_SKBS];
46 struct lguest_dma dma[NUM_SKBS]; 76 struct lguest_dma dma[NUM_SKBS];
47}; 77};
78/*:*/
48 79
49/* How many bytes left in this page. */ 80/* How many bytes left in this page. */
50static unsigned int rest_of_page(void *data) 81static unsigned int rest_of_page(void *data)
@@ -52,39 +83,82 @@ static unsigned int rest_of_page(void *data)
52 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); 83 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
53} 84}
54 85
55/* Simple convention: offset 4 * peernum. */ 86/*D:570 Each peer (ie. Guest or Host) on the network binds their receive
87 * buffers to a different key: we simply use the physical address of the
88 * device's memory page plus the peer number. The Host insists that all keys
89 * be a multiple of 4, so we multiply the peer number by 4. */
56static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum) 90static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum)
57{ 91{
58 return info->peer_phys + 4 * peernum; 92 return info->peer_phys + 4 * peernum;
59} 93}
60 94
95/* This is the routine which sets up a "struct lguest_dma" to point to a
96 * network packet, similar to req_to_dma() in lguest_blk.c. The structure of a
97 * "struct sk_buff" has grown complex over the years: it consists of a "head"
98 * linear section pointed to by "skb->data", and possibly an array of
99 * "fragments" in the case of a non-linear packet.
100 *
101 * Our receive buffers don't use fragments at all but outgoing skbs might, so
102 * we handle it. */
61static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen, 103static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen,
62 struct lguest_dma *dma) 104 struct lguest_dma *dma)
63{ 105{
64 unsigned int i, seg; 106 unsigned int i, seg;
65 107
108 /* First, we put the linear region into the "struct lguest_dma". Each
109 * entry can't go over a page boundary, so even though all our packets
110 * are 1514 bytes or less, we might need to use two entries here: */
66 for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) { 111 for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) {
67 dma->addr[seg] = virt_to_phys(skb->data + i); 112 dma->addr[seg] = virt_to_phys(skb->data + i);
68 dma->len[seg] = min((unsigned)(headlen - i), 113 dma->len[seg] = min((unsigned)(headlen - i),
69 rest_of_page(skb->data + i)); 114 rest_of_page(skb->data + i));
70 } 115 }
116
117 /* Now we handle the fragments: at least they're guaranteed not to go
118 * over a page. skb_shinfo(skb) returns a pointer to the structure
119 * which tells us about the number of fragments and the fragment
120 * array. */
71 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) { 121 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) {
72 const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 122 const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
73 /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */ 123 /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */
74 if (seg == LGUEST_MAX_DMA_SECTIONS) { 124 if (seg == LGUEST_MAX_DMA_SECTIONS) {
125 /* We will end up sending a truncated packet should
126 * this ever happen. Plus, a cool log message! */
75 printk("Woah dude! Megapacket!\n"); 127 printk("Woah dude! Megapacket!\n");
76 break; 128 break;
77 } 129 }
78 dma->addr[seg] = page_to_phys(f->page) + f->page_offset; 130 dma->addr[seg] = page_to_phys(f->page) + f->page_offset;
79 dma->len[seg] = f->size; 131 dma->len[seg] = f->size;
80 } 132 }
133
134 /* If after all that we didn't use the entire "struct lguest_dma"
135 * array, we terminate it with a 0 length. */
81 if (seg < LGUEST_MAX_DMA_SECTIONS) 136 if (seg < LGUEST_MAX_DMA_SECTIONS)
82 dma->len[seg] = 0; 137 dma->len[seg] = 0;
83} 138}
84 139
85/* We overload multicast bit to show promiscuous mode. */ 140/*
141 * Packet transmission.
142 *
143 * Our packet transmission is a little unusual. A real network card would just
144 * send out the packet and leave the receivers to decide if they're interested.
145 * Instead, we look through the network device memory page and see if any of
146 * the ethernet addresses match the packet destination, and if so we send it to
147 * that Guest.
148 *
149 * This is made a little more complicated in two cases. The first case is
150 * broadcast packets: for that we send the packet to all Guests on the network,
151 * one at a time. The second case is "promiscuous" mode, where a Guest wants
152 * to see all the packets on the network. We need a way for the Guest to tell
153 * us it wants to see all packets, so it sets the "multicast" bit on its
154 * published MAC address, which is never valid in a real ethernet address.
155 */
86#define PROMISC_BIT 0x01 156#define PROMISC_BIT 0x01
87 157
158/* This is the callback which is summoned whenever the network device's
159 * multicast or promiscuous state changes. If the card is in promiscuous mode,
160 * we advertise that in our ethernet address in the device's memory. We do the
161 * same if Linux wants any or all multicast traffic. */
88static void lguestnet_set_multicast(struct net_device *dev) 162static void lguestnet_set_multicast(struct net_device *dev)
89{ 163{
90 struct lguestnet_info *info = netdev_priv(dev); 164 struct lguestnet_info *info = netdev_priv(dev);
@@ -95,11 +169,14 @@ static void lguestnet_set_multicast(struct net_device *dev)
95 info->peer[info->me].mac[0] &= ~PROMISC_BIT; 169 info->peer[info->me].mac[0] &= ~PROMISC_BIT;
96} 170}
97 171
172/* A simple test function to see if a peer wants to see all packets.*/
98static int promisc(struct lguestnet_info *info, unsigned int peer) 173static int promisc(struct lguestnet_info *info, unsigned int peer)
99{ 174{
100 return info->peer[peer].mac[0] & PROMISC_BIT; 175 return info->peer[peer].mac[0] & PROMISC_BIT;
101} 176}
102 177
178/* Another simple function to see if a peer's advertised ethernet address
179 * matches a packet's destination ethernet address. */
103static int mac_eq(const unsigned char mac[ETH_ALEN], 180static int mac_eq(const unsigned char mac[ETH_ALEN],
104 struct lguestnet_info *info, unsigned int peer) 181 struct lguestnet_info *info, unsigned int peer)
105{ 182{
@@ -109,6 +186,8 @@ static int mac_eq(const unsigned char mac[ETH_ALEN],
109 return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0; 186 return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0;
110} 187}
111 188
189/* This is the function which actually sends a packet once we've decided a
190 * peer wants it: */
112static void transfer_packet(struct net_device *dev, 191static void transfer_packet(struct net_device *dev,
113 struct sk_buff *skb, 192 struct sk_buff *skb,
114 unsigned int peernum) 193 unsigned int peernum)
@@ -116,76 +195,134 @@ static void transfer_packet(struct net_device *dev,
116 struct lguestnet_info *info = netdev_priv(dev); 195 struct lguestnet_info *info = netdev_priv(dev);
117 struct lguest_dma dma; 196 struct lguest_dma dma;
118 197
198 /* We use our handy "struct lguest_dma" packing function to prepare
199 * the skb for sending. */
119 skb_to_dma(skb, skb_headlen(skb), &dma); 200 skb_to_dma(skb, skb_headlen(skb), &dma);
120 pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len); 201 pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);
121 202
203 /* This is the actual send call which copies the packet. */
122 lguest_send_dma(peer_key(info, peernum), &dma); 204 lguest_send_dma(peer_key(info, peernum), &dma);
205
206 /* Check that the entire packet was transmitted. If not, it could mean
207 * that the other Guest registered a short receive buffer, but this
208 * driver should never do that. More likely, the peer is dead. */
123 if (dma.used_len != skb->len) { 209 if (dma.used_len != skb->len) {
124 dev->stats.tx_carrier_errors++; 210 dev->stats.tx_carrier_errors++;
125 pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n", 211 pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
126 peernum, dma.used_len, skb->len, 212 peernum, dma.used_len, skb->len,
127 (void *)dma.addr[0], dma.len[0]); 213 (void *)dma.addr[0], dma.len[0]);
128 } else { 214 } else {
215 /* On success we update the stats. */
129 dev->stats.tx_bytes += skb->len; 216 dev->stats.tx_bytes += skb->len;
130 dev->stats.tx_packets++; 217 dev->stats.tx_packets++;
131 } 218 }
132} 219}
133 220
221/* Another helper function to tell is if a slot in the device memory is unused.
222 * Since we always set the Local Assignment bit in the ethernet address, the
223 * first byte can never be 0. */
134static int unused_peer(const struct lguest_net peer[], unsigned int num) 224static int unused_peer(const struct lguest_net peer[], unsigned int num)
135{ 225{
136 return peer[num].mac[0] == 0; 226 return peer[num].mac[0] == 0;
137} 227}
138 228
229/* Finally, here is the routine which handles an outgoing packet. It's called
230 * "start_xmit" for traditional reasons. */
139static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev) 231static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
140{ 232{
141 unsigned int i; 233 unsigned int i;
142 int broadcast; 234 int broadcast;
143 struct lguestnet_info *info = netdev_priv(dev); 235 struct lguestnet_info *info = netdev_priv(dev);
236 /* Extract the destination ethernet address from the packet. */
144 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 237 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
145 238
146 pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n", 239 pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n",
147 dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]); 240 dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]);
148 241
242 /* If it's a multicast packet, we broadcast to everyone. That's not
243 * very efficient, but there are very few applications which actually
244 * use multicast, which is a shame really.
245 *
246 * As etherdevice.h points out: "By definition the broadcast address is
247 * also a multicast address." So we don't have to test for broadcast
248 * packets separately. */
149 broadcast = is_multicast_ether_addr(dest); 249 broadcast = is_multicast_ether_addr(dest);
250
251 /* Look through all the published ethernet addresses to see if we
252 * should send this packet. */
150 for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) { 253 for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) {
254 /* We don't send to ourselves (we actually can't SEND_DMA to
255 * ourselves anyway), and don't send to unused slots.*/
151 if (i == info->me || unused_peer(info->peer, i)) 256 if (i == info->me || unused_peer(info->peer, i))
152 continue; 257 continue;
153 258
259 /* If it's broadcast we send it. If they want every packet we
260 * send it. If the destination matches their address we send
261 * it. Otherwise we go to the next peer. */
154 if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i)) 262 if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i))
155 continue; 263 continue;
156 264
157 pr_debug("lguestnet %s: sending from %i to %i\n", 265 pr_debug("lguestnet %s: sending from %i to %i\n",
158 dev->name, info->me, i); 266 dev->name, info->me, i);
267 /* Our routine which actually does the transfer. */
159 transfer_packet(dev, skb, i); 268 transfer_packet(dev, skb, i);
160 } 269 }
270
271 /* An xmit routine is expected to dispose of the packet, so we do. */
161 dev_kfree_skb(skb); 272 dev_kfree_skb(skb);
273
274 /* As per kernel convention, 0 means success. This is why I love
275 * networking: even if we never sent to anyone, that's still
276 * success! */
162 return 0; 277 return 0;
163} 278}
164 279
165/* Find a new skb to put in this slot in shared mem. */ 280/*D:560
281 * Packet receiving.
282 *
283 * First, here's a helper routine which fills one of our array of receive
284 * buffers: */
166static int fill_slot(struct net_device *dev, unsigned int slot) 285static int fill_slot(struct net_device *dev, unsigned int slot)
167{ 286{
168 struct lguestnet_info *info = netdev_priv(dev); 287 struct lguestnet_info *info = netdev_priv(dev);
169 /* Try to create and register a new one. */ 288
289 /* We can receive ETH_DATA_LEN (1500) byte packets, plus a standard
290 * ethernet header of ETH_HLEN (14) bytes. */
170 info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN); 291 info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
171 if (!info->skb[slot]) { 292 if (!info->skb[slot]) {
172 printk("%s: could not fill slot %i\n", dev->name, slot); 293 printk("%s: could not fill slot %i\n", dev->name, slot);
173 return -ENOMEM; 294 return -ENOMEM;
174 } 295 }
175 296
297 /* skb_to_dma() is a helper which sets up the "struct lguest_dma" to
298 * point to the data in the skb: we also use it for sending out a
299 * packet. */
176 skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]); 300 skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]);
301
302 /* This is a Write Memory Barrier: it ensures that the entry in the
303 * receive buffer array is written *before* we set the "used_len" entry
304 * to 0. If the Host were looking at the receive buffer array from a
305 * different CPU, it could potentially see "used_len = 0" and not see
306 * the updated receive buffer information. This would be a horribly
307 * nasty bug, so make sure the compiler and CPU know this has to happen
308 * first. */
177 wmb(); 309 wmb();
178 /* Now we tell hypervisor it can use the slot. */ 310 /* Writing 0 to "used_len" tells the Host it can use this receive
311 * buffer now. */
179 info->dma[slot].used_len = 0; 312 info->dma[slot].used_len = 0;
180 return 0; 313 return 0;
181} 314}
182 315
316/* This is the actual receive routine. When we receive an interrupt from the
317 * Host to tell us a packet has been delivered, we arrive here: */
183static irqreturn_t lguestnet_rcv(int irq, void *dev_id) 318static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
184{ 319{
185 struct net_device *dev = dev_id; 320 struct net_device *dev = dev_id;
186 struct lguestnet_info *info = netdev_priv(dev); 321 struct lguestnet_info *info = netdev_priv(dev);
187 unsigned int i, done = 0; 322 unsigned int i, done = 0;
188 323
324 /* Look through our entire receive array for an entry which has data
325 * in it. */
189 for (i = 0; i < ARRAY_SIZE(info->dma); i++) { 326 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
190 unsigned int length; 327 unsigned int length;
191 struct sk_buff *skb; 328 struct sk_buff *skb;
@@ -194,10 +331,16 @@ static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
194 if (length == 0) 331 if (length == 0)
195 continue; 332 continue;
196 333
334 /* We've found one! Remember the skb (we grabbed the length
335 * above), and immediately refill the slot we've taken it
336 * from. */
197 done++; 337 done++;
198 skb = info->skb[i]; 338 skb = info->skb[i];
199 fill_slot(dev, i); 339 fill_slot(dev, i);
200 340
341 /* This shouldn't happen: micropackets could be sent by a
342 * badly-behaved Guest on the network, but the Host will never
343 * stuff more data in the buffer than the buffer length. */
201 if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) { 344 if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) {
202 pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n", 345 pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n",
203 dev->name, length); 346 dev->name, length);
@@ -205,36 +348,72 @@ static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
205 continue; 348 continue;
206 } 349 }
207 350
351 /* skb_put(), what a great function! I've ranted about this
352 * function before (http://lkml.org/lkml/1999/9/26/24). You
353 * call it after you've added data to the end of an skb (in
354 * this case, it was the Host which wrote the data). */
208 skb_put(skb, length); 355 skb_put(skb, length);
356
357 /* The ethernet header contains a protocol field: we use the
358 * standard helper to extract it, and place the result in
359 * skb->protocol. The helper also sets up skb->pkt_type and
360 * eats up the ethernet header from the front of the packet. */
209 skb->protocol = eth_type_trans(skb, dev); 361 skb->protocol = eth_type_trans(skb, dev);
210 /* This is a reliable transport. */ 362
363 /* If this device doesn't need checksums for sending, we also
364 * don't need to check the packets when they come in. */
211 if (dev->features & NETIF_F_NO_CSUM) 365 if (dev->features & NETIF_F_NO_CSUM)
212 skb->ip_summed = CHECKSUM_UNNECESSARY; 366 skb->ip_summed = CHECKSUM_UNNECESSARY;
367
368 /* As a last resort for debugging the driver or the lguest I/O
369 * subsystem, you can uncomment the "#define DEBUG" at the top
370 * of this file, which turns all the pr_debug() into printk()
371 * and floods the logs. */
213 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 372 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
214 ntohs(skb->protocol), skb->len, skb->pkt_type); 373 ntohs(skb->protocol), skb->len, skb->pkt_type);
215 374
375 /* Update the packet and byte counts (visible from ifconfig,
376 * and good for debugging). */
216 dev->stats.rx_bytes += skb->len; 377 dev->stats.rx_bytes += skb->len;
217 dev->stats.rx_packets++; 378 dev->stats.rx_packets++;
379
380 /* Hand our fresh network packet into the stack's "network
381 * interface receive" routine. That will free the packet
382 * itself when it's finished. */
218 netif_rx(skb); 383 netif_rx(skb);
219 } 384 }
385
386 /* If we found any packets, we assume the interrupt was for us. */
220 return done ? IRQ_HANDLED : IRQ_NONE; 387 return done ? IRQ_HANDLED : IRQ_NONE;
221} 388}
222 389
390/*D:550 This is where we start: when the device is brought up by dhcpd or
391 * ifconfig. At this point we advertise our MAC address to the rest of the
392 * network, and register receive buffers ready for incoming packets. */
223static int lguestnet_open(struct net_device *dev) 393static int lguestnet_open(struct net_device *dev)
224{ 394{
225 int i; 395 int i;
226 struct lguestnet_info *info = netdev_priv(dev); 396 struct lguestnet_info *info = netdev_priv(dev);
227 397
228 /* Set up our MAC address */ 398 /* Copy our MAC address into the device page, so others on the network
399 * can find us. */
229 memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN); 400 memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
230 401
231 /* Turn on promisc mode if needed */ 402 /* We might already be in promisc mode (dev->flags & IFF_PROMISC). Our
403 * set_multicast callback handles this already, so we call it now. */
232 lguestnet_set_multicast(dev); 404 lguestnet_set_multicast(dev);
233 405
406 /* Allocate packets and put them into our "struct lguest_dma" array.
407 * If we fail to allocate all the packets we could still limp along,
408 * but it's a sign of real stress so we should probably give up now. */
234 for (i = 0; i < ARRAY_SIZE(info->dma); i++) { 409 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
235 if (fill_slot(dev, i) != 0) 410 if (fill_slot(dev, i) != 0)
236 goto cleanup; 411 goto cleanup;
237 } 412 }
413
414 /* Finally we tell the Host where our array of "struct lguest_dma"
415 * receive buffers is, binding it to the key corresponding to the
416 * device's physical memory plus our peerid. */
238 if (lguest_bind_dma(peer_key(info,info->me), info->dma, 417 if (lguest_bind_dma(peer_key(info,info->me), info->dma,
239 NUM_SKBS, lgdev_irq(info->lgdev)) != 0) 418 NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
240 goto cleanup; 419 goto cleanup;
@@ -245,22 +424,29 @@ cleanup:
245 dev_kfree_skb(info->skb[i]); 424 dev_kfree_skb(info->skb[i]);
246 return -ENOMEM; 425 return -ENOMEM;
247} 426}
427/*:*/
248 428
429/* The close routine is called when the device is no longer in use: we clean up
430 * elegantly. */
249static int lguestnet_close(struct net_device *dev) 431static int lguestnet_close(struct net_device *dev)
250{ 432{
251 unsigned int i; 433 unsigned int i;
252 struct lguestnet_info *info = netdev_priv(dev); 434 struct lguestnet_info *info = netdev_priv(dev);
253 435
254 /* Clear all trace: others might deliver packets, we'll ignore it. */ 436 /* Clear all trace of our existence out of the device memory by setting
437 * the slot which held our MAC address to 0 (unused). */
255 memset(&info->peer[info->me], 0, sizeof(info->peer[info->me])); 438 memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));
256 439
257 /* Deregister sg lists. */ 440 /* Unregister our array of receive buffers */
258 lguest_unbind_dma(peer_key(info, info->me), info->dma); 441 lguest_unbind_dma(peer_key(info, info->me), info->dma);
259 for (i = 0; i < ARRAY_SIZE(info->dma); i++) 442 for (i = 0; i < ARRAY_SIZE(info->dma); i++)
260 dev_kfree_skb(info->skb[i]); 443 dev_kfree_skb(info->skb[i]);
261 return 0; 444 return 0;
262} 445}
263 446
447/*D:510 The network device probe function is basically a standard ethernet
448 * device setup. It reads the "struct lguest_device_desc" and sets the "struct
449 * net_device". Oh, the line-by-line excitement! Let's skip over it. :*/
264static int lguestnet_probe(struct lguest_device *lgdev) 450static int lguestnet_probe(struct lguest_device *lgdev)
265{ 451{
266 int err, irqf = IRQF_SHARED; 452 int err, irqf = IRQF_SHARED;
@@ -290,10 +476,16 @@ static int lguestnet_probe(struct lguest_device *lgdev)
290 dev->stop = lguestnet_close; 476 dev->stop = lguestnet_close;
291 dev->hard_start_xmit = lguestnet_start_xmit; 477 dev->hard_start_xmit = lguestnet_start_xmit;
292 478
293 /* Turning on/off promisc will call dev->set_multicast_list. 479 /* We don't actually support multicast yet, but turning on/off
294 * We don't actually support multicast yet */ 480 * promisc also calls dev->set_multicast_list. */
295 dev->set_multicast_list = lguestnet_set_multicast; 481 dev->set_multicast_list = lguestnet_set_multicast;
296 SET_NETDEV_DEV(dev, &lgdev->dev); 482 SET_NETDEV_DEV(dev, &lgdev->dev);
483
484 /* The network code complains if you have "scatter-gather" capability
485 * if you don't also handle checksums (it seem that would be
486 * "illogical"). So we use a lie of omission and don't tell it that we
487 * can handle scattered packets unless we also don't want checksums,
488 * even though to us they're completely independent. */
297 if (desc->features & LGUEST_NET_F_NOCSUM) 489 if (desc->features & LGUEST_NET_F_NOCSUM)
298 dev->features = NETIF_F_SG|NETIF_F_NO_CSUM; 490 dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
299 491
@@ -325,6 +517,9 @@ static int lguestnet_probe(struct lguest_device *lgdev)
325 } 517 }
326 518
327 pr_debug("lguestnet: registered device %s\n", dev->name); 519 pr_debug("lguestnet: registered device %s\n", dev->name);
520 /* Finally, we put the "struct net_device" in the generic "struct
521 * lguest_device"s private pointer. Again, it's not necessary, but
522 * makes sure the cool kernel kids don't tease us. */
328 lgdev->private = dev; 523 lgdev->private = dev;
329 return 0; 524 return 0;
330 525
@@ -352,3 +547,11 @@ module_init(lguestnet_init);
352 547
353MODULE_DESCRIPTION("Lguest network driver"); 548MODULE_DESCRIPTION("Lguest network driver");
354MODULE_LICENSE("GPL"); 549MODULE_LICENSE("GPL");
550
551/*D:580
552 * This is the last of the Drivers, and with this we have covered the many and
553 * wonderous and fine (and boring) details of the Guest.
554 *
555 * "make Launcher" beckons, where we answer questions like "Where do Guests
556 * come from?", and "What do you do when someone asks for optimization?"
557 */
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index 5c86e737f954..721ee38d2241 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -219,15 +219,6 @@ static void ei_tx_timeout(struct net_device *dev)
219 int txsr, isr, tickssofar = jiffies - dev->trans_start; 219 int txsr, isr, tickssofar = jiffies - dev->trans_start;
220 unsigned long flags; 220 unsigned long flags;
221 221
222#if defined(CONFIG_M32R) && defined(CONFIG_SMP)
223 unsigned long icucr;
224
225 local_irq_save(flags);
226 icucr = inl(M32R_ICU_CR1_PORTL);
227 icucr |= M32R_ICUCR_ISMOD11;
228 outl(icucr, M32R_ICU_CR1_PORTL);
229 local_irq_restore(flags);
230#endif
231 ei_local->stat.tx_errors++; 222 ei_local->stat.tx_errors++;
232 223
233 spin_lock_irqsave(&ei_local->page_lock, flags); 224 spin_lock_irqsave(&ei_local->page_lock, flags);
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index f87176055d0e..266e8b38fe10 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -2054,7 +2054,7 @@ end:
2054 */ 2054 */
2055static int pppol2tp_tunnel_getsockopt(struct sock *sk, 2055static int pppol2tp_tunnel_getsockopt(struct sock *sk,
2056 struct pppol2tp_tunnel *tunnel, 2056 struct pppol2tp_tunnel *tunnel,
2057 int optname, int __user *val) 2057 int optname, int *val)
2058{ 2058{
2059 int err = 0; 2059 int err = 0;
2060 2060
@@ -2077,7 +2077,7 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
2077 */ 2077 */
2078static int pppol2tp_session_getsockopt(struct sock *sk, 2078static int pppol2tp_session_getsockopt(struct sock *sk,
2079 struct pppol2tp_session *session, 2079 struct pppol2tp_session *session,
2080 int optname, int __user *val) 2080 int optname, int *val)
2081{ 2081{
2082 int err = 0; 2082 int err = 0;
2083 2083
diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index dd6384b1efce..b6a4f02b01d1 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -2,7 +2,6 @@
2 * card.c - contains functions for managing groups of PnP devices 2 * card.c - contains functions for managing groups of PnP devices
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/module.h> 7#include <linux/module.h>
@@ -13,26 +12,31 @@
13LIST_HEAD(pnp_cards); 12LIST_HEAD(pnp_cards);
14static LIST_HEAD(pnp_card_drivers); 13static LIST_HEAD(pnp_card_drivers);
15 14
16 15static const struct pnp_card_device_id *match_card(struct pnp_card_driver *drv,
17static const struct pnp_card_device_id * match_card(struct pnp_card_driver * drv, struct pnp_card * card) 16 struct pnp_card *card)
18{ 17{
19 const struct pnp_card_device_id * drv_id = drv->id_table; 18 const struct pnp_card_device_id *drv_id = drv->id_table;
20 while (*drv_id->id){ 19
21 if (compare_pnp_id(card->id,drv_id->id)) { 20 while (*drv_id->id) {
21 if (compare_pnp_id(card->id, drv_id->id)) {
22 int i = 0; 22 int i = 0;
23
23 for (;;) { 24 for (;;) {
24 int found; 25 int found;
25 struct pnp_dev *dev; 26 struct pnp_dev *dev;
26 if (i == PNP_MAX_DEVICES || ! *drv_id->devs[i].id) 27
28 if (i == PNP_MAX_DEVICES
29 || !*drv_id->devs[i].id)
27 return drv_id; 30 return drv_id;
28 found = 0; 31 found = 0;
29 card_for_each_dev(card, dev) { 32 card_for_each_dev(card, dev) {
30 if (compare_pnp_id(dev->id, drv_id->devs[i].id)) { 33 if (compare_pnp_id
34 (dev->id, drv_id->devs[i].id)) {
31 found = 1; 35 found = 1;
32 break; 36 break;
33 } 37 }
34 } 38 }
35 if (! found) 39 if (!found)
36 break; 40 break;
37 i++; 41 i++;
38 } 42 }
@@ -42,14 +46,15 @@ static const struct pnp_card_device_id * match_card(struct pnp_card_driver * drv
42 return NULL; 46 return NULL;
43} 47}
44 48
45static void card_remove(struct pnp_dev * dev) 49static void card_remove(struct pnp_dev *dev)
46{ 50{
47 dev->card_link = NULL; 51 dev->card_link = NULL;
48} 52}
49 53
50static void card_remove_first(struct pnp_dev * dev) 54static void card_remove_first(struct pnp_dev *dev)
51{ 55{
52 struct pnp_card_driver * drv = to_pnp_card_driver(dev->driver); 56 struct pnp_card_driver *drv = to_pnp_card_driver(dev->driver);
57
53 if (!dev->card || !drv) 58 if (!dev->card || !drv)
54 return; 59 return;
55 if (drv->remove) 60 if (drv->remove)
@@ -67,7 +72,7 @@ static int card_probe(struct pnp_card *card, struct pnp_card_driver *drv)
67 72
68 if (!drv->probe) 73 if (!drv->probe)
69 return 0; 74 return 0;
70 id = match_card(drv,card); 75 id = match_card(drv, card);
71 if (!id) 76 if (!id)
72 return 0; 77 return 0;
73 78
@@ -94,12 +99,11 @@ static int card_probe(struct pnp_card *card, struct pnp_card_driver *drv)
94 * pnp_add_card_id - adds an EISA id to the specified card 99 * pnp_add_card_id - adds an EISA id to the specified card
95 * @id: pointer to a pnp_id structure 100 * @id: pointer to a pnp_id structure
96 * @card: pointer to the desired card 101 * @card: pointer to the desired card
97 *
98 */ 102 */
99 103int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card)
100int pnp_add_card_id(struct pnp_id *id, struct pnp_card * card)
101{ 104{
102 struct pnp_id * ptr; 105 struct pnp_id *ptr;
106
103 if (!id) 107 if (!id)
104 return -EINVAL; 108 return -EINVAL;
105 if (!card) 109 if (!card)
@@ -115,10 +119,11 @@ int pnp_add_card_id(struct pnp_id *id, struct pnp_card * card)
115 return 0; 119 return 0;
116} 120}
117 121
118static void pnp_free_card_ids(struct pnp_card * card) 122static void pnp_free_card_ids(struct pnp_card *card)
119{ 123{
120 struct pnp_id * id; 124 struct pnp_id *id;
121 struct pnp_id *next; 125 struct pnp_id *next;
126
122 if (!card) 127 if (!card)
123 return; 128 return;
124 id = card->id; 129 id = card->id;
@@ -131,49 +136,55 @@ static void pnp_free_card_ids(struct pnp_card * card)
131 136
132static void pnp_release_card(struct device *dmdev) 137static void pnp_release_card(struct device *dmdev)
133{ 138{
134 struct pnp_card * card = to_pnp_card(dmdev); 139 struct pnp_card *card = to_pnp_card(dmdev);
140
135 pnp_free_card_ids(card); 141 pnp_free_card_ids(card);
136 kfree(card); 142 kfree(card);
137} 143}
138 144
139 145static ssize_t pnp_show_card_name(struct device *dmdev,
140static ssize_t pnp_show_card_name(struct device *dmdev, struct device_attribute *attr, char *buf) 146 struct device_attribute *attr, char *buf)
141{ 147{
142 char *str = buf; 148 char *str = buf;
143 struct pnp_card *card = to_pnp_card(dmdev); 149 struct pnp_card *card = to_pnp_card(dmdev);
144 str += sprintf(str,"%s\n", card->name); 150
151 str += sprintf(str, "%s\n", card->name);
145 return (str - buf); 152 return (str - buf);
146} 153}
147 154
148static DEVICE_ATTR(name,S_IRUGO,pnp_show_card_name,NULL); 155static DEVICE_ATTR(name, S_IRUGO, pnp_show_card_name, NULL);
149 156
150static ssize_t pnp_show_card_ids(struct device *dmdev, struct device_attribute *attr, char *buf) 157static ssize_t pnp_show_card_ids(struct device *dmdev,
158 struct device_attribute *attr, char *buf)
151{ 159{
152 char *str = buf; 160 char *str = buf;
153 struct pnp_card *card = to_pnp_card(dmdev); 161 struct pnp_card *card = to_pnp_card(dmdev);
154 struct pnp_id * pos = card->id; 162 struct pnp_id *pos = card->id;
155 163
156 while (pos) { 164 while (pos) {
157 str += sprintf(str,"%s\n", pos->id); 165 str += sprintf(str, "%s\n", pos->id);
158 pos = pos->next; 166 pos = pos->next;
159 } 167 }
160 return (str - buf); 168 return (str - buf);
161} 169}
162 170
163static DEVICE_ATTR(card_id,S_IRUGO,pnp_show_card_ids,NULL); 171static DEVICE_ATTR(card_id, S_IRUGO, pnp_show_card_ids, NULL);
164 172
165static int pnp_interface_attach_card(struct pnp_card *card) 173static int pnp_interface_attach_card(struct pnp_card *card)
166{ 174{
167 int rc = device_create_file(&card->dev,&dev_attr_name); 175 int rc = device_create_file(&card->dev, &dev_attr_name);
168 if (rc) return rc;
169 176
170 rc = device_create_file(&card->dev,&dev_attr_card_id); 177 if (rc)
171 if (rc) goto err_name; 178 return rc;
179
180 rc = device_create_file(&card->dev, &dev_attr_card_id);
181 if (rc)
182 goto err_name;
172 183
173 return 0; 184 return 0;
174 185
175err_name: 186 err_name:
176 device_remove_file(&card->dev,&dev_attr_name); 187 device_remove_file(&card->dev, &dev_attr_name);
177 return rc; 188 return rc;
178} 189}
179 190
@@ -181,15 +192,16 @@ err_name:
181 * pnp_add_card - adds a PnP card to the PnP Layer 192 * pnp_add_card - adds a PnP card to the PnP Layer
182 * @card: pointer to the card to add 193 * @card: pointer to the card to add
183 */ 194 */
184 195int pnp_add_card(struct pnp_card *card)
185int pnp_add_card(struct pnp_card * card)
186{ 196{
187 int error; 197 int error;
188 struct list_head * pos, * temp; 198 struct list_head *pos, *temp;
199
189 if (!card || !card->protocol) 200 if (!card || !card->protocol)
190 return -EINVAL; 201 return -EINVAL;
191 202
192 sprintf(card->dev.bus_id, "%02x:%02x", card->protocol->number, card->number); 203 sprintf(card->dev.bus_id, "%02x:%02x", card->protocol->number,
204 card->number);
193 card->dev.parent = &card->protocol->dev; 205 card->dev.parent = &card->protocol->dev;
194 card->dev.bus = NULL; 206 card->dev.bus = NULL;
195 card->dev.release = &pnp_release_card; 207 card->dev.release = &pnp_release_card;
@@ -205,18 +217,21 @@ int pnp_add_card(struct pnp_card * card)
205 /* we wait until now to add devices in order to ensure the drivers 217 /* we wait until now to add devices in order to ensure the drivers
206 * will be able to use all of the related devices on the card 218 * will be able to use all of the related devices on the card
207 * without waiting any unresonable length of time */ 219 * without waiting any unresonable length of time */
208 list_for_each(pos,&card->devices){ 220 list_for_each(pos, &card->devices) {
209 struct pnp_dev *dev = card_to_pnp_dev(pos); 221 struct pnp_dev *dev = card_to_pnp_dev(pos);
210 __pnp_add_device(dev); 222 __pnp_add_device(dev);
211 } 223 }
212 224
213 /* match with card drivers */ 225 /* match with card drivers */
214 list_for_each_safe(pos,temp,&pnp_card_drivers){ 226 list_for_each_safe(pos, temp, &pnp_card_drivers) {
215 struct pnp_card_driver * drv = list_entry(pos, struct pnp_card_driver, global_list); 227 struct pnp_card_driver *drv =
216 card_probe(card,drv); 228 list_entry(pos, struct pnp_card_driver,
229 global_list);
230 card_probe(card, drv);
217 } 231 }
218 } else 232 } else
219 pnp_err("sysfs failure, card '%s' will be unavailable", card->dev.bus_id); 233 pnp_err("sysfs failure, card '%s' will be unavailable",
234 card->dev.bus_id);
220 return error; 235 return error;
221} 236}
222 237
@@ -224,10 +239,10 @@ int pnp_add_card(struct pnp_card * card)
224 * pnp_remove_card - removes a PnP card from the PnP Layer 239 * pnp_remove_card - removes a PnP card from the PnP Layer
225 * @card: pointer to the card to remove 240 * @card: pointer to the card to remove
226 */ 241 */
227 242void pnp_remove_card(struct pnp_card *card)
228void pnp_remove_card(struct pnp_card * card)
229{ 243{
230 struct list_head *pos, *temp; 244 struct list_head *pos, *temp;
245
231 if (!card) 246 if (!card)
232 return; 247 return;
233 device_unregister(&card->dev); 248 device_unregister(&card->dev);
@@ -235,7 +250,7 @@ void pnp_remove_card(struct pnp_card * card)
235 list_del(&card->global_list); 250 list_del(&card->global_list);
236 list_del(&card->protocol_list); 251 list_del(&card->protocol_list);
237 spin_unlock(&pnp_lock); 252 spin_unlock(&pnp_lock);
238 list_for_each_safe(pos,temp,&card->devices){ 253 list_for_each_safe(pos, temp, &card->devices) {
239 struct pnp_dev *dev = card_to_pnp_dev(pos); 254 struct pnp_dev *dev = card_to_pnp_dev(pos);
240 pnp_remove_card_device(dev); 255 pnp_remove_card_device(dev);
241 } 256 }
@@ -246,15 +261,14 @@ void pnp_remove_card(struct pnp_card * card)
246 * @card: pointer to the card to add to 261 * @card: pointer to the card to add to
247 * @dev: pointer to the device to add 262 * @dev: pointer to the device to add
248 */ 263 */
249 264int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev)
250int pnp_add_card_device(struct pnp_card * card, struct pnp_dev * dev)
251{ 265{
252 if (!card || !dev || !dev->protocol) 266 if (!card || !dev || !dev->protocol)
253 return -EINVAL; 267 return -EINVAL;
254 dev->dev.parent = &card->dev; 268 dev->dev.parent = &card->dev;
255 dev->card_link = NULL; 269 dev->card_link = NULL;
256 snprintf(dev->dev.bus_id, BUS_ID_SIZE, "%02x:%02x.%02x", dev->protocol->number, 270 snprintf(dev->dev.bus_id, BUS_ID_SIZE, "%02x:%02x.%02x",
257 card->number,dev->number); 271 dev->protocol->number, card->number, dev->number);
258 spin_lock(&pnp_lock); 272 spin_lock(&pnp_lock);
259 dev->card = card; 273 dev->card = card;
260 list_add_tail(&dev->card_list, &card->devices); 274 list_add_tail(&dev->card_list, &card->devices);
@@ -266,8 +280,7 @@ int pnp_add_card_device(struct pnp_card * card, struct pnp_dev * dev)
266 * pnp_remove_card_device- removes a device from the specified card 280 * pnp_remove_card_device- removes a device from the specified card
267 * @dev: pointer to the device to remove 281 * @dev: pointer to the device to remove
268 */ 282 */
269 283void pnp_remove_card_device(struct pnp_dev *dev)
270void pnp_remove_card_device(struct pnp_dev * dev)
271{ 284{
272 spin_lock(&pnp_lock); 285 spin_lock(&pnp_lock);
273 dev->card = NULL; 286 dev->card = NULL;
@@ -282,13 +295,14 @@ void pnp_remove_card_device(struct pnp_dev * dev)
282 * @id: pointer to a PnP ID structure that explains the rules for finding the device 295 * @id: pointer to a PnP ID structure that explains the rules for finding the device
283 * @from: Starting place to search from. If NULL it will start from the begining. 296 * @from: Starting place to search from. If NULL it will start from the begining.
284 */ 297 */
285 298struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink,
286struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char * id, struct pnp_dev * from) 299 const char *id, struct pnp_dev *from)
287{ 300{
288 struct list_head * pos; 301 struct list_head *pos;
289 struct pnp_dev * dev; 302 struct pnp_dev *dev;
290 struct pnp_card_driver * drv; 303 struct pnp_card_driver *drv;
291 struct pnp_card * card; 304 struct pnp_card *card;
305
292 if (!clink || !id) 306 if (!clink || !id)
293 goto done; 307 goto done;
294 card = clink->card; 308 card = clink->card;
@@ -302,15 +316,15 @@ struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char
302 } 316 }
303 while (pos != &card->devices) { 317 while (pos != &card->devices) {
304 dev = card_to_pnp_dev(pos); 318 dev = card_to_pnp_dev(pos);
305 if ((!dev->card_link) && compare_pnp_id(dev->id,id)) 319 if ((!dev->card_link) && compare_pnp_id(dev->id, id))
306 goto found; 320 goto found;
307 pos = pos->next; 321 pos = pos->next;
308 } 322 }
309 323
310done: 324 done:
311 return NULL; 325 return NULL;
312 326
313found: 327 found:
314 dev->card_link = clink; 328 dev->card_link = clink;
315 dev->dev.driver = &drv->link.driver; 329 dev->dev.driver = &drv->link.driver;
316 if (pnp_bus_type.probe(&dev->dev)) 330 if (pnp_bus_type.probe(&dev->dev))
@@ -320,7 +334,7 @@ found:
320 334
321 return dev; 335 return dev;
322 336
323err_out: 337 err_out:
324 dev->dev.driver = NULL; 338 dev->dev.driver = NULL;
325 dev->card_link = NULL; 339 dev->card_link = NULL;
326 return NULL; 340 return NULL;
@@ -330,10 +344,10 @@ err_out:
330 * pnp_release_card_device - call this when the driver no longer needs the device 344 * pnp_release_card_device - call this when the driver no longer needs the device
331 * @dev: pointer to the PnP device stucture 345 * @dev: pointer to the PnP device stucture
332 */ 346 */
333 347void pnp_release_card_device(struct pnp_dev *dev)
334void pnp_release_card_device(struct pnp_dev * dev)
335{ 348{
336 struct pnp_card_driver * drv = dev->card_link->driver; 349 struct pnp_card_driver *drv = dev->card_link->driver;
350
337 if (!drv) 351 if (!drv)
338 return; 352 return;
339 drv->link.remove = &card_remove; 353 drv->link.remove = &card_remove;
@@ -347,6 +361,7 @@ void pnp_release_card_device(struct pnp_dev * dev)
347static int card_suspend(struct pnp_dev *dev, pm_message_t state) 361static int card_suspend(struct pnp_dev *dev, pm_message_t state)
348{ 362{
349 struct pnp_card_link *link = dev->card_link; 363 struct pnp_card_link *link = dev->card_link;
364
350 if (link->pm_state.event == state.event) 365 if (link->pm_state.event == state.event)
351 return 0; 366 return 0;
352 link->pm_state = state; 367 link->pm_state = state;
@@ -356,6 +371,7 @@ static int card_suspend(struct pnp_dev *dev, pm_message_t state)
356static int card_resume(struct pnp_dev *dev) 371static int card_resume(struct pnp_dev *dev)
357{ 372{
358 struct pnp_card_link *link = dev->card_link; 373 struct pnp_card_link *link = dev->card_link;
374
359 if (link->pm_state.event == PM_EVENT_ON) 375 if (link->pm_state.event == PM_EVENT_ON)
360 return 0; 376 return 0;
361 link->pm_state = PMSG_ON; 377 link->pm_state = PMSG_ON;
@@ -367,8 +383,7 @@ static int card_resume(struct pnp_dev *dev)
367 * pnp_register_card_driver - registers a PnP card driver with the PnP Layer 383 * pnp_register_card_driver - registers a PnP card driver with the PnP Layer
368 * @drv: pointer to the driver to register 384 * @drv: pointer to the driver to register
369 */ 385 */
370 386int pnp_register_card_driver(struct pnp_card_driver *drv)
371int pnp_register_card_driver(struct pnp_card_driver * drv)
372{ 387{
373 int error; 388 int error;
374 struct list_head *pos, *temp; 389 struct list_head *pos, *temp;
@@ -389,9 +404,10 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
389 list_add_tail(&drv->global_list, &pnp_card_drivers); 404 list_add_tail(&drv->global_list, &pnp_card_drivers);
390 spin_unlock(&pnp_lock); 405 spin_unlock(&pnp_lock);
391 406
392 list_for_each_safe(pos,temp,&pnp_cards){ 407 list_for_each_safe(pos, temp, &pnp_cards) {
393 struct pnp_card *card = list_entry(pos, struct pnp_card, global_list); 408 struct pnp_card *card =
394 card_probe(card,drv); 409 list_entry(pos, struct pnp_card, global_list);
410 card_probe(card, drv);
395 } 411 }
396 return 0; 412 return 0;
397} 413}
@@ -400,8 +416,7 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
400 * pnp_unregister_card_driver - unregisters a PnP card driver from the PnP Layer 416 * pnp_unregister_card_driver - unregisters a PnP card driver from the PnP Layer
401 * @drv: pointer to the driver to unregister 417 * @drv: pointer to the driver to unregister
402 */ 418 */
403 419void pnp_unregister_card_driver(struct pnp_card_driver *drv)
404void pnp_unregister_card_driver(struct pnp_card_driver * drv)
405{ 420{
406 spin_lock(&pnp_lock); 421 spin_lock(&pnp_lock);
407 list_del(&drv->global_list); 422 list_del(&drv->global_list);
@@ -409,13 +424,6 @@ void pnp_unregister_card_driver(struct pnp_card_driver * drv)
409 pnp_unregister_driver(&drv->link); 424 pnp_unregister_driver(&drv->link);
410} 425}
411 426
412#if 0
413EXPORT_SYMBOL(pnp_add_card);
414EXPORT_SYMBOL(pnp_remove_card);
415EXPORT_SYMBOL(pnp_add_card_device);
416EXPORT_SYMBOL(pnp_remove_card_device);
417EXPORT_SYMBOL(pnp_add_card_id);
418#endif /* 0 */
419EXPORT_SYMBOL(pnp_request_card_device); 427EXPORT_SYMBOL(pnp_request_card_device);
420EXPORT_SYMBOL(pnp_release_card_device); 428EXPORT_SYMBOL(pnp_release_card_device);
421EXPORT_SYMBOL(pnp_register_card_driver); 429EXPORT_SYMBOL(pnp_register_card_driver);
diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index 8e7b2dd38810..61066fdb9e6d 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -2,7 +2,6 @@
2 * core.c - contains all core device and protocol registration functions 2 * core.c - contains all core device and protocol registration functions
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/pnp.h> 7#include <linux/pnp.h>
@@ -18,7 +17,6 @@
18 17
19#include "base.h" 18#include "base.h"
20 19
21
22static LIST_HEAD(pnp_protocols); 20static LIST_HEAD(pnp_protocols);
23LIST_HEAD(pnp_global); 21LIST_HEAD(pnp_global);
24DEFINE_SPINLOCK(pnp_lock); 22DEFINE_SPINLOCK(pnp_lock);
@@ -36,7 +34,7 @@ void *pnp_alloc(long size)
36 void *result; 34 void *result;
37 35
38 result = kzalloc(size, GFP_KERNEL); 36 result = kzalloc(size, GFP_KERNEL);
39 if (!result){ 37 if (!result) {
40 printk(KERN_ERR "pnp: Out of Memory\n"); 38 printk(KERN_ERR "pnp: Out of Memory\n");
41 return NULL; 39 return NULL;
42 } 40 }
@@ -49,11 +47,10 @@ void *pnp_alloc(long size)
49 * 47 *
50 * Ex protocols: ISAPNP, PNPBIOS, etc 48 * Ex protocols: ISAPNP, PNPBIOS, etc
51 */ 49 */
52
53int pnp_register_protocol(struct pnp_protocol *protocol) 50int pnp_register_protocol(struct pnp_protocol *protocol)
54{ 51{
55 int nodenum; 52 int nodenum;
56 struct list_head * pos; 53 struct list_head *pos;
57 54
58 if (!protocol) 55 if (!protocol)
59 return -EINVAL; 56 return -EINVAL;
@@ -64,9 +61,9 @@ int pnp_register_protocol(struct pnp_protocol *protocol)
64 spin_lock(&pnp_lock); 61 spin_lock(&pnp_lock);
65 62
66 /* assign the lowest unused number */ 63 /* assign the lowest unused number */
67 list_for_each(pos,&pnp_protocols) { 64 list_for_each(pos, &pnp_protocols) {
68 struct pnp_protocol * cur = to_pnp_protocol(pos); 65 struct pnp_protocol *cur = to_pnp_protocol(pos);
69 if (cur->number == nodenum){ 66 if (cur->number == nodenum) {
70 pos = &pnp_protocols; 67 pos = &pnp_protocols;
71 nodenum++; 68 nodenum++;
72 } 69 }
@@ -83,7 +80,6 @@ int pnp_register_protocol(struct pnp_protocol *protocol)
83/** 80/**
84 * pnp_protocol_unregister - removes a pnp protocol from the pnp layer 81 * pnp_protocol_unregister - removes a pnp protocol from the pnp layer
85 * @protocol: pointer to the corresponding pnp_protocol structure 82 * @protocol: pointer to the corresponding pnp_protocol structure
86 *
87 */ 83 */
88void pnp_unregister_protocol(struct pnp_protocol *protocol) 84void pnp_unregister_protocol(struct pnp_protocol *protocol)
89{ 85{
@@ -93,11 +89,11 @@ void pnp_unregister_protocol(struct pnp_protocol *protocol)
93 device_unregister(&protocol->dev); 89 device_unregister(&protocol->dev);
94} 90}
95 91
96
97static void pnp_free_ids(struct pnp_dev *dev) 92static void pnp_free_ids(struct pnp_dev *dev)
98{ 93{
99 struct pnp_id * id; 94 struct pnp_id *id;
100 struct pnp_id * next; 95 struct pnp_id *next;
96
101 if (!dev) 97 if (!dev)
102 return; 98 return;
103 id = dev->id; 99 id = dev->id;
@@ -110,7 +106,8 @@ static void pnp_free_ids(struct pnp_dev *dev)
110 106
111static void pnp_release_device(struct device *dmdev) 107static void pnp_release_device(struct device *dmdev)
112{ 108{
113 struct pnp_dev * dev = to_pnp_dev(dmdev); 109 struct pnp_dev *dev = to_pnp_dev(dmdev);
110
114 pnp_free_option(dev->independent); 111 pnp_free_option(dev->independent);
115 pnp_free_option(dev->dependent); 112 pnp_free_option(dev->dependent);
116 pnp_free_ids(dev); 113 pnp_free_ids(dev);
@@ -120,6 +117,7 @@ static void pnp_release_device(struct device *dmdev)
120int __pnp_add_device(struct pnp_dev *dev) 117int __pnp_add_device(struct pnp_dev *dev)
121{ 118{
122 int ret; 119 int ret;
120
123 pnp_fixup_device(dev); 121 pnp_fixup_device(dev);
124 dev->dev.bus = &pnp_bus_type; 122 dev->dev.bus = &pnp_bus_type;
125 dev->dev.dma_mask = &dev->dma_mask; 123 dev->dev.dma_mask = &dev->dma_mask;
@@ -143,13 +141,13 @@ int __pnp_add_device(struct pnp_dev *dev)
143 * 141 *
144 * adds to driver model, name database, fixups, interface, etc. 142 * adds to driver model, name database, fixups, interface, etc.
145 */ 143 */
146
147int pnp_add_device(struct pnp_dev *dev) 144int pnp_add_device(struct pnp_dev *dev)
148{ 145{
149 if (!dev || !dev->protocol || dev->card) 146 if (!dev || !dev->protocol || dev->card)
150 return -EINVAL; 147 return -EINVAL;
151 dev->dev.parent = &dev->protocol->dev; 148 dev->dev.parent = &dev->protocol->dev;
152 sprintf(dev->dev.bus_id, "%02x:%02x", dev->protocol->number, dev->number); 149 sprintf(dev->dev.bus_id, "%02x:%02x", dev->protocol->number,
150 dev->number);
153 return __pnp_add_device(dev); 151 return __pnp_add_device(dev);
154} 152}
155 153
@@ -162,21 +160,6 @@ void __pnp_remove_device(struct pnp_dev *dev)
162 device_unregister(&dev->dev); 160 device_unregister(&dev->dev);
163} 161}
164 162
165/**
166 * pnp_remove_device - removes a pnp device from the pnp layer
167 * @dev: pointer to dev to add
168 *
169 * this function will free all mem used by dev
170 */
171#if 0
172void pnp_remove_device(struct pnp_dev *dev)
173{
174 if (!dev || dev->card)
175 return;
176 __pnp_remove_device(dev);
177}
178#endif /* 0 */
179
180static int __init pnp_init(void) 163static int __init pnp_init(void)
181{ 164{
182 printk(KERN_INFO "Linux Plug and Play Support v0.97 (c) Adam Belay\n"); 165 printk(KERN_INFO "Linux Plug and Play Support v0.97 (c) Adam Belay\n");
@@ -184,10 +167,3 @@ static int __init pnp_init(void)
184} 167}
185 168
186subsys_initcall(pnp_init); 169subsys_initcall(pnp_init);
187
188#if 0
189EXPORT_SYMBOL(pnp_register_protocol);
190EXPORT_SYMBOL(pnp_unregister_protocol);
191EXPORT_SYMBOL(pnp_add_device);
192EXPORT_SYMBOL(pnp_remove_device);
193#endif /* 0 */
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 1432806451cd..30b8f6f3258a 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -2,7 +2,6 @@
2 * driver.c - device id matching, driver model, etc. 2 * driver.c - device id matching, driver model, etc.
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/string.h> 7#include <linux/string.h>
@@ -16,12 +15,11 @@
16static int compare_func(const char *ida, const char *idb) 15static int compare_func(const char *ida, const char *idb)
17{ 16{
18 int i; 17 int i;
18
19 /* we only need to compare the last 4 chars */ 19 /* we only need to compare the last 4 chars */
20 for (i=3; i<7; i++) 20 for (i = 3; i < 7; i++) {
21 {
22 if (ida[i] != 'X' && 21 if (ida[i] != 'X' &&
23 idb[i] != 'X' && 22 idb[i] != 'X' && toupper(ida[i]) != toupper(idb[i]))
24 toupper(ida[i]) != toupper(idb[i]))
25 return 0; 23 return 0;
26 } 24 }
27 return 1; 25 return 1;
@@ -31,20 +29,22 @@ int compare_pnp_id(struct pnp_id *pos, const char *id)
31{ 29{
32 if (!pos || !id || (strlen(id) != 7)) 30 if (!pos || !id || (strlen(id) != 7))
33 return 0; 31 return 0;
34 if (memcmp(id,"ANYDEVS",7)==0) 32 if (memcmp(id, "ANYDEVS", 7) == 0)
35 return 1; 33 return 1;
36 while (pos){ 34 while (pos) {
37 if (memcmp(pos->id,id,3)==0) 35 if (memcmp(pos->id, id, 3) == 0)
38 if (compare_func(pos->id,id)==1) 36 if (compare_func(pos->id, id) == 1)
39 return 1; 37 return 1;
40 pos = pos->next; 38 pos = pos->next;
41 } 39 }
42 return 0; 40 return 0;
43} 41}
44 42
45static const struct pnp_device_id * match_device(struct pnp_driver *drv, struct pnp_dev *dev) 43static const struct pnp_device_id *match_device(struct pnp_driver *drv,
44 struct pnp_dev *dev)
46{ 45{
47 const struct pnp_device_id *drv_id = drv->id_table; 46 const struct pnp_device_id *drv_id = drv->id_table;
47
48 if (!drv_id) 48 if (!drv_id)
49 return NULL; 49 return NULL;
50 50
@@ -59,7 +59,7 @@ static const struct pnp_device_id * match_device(struct pnp_driver *drv, struct
59int pnp_device_attach(struct pnp_dev *pnp_dev) 59int pnp_device_attach(struct pnp_dev *pnp_dev)
60{ 60{
61 spin_lock(&pnp_lock); 61 spin_lock(&pnp_lock);
62 if(pnp_dev->status != PNP_READY){ 62 if (pnp_dev->status != PNP_READY) {
63 spin_unlock(&pnp_lock); 63 spin_unlock(&pnp_lock);
64 return -EBUSY; 64 return -EBUSY;
65 } 65 }
@@ -86,7 +86,8 @@ static int pnp_device_probe(struct device *dev)
86 pnp_dev = to_pnp_dev(dev); 86 pnp_dev = to_pnp_dev(dev);
87 pnp_drv = to_pnp_driver(dev->driver); 87 pnp_drv = to_pnp_driver(dev->driver);
88 88
89 pnp_dbg("match found with the PnP device '%s' and the driver '%s'", dev->bus_id,pnp_drv->name); 89 pnp_dbg("match found with the PnP device '%s' and the driver '%s'",
90 dev->bus_id, pnp_drv->name);
90 91
91 error = pnp_device_attach(pnp_dev); 92 error = pnp_device_attach(pnp_dev);
92 if (error < 0) 93 if (error < 0)
@@ -99,7 +100,7 @@ static int pnp_device_probe(struct device *dev)
99 return error; 100 return error;
100 } 101 }
101 } else if ((pnp_drv->flags & PNP_DRIVER_RES_DISABLE) 102 } else if ((pnp_drv->flags & PNP_DRIVER_RES_DISABLE)
102 == PNP_DRIVER_RES_DISABLE) { 103 == PNP_DRIVER_RES_DISABLE) {
103 error = pnp_disable_dev(pnp_dev); 104 error = pnp_disable_dev(pnp_dev);
104 if (error < 0) 105 if (error < 0)
105 return error; 106 return error;
@@ -110,22 +111,22 @@ static int pnp_device_probe(struct device *dev)
110 if (dev_id != NULL) 111 if (dev_id != NULL)
111 error = pnp_drv->probe(pnp_dev, dev_id); 112 error = pnp_drv->probe(pnp_dev, dev_id);
112 } 113 }
113 if (error >= 0){ 114 if (error >= 0) {
114 pnp_dev->driver = pnp_drv; 115 pnp_dev->driver = pnp_drv;
115 error = 0; 116 error = 0;
116 } else 117 } else
117 goto fail; 118 goto fail;
118 return error; 119 return error;
119 120
120fail: 121 fail:
121 pnp_device_detach(pnp_dev); 122 pnp_device_detach(pnp_dev);
122 return error; 123 return error;
123} 124}
124 125
125static int pnp_device_remove(struct device *dev) 126static int pnp_device_remove(struct device *dev)
126{ 127{
127 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 128 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
128 struct pnp_driver * drv = pnp_dev->driver; 129 struct pnp_driver *drv = pnp_dev->driver;
129 130
130 if (drv) { 131 if (drv) {
131 if (drv->remove) 132 if (drv->remove)
@@ -138,8 +139,9 @@ static int pnp_device_remove(struct device *dev)
138 139
139static int pnp_bus_match(struct device *dev, struct device_driver *drv) 140static int pnp_bus_match(struct device *dev, struct device_driver *drv)
140{ 141{
141 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 142 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
142 struct pnp_driver * pnp_drv = to_pnp_driver(drv); 143 struct pnp_driver *pnp_drv = to_pnp_driver(drv);
144
143 if (match_device(pnp_drv, pnp_dev) == NULL) 145 if (match_device(pnp_drv, pnp_dev) == NULL)
144 return 0; 146 return 0;
145 return 1; 147 return 1;
@@ -147,8 +149,8 @@ static int pnp_bus_match(struct device *dev, struct device_driver *drv)
147 149
148static int pnp_bus_suspend(struct device *dev, pm_message_t state) 150static int pnp_bus_suspend(struct device *dev, pm_message_t state)
149{ 151{
150 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 152 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
151 struct pnp_driver * pnp_drv = pnp_dev->driver; 153 struct pnp_driver *pnp_drv = pnp_dev->driver;
152 int error; 154 int error;
153 155
154 if (!pnp_drv) 156 if (!pnp_drv)
@@ -162,9 +164,9 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state)
162 164
163 if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) && 165 if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) &&
164 pnp_can_disable(pnp_dev)) { 166 pnp_can_disable(pnp_dev)) {
165 error = pnp_stop_dev(pnp_dev); 167 error = pnp_stop_dev(pnp_dev);
166 if (error) 168 if (error)
167 return error; 169 return error;
168 } 170 }
169 171
170 if (pnp_dev->protocol && pnp_dev->protocol->suspend) 172 if (pnp_dev->protocol && pnp_dev->protocol->suspend)
@@ -174,8 +176,8 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state)
174 176
175static int pnp_bus_resume(struct device *dev) 177static int pnp_bus_resume(struct device *dev)
176{ 178{
177 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 179 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
178 struct pnp_driver * pnp_drv = pnp_dev->driver; 180 struct pnp_driver *pnp_drv = pnp_dev->driver;
179 int error; 181 int error;
180 182
181 if (!pnp_drv) 183 if (!pnp_drv)
@@ -197,12 +199,12 @@ static int pnp_bus_resume(struct device *dev)
197} 199}
198 200
199struct bus_type pnp_bus_type = { 201struct bus_type pnp_bus_type = {
200 .name = "pnp", 202 .name = "pnp",
201 .match = pnp_bus_match, 203 .match = pnp_bus_match,
202 .probe = pnp_device_probe, 204 .probe = pnp_device_probe,
203 .remove = pnp_device_remove, 205 .remove = pnp_device_remove,
204 .suspend = pnp_bus_suspend, 206 .suspend = pnp_bus_suspend,
205 .resume = pnp_bus_resume, 207 .resume = pnp_bus_resume,
206}; 208};
207 209
208int pnp_register_driver(struct pnp_driver *drv) 210int pnp_register_driver(struct pnp_driver *drv)
@@ -225,12 +227,11 @@ void pnp_unregister_driver(struct pnp_driver *drv)
225 * pnp_add_id - adds an EISA id to the specified device 227 * pnp_add_id - adds an EISA id to the specified device
226 * @id: pointer to a pnp_id structure 228 * @id: pointer to a pnp_id structure
227 * @dev: pointer to the desired device 229 * @dev: pointer to the desired device
228 *
229 */ 230 */
230
231int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev) 231int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev)
232{ 232{
233 struct pnp_id *ptr; 233 struct pnp_id *ptr;
234
234 if (!id) 235 if (!id)
235 return -EINVAL; 236 return -EINVAL;
236 if (!dev) 237 if (!dev)
@@ -248,8 +249,5 @@ int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev)
248 249
249EXPORT_SYMBOL(pnp_register_driver); 250EXPORT_SYMBOL(pnp_register_driver);
250EXPORT_SYMBOL(pnp_unregister_driver); 251EXPORT_SYMBOL(pnp_unregister_driver);
251#if 0
252EXPORT_SYMBOL(pnp_add_id);
253#endif
254EXPORT_SYMBOL(pnp_device_attach); 252EXPORT_SYMBOL(pnp_device_attach);
255EXPORT_SYMBOL(pnp_device_detach); 253EXPORT_SYMBOL(pnp_device_detach);
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index ac9fcd499f3f..fe6684e13e82 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela <perex@suse.cz> 4 * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/pnp.h> 8#include <linux/pnp.h>
@@ -29,7 +28,7 @@ struct pnp_info_buffer {
29 28
30typedef struct pnp_info_buffer pnp_info_buffer_t; 29typedef struct pnp_info_buffer pnp_info_buffer_t;
31 30
32static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...) 31static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt, ...)
33{ 32{
34 va_list args; 33 va_list args;
35 int res; 34 int res;
@@ -48,14 +47,18 @@ static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...)
48 return res; 47 return res;
49} 48}
50 49
51static void pnp_print_port(pnp_info_buffer_t *buffer, char *space, struct pnp_port *port) 50static void pnp_print_port(pnp_info_buffer_t * buffer, char *space,
51 struct pnp_port *port)
52{ 52{
53 pnp_printf(buffer, "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n", 53 pnp_printf(buffer,
54 space, port->min, port->max, port->align ? (port->align-1) : 0, port->size, 54 "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n",
55 port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10); 55 space, port->min, port->max,
56 port->align ? (port->align - 1) : 0, port->size,
57 port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10);
56} 58}
57 59
58static void pnp_print_irq(pnp_info_buffer_t *buffer, char *space, struct pnp_irq *irq) 60static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
61 struct pnp_irq *irq)
59{ 62{
60 int first = 1, i; 63 int first = 1, i;
61 64
@@ -85,14 +88,15 @@ static void pnp_print_irq(pnp_info_buffer_t *buffer, char *space, struct pnp_irq
85 pnp_printf(buffer, "\n"); 88 pnp_printf(buffer, "\n");
86} 89}
87 90
88static void pnp_print_dma(pnp_info_buffer_t *buffer, char *space, struct pnp_dma *dma) 91static void pnp_print_dma(pnp_info_buffer_t * buffer, char *space,
92 struct pnp_dma *dma)
89{ 93{
90 int first = 1, i; 94 int first = 1, i;
91 char *s; 95 char *s;
92 96
93 pnp_printf(buffer, "%sdma ", space); 97 pnp_printf(buffer, "%sdma ", space);
94 for (i = 0; i < 8; i++) 98 for (i = 0; i < 8; i++)
95 if (dma->map & (1<<i)) { 99 if (dma->map & (1 << i)) {
96 if (!first) { 100 if (!first) {
97 pnp_printf(buffer, ","); 101 pnp_printf(buffer, ",");
98 } else { 102 } else {
@@ -136,12 +140,13 @@ static void pnp_print_dma(pnp_info_buffer_t *buffer, char *space, struct pnp_dma
136 pnp_printf(buffer, " %s\n", s); 140 pnp_printf(buffer, " %s\n", s);
137} 141}
138 142
139static void pnp_print_mem(pnp_info_buffer_t *buffer, char *space, struct pnp_mem *mem) 143static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space,
144 struct pnp_mem *mem)
140{ 145{
141 char *s; 146 char *s;
142 147
143 pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x", 148 pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x",
144 space, mem->min, mem->max, mem->align, mem->size); 149 space, mem->min, mem->max, mem->align, mem->size);
145 if (mem->flags & IORESOURCE_MEM_WRITEABLE) 150 if (mem->flags & IORESOURCE_MEM_WRITEABLE)
146 pnp_printf(buffer, ", writeable"); 151 pnp_printf(buffer, ", writeable");
147 if (mem->flags & IORESOURCE_MEM_CACHEABLE) 152 if (mem->flags & IORESOURCE_MEM_CACHEABLE)
@@ -168,7 +173,7 @@ static void pnp_print_mem(pnp_info_buffer_t *buffer, char *space, struct pnp_mem
168 pnp_printf(buffer, ", %s\n", s); 173 pnp_printf(buffer, ", %s\n", s);
169} 174}
170 175
171static void pnp_print_option(pnp_info_buffer_t *buffer, char *space, 176static void pnp_print_option(pnp_info_buffer_t * buffer, char *space,
172 struct pnp_option *option, int dep) 177 struct pnp_option *option, int dep)
173{ 178{
174 char *s; 179 char *s;
@@ -179,19 +184,19 @@ static void pnp_print_option(pnp_info_buffer_t *buffer, char *space,
179 184
180 if (dep) { 185 if (dep) {
181 switch (option->priority) { 186 switch (option->priority) {
182 case PNP_RES_PRIORITY_PREFERRED: 187 case PNP_RES_PRIORITY_PREFERRED:
183 s = "preferred"; 188 s = "preferred";
184 break; 189 break;
185 case PNP_RES_PRIORITY_ACCEPTABLE: 190 case PNP_RES_PRIORITY_ACCEPTABLE:
186 s = "acceptable"; 191 s = "acceptable";
187 break; 192 break;
188 case PNP_RES_PRIORITY_FUNCTIONAL: 193 case PNP_RES_PRIORITY_FUNCTIONAL:
189 s = "functional"; 194 s = "functional";
190 break; 195 break;
191 default: 196 default:
192 s = "invalid"; 197 s = "invalid";
193 } 198 }
194 pnp_printf(buffer, "Dependent: %02i - Priority %s\n",dep, s); 199 pnp_printf(buffer, "Dependent: %02i - Priority %s\n", dep, s);
195 } 200 }
196 201
197 for (port = option->port; port; port = port->next) 202 for (port = option->port; port; port = port->next)
@@ -204,16 +209,16 @@ static void pnp_print_option(pnp_info_buffer_t *buffer, char *space,
204 pnp_print_mem(buffer, space, mem); 209 pnp_print_mem(buffer, space, mem);
205} 210}
206 211
207 212static ssize_t pnp_show_options(struct device *dmdev,
208static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *attr, char *buf) 213 struct device_attribute *attr, char *buf)
209{ 214{
210 struct pnp_dev *dev = to_pnp_dev(dmdev); 215 struct pnp_dev *dev = to_pnp_dev(dmdev);
211 struct pnp_option * independent = dev->independent; 216 struct pnp_option *independent = dev->independent;
212 struct pnp_option * dependent = dev->dependent; 217 struct pnp_option *dependent = dev->dependent;
213 int ret, dep = 1; 218 int ret, dep = 1;
214 219
215 pnp_info_buffer_t *buffer = (pnp_info_buffer_t *) 220 pnp_info_buffer_t *buffer = (pnp_info_buffer_t *)
216 pnp_alloc(sizeof(pnp_info_buffer_t)); 221 pnp_alloc(sizeof(pnp_info_buffer_t));
217 if (!buffer) 222 if (!buffer)
218 return -ENOMEM; 223 return -ENOMEM;
219 224
@@ -223,7 +228,7 @@ static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *a
223 if (independent) 228 if (independent)
224 pnp_print_option(buffer, "", independent, 0); 229 pnp_print_option(buffer, "", independent, 0);
225 230
226 while (dependent){ 231 while (dependent) {
227 pnp_print_option(buffer, " ", dependent, dep); 232 pnp_print_option(buffer, " ", dependent, dep);
228 dependent = dependent->next; 233 dependent = dependent->next;
229 dep++; 234 dep++;
@@ -233,10 +238,11 @@ static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *a
233 return ret; 238 return ret;
234} 239}
235 240
236static DEVICE_ATTR(options,S_IRUGO,pnp_show_options,NULL); 241static DEVICE_ATTR(options, S_IRUGO, pnp_show_options, NULL);
237 242
238 243static ssize_t pnp_show_current_resources(struct device *dmdev,
239static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_attribute *attr, char *buf) 244 struct device_attribute *attr,
245 char *buf)
240{ 246{
241 struct pnp_dev *dev = to_pnp_dev(dmdev); 247 struct pnp_dev *dev = to_pnp_dev(dmdev);
242 int i, ret; 248 int i, ret;
@@ -252,52 +258,56 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_at
252 buffer->buffer = buf; 258 buffer->buffer = buf;
253 buffer->curr = buffer->buffer; 259 buffer->curr = buffer->buffer;
254 260
255 pnp_printf(buffer,"state = "); 261 pnp_printf(buffer, "state = ");
256 if (dev->active) 262 if (dev->active)
257 pnp_printf(buffer,"active\n"); 263 pnp_printf(buffer, "active\n");
258 else 264 else
259 pnp_printf(buffer,"disabled\n"); 265 pnp_printf(buffer, "disabled\n");
260 266
261 for (i = 0; i < PNP_MAX_PORT; i++) { 267 for (i = 0; i < PNP_MAX_PORT; i++) {
262 if (pnp_port_valid(dev, i)) { 268 if (pnp_port_valid(dev, i)) {
263 pnp_printf(buffer,"io"); 269 pnp_printf(buffer, "io");
264 if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) 270 if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED)
265 pnp_printf(buffer," disabled\n"); 271 pnp_printf(buffer, " disabled\n");
266 else 272 else
267 pnp_printf(buffer," 0x%llx-0x%llx\n", 273 pnp_printf(buffer, " 0x%llx-0x%llx\n",
268 (unsigned long long)pnp_port_start(dev, i), 274 (unsigned long long)
269 (unsigned long long)pnp_port_end(dev, i)); 275 pnp_port_start(dev, i),
276 (unsigned long long)pnp_port_end(dev,
277 i));
270 } 278 }
271 } 279 }
272 for (i = 0; i < PNP_MAX_MEM; i++) { 280 for (i = 0; i < PNP_MAX_MEM; i++) {
273 if (pnp_mem_valid(dev, i)) { 281 if (pnp_mem_valid(dev, i)) {
274 pnp_printf(buffer,"mem"); 282 pnp_printf(buffer, "mem");
275 if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) 283 if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED)
276 pnp_printf(buffer," disabled\n"); 284 pnp_printf(buffer, " disabled\n");
277 else 285 else
278 pnp_printf(buffer," 0x%llx-0x%llx\n", 286 pnp_printf(buffer, " 0x%llx-0x%llx\n",
279 (unsigned long long)pnp_mem_start(dev, i), 287 (unsigned long long)
280 (unsigned long long)pnp_mem_end(dev, i)); 288 pnp_mem_start(dev, i),
289 (unsigned long long)pnp_mem_end(dev,
290 i));
281 } 291 }
282 } 292 }
283 for (i = 0; i < PNP_MAX_IRQ; i++) { 293 for (i = 0; i < PNP_MAX_IRQ; i++) {
284 if (pnp_irq_valid(dev, i)) { 294 if (pnp_irq_valid(dev, i)) {
285 pnp_printf(buffer,"irq"); 295 pnp_printf(buffer, "irq");
286 if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) 296 if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED)
287 pnp_printf(buffer," disabled\n"); 297 pnp_printf(buffer, " disabled\n");
288 else 298 else
289 pnp_printf(buffer," %lld\n", 299 pnp_printf(buffer, " %lld\n",
290 (unsigned long long)pnp_irq(dev, i)); 300 (unsigned long long)pnp_irq(dev, i));
291 } 301 }
292 } 302 }
293 for (i = 0; i < PNP_MAX_DMA; i++) { 303 for (i = 0; i < PNP_MAX_DMA; i++) {
294 if (pnp_dma_valid(dev, i)) { 304 if (pnp_dma_valid(dev, i)) {
295 pnp_printf(buffer,"dma"); 305 pnp_printf(buffer, "dma");
296 if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) 306 if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED)
297 pnp_printf(buffer," disabled\n"); 307 pnp_printf(buffer, " disabled\n");
298 else 308 else
299 pnp_printf(buffer," %lld\n", 309 pnp_printf(buffer, " %lld\n",
300 (unsigned long long)pnp_dma(dev, i)); 310 (unsigned long long)pnp_dma(dev, i));
301 } 311 }
302 } 312 }
303 ret = (buffer->curr - buf); 313 ret = (buffer->curr - buf);
@@ -308,55 +318,57 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_at
308extern struct semaphore pnp_res_mutex; 318extern struct semaphore pnp_res_mutex;
309 319
310static ssize_t 320static ssize_t
311pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr, const char * ubuf, size_t count) 321pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
322 const char *ubuf, size_t count)
312{ 323{
313 struct pnp_dev *dev = to_pnp_dev(dmdev); 324 struct pnp_dev *dev = to_pnp_dev(dmdev);
314 char *buf = (void *)ubuf; 325 char *buf = (void *)ubuf;
315 int retval = 0; 326 int retval = 0;
316 327
317 if (dev->status & PNP_ATTACHED) { 328 if (dev->status & PNP_ATTACHED) {
318 retval = -EBUSY; 329 retval = -EBUSY;
319 pnp_info("Device %s cannot be configured because it is in use.", dev->dev.bus_id); 330 pnp_info("Device %s cannot be configured because it is in use.",
331 dev->dev.bus_id);
320 goto done; 332 goto done;
321 } 333 }
322 334
323 while (isspace(*buf)) 335 while (isspace(*buf))
324 ++buf; 336 ++buf;
325 if (!strnicmp(buf,"disable",7)) { 337 if (!strnicmp(buf, "disable", 7)) {
326 retval = pnp_disable_dev(dev); 338 retval = pnp_disable_dev(dev);
327 goto done; 339 goto done;
328 } 340 }
329 if (!strnicmp(buf,"activate",8)) { 341 if (!strnicmp(buf, "activate", 8)) {
330 retval = pnp_activate_dev(dev); 342 retval = pnp_activate_dev(dev);
331 goto done; 343 goto done;
332 } 344 }
333 if (!strnicmp(buf,"fill",4)) { 345 if (!strnicmp(buf, "fill", 4)) {
334 if (dev->active) 346 if (dev->active)
335 goto done; 347 goto done;
336 retval = pnp_auto_config_dev(dev); 348 retval = pnp_auto_config_dev(dev);
337 goto done; 349 goto done;
338 } 350 }
339 if (!strnicmp(buf,"auto",4)) { 351 if (!strnicmp(buf, "auto", 4)) {
340 if (dev->active) 352 if (dev->active)
341 goto done; 353 goto done;
342 pnp_init_resource_table(&dev->res); 354 pnp_init_resource_table(&dev->res);
343 retval = pnp_auto_config_dev(dev); 355 retval = pnp_auto_config_dev(dev);
344 goto done; 356 goto done;
345 } 357 }
346 if (!strnicmp(buf,"clear",5)) { 358 if (!strnicmp(buf, "clear", 5)) {
347 if (dev->active) 359 if (dev->active)
348 goto done; 360 goto done;
349 pnp_init_resource_table(&dev->res); 361 pnp_init_resource_table(&dev->res);
350 goto done; 362 goto done;
351 } 363 }
352 if (!strnicmp(buf,"get",3)) { 364 if (!strnicmp(buf, "get", 3)) {
353 down(&pnp_res_mutex); 365 down(&pnp_res_mutex);
354 if (pnp_can_read(dev)) 366 if (pnp_can_read(dev))
355 dev->protocol->get(dev, &dev->res); 367 dev->protocol->get(dev, &dev->res);
356 up(&pnp_res_mutex); 368 up(&pnp_res_mutex);
357 goto done; 369 goto done;
358 } 370 }
359 if (!strnicmp(buf,"set",3)) { 371 if (!strnicmp(buf, "set", 3)) {
360 int nport = 0, nmem = 0, nirq = 0, ndma = 0; 372 int nport = 0, nmem = 0, nirq = 0, ndma = 0;
361 if (dev->active) 373 if (dev->active)
362 goto done; 374 goto done;
@@ -366,65 +378,77 @@ pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr,
366 while (1) { 378 while (1) {
367 while (isspace(*buf)) 379 while (isspace(*buf))
368 ++buf; 380 ++buf;
369 if (!strnicmp(buf,"io",2)) { 381 if (!strnicmp(buf, "io", 2)) {
370 buf += 2; 382 buf += 2;
371 while (isspace(*buf)) 383 while (isspace(*buf))
372 ++buf; 384 ++buf;
373 dev->res.port_resource[nport].start = simple_strtoul(buf,&buf,0); 385 dev->res.port_resource[nport].start =
386 simple_strtoul(buf, &buf, 0);
374 while (isspace(*buf)) 387 while (isspace(*buf))
375 ++buf; 388 ++buf;
376 if(*buf == '-') { 389 if (*buf == '-') {
377 buf += 1; 390 buf += 1;
378 while (isspace(*buf)) 391 while (isspace(*buf))
379 ++buf; 392 ++buf;
380 dev->res.port_resource[nport].end = simple_strtoul(buf,&buf,0); 393 dev->res.port_resource[nport].end =
394 simple_strtoul(buf, &buf, 0);
381 } else 395 } else
382 dev->res.port_resource[nport].end = dev->res.port_resource[nport].start; 396 dev->res.port_resource[nport].end =
383 dev->res.port_resource[nport].flags = IORESOURCE_IO; 397 dev->res.port_resource[nport].start;
398 dev->res.port_resource[nport].flags =
399 IORESOURCE_IO;
384 nport++; 400 nport++;
385 if (nport >= PNP_MAX_PORT) 401 if (nport >= PNP_MAX_PORT)
386 break; 402 break;
387 continue; 403 continue;
388 } 404 }
389 if (!strnicmp(buf,"mem",3)) { 405 if (!strnicmp(buf, "mem", 3)) {
390 buf += 3; 406 buf += 3;
391 while (isspace(*buf)) 407 while (isspace(*buf))
392 ++buf; 408 ++buf;
393 dev->res.mem_resource[nmem].start = simple_strtoul(buf,&buf,0); 409 dev->res.mem_resource[nmem].start =
410 simple_strtoul(buf, &buf, 0);
394 while (isspace(*buf)) 411 while (isspace(*buf))
395 ++buf; 412 ++buf;
396 if(*buf == '-') { 413 if (*buf == '-') {
397 buf += 1; 414 buf += 1;
398 while (isspace(*buf)) 415 while (isspace(*buf))
399 ++buf; 416 ++buf;
400 dev->res.mem_resource[nmem].end = simple_strtoul(buf,&buf,0); 417 dev->res.mem_resource[nmem].end =
418 simple_strtoul(buf, &buf, 0);
401 } else 419 } else
402 dev->res.mem_resource[nmem].end = dev->res.mem_resource[nmem].start; 420 dev->res.mem_resource[nmem].end =
403 dev->res.mem_resource[nmem].flags = IORESOURCE_MEM; 421 dev->res.mem_resource[nmem].start;
422 dev->res.mem_resource[nmem].flags =
423 IORESOURCE_MEM;
404 nmem++; 424 nmem++;
405 if (nmem >= PNP_MAX_MEM) 425 if (nmem >= PNP_MAX_MEM)
406 break; 426 break;
407 continue; 427 continue;
408 } 428 }
409 if (!strnicmp(buf,"irq",3)) { 429 if (!strnicmp(buf, "irq", 3)) {
410 buf += 3; 430 buf += 3;
411 while (isspace(*buf)) 431 while (isspace(*buf))
412 ++buf; 432 ++buf;
413 dev->res.irq_resource[nirq].start = 433 dev->res.irq_resource[nirq].start =
414 dev->res.irq_resource[nirq].end = simple_strtoul(buf,&buf,0); 434 dev->res.irq_resource[nirq].end =
415 dev->res.irq_resource[nirq].flags = IORESOURCE_IRQ; 435 simple_strtoul(buf, &buf, 0);
436 dev->res.irq_resource[nirq].flags =
437 IORESOURCE_IRQ;
416 nirq++; 438 nirq++;
417 if (nirq >= PNP_MAX_IRQ) 439 if (nirq >= PNP_MAX_IRQ)
418 break; 440 break;
419 continue; 441 continue;
420 } 442 }
421 if (!strnicmp(buf,"dma",3)) { 443 if (!strnicmp(buf, "dma", 3)) {
422 buf += 3; 444 buf += 3;
423 while (isspace(*buf)) 445 while (isspace(*buf))
424 ++buf; 446 ++buf;
425 dev->res.dma_resource[ndma].start = 447 dev->res.dma_resource[ndma].start =
426 dev->res.dma_resource[ndma].end = simple_strtoul(buf,&buf,0); 448 dev->res.dma_resource[ndma].end =
427 dev->res.dma_resource[ndma].flags = IORESOURCE_DMA; 449 simple_strtoul(buf, &buf, 0);
450 dev->res.dma_resource[ndma].flags =
451 IORESOURCE_DMA;
428 ndma++; 452 ndma++;
429 if (ndma >= PNP_MAX_DMA) 453 if (ndma >= PNP_MAX_DMA)
430 break; 454 break;
@@ -435,45 +459,50 @@ pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr,
435 up(&pnp_res_mutex); 459 up(&pnp_res_mutex);
436 goto done; 460 goto done;
437 } 461 }
438 done: 462 done:
439 if (retval < 0) 463 if (retval < 0)
440 return retval; 464 return retval;
441 return count; 465 return count;
442} 466}
443 467
444static DEVICE_ATTR(resources,S_IRUGO | S_IWUSR, 468static DEVICE_ATTR(resources, S_IRUGO | S_IWUSR,
445 pnp_show_current_resources,pnp_set_current_resources); 469 pnp_show_current_resources, pnp_set_current_resources);
446 470
447static ssize_t pnp_show_current_ids(struct device *dmdev, struct device_attribute *attr, char *buf) 471static ssize_t pnp_show_current_ids(struct device *dmdev,
472 struct device_attribute *attr, char *buf)
448{ 473{
449 char *str = buf; 474 char *str = buf;
450 struct pnp_dev *dev = to_pnp_dev(dmdev); 475 struct pnp_dev *dev = to_pnp_dev(dmdev);
451 struct pnp_id * pos = dev->id; 476 struct pnp_id *pos = dev->id;
452 477
453 while (pos) { 478 while (pos) {
454 str += sprintf(str,"%s\n", pos->id); 479 str += sprintf(str, "%s\n", pos->id);
455 pos = pos->next; 480 pos = pos->next;
456 } 481 }
457 return (str - buf); 482 return (str - buf);
458} 483}
459 484
460static DEVICE_ATTR(id,S_IRUGO,pnp_show_current_ids,NULL); 485static DEVICE_ATTR(id, S_IRUGO, pnp_show_current_ids, NULL);
461 486
462int pnp_interface_attach_device(struct pnp_dev *dev) 487int pnp_interface_attach_device(struct pnp_dev *dev)
463{ 488{
464 int rc = device_create_file(&dev->dev,&dev_attr_options); 489 int rc = device_create_file(&dev->dev, &dev_attr_options);
465 if (rc) goto err; 490
466 rc = device_create_file(&dev->dev,&dev_attr_resources); 491 if (rc)
467 if (rc) goto err_opt; 492 goto err;
468 rc = device_create_file(&dev->dev,&dev_attr_id); 493 rc = device_create_file(&dev->dev, &dev_attr_resources);
469 if (rc) goto err_res; 494 if (rc)
495 goto err_opt;
496 rc = device_create_file(&dev->dev, &dev_attr_id);
497 if (rc)
498 goto err_res;
470 499
471 return 0; 500 return 0;
472 501
473err_res: 502 err_res:
474 device_remove_file(&dev->dev,&dev_attr_resources); 503 device_remove_file(&dev->dev, &dev_attr_resources);
475err_opt: 504 err_opt:
476 device_remove_file(&dev->dev,&dev_attr_options); 505 device_remove_file(&dev->dev, &dev_attr_options);
477err: 506 err:
478 return rc; 507 return rc;
479} 508}
diff --git a/drivers/pnp/isapnp/compat.c b/drivers/pnp/isapnp/compat.c
index 0697ab88a9ac..10bdcc4d4f7b 100644
--- a/drivers/pnp/isapnp/compat.c
+++ b/drivers/pnp/isapnp/compat.c
@@ -3,34 +3,30 @@
3 * the old isapnp APIs. If possible use the new APIs instead. 3 * the old isapnp APIs. If possible use the new APIs instead.
4 * 4 *
5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8
9/* TODO: see if more isapnp functions are needed here */
10 7
11#include <linux/module.h> 8#include <linux/module.h>
12#include <linux/isapnp.h> 9#include <linux/isapnp.h>
13#include <linux/string.h> 10#include <linux/string.h>
14 11
15static void pnp_convert_id(char *buf, unsigned short vendor, unsigned short device) 12static void pnp_convert_id(char *buf, unsigned short vendor,
13 unsigned short device)
16{ 14{
17 sprintf(buf, "%c%c%c%x%x%x%x", 15 sprintf(buf, "%c%c%c%x%x%x%x",
18 'A' + ((vendor >> 2) & 0x3f) - 1, 16 'A' + ((vendor >> 2) & 0x3f) - 1,
19 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 17 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
20 'A' + ((vendor >> 8) & 0x1f) - 1, 18 'A' + ((vendor >> 8) & 0x1f) - 1,
21 (device >> 4) & 0x0f, 19 (device >> 4) & 0x0f, device & 0x0f,
22 device & 0x0f, 20 (device >> 12) & 0x0f, (device >> 8) & 0x0f);
23 (device >> 12) & 0x0f,
24 (device >> 8) & 0x0f);
25} 21}
26 22
27struct pnp_card *pnp_find_card(unsigned short vendor, 23struct pnp_card *pnp_find_card(unsigned short vendor, unsigned short device,
28 unsigned short device,
29 struct pnp_card *from) 24 struct pnp_card *from)
30{ 25{
31 char id[8]; 26 char id[8];
32 char any[8]; 27 char any[8];
33 struct list_head *list; 28 struct list_head *list;
29
34 pnp_convert_id(id, vendor, device); 30 pnp_convert_id(id, vendor, device);
35 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID); 31 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID);
36 32
@@ -38,20 +34,20 @@ struct pnp_card *pnp_find_card(unsigned short vendor,
38 34
39 while (list != &pnp_cards) { 35 while (list != &pnp_cards) {
40 struct pnp_card *card = global_to_pnp_card(list); 36 struct pnp_card *card = global_to_pnp_card(list);
41 if (compare_pnp_id(card->id,id) || (memcmp(id,any,7)==0)) 37
38 if (compare_pnp_id(card->id, id) || (memcmp(id, any, 7) == 0))
42 return card; 39 return card;
43 list = list->next; 40 list = list->next;
44 } 41 }
45 return NULL; 42 return NULL;
46} 43}
47 44
48struct pnp_dev *pnp_find_dev(struct pnp_card *card, 45struct pnp_dev *pnp_find_dev(struct pnp_card *card, unsigned short vendor,
49 unsigned short vendor, 46 unsigned short function, struct pnp_dev *from)
50 unsigned short function,
51 struct pnp_dev *from)
52{ 47{
53 char id[8]; 48 char id[8];
54 char any[8]; 49 char any[8];
50
55 pnp_convert_id(id, vendor, function); 51 pnp_convert_id(id, vendor, function);
56 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID); 52 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID);
57 if (card == NULL) { /* look for a logical device from all cards */ 53 if (card == NULL) { /* look for a logical device from all cards */
@@ -63,7 +59,9 @@ struct pnp_dev *pnp_find_dev(struct pnp_card *card,
63 59
64 while (list != &pnp_global) { 60 while (list != &pnp_global) {
65 struct pnp_dev *dev = global_to_pnp_dev(list); 61 struct pnp_dev *dev = global_to_pnp_dev(list);
66 if (compare_pnp_id(dev->id,id) || (memcmp(id,any,7)==0)) 62
63 if (compare_pnp_id(dev->id, id) ||
64 (memcmp(id, any, 7) == 0))
67 return dev; 65 return dev;
68 list = list->next; 66 list = list->next;
69 } 67 }
@@ -78,7 +76,8 @@ struct pnp_dev *pnp_find_dev(struct pnp_card *card,
78 } 76 }
79 while (list != &card->devices) { 77 while (list != &card->devices) {
80 struct pnp_dev *dev = card_to_pnp_dev(list); 78 struct pnp_dev *dev = card_to_pnp_dev(list);
81 if (compare_pnp_id(dev->id,id)) 79
80 if (compare_pnp_id(dev->id, id))
82 return dev; 81 return dev;
83 list = list->next; 82 list = list->next;
84 } 83 }
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c
index 914d00c423ad..b4e2aa995b53 100644
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -51,10 +51,10 @@
51#define ISAPNP_DEBUG 51#define ISAPNP_DEBUG
52#endif 52#endif
53 53
54int isapnp_disable; /* Disable ISA PnP */ 54int isapnp_disable; /* Disable ISA PnP */
55static int isapnp_rdp; /* Read Data Port */ 55static int isapnp_rdp; /* Read Data Port */
56static int isapnp_reset = 1; /* reset all PnP cards (deactivate) */ 56static int isapnp_reset = 1; /* reset all PnP cards (deactivate) */
57static int isapnp_verbose = 1; /* verbose mode */ 57static int isapnp_verbose = 1; /* verbose mode */
58 58
59MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>"); 59MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>");
60MODULE_DESCRIPTION("Generic ISA Plug & Play support"); 60MODULE_DESCRIPTION("Generic ISA Plug & Play support");
@@ -126,7 +126,7 @@ static unsigned short isapnp_read_word(unsigned char idx)
126 unsigned short val; 126 unsigned short val;
127 127
128 val = isapnp_read_byte(idx); 128 val = isapnp_read_byte(idx);
129 val = (val << 8) + isapnp_read_byte(idx+1); 129 val = (val << 8) + isapnp_read_byte(idx + 1);
130 return val; 130 return val;
131} 131}
132 132
@@ -139,7 +139,7 @@ void isapnp_write_byte(unsigned char idx, unsigned char val)
139static void isapnp_write_word(unsigned char idx, unsigned short val) 139static void isapnp_write_word(unsigned char idx, unsigned short val)
140{ 140{
141 isapnp_write_byte(idx, val >> 8); 141 isapnp_write_byte(idx, val >> 8);
142 isapnp_write_byte(idx+1, val); 142 isapnp_write_byte(idx + 1, val);
143} 143}
144 144
145static void isapnp_key(void) 145static void isapnp_key(void)
@@ -193,7 +193,7 @@ static void isapnp_deactivate(unsigned char logdev)
193static void __init isapnp_peek(unsigned char *data, int bytes) 193static void __init isapnp_peek(unsigned char *data, int bytes)
194{ 194{
195 int i, j; 195 int i, j;
196 unsigned char d=0; 196 unsigned char d = 0;
197 197
198 for (i = 1; i <= bytes; i++) { 198 for (i = 1; i <= bytes; i++) {
199 for (j = 0; j < 20; j++) { 199 for (j = 0; j < 20; j++) {
@@ -220,19 +220,18 @@ static int isapnp_next_rdp(void)
220{ 220{
221 int rdp = isapnp_rdp; 221 int rdp = isapnp_rdp;
222 static int old_rdp = 0; 222 static int old_rdp = 0;
223 223
224 if(old_rdp) 224 if (old_rdp) {
225 {
226 release_region(old_rdp, 1); 225 release_region(old_rdp, 1);
227 old_rdp = 0; 226 old_rdp = 0;
228 } 227 }
229 while (rdp <= 0x3ff) { 228 while (rdp <= 0x3ff) {
230 /* 229 /*
231 * We cannot use NE2000 probe spaces for ISAPnP or we 230 * We cannot use NE2000 probe spaces for ISAPnP or we
232 * will lock up machines. 231 * will lock up machines.
233 */ 232 */
234 if ((rdp < 0x280 || rdp > 0x380) && request_region(rdp, 1, "ISAPnP")) 233 if ((rdp < 0x280 || rdp > 0x380)
235 { 234 && request_region(rdp, 1, "ISAPnP")) {
236 isapnp_rdp = rdp; 235 isapnp_rdp = rdp;
237 old_rdp = rdp; 236 old_rdp = rdp;
238 return 0; 237 return 0;
@@ -253,7 +252,6 @@ static inline void isapnp_set_rdp(void)
253 * Perform an isolation. The port selection code now tries to avoid 252 * Perform an isolation. The port selection code now tries to avoid
254 * "dangerous to read" ports. 253 * "dangerous to read" ports.
255 */ 254 */
256
257static int __init isapnp_isolate_rdp_select(void) 255static int __init isapnp_isolate_rdp_select(void)
258{ 256{
259 isapnp_wait(); 257 isapnp_wait();
@@ -282,7 +280,6 @@ static int __init isapnp_isolate_rdp_select(void)
282/* 280/*
283 * Isolate (assign uniqued CSN) to all ISA PnP devices. 281 * Isolate (assign uniqued CSN) to all ISA PnP devices.
284 */ 282 */
285
286static int __init isapnp_isolate(void) 283static int __init isapnp_isolate(void)
287{ 284{
288 unsigned char checksum = 0x6a; 285 unsigned char checksum = 0x6a;
@@ -305,7 +302,9 @@ static int __init isapnp_isolate(void)
305 udelay(250); 302 udelay(250);
306 if (data == 0x55aa) 303 if (data == 0x55aa)
307 bit = 0x01; 304 bit = 0x01;
308 checksum = ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7) | (checksum >> 1); 305 checksum =
306 ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7)
307 | (checksum >> 1);
309 bit = 0x00; 308 bit = 0x00;
310 } 309 }
311 for (i = 65; i <= 72; i++) { 310 for (i = 65; i <= 72; i++) {
@@ -351,13 +350,12 @@ static int __init isapnp_isolate(void)
351/* 350/*
352 * Read one tag from stream. 351 * Read one tag from stream.
353 */ 352 */
354
355static int __init isapnp_read_tag(unsigned char *type, unsigned short *size) 353static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
356{ 354{
357 unsigned char tag, tmp[2]; 355 unsigned char tag, tmp[2];
358 356
359 isapnp_peek(&tag, 1); 357 isapnp_peek(&tag, 1);
360 if (tag == 0) /* invalid tag */ 358 if (tag == 0) /* invalid tag */
361 return -1; 359 return -1;
362 if (tag & 0x80) { /* large item */ 360 if (tag & 0x80) { /* large item */
363 *type = tag; 361 *type = tag;
@@ -368,7 +366,8 @@ static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
368 *size = tag & 0x07; 366 *size = tag & 0x07;
369 } 367 }
370#if 0 368#if 0
371 printk(KERN_DEBUG "tag = 0x%x, type = 0x%x, size = %i\n", tag, *type, *size); 369 printk(KERN_DEBUG "tag = 0x%x, type = 0x%x, size = %i\n", tag, *type,
370 *size);
372#endif 371#endif
373 if (*type == 0xff && *size == 0xffff) /* probably invalid data */ 372 if (*type == 0xff && *size == 0xffff) /* probably invalid data */
374 return -1; 373 return -1;
@@ -378,7 +377,6 @@ static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
378/* 377/*
379 * Skip specified number of bytes from stream. 378 * Skip specified number of bytes from stream.
380 */ 379 */
381
382static void __init isapnp_skip_bytes(int count) 380static void __init isapnp_skip_bytes(int count)
383{ 381{
384 isapnp_peek(NULL, count); 382 isapnp_peek(NULL, count);
@@ -387,31 +385,30 @@ static void __init isapnp_skip_bytes(int count)
387/* 385/*
388 * Parse EISA id. 386 * Parse EISA id.
389 */ 387 */
390 388static void isapnp_parse_id(struct pnp_dev *dev, unsigned short vendor,
391static void isapnp_parse_id(struct pnp_dev * dev, unsigned short vendor, unsigned short device) 389 unsigned short device)
392{ 390{
393 struct pnp_id * id; 391 struct pnp_id *id;
392
394 if (!dev) 393 if (!dev)
395 return; 394 return;
396 id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 395 id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
397 if (!id) 396 if (!id)
398 return; 397 return;
399 sprintf(id->id, "%c%c%c%x%x%x%x", 398 sprintf(id->id, "%c%c%c%x%x%x%x",
400 'A' + ((vendor >> 2) & 0x3f) - 1, 399 'A' + ((vendor >> 2) & 0x3f) - 1,
401 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 400 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
402 'A' + ((vendor >> 8) & 0x1f) - 1, 401 'A' + ((vendor >> 8) & 0x1f) - 1,
403 (device >> 4) & 0x0f, 402 (device >> 4) & 0x0f,
404 device & 0x0f, 403 device & 0x0f, (device >> 12) & 0x0f, (device >> 8) & 0x0f);
405 (device >> 12) & 0x0f,
406 (device >> 8) & 0x0f);
407 pnp_add_id(id, dev); 404 pnp_add_id(id, dev);
408} 405}
409 406
410/* 407/*
411 * Parse logical device tag. 408 * Parse logical device tag.
412 */ 409 */
413 410static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card,
414static struct pnp_dev * __init isapnp_parse_device(struct pnp_card *card, int size, int number) 411 int size, int number)
415{ 412{
416 unsigned char tmp[6]; 413 unsigned char tmp[6];
417 struct pnp_dev *dev; 414 struct pnp_dev *dev;
@@ -435,13 +432,11 @@ static struct pnp_dev * __init isapnp_parse_device(struct pnp_card *card, int si
435 return dev; 432 return dev;
436} 433}
437 434
438
439/* 435/*
440 * Add IRQ resource to resources list. 436 * Add IRQ resource to resources list.
441 */ 437 */
442
443static void __init isapnp_parse_irq_resource(struct pnp_option *option, 438static void __init isapnp_parse_irq_resource(struct pnp_option *option,
444 int size) 439 int size)
445{ 440{
446 unsigned char tmp[3]; 441 unsigned char tmp[3];
447 struct pnp_irq *irq; 442 struct pnp_irq *irq;
@@ -458,15 +453,13 @@ static void __init isapnp_parse_irq_resource(struct pnp_option *option,
458 else 453 else
459 irq->flags = IORESOURCE_IRQ_HIGHEDGE; 454 irq->flags = IORESOURCE_IRQ_HIGHEDGE;
460 pnp_register_irq_resource(option, irq); 455 pnp_register_irq_resource(option, irq);
461 return;
462} 456}
463 457
464/* 458/*
465 * Add DMA resource to resources list. 459 * Add DMA resource to resources list.
466 */ 460 */
467
468static void __init isapnp_parse_dma_resource(struct pnp_option *option, 461static void __init isapnp_parse_dma_resource(struct pnp_option *option,
469 int size) 462 int size)
470{ 463{
471 unsigned char tmp[2]; 464 unsigned char tmp[2];
472 struct pnp_dma *dma; 465 struct pnp_dma *dma;
@@ -478,15 +471,13 @@ static void __init isapnp_parse_dma_resource(struct pnp_option *option,
478 dma->map = tmp[0]; 471 dma->map = tmp[0];
479 dma->flags = tmp[1]; 472 dma->flags = tmp[1];
480 pnp_register_dma_resource(option, dma); 473 pnp_register_dma_resource(option, dma);
481 return;
482} 474}
483 475
484/* 476/*
485 * Add port resource to resources list. 477 * Add port resource to resources list.
486 */ 478 */
487
488static void __init isapnp_parse_port_resource(struct pnp_option *option, 479static void __init isapnp_parse_port_resource(struct pnp_option *option,
489 int size) 480 int size)
490{ 481{
491 unsigned char tmp[7]; 482 unsigned char tmp[7];
492 struct pnp_port *port; 483 struct pnp_port *port;
@@ -500,16 +491,14 @@ static void __init isapnp_parse_port_resource(struct pnp_option *option,
500 port->align = tmp[5]; 491 port->align = tmp[5];
501 port->size = tmp[6]; 492 port->size = tmp[6];
502 port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0; 493 port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0;
503 pnp_register_port_resource(option,port); 494 pnp_register_port_resource(option, port);
504 return;
505} 495}
506 496
507/* 497/*
508 * Add fixed port resource to resources list. 498 * Add fixed port resource to resources list.
509 */ 499 */
510
511static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option, 500static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option,
512 int size) 501 int size)
513{ 502{
514 unsigned char tmp[3]; 503 unsigned char tmp[3];
515 struct pnp_port *port; 504 struct pnp_port *port;
@@ -522,16 +511,14 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option,
522 port->size = tmp[2]; 511 port->size = tmp[2];
523 port->align = 0; 512 port->align = 0;
524 port->flags = PNP_PORT_FLAG_FIXED; 513 port->flags = PNP_PORT_FLAG_FIXED;
525 pnp_register_port_resource(option,port); 514 pnp_register_port_resource(option, port);
526 return;
527} 515}
528 516
529/* 517/*
530 * Add memory resource to resources list. 518 * Add memory resource to resources list.
531 */ 519 */
532
533static void __init isapnp_parse_mem_resource(struct pnp_option *option, 520static void __init isapnp_parse_mem_resource(struct pnp_option *option,
534 int size) 521 int size)
535{ 522{
536 unsigned char tmp[9]; 523 unsigned char tmp[9];
537 struct pnp_mem *mem; 524 struct pnp_mem *mem;
@@ -545,16 +532,14 @@ static void __init isapnp_parse_mem_resource(struct pnp_option *option,
545 mem->align = (tmp[6] << 8) | tmp[5]; 532 mem->align = (tmp[6] << 8) | tmp[5];
546 mem->size = ((tmp[8] << 8) | tmp[7]) << 8; 533 mem->size = ((tmp[8] << 8) | tmp[7]) << 8;
547 mem->flags = tmp[0]; 534 mem->flags = tmp[0];
548 pnp_register_mem_resource(option,mem); 535 pnp_register_mem_resource(option, mem);
549 return;
550} 536}
551 537
552/* 538/*
553 * Add 32-bit memory resource to resources list. 539 * Add 32-bit memory resource to resources list.
554 */ 540 */
555
556static void __init isapnp_parse_mem32_resource(struct pnp_option *option, 541static void __init isapnp_parse_mem32_resource(struct pnp_option *option,
557 int size) 542 int size)
558{ 543{
559 unsigned char tmp[17]; 544 unsigned char tmp[17];
560 struct pnp_mem *mem; 545 struct pnp_mem *mem;
@@ -565,18 +550,19 @@ static void __init isapnp_parse_mem32_resource(struct pnp_option *option,
565 return; 550 return;
566 mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; 551 mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
567 mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; 552 mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
568 mem->align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; 553 mem->align =
569 mem->size = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; 554 (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9];
555 mem->size =
556 (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13];
570 mem->flags = tmp[0]; 557 mem->flags = tmp[0];
571 pnp_register_mem_resource(option,mem); 558 pnp_register_mem_resource(option, mem);
572} 559}
573 560
574/* 561/*
575 * Add 32-bit fixed memory resource to resources list. 562 * Add 32-bit fixed memory resource to resources list.
576 */ 563 */
577
578static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option, 564static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option,
579 int size) 565 int size)
580{ 566{
581 unsigned char tmp[9]; 567 unsigned char tmp[9];
582 struct pnp_mem *mem; 568 struct pnp_mem *mem;
@@ -585,28 +571,29 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option,
585 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 571 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
586 if (!mem) 572 if (!mem)
587 return; 573 return;
588 mem->min = mem->max = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; 574 mem->min = mem->max =
575 (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
589 mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; 576 mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
590 mem->align = 0; 577 mem->align = 0;
591 mem->flags = tmp[0]; 578 mem->flags = tmp[0];
592 pnp_register_mem_resource(option,mem); 579 pnp_register_mem_resource(option, mem);
593} 580}
594 581
595/* 582/*
596 * Parse card name for ISA PnP device. 583 * Parse card name for ISA PnP device.
597 */ 584 */
598
599static void __init 585static void __init
600isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size) 586isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size)
601{ 587{
602 if (name[0] == '\0') { 588 if (name[0] == '\0') {
603 unsigned short size1 = *size >= name_max ? (name_max - 1) : *size; 589 unsigned short size1 =
590 *size >= name_max ? (name_max - 1) : *size;
604 isapnp_peek(name, size1); 591 isapnp_peek(name, size1);
605 name[size1] = '\0'; 592 name[size1] = '\0';
606 *size -= size1; 593 *size -= size1;
607 594
608 /* clean whitespace from end of string */ 595 /* clean whitespace from end of string */
609 while (size1 > 0 && name[--size1] == ' ') 596 while (size1 > 0 && name[--size1] == ' ')
610 name[size1] = '\0'; 597 name[size1] = '\0';
611 } 598 }
612} 599}
@@ -614,7 +601,6 @@ isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size)
614/* 601/*
615 * Parse resource map for logical device. 602 * Parse resource map for logical device.
616 */ 603 */
617
618static int __init isapnp_create_device(struct pnp_card *card, 604static int __init isapnp_create_device(struct pnp_card *card,
619 unsigned short size) 605 unsigned short size)
620{ 606{
@@ -622,6 +608,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
622 unsigned char type, tmp[17]; 608 unsigned char type, tmp[17];
623 struct pnp_option *option; 609 struct pnp_option *option;
624 struct pnp_dev *dev; 610 struct pnp_dev *dev;
611
625 if ((dev = isapnp_parse_device(card, size, number++)) == NULL) 612 if ((dev = isapnp_parse_device(card, size, number++)) == NULL)
626 return 1; 613 return 1;
627 option = pnp_register_independent_option(dev); 614 option = pnp_register_independent_option(dev);
@@ -629,17 +616,19 @@ static int __init isapnp_create_device(struct pnp_card *card,
629 kfree(dev); 616 kfree(dev);
630 return 1; 617 return 1;
631 } 618 }
632 pnp_add_card_device(card,dev); 619 pnp_add_card_device(card, dev);
633 620
634 while (1) { 621 while (1) {
635 if (isapnp_read_tag(&type, &size)<0) 622 if (isapnp_read_tag(&type, &size) < 0)
636 return 1; 623 return 1;
637 if (skip && type != _STAG_LOGDEVID && type != _STAG_END) 624 if (skip && type != _STAG_LOGDEVID && type != _STAG_END)
638 goto __skip; 625 goto __skip;
639 switch (type) { 626 switch (type) {
640 case _STAG_LOGDEVID: 627 case _STAG_LOGDEVID:
641 if (size >= 5 && size <= 6) { 628 if (size >= 5 && size <= 6) {
642 if ((dev = isapnp_parse_device(card, size, number++)) == NULL) 629 if ((dev =
630 isapnp_parse_device(card, size,
631 number++)) == NULL)
643 return 1; 632 return 1;
644 size = 0; 633 size = 0;
645 skip = 0; 634 skip = 0;
@@ -648,7 +637,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
648 kfree(dev); 637 kfree(dev);
649 return 1; 638 return 1;
650 } 639 }
651 pnp_add_card_device(card,dev); 640 pnp_add_card_device(card, dev);
652 } else { 641 } else {
653 skip = 1; 642 skip = 1;
654 } 643 }
@@ -658,7 +647,8 @@ static int __init isapnp_create_device(struct pnp_card *card,
658 case _STAG_COMPATDEVID: 647 case _STAG_COMPATDEVID:
659 if (size == 4 && compat < DEVICE_COUNT_COMPATIBLE) { 648 if (size == 4 && compat < DEVICE_COUNT_COMPATIBLE) {
660 isapnp_peek(tmp, 4); 649 isapnp_peek(tmp, 4);
661 isapnp_parse_id(dev,(tmp[1] << 8) | tmp[0], (tmp[3] << 8) | tmp[2]); 650 isapnp_parse_id(dev, (tmp[1] << 8) | tmp[0],
651 (tmp[3] << 8) | tmp[2]);
662 compat++; 652 compat++;
663 size = 0; 653 size = 0;
664 } 654 }
@@ -684,7 +674,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
684 priority = 0x100 | tmp[0]; 674 priority = 0x100 | tmp[0];
685 size = 0; 675 size = 0;
686 } 676 }
687 option = pnp_register_dependent_option(dev,priority); 677 option = pnp_register_dependent_option(dev, priority);
688 if (!option) 678 if (!option)
689 return 1; 679 return 1;
690 break; 680 break;
@@ -739,11 +729,13 @@ static int __init isapnp_create_device(struct pnp_card *card,
739 isapnp_skip_bytes(size); 729 isapnp_skip_bytes(size);
740 return 1; 730 return 1;
741 default: 731 default:
742 printk(KERN_ERR "isapnp: unexpected or unknown tag type 0x%x for logical device %i (device %i), ignored\n", type, dev->number, card->number); 732 printk(KERN_ERR
733 "isapnp: unexpected or unknown tag type 0x%x for logical device %i (device %i), ignored\n",
734 type, dev->number, card->number);
743 } 735 }
744 __skip: 736 __skip:
745 if (size > 0) 737 if (size > 0)
746 isapnp_skip_bytes(size); 738 isapnp_skip_bytes(size);
747 } 739 }
748 return 0; 740 return 0;
749} 741}
@@ -751,14 +743,13 @@ static int __init isapnp_create_device(struct pnp_card *card,
751/* 743/*
752 * Parse resource map for ISA PnP card. 744 * Parse resource map for ISA PnP card.
753 */ 745 */
754
755static void __init isapnp_parse_resource_map(struct pnp_card *card) 746static void __init isapnp_parse_resource_map(struct pnp_card *card)
756{ 747{
757 unsigned char type, tmp[17]; 748 unsigned char type, tmp[17];
758 unsigned short size; 749 unsigned short size;
759 750
760 while (1) { 751 while (1) {
761 if (isapnp_read_tag(&type, &size)<0) 752 if (isapnp_read_tag(&type, &size) < 0)
762 return; 753 return;
763 switch (type) { 754 switch (type) {
764 case _STAG_PNPVERNO: 755 case _STAG_PNPVERNO:
@@ -771,7 +762,7 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
771 break; 762 break;
772 case _STAG_LOGDEVID: 763 case _STAG_LOGDEVID:
773 if (size >= 5 && size <= 6) { 764 if (size >= 5 && size <= 6) {
774 if (isapnp_create_device(card, size)==1) 765 if (isapnp_create_device(card, size) == 1)
775 return; 766 return;
776 size = 0; 767 size = 0;
777 } 768 }
@@ -779,7 +770,8 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
779 case _STAG_VENDOR: 770 case _STAG_VENDOR:
780 break; 771 break;
781 case _LTAG_ANSISTR: 772 case _LTAG_ANSISTR:
782 isapnp_parse_name(card->name, sizeof(card->name), &size); 773 isapnp_parse_name(card->name, sizeof(card->name),
774 &size);
783 break; 775 break;
784 case _LTAG_UNICODESTR: 776 case _LTAG_UNICODESTR:
785 /* silently ignore */ 777 /* silently ignore */
@@ -792,18 +784,19 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
792 isapnp_skip_bytes(size); 784 isapnp_skip_bytes(size);
793 return; 785 return;
794 default: 786 default:
795 printk(KERN_ERR "isapnp: unexpected or unknown tag type 0x%x for device %i, ignored\n", type, card->number); 787 printk(KERN_ERR
788 "isapnp: unexpected or unknown tag type 0x%x for device %i, ignored\n",
789 type, card->number);
796 } 790 }
797 __skip: 791 __skip:
798 if (size > 0) 792 if (size > 0)
799 isapnp_skip_bytes(size); 793 isapnp_skip_bytes(size);
800 } 794 }
801} 795}
802 796
803/* 797/*
804 * Compute ISA PnP checksum for first eight bytes. 798 * Compute ISA PnP checksum for first eight bytes.
805 */ 799 */
806
807static unsigned char __init isapnp_checksum(unsigned char *data) 800static unsigned char __init isapnp_checksum(unsigned char *data)
808{ 801{
809 int i, j; 802 int i, j;
@@ -815,7 +808,9 @@ static unsigned char __init isapnp_checksum(unsigned char *data)
815 bit = 0; 808 bit = 0;
816 if (b & (1 << j)) 809 if (b & (1 << j))
817 bit = 1; 810 bit = 1;
818 checksum = ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7) | (checksum >> 1); 811 checksum =
812 ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7)
813 | (checksum >> 1);
819 } 814 }
820 } 815 }
821 return checksum; 816 return checksum;
@@ -824,27 +819,25 @@ static unsigned char __init isapnp_checksum(unsigned char *data)
824/* 819/*
825 * Parse EISA id for ISA PnP card. 820 * Parse EISA id for ISA PnP card.
826 */ 821 */
827 822static void isapnp_parse_card_id(struct pnp_card *card, unsigned short vendor,
828static void isapnp_parse_card_id(struct pnp_card * card, unsigned short vendor, unsigned short device) 823 unsigned short device)
829{ 824{
830 struct pnp_id * id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 825 struct pnp_id *id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
826
831 if (!id) 827 if (!id)
832 return; 828 return;
833 sprintf(id->id, "%c%c%c%x%x%x%x", 829 sprintf(id->id, "%c%c%c%x%x%x%x",
834 'A' + ((vendor >> 2) & 0x3f) - 1, 830 'A' + ((vendor >> 2) & 0x3f) - 1,
835 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 831 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
836 'A' + ((vendor >> 8) & 0x1f) - 1, 832 'A' + ((vendor >> 8) & 0x1f) - 1,
837 (device >> 4) & 0x0f, 833 (device >> 4) & 0x0f,
838 device & 0x0f, 834 device & 0x0f, (device >> 12) & 0x0f, (device >> 8) & 0x0f);
839 (device >> 12) & 0x0f, 835 pnp_add_card_id(id, card);
840 (device >> 8) & 0x0f);
841 pnp_add_card_id(id,card);
842} 836}
843 837
844/* 838/*
845 * Build device list for all present ISA PnP devices. 839 * Build device list for all present ISA PnP devices.
846 */ 840 */
847
848static int __init isapnp_build_device_list(void) 841static int __init isapnp_build_device_list(void)
849{ 842{
850 int csn; 843 int csn;
@@ -858,22 +851,29 @@ static int __init isapnp_build_device_list(void)
858 isapnp_peek(header, 9); 851 isapnp_peek(header, 9);
859 checksum = isapnp_checksum(header); 852 checksum = isapnp_checksum(header);
860#if 0 853#if 0
861 printk(KERN_DEBUG "vendor: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", 854 printk(KERN_DEBUG
862 header[0], header[1], header[2], header[3], 855 "vendor: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
863 header[4], header[5], header[6], header[7], header[8]); 856 header[0], header[1], header[2], header[3], header[4],
857 header[5], header[6], header[7], header[8]);
864 printk(KERN_DEBUG "checksum = 0x%x\n", checksum); 858 printk(KERN_DEBUG "checksum = 0x%x\n", checksum);
865#endif 859#endif
866 if ((card = kzalloc(sizeof(struct pnp_card), GFP_KERNEL)) == NULL) 860 if ((card =
861 kzalloc(sizeof(struct pnp_card), GFP_KERNEL)) == NULL)
867 continue; 862 continue;
868 863
869 card->number = csn; 864 card->number = csn;
870 INIT_LIST_HEAD(&card->devices); 865 INIT_LIST_HEAD(&card->devices);
871 isapnp_parse_card_id(card, (header[1] << 8) | header[0], (header[3] << 8) | header[2]); 866 isapnp_parse_card_id(card, (header[1] << 8) | header[0],
872 card->serial = (header[7] << 24) | (header[6] << 16) | (header[5] << 8) | header[4]; 867 (header[3] << 8) | header[2]);
868 card->serial =
869 (header[7] << 24) | (header[6] << 16) | (header[5] << 8) |
870 header[4];
873 isapnp_checksum_value = 0x00; 871 isapnp_checksum_value = 0x00;
874 isapnp_parse_resource_map(card); 872 isapnp_parse_resource_map(card);
875 if (isapnp_checksum_value != 0x00) 873 if (isapnp_checksum_value != 0x00)
876 printk(KERN_ERR "isapnp: checksum for device %i is not valid (0x%x)\n", csn, isapnp_checksum_value); 874 printk(KERN_ERR
875 "isapnp: checksum for device %i is not valid (0x%x)\n",
876 csn, isapnp_checksum_value);
877 card->checksum = isapnp_checksum_value; 877 card->checksum = isapnp_checksum_value;
878 card->protocol = &isapnp_protocol; 878 card->protocol = &isapnp_protocol;
879 879
@@ -890,6 +890,7 @@ static int __init isapnp_build_device_list(void)
890int isapnp_present(void) 890int isapnp_present(void)
891{ 891{
892 struct pnp_card *card; 892 struct pnp_card *card;
893
893 pnp_for_each_card(card) { 894 pnp_for_each_card(card) {
894 if (card->protocol == &isapnp_protocol) 895 if (card->protocol == &isapnp_protocol)
895 return 1; 896 return 1;
@@ -911,13 +912,13 @@ int isapnp_cfg_begin(int csn, int logdev)
911 /* it is possible to set RDP only in the isolation phase */ 912 /* it is possible to set RDP only in the isolation phase */
912 /* Jens Thoms Toerring <Jens.Toerring@physik.fu-berlin.de> */ 913 /* Jens Thoms Toerring <Jens.Toerring@physik.fu-berlin.de> */
913 isapnp_write_byte(0x02, 0x04); /* clear CSN of card */ 914 isapnp_write_byte(0x02, 0x04); /* clear CSN of card */
914 mdelay(2); /* is this necessary? */ 915 mdelay(2); /* is this necessary? */
915 isapnp_wake(csn); /* bring card into sleep state */ 916 isapnp_wake(csn); /* bring card into sleep state */
916 isapnp_wake(0); /* bring card into isolation state */ 917 isapnp_wake(0); /* bring card into isolation state */
917 isapnp_set_rdp(); /* reset the RDP port */ 918 isapnp_set_rdp(); /* reset the RDP port */
918 udelay(1000); /* delay 1000us */ 919 udelay(1000); /* delay 1000us */
919 isapnp_write_byte(0x06, csn); /* reset CSN to previous value */ 920 isapnp_write_byte(0x06, csn); /* reset CSN to previous value */
920 udelay(250); /* is this necessary? */ 921 udelay(250); /* is this necessary? */
921#endif 922#endif
922 if (logdev >= 0) 923 if (logdev >= 0)
923 isapnp_device(logdev); 924 isapnp_device(logdev);
@@ -931,12 +932,10 @@ int isapnp_cfg_end(void)
931 return 0; 932 return 0;
932} 933}
933 934
934
935/* 935/*
936 * Inititialization. 936 * Initialization.
937 */ 937 */
938 938
939
940EXPORT_SYMBOL(isapnp_protocol); 939EXPORT_SYMBOL(isapnp_protocol);
941EXPORT_SYMBOL(isapnp_present); 940EXPORT_SYMBOL(isapnp_present);
942EXPORT_SYMBOL(isapnp_cfg_begin); 941EXPORT_SYMBOL(isapnp_cfg_begin);
@@ -946,7 +945,8 @@ EXPORT_SYMBOL(isapnp_read_byte);
946#endif 945#endif
947EXPORT_SYMBOL(isapnp_write_byte); 946EXPORT_SYMBOL(isapnp_write_byte);
948 947
949static int isapnp_read_resources(struct pnp_dev *dev, struct pnp_resource_table *res) 948static int isapnp_read_resources(struct pnp_dev *dev,
949 struct pnp_resource_table *res)
950{ 950{
951 int tmp, ret; 951 int tmp, ret;
952 952
@@ -960,31 +960,37 @@ static int isapnp_read_resources(struct pnp_dev *dev, struct pnp_resource_table
960 res->port_resource[tmp].flags = IORESOURCE_IO; 960 res->port_resource[tmp].flags = IORESOURCE_IO;
961 } 961 }
962 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { 962 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) {
963 ret = isapnp_read_word(ISAPNP_CFG_MEM + (tmp << 3)) << 8; 963 ret =
964 isapnp_read_word(ISAPNP_CFG_MEM + (tmp << 3)) << 8;
964 if (!ret) 965 if (!ret)
965 continue; 966 continue;
966 res->mem_resource[tmp].start = ret; 967 res->mem_resource[tmp].start = ret;
967 res->mem_resource[tmp].flags = IORESOURCE_MEM; 968 res->mem_resource[tmp].flags = IORESOURCE_MEM;
968 } 969 }
969 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { 970 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) {
970 ret = (isapnp_read_word(ISAPNP_CFG_IRQ + (tmp << 1)) >> 8); 971 ret =
972 (isapnp_read_word(ISAPNP_CFG_IRQ + (tmp << 1)) >>
973 8);
971 if (!ret) 974 if (!ret)
972 continue; 975 continue;
973 res->irq_resource[tmp].start = res->irq_resource[tmp].end = ret; 976 res->irq_resource[tmp].start =
977 res->irq_resource[tmp].end = ret;
974 res->irq_resource[tmp].flags = IORESOURCE_IRQ; 978 res->irq_resource[tmp].flags = IORESOURCE_IRQ;
975 } 979 }
976 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { 980 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) {
977 ret = isapnp_read_byte(ISAPNP_CFG_DMA + tmp); 981 ret = isapnp_read_byte(ISAPNP_CFG_DMA + tmp);
978 if (ret == 4) 982 if (ret == 4)
979 continue; 983 continue;
980 res->dma_resource[tmp].start = res->dma_resource[tmp].end = ret; 984 res->dma_resource[tmp].start =
985 res->dma_resource[tmp].end = ret;
981 res->dma_resource[tmp].flags = IORESOURCE_DMA; 986 res->dma_resource[tmp].flags = IORESOURCE_DMA;
982 } 987 }
983 } 988 }
984 return 0; 989 return 0;
985} 990}
986 991
987static int isapnp_get_resources(struct pnp_dev *dev, struct pnp_resource_table * res) 992static int isapnp_get_resources(struct pnp_dev *dev,
993 struct pnp_resource_table *res)
988{ 994{
989 int ret; 995 int ret;
990 pnp_init_resource_table(res); 996 pnp_init_resource_table(res);
@@ -994,24 +1000,44 @@ static int isapnp_get_resources(struct pnp_dev *dev, struct pnp_resource_table *
994 return ret; 1000 return ret;
995} 1001}
996 1002
997static int isapnp_set_resources(struct pnp_dev *dev, struct pnp_resource_table * res) 1003static int isapnp_set_resources(struct pnp_dev *dev,
1004 struct pnp_resource_table *res)
998{ 1005{
999 int tmp; 1006 int tmp;
1000 1007
1001 isapnp_cfg_begin(dev->card->number, dev->number); 1008 isapnp_cfg_begin(dev->card->number, dev->number);
1002 dev->active = 1; 1009 dev->active = 1;
1003 for (tmp = 0; tmp < PNP_MAX_PORT && (res->port_resource[tmp].flags & (IORESOURCE_IO | IORESOURCE_UNSET)) == IORESOURCE_IO; tmp++) 1010 for (tmp = 0;
1004 isapnp_write_word(ISAPNP_CFG_PORT+(tmp<<1), res->port_resource[tmp].start); 1011 tmp < PNP_MAX_PORT
1005 for (tmp = 0; tmp < PNP_MAX_IRQ && (res->irq_resource[tmp].flags & (IORESOURCE_IRQ | IORESOURCE_UNSET)) == IORESOURCE_IRQ; tmp++) { 1012 && (res->port_resource[tmp].
1013 flags & (IORESOURCE_IO | IORESOURCE_UNSET)) == IORESOURCE_IO;
1014 tmp++)
1015 isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1),
1016 res->port_resource[tmp].start);
1017 for (tmp = 0;
1018 tmp < PNP_MAX_IRQ
1019 && (res->irq_resource[tmp].
1020 flags & (IORESOURCE_IRQ | IORESOURCE_UNSET)) == IORESOURCE_IRQ;
1021 tmp++) {
1006 int irq = res->irq_resource[tmp].start; 1022 int irq = res->irq_resource[tmp].start;
1007 if (irq == 2) 1023 if (irq == 2)
1008 irq = 9; 1024 irq = 9;
1009 isapnp_write_byte(ISAPNP_CFG_IRQ+(tmp<<1), irq); 1025 isapnp_write_byte(ISAPNP_CFG_IRQ + (tmp << 1), irq);
1010 } 1026 }
1011 for (tmp = 0; tmp < PNP_MAX_DMA && (res->dma_resource[tmp].flags & (IORESOURCE_DMA | IORESOURCE_UNSET)) == IORESOURCE_DMA; tmp++) 1027 for (tmp = 0;
1012 isapnp_write_byte(ISAPNP_CFG_DMA+tmp, res->dma_resource[tmp].start); 1028 tmp < PNP_MAX_DMA
1013 for (tmp = 0; tmp < PNP_MAX_MEM && (res->mem_resource[tmp].flags & (IORESOURCE_MEM | IORESOURCE_UNSET)) == IORESOURCE_MEM; tmp++) 1029 && (res->dma_resource[tmp].
1014 isapnp_write_word(ISAPNP_CFG_MEM+(tmp<<3), (res->mem_resource[tmp].start >> 8) & 0xffff); 1030 flags & (IORESOURCE_DMA | IORESOURCE_UNSET)) == IORESOURCE_DMA;
1031 tmp++)
1032 isapnp_write_byte(ISAPNP_CFG_DMA + tmp,
1033 res->dma_resource[tmp].start);
1034 for (tmp = 0;
1035 tmp < PNP_MAX_MEM
1036 && (res->mem_resource[tmp].
1037 flags & (IORESOURCE_MEM | IORESOURCE_UNSET)) == IORESOURCE_MEM;
1038 tmp++)
1039 isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3),
1040 (res->mem_resource[tmp].start >> 8) & 0xffff);
1015 /* FIXME: We aren't handling 32bit mems properly here */ 1041 /* FIXME: We aren't handling 32bit mems properly here */
1016 isapnp_activate(dev->number); 1042 isapnp_activate(dev->number);
1017 isapnp_cfg_end(); 1043 isapnp_cfg_end();
@@ -1030,9 +1056,9 @@ static int isapnp_disable_resources(struct pnp_dev *dev)
1030} 1056}
1031 1057
1032struct pnp_protocol isapnp_protocol = { 1058struct pnp_protocol isapnp_protocol = {
1033 .name = "ISA Plug and Play", 1059 .name = "ISA Plug and Play",
1034 .get = isapnp_get_resources, 1060 .get = isapnp_get_resources,
1035 .set = isapnp_set_resources, 1061 .set = isapnp_set_resources,
1036 .disable = isapnp_disable_resources, 1062 .disable = isapnp_disable_resources,
1037}; 1063};
1038 1064
@@ -1053,31 +1079,36 @@ static int __init isapnp_init(void)
1053#endif 1079#endif
1054#ifdef ISAPNP_REGION_OK 1080#ifdef ISAPNP_REGION_OK
1055 if (!request_region(_PIDXR, 1, "isapnp index")) { 1081 if (!request_region(_PIDXR, 1, "isapnp index")) {
1056 printk(KERN_ERR "isapnp: Index Register 0x%x already used\n", _PIDXR); 1082 printk(KERN_ERR "isapnp: Index Register 0x%x already used\n",
1083 _PIDXR);
1057 return -EBUSY; 1084 return -EBUSY;
1058 } 1085 }
1059#endif 1086#endif
1060 if (!request_region(_PNPWRP, 1, "isapnp write")) { 1087 if (!request_region(_PNPWRP, 1, "isapnp write")) {
1061 printk(KERN_ERR "isapnp: Write Data Register 0x%x already used\n", _PNPWRP); 1088 printk(KERN_ERR
1089 "isapnp: Write Data Register 0x%x already used\n",
1090 _PNPWRP);
1062#ifdef ISAPNP_REGION_OK 1091#ifdef ISAPNP_REGION_OK
1063 release_region(_PIDXR, 1); 1092 release_region(_PIDXR, 1);
1064#endif 1093#endif
1065 return -EBUSY; 1094 return -EBUSY;
1066 } 1095 }
1067 1096
1068 if(pnp_register_protocol(&isapnp_protocol)<0) 1097 if (pnp_register_protocol(&isapnp_protocol) < 0)
1069 return -EBUSY; 1098 return -EBUSY;
1070 1099
1071 /* 1100 /*
1072 * Print a message. The existing ISAPnP code is hanging machines 1101 * Print a message. The existing ISAPnP code is hanging machines
1073 * so let the user know where. 1102 * so let the user know where.
1074 */ 1103 */
1075 1104
1076 printk(KERN_INFO "isapnp: Scanning for PnP cards...\n"); 1105 printk(KERN_INFO "isapnp: Scanning for PnP cards...\n");
1077 if (isapnp_rdp >= 0x203 && isapnp_rdp <= 0x3ff) { 1106 if (isapnp_rdp >= 0x203 && isapnp_rdp <= 0x3ff) {
1078 isapnp_rdp |= 3; 1107 isapnp_rdp |= 3;
1079 if (!request_region(isapnp_rdp, 1, "isapnp read")) { 1108 if (!request_region(isapnp_rdp, 1, "isapnp read")) {
1080 printk(KERN_ERR "isapnp: Read Data Register 0x%x already used\n", isapnp_rdp); 1109 printk(KERN_ERR
1110 "isapnp: Read Data Register 0x%x already used\n",
1111 isapnp_rdp);
1081#ifdef ISAPNP_REGION_OK 1112#ifdef ISAPNP_REGION_OK
1082 release_region(_PIDXR, 1); 1113 release_region(_PIDXR, 1);
1083#endif 1114#endif
@@ -1089,14 +1120,14 @@ static int __init isapnp_init(void)
1089 isapnp_detected = 1; 1120 isapnp_detected = 1;
1090 if (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff) { 1121 if (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff) {
1091 cards = isapnp_isolate(); 1122 cards = isapnp_isolate();
1092 if (cards < 0 || 1123 if (cards < 0 || (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff)) {
1093 (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff)) {
1094#ifdef ISAPNP_REGION_OK 1124#ifdef ISAPNP_REGION_OK
1095 release_region(_PIDXR, 1); 1125 release_region(_PIDXR, 1);
1096#endif 1126#endif
1097 release_region(_PNPWRP, 1); 1127 release_region(_PNPWRP, 1);
1098 isapnp_detected = 0; 1128 isapnp_detected = 0;
1099 printk(KERN_INFO "isapnp: No Plug & Play device found\n"); 1129 printk(KERN_INFO
1130 "isapnp: No Plug & Play device found\n");
1100 return 0; 1131 return 0;
1101 } 1132 }
1102 request_region(isapnp_rdp, 1, "isapnp read"); 1133 request_region(isapnp_rdp, 1, "isapnp read");
@@ -1104,19 +1135,23 @@ static int __init isapnp_init(void)
1104 isapnp_build_device_list(); 1135 isapnp_build_device_list();
1105 cards = 0; 1136 cards = 0;
1106 1137
1107 protocol_for_each_card(&isapnp_protocol,card) { 1138 protocol_for_each_card(&isapnp_protocol, card) {
1108 cards++; 1139 cards++;
1109 if (isapnp_verbose) { 1140 if (isapnp_verbose) {
1110 printk(KERN_INFO "isapnp: Card '%s'\n", card->name[0]?card->name:"Unknown"); 1141 printk(KERN_INFO "isapnp: Card '%s'\n",
1142 card->name[0] ? card->name : "Unknown");
1111 if (isapnp_verbose < 2) 1143 if (isapnp_verbose < 2)
1112 continue; 1144 continue;
1113 card_for_each_dev(card,dev) { 1145 card_for_each_dev(card, dev) {
1114 printk(KERN_INFO "isapnp: Device '%s'\n", dev->name[0]?dev->name:"Unknown"); 1146 printk(KERN_INFO "isapnp: Device '%s'\n",
1147 dev->name[0] ? dev->name : "Unknown");
1115 } 1148 }
1116 } 1149 }
1117 } 1150 }
1118 if (cards) { 1151 if (cards) {
1119 printk(KERN_INFO "isapnp: %i Plug & Play card%s detected total\n", cards, cards>1?"s":""); 1152 printk(KERN_INFO
1153 "isapnp: %i Plug & Play card%s detected total\n", cards,
1154 cards > 1 ? "s" : "");
1120 } else { 1155 } else {
1121 printk(KERN_INFO "isapnp: No Plug & Play card found\n"); 1156 printk(KERN_INFO "isapnp: No Plug & Play card found\n");
1122 } 1157 }
@@ -1141,11 +1176,10 @@ __setup("noisapnp", isapnp_setup_disable);
1141 1176
1142static int __init isapnp_setup_isapnp(char *str) 1177static int __init isapnp_setup_isapnp(char *str)
1143{ 1178{
1144 (void)((get_option(&str,&isapnp_rdp) == 2) && 1179 (void)((get_option(&str, &isapnp_rdp) == 2) &&
1145 (get_option(&str,&isapnp_reset) == 2) && 1180 (get_option(&str, &isapnp_reset) == 2) &&
1146 (get_option(&str,&isapnp_verbose) == 2)); 1181 (get_option(&str, &isapnp_verbose) == 2));
1147 return 1; 1182 return 1;
1148} 1183}
1149 1184
1150__setup("isapnp=", isapnp_setup_isapnp); 1185__setup("isapnp=", isapnp_setup_isapnp);
1151
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 40b724ebe23b..3fbc0f9ffc26 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -2,7 +2,6 @@
2 * ISA Plug & Play support 2 * ISA Plug & Play support
3 * Copyright (c) by Jaroslav Kysela <perex@suse.cz> 3 * Copyright (c) by Jaroslav Kysela <perex@suse.cz>
4 * 4 *
5 *
6 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
@@ -16,7 +15,6 @@
16 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 */ 18 */
21 19
22#include <linux/module.h> 20#include <linux/module.h>
@@ -54,7 +52,8 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
54 return (file->f_pos = new); 52 return (file->f_pos = new);
55} 53}
56 54
57static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) 55static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf,
56 size_t nbytes, loff_t * ppos)
58{ 57{
59 struct inode *ino = file->f_path.dentry->d_inode; 58 struct inode *ino = file->f_path.dentry->d_inode;
60 struct proc_dir_entry *dp = PDE(ino); 59 struct proc_dir_entry *dp = PDE(ino);
@@ -74,7 +73,7 @@ static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t
74 return -EINVAL; 73 return -EINVAL;
75 74
76 isapnp_cfg_begin(dev->card->number, dev->number); 75 isapnp_cfg_begin(dev->card->number, dev->number);
77 for ( ; pos < 256 && cnt > 0; pos++, buf++, cnt--) { 76 for (; pos < 256 && cnt > 0; pos++, buf++, cnt--) {
78 unsigned char val; 77 unsigned char val;
79 val = isapnp_read_byte(pos); 78 val = isapnp_read_byte(pos);
80 __put_user(val, buf); 79 __put_user(val, buf);
@@ -85,10 +84,9 @@ static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t
85 return nbytes; 84 return nbytes;
86} 85}
87 86
88static const struct file_operations isapnp_proc_bus_file_operations = 87static const struct file_operations isapnp_proc_bus_file_operations = {
89{ 88 .llseek = isapnp_proc_bus_lseek,
90 .llseek = isapnp_proc_bus_lseek, 89 .read = isapnp_proc_bus_read,
91 .read = isapnp_proc_bus_read,
92}; 90};
93 91
94static int isapnp_proc_attach_device(struct pnp_dev *dev) 92static int isapnp_proc_attach_device(struct pnp_dev *dev)
@@ -139,13 +137,14 @@ static int __exit isapnp_proc_detach_bus(struct pnp_card *bus)
139 remove_proc_entry(name, isapnp_proc_bus_dir); 137 remove_proc_entry(name, isapnp_proc_bus_dir);
140 return 0; 138 return 0;
141} 139}
142#endif /* MODULE */ 140#endif /* MODULE */
143 141
144int __init isapnp_proc_init(void) 142int __init isapnp_proc_init(void)
145{ 143{
146 struct pnp_dev *dev; 144 struct pnp_dev *dev;
145
147 isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus); 146 isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus);
148 protocol_for_each_dev(&isapnp_protocol,dev) { 147 protocol_for_each_dev(&isapnp_protocol, dev) {
149 isapnp_proc_attach_device(dev); 148 isapnp_proc_attach_device(dev);
150 } 149 }
151 return 0; 150 return 0;
@@ -167,4 +166,4 @@ int __exit isapnp_proc_done(void)
167 remove_proc_entry("isapnp", proc_bus); 166 remove_proc_entry("isapnp", proc_bus);
168 return 0; 167 return 0;
169} 168}
170#endif /* MODULE */ 169#endif /* MODULE */
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 57e6ab1004d0..3bda513a6bd3 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz> 4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/errno.h> 8#include <linux/errno.h>
@@ -26,7 +25,8 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
26 return -EINVAL; 25 return -EINVAL;
27 26
28 if (idx >= PNP_MAX_PORT) { 27 if (idx >= PNP_MAX_PORT) {
29 pnp_err("More than 4 ports is incompatible with pnp specifications."); 28 pnp_err
29 ("More than 4 ports is incompatible with pnp specifications.");
30 /* pretend we were successful so at least the manager won't try again */ 30 /* pretend we were successful so at least the manager won't try again */
31 return 1; 31 return 1;
32 } 32 }
@@ -41,11 +41,11 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
41 41
42 /* set the initial values */ 42 /* set the initial values */
43 *flags |= rule->flags | IORESOURCE_IO; 43 *flags |= rule->flags | IORESOURCE_IO;
44 *flags &= ~IORESOURCE_UNSET; 44 *flags &= ~IORESOURCE_UNSET;
45 45
46 if (!rule->size) { 46 if (!rule->size) {
47 *flags |= IORESOURCE_DISABLED; 47 *flags |= IORESOURCE_DISABLED;
48 return 1; /* skip disabled resource requests */ 48 return 1; /* skip disabled resource requests */
49 } 49 }
50 50
51 *start = rule->min; 51 *start = rule->min;
@@ -70,7 +70,8 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
70 return -EINVAL; 70 return -EINVAL;
71 71
72 if (idx >= PNP_MAX_MEM) { 72 if (idx >= PNP_MAX_MEM) {
73 pnp_err("More than 8 mems is incompatible with pnp specifications."); 73 pnp_err
74 ("More than 8 mems is incompatible with pnp specifications.");
74 /* pretend we were successful so at least the manager won't try again */ 75 /* pretend we were successful so at least the manager won't try again */
75 return 1; 76 return 1;
76 } 77 }
@@ -85,7 +86,7 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
85 86
86 /* set the initial values */ 87 /* set the initial values */
87 *flags |= rule->flags | IORESOURCE_MEM; 88 *flags |= rule->flags | IORESOURCE_MEM;
88 *flags &= ~IORESOURCE_UNSET; 89 *flags &= ~IORESOURCE_UNSET;
89 90
90 /* convert pnp flags to standard Linux flags */ 91 /* convert pnp flags to standard Linux flags */
91 if (!(rule->flags & IORESOURCE_MEM_WRITEABLE)) 92 if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
@@ -99,11 +100,11 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
99 100
100 if (!rule->size) { 101 if (!rule->size) {
101 *flags |= IORESOURCE_DISABLED; 102 *flags |= IORESOURCE_DISABLED;
102 return 1; /* skip disabled resource requests */ 103 return 1; /* skip disabled resource requests */
103 } 104 }
104 105
105 *start = rule->min; 106 *start = rule->min;
106 *end = *start + rule->size -1; 107 *end = *start + rule->size - 1;
107 108
108 /* run through until pnp_check_mem is happy */ 109 /* run through until pnp_check_mem is happy */
109 while (!pnp_check_mem(dev, idx)) { 110 while (!pnp_check_mem(dev, idx)) {
@@ -115,7 +116,7 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
115 return 1; 116 return 1;
116} 117}
117 118
118static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) 119static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx)
119{ 120{
120 resource_size_t *start, *end; 121 resource_size_t *start, *end;
121 unsigned long *flags; 122 unsigned long *flags;
@@ -130,7 +131,8 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
130 return -EINVAL; 131 return -EINVAL;
131 132
132 if (idx >= PNP_MAX_IRQ) { 133 if (idx >= PNP_MAX_IRQ) {
133 pnp_err("More than 2 irqs is incompatible with pnp specifications."); 134 pnp_err
135 ("More than 2 irqs is incompatible with pnp specifications.");
134 /* pretend we were successful so at least the manager won't try again */ 136 /* pretend we were successful so at least the manager won't try again */
135 return 1; 137 return 1;
136 } 138 }
@@ -145,11 +147,11 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
145 147
146 /* set the initial values */ 148 /* set the initial values */
147 *flags |= rule->flags | IORESOURCE_IRQ; 149 *flags |= rule->flags | IORESOURCE_IRQ;
148 *flags &= ~IORESOURCE_UNSET; 150 *flags &= ~IORESOURCE_UNSET;
149 151
150 if (bitmap_empty(rule->map, PNP_IRQ_NR)) { 152 if (bitmap_empty(rule->map, PNP_IRQ_NR)) {
151 *flags |= IORESOURCE_DISABLED; 153 *flags |= IORESOURCE_DISABLED;
152 return 1; /* skip disabled resource requests */ 154 return 1; /* skip disabled resource requests */
153 } 155 }
154 156
155 /* TBD: need check for >16 IRQ */ 157 /* TBD: need check for >16 IRQ */
@@ -159,9 +161,9 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
159 return 1; 161 return 1;
160 } 162 }
161 for (i = 0; i < 16; i++) { 163 for (i = 0; i < 16; i++) {
162 if(test_bit(xtab[i], rule->map)) { 164 if (test_bit(xtab[i], rule->map)) {
163 *start = *end = xtab[i]; 165 *start = *end = xtab[i];
164 if(pnp_check_irq(dev, idx)) 166 if (pnp_check_irq(dev, idx))
165 return 1; 167 return 1;
166 } 168 }
167 } 169 }
@@ -183,7 +185,8 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
183 return -EINVAL; 185 return -EINVAL;
184 186
185 if (idx >= PNP_MAX_DMA) { 187 if (idx >= PNP_MAX_DMA) {
186 pnp_err("More than 2 dmas is incompatible with pnp specifications."); 188 pnp_err
189 ("More than 2 dmas is incompatible with pnp specifications.");
187 /* pretend we were successful so at least the manager won't try again */ 190 /* pretend we were successful so at least the manager won't try again */
188 return 1; 191 return 1;
189 } 192 }
@@ -198,17 +201,17 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
198 201
199 /* set the initial values */ 202 /* set the initial values */
200 *flags |= rule->flags | IORESOURCE_DMA; 203 *flags |= rule->flags | IORESOURCE_DMA;
201 *flags &= ~IORESOURCE_UNSET; 204 *flags &= ~IORESOURCE_UNSET;
202 205
203 if (!rule->map) { 206 if (!rule->map) {
204 *flags |= IORESOURCE_DISABLED; 207 *flags |= IORESOURCE_DISABLED;
205 return 1; /* skip disabled resource requests */ 208 return 1; /* skip disabled resource requests */
206 } 209 }
207 210
208 for (i = 0; i < 8; i++) { 211 for (i = 0; i < 8; i++) {
209 if(rule->map & (1<<xtab[i])) { 212 if (rule->map & (1 << xtab[i])) {
210 *start = *end = xtab[i]; 213 *start = *end = xtab[i];
211 if(pnp_check_dma(dev, idx)) 214 if (pnp_check_dma(dev, idx))
212 return 1; 215 return 1;
213 } 216 }
214 } 217 }
@@ -218,72 +221,80 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
218/** 221/**
219 * pnp_init_resources - Resets a resource table to default values. 222 * pnp_init_resources - Resets a resource table to default values.
220 * @table: pointer to the desired resource table 223 * @table: pointer to the desired resource table
221 *
222 */ 224 */
223void pnp_init_resource_table(struct pnp_resource_table *table) 225void pnp_init_resource_table(struct pnp_resource_table *table)
224{ 226{
225 int idx; 227 int idx;
228
226 for (idx = 0; idx < PNP_MAX_IRQ; idx++) { 229 for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
227 table->irq_resource[idx].name = NULL; 230 table->irq_resource[idx].name = NULL;
228 table->irq_resource[idx].start = -1; 231 table->irq_resource[idx].start = -1;
229 table->irq_resource[idx].end = -1; 232 table->irq_resource[idx].end = -1;
230 table->irq_resource[idx].flags = IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; 233 table->irq_resource[idx].flags =
234 IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET;
231 } 235 }
232 for (idx = 0; idx < PNP_MAX_DMA; idx++) { 236 for (idx = 0; idx < PNP_MAX_DMA; idx++) {
233 table->dma_resource[idx].name = NULL; 237 table->dma_resource[idx].name = NULL;
234 table->dma_resource[idx].start = -1; 238 table->dma_resource[idx].start = -1;
235 table->dma_resource[idx].end = -1; 239 table->dma_resource[idx].end = -1;
236 table->dma_resource[idx].flags = IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; 240 table->dma_resource[idx].flags =
241 IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET;
237 } 242 }
238 for (idx = 0; idx < PNP_MAX_PORT; idx++) { 243 for (idx = 0; idx < PNP_MAX_PORT; idx++) {
239 table->port_resource[idx].name = NULL; 244 table->port_resource[idx].name = NULL;
240 table->port_resource[idx].start = 0; 245 table->port_resource[idx].start = 0;
241 table->port_resource[idx].end = 0; 246 table->port_resource[idx].end = 0;
242 table->port_resource[idx].flags = IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; 247 table->port_resource[idx].flags =
248 IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET;
243 } 249 }
244 for (idx = 0; idx < PNP_MAX_MEM; idx++) { 250 for (idx = 0; idx < PNP_MAX_MEM; idx++) {
245 table->mem_resource[idx].name = NULL; 251 table->mem_resource[idx].name = NULL;
246 table->mem_resource[idx].start = 0; 252 table->mem_resource[idx].start = 0;
247 table->mem_resource[idx].end = 0; 253 table->mem_resource[idx].end = 0;
248 table->mem_resource[idx].flags = IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; 254 table->mem_resource[idx].flags =
255 IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET;
249 } 256 }
250} 257}
251 258
252/** 259/**
253 * pnp_clean_resources - clears resources that were not manually set 260 * pnp_clean_resources - clears resources that were not manually set
254 * @res: the resources to clean 261 * @res: the resources to clean
255 *
256 */ 262 */
257static void pnp_clean_resource_table(struct pnp_resource_table * res) 263static void pnp_clean_resource_table(struct pnp_resource_table *res)
258{ 264{
259 int idx; 265 int idx;
266
260 for (idx = 0; idx < PNP_MAX_IRQ; idx++) { 267 for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
261 if (!(res->irq_resource[idx].flags & IORESOURCE_AUTO)) 268 if (!(res->irq_resource[idx].flags & IORESOURCE_AUTO))
262 continue; 269 continue;
263 res->irq_resource[idx].start = -1; 270 res->irq_resource[idx].start = -1;
264 res->irq_resource[idx].end = -1; 271 res->irq_resource[idx].end = -1;
265 res->irq_resource[idx].flags = IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; 272 res->irq_resource[idx].flags =
273 IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET;
266 } 274 }
267 for (idx = 0; idx < PNP_MAX_DMA; idx++) { 275 for (idx = 0; idx < PNP_MAX_DMA; idx++) {
268 if (!(res->dma_resource[idx].flags & IORESOURCE_AUTO)) 276 if (!(res->dma_resource[idx].flags & IORESOURCE_AUTO))
269 continue; 277 continue;
270 res->dma_resource[idx].start = -1; 278 res->dma_resource[idx].start = -1;
271 res->dma_resource[idx].end = -1; 279 res->dma_resource[idx].end = -1;
272 res->dma_resource[idx].flags = IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; 280 res->dma_resource[idx].flags =
281 IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET;
273 } 282 }
274 for (idx = 0; idx < PNP_MAX_PORT; idx++) { 283 for (idx = 0; idx < PNP_MAX_PORT; idx++) {
275 if (!(res->port_resource[idx].flags & IORESOURCE_AUTO)) 284 if (!(res->port_resource[idx].flags & IORESOURCE_AUTO))
276 continue; 285 continue;
277 res->port_resource[idx].start = 0; 286 res->port_resource[idx].start = 0;
278 res->port_resource[idx].end = 0; 287 res->port_resource[idx].end = 0;
279 res->port_resource[idx].flags = IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; 288 res->port_resource[idx].flags =
289 IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET;
280 } 290 }
281 for (idx = 0; idx < PNP_MAX_MEM; idx++) { 291 for (idx = 0; idx < PNP_MAX_MEM; idx++) {
282 if (!(res->mem_resource[idx].flags & IORESOURCE_AUTO)) 292 if (!(res->mem_resource[idx].flags & IORESOURCE_AUTO))
283 continue; 293 continue;
284 res->mem_resource[idx].start = 0; 294 res->mem_resource[idx].start = 0;
285 res->mem_resource[idx].end = 0; 295 res->mem_resource[idx].end = 0;
286 res->mem_resource[idx].flags = IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; 296 res->mem_resource[idx].flags =
297 IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET;
287 } 298 }
288} 299}
289 300
@@ -306,7 +317,7 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
306 return -ENODEV; 317 return -ENODEV;
307 318
308 down(&pnp_res_mutex); 319 down(&pnp_res_mutex);
309 pnp_clean_resource_table(&dev->res); /* start with a fresh slate */ 320 pnp_clean_resource_table(&dev->res); /* start with a fresh slate */
310 if (dev->independent) { 321 if (dev->independent) {
311 port = dev->independent->port; 322 port = dev->independent->port;
312 mem = dev->independent->mem; 323 mem = dev->independent->mem;
@@ -341,10 +352,11 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
341 if (depnum) { 352 if (depnum) {
342 struct pnp_option *dep; 353 struct pnp_option *dep;
343 int i; 354 int i;
344 for (i=1,dep=dev->dependent; i<depnum; i++, dep=dep->next) 355 for (i = 1, dep = dev->dependent; i < depnum;
345 if(!dep) 356 i++, dep = dep->next)
357 if (!dep)
346 goto fail; 358 goto fail;
347 port =dep->port; 359 port = dep->port;
348 mem = dep->mem; 360 mem = dep->mem;
349 irq = dep->irq; 361 irq = dep->irq;
350 dma = dep->dma; 362 dma = dep->dma;
@@ -378,7 +390,7 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
378 up(&pnp_res_mutex); 390 up(&pnp_res_mutex);
379 return 1; 391 return 1;
380 392
381fail: 393 fail:
382 pnp_clean_resource_table(&dev->res); 394 pnp_clean_resource_table(&dev->res);
383 up(&pnp_res_mutex); 395 up(&pnp_res_mutex);
384 return 0; 396 return 0;
@@ -392,10 +404,12 @@ fail:
392 * 404 *
393 * This function can be used by drivers that want to manually set thier resources. 405 * This function can be used by drivers that want to manually set thier resources.
394 */ 406 */
395int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res, int mode) 407int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res,
408 int mode)
396{ 409{
397 int i; 410 int i;
398 struct pnp_resource_table * bak; 411 struct pnp_resource_table *bak;
412
399 if (!dev || !res) 413 if (!dev || !res)
400 return -EINVAL; 414 return -EINVAL;
401 if (!pnp_can_configure(dev)) 415 if (!pnp_can_configure(dev))
@@ -409,19 +423,19 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res,
409 dev->res = *res; 423 dev->res = *res;
410 if (!(mode & PNP_CONFIG_FORCE)) { 424 if (!(mode & PNP_CONFIG_FORCE)) {
411 for (i = 0; i < PNP_MAX_PORT; i++) { 425 for (i = 0; i < PNP_MAX_PORT; i++) {
412 if(!pnp_check_port(dev,i)) 426 if (!pnp_check_port(dev, i))
413 goto fail; 427 goto fail;
414 } 428 }
415 for (i = 0; i < PNP_MAX_MEM; i++) { 429 for (i = 0; i < PNP_MAX_MEM; i++) {
416 if(!pnp_check_mem(dev,i)) 430 if (!pnp_check_mem(dev, i))
417 goto fail; 431 goto fail;
418 } 432 }
419 for (i = 0; i < PNP_MAX_IRQ; i++) { 433 for (i = 0; i < PNP_MAX_IRQ; i++) {
420 if(!pnp_check_irq(dev,i)) 434 if (!pnp_check_irq(dev, i))
421 goto fail; 435 goto fail;
422 } 436 }
423 for (i = 0; i < PNP_MAX_DMA; i++) { 437 for (i = 0; i < PNP_MAX_DMA; i++) {
424 if(!pnp_check_dma(dev,i)) 438 if (!pnp_check_dma(dev, i))
425 goto fail; 439 goto fail;
426 } 440 }
427 } 441 }
@@ -430,7 +444,7 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res,
430 kfree(bak); 444 kfree(bak);
431 return 0; 445 return 0;
432 446
433fail: 447 fail:
434 dev->res = *bak; 448 dev->res = *bak;
435 up(&pnp_res_mutex); 449 up(&pnp_res_mutex);
436 kfree(bak); 450 kfree(bak);
@@ -440,18 +454,18 @@ fail:
440/** 454/**
441 * pnp_auto_config_dev - automatically assigns resources to a device 455 * pnp_auto_config_dev - automatically assigns resources to a device
442 * @dev: pointer to the desired device 456 * @dev: pointer to the desired device
443 *
444 */ 457 */
445int pnp_auto_config_dev(struct pnp_dev *dev) 458int pnp_auto_config_dev(struct pnp_dev *dev)
446{ 459{
447 struct pnp_option *dep; 460 struct pnp_option *dep;
448 int i = 1; 461 int i = 1;
449 462
450 if(!dev) 463 if (!dev)
451 return -EINVAL; 464 return -EINVAL;
452 465
453 if(!pnp_can_configure(dev)) { 466 if (!pnp_can_configure(dev)) {
454 pnp_dbg("Device %s does not support resource configuration.", dev->dev.bus_id); 467 pnp_dbg("Device %s does not support resource configuration.",
468 dev->dev.bus_id);
455 return -ENODEV; 469 return -ENODEV;
456 } 470 }
457 471
@@ -476,23 +490,22 @@ int pnp_auto_config_dev(struct pnp_dev *dev)
476 * pnp_start_dev - low-level start of the PnP device 490 * pnp_start_dev - low-level start of the PnP device
477 * @dev: pointer to the desired device 491 * @dev: pointer to the desired device
478 * 492 *
479 * assumes that resources have alread been allocated 493 * assumes that resources have already been allocated
480 */ 494 */
481
482int pnp_start_dev(struct pnp_dev *dev) 495int pnp_start_dev(struct pnp_dev *dev)
483{ 496{
484 if (!pnp_can_write(dev)) { 497 if (!pnp_can_write(dev)) {
485 pnp_dbg("Device %s does not support activation.", dev->dev.bus_id); 498 pnp_dbg("Device %s does not support activation.",
499 dev->dev.bus_id);
486 return -EINVAL; 500 return -EINVAL;
487 } 501 }
488 502
489 if (dev->protocol->set(dev, &dev->res)<0) { 503 if (dev->protocol->set(dev, &dev->res) < 0) {
490 pnp_err("Failed to activate device %s.", dev->dev.bus_id); 504 pnp_err("Failed to activate device %s.", dev->dev.bus_id);
491 return -EIO; 505 return -EIO;
492 } 506 }
493 507
494 pnp_info("Device %s activated.", dev->dev.bus_id); 508 pnp_info("Device %s activated.", dev->dev.bus_id);
495
496 return 0; 509 return 0;
497} 510}
498 511
@@ -502,20 +515,19 @@ int pnp_start_dev(struct pnp_dev *dev)
502 * 515 *
503 * does not free resources 516 * does not free resources
504 */ 517 */
505
506int pnp_stop_dev(struct pnp_dev *dev) 518int pnp_stop_dev(struct pnp_dev *dev)
507{ 519{
508 if (!pnp_can_disable(dev)) { 520 if (!pnp_can_disable(dev)) {
509 pnp_dbg("Device %s does not support disabling.", dev->dev.bus_id); 521 pnp_dbg("Device %s does not support disabling.",
522 dev->dev.bus_id);
510 return -EINVAL; 523 return -EINVAL;
511 } 524 }
512 if (dev->protocol->disable(dev)<0) { 525 if (dev->protocol->disable(dev) < 0) {
513 pnp_err("Failed to disable device %s.", dev->dev.bus_id); 526 pnp_err("Failed to disable device %s.", dev->dev.bus_id);
514 return -EIO; 527 return -EIO;
515 } 528 }
516 529
517 pnp_info("Device %s disabled.", dev->dev.bus_id); 530 pnp_info("Device %s disabled.", dev->dev.bus_id);
518
519 return 0; 531 return 0;
520} 532}
521 533
@@ -531,9 +543,8 @@ int pnp_activate_dev(struct pnp_dev *dev)
531 543
532 if (!dev) 544 if (!dev)
533 return -EINVAL; 545 return -EINVAL;
534 if (dev->active) { 546 if (dev->active)
535 return 0; /* the device is already active */ 547 return 0; /* the device is already active */
536 }
537 548
538 /* ensure resources are allocated */ 549 /* ensure resources are allocated */
539 if (pnp_auto_config_dev(dev)) 550 if (pnp_auto_config_dev(dev))
@@ -544,7 +555,6 @@ int pnp_activate_dev(struct pnp_dev *dev)
544 return error; 555 return error;
545 556
546 dev->active = 1; 557 dev->active = 1;
547
548 return 1; 558 return 1;
549} 559}
550 560
@@ -558,11 +568,10 @@ int pnp_disable_dev(struct pnp_dev *dev)
558{ 568{
559 int error; 569 int error;
560 570
561 if (!dev) 571 if (!dev)
562 return -EINVAL; 572 return -EINVAL;
563 if (!dev->active) { 573 if (!dev->active)
564 return 0; /* the device is already disabled */ 574 return 0; /* the device is already disabled */
565 }
566 575
567 error = pnp_stop_dev(dev); 576 error = pnp_stop_dev(dev);
568 if (error) 577 if (error)
@@ -583,10 +592,9 @@ int pnp_disable_dev(struct pnp_dev *dev)
583 * @resource: pointer to resource to be changed 592 * @resource: pointer to resource to be changed
584 * @start: start of region 593 * @start: start of region
585 * @size: size of region 594 * @size: size of region
586 *
587 */ 595 */
588void pnp_resource_change(struct resource *resource, resource_size_t start, 596void pnp_resource_change(struct resource *resource, resource_size_t start,
589 resource_size_t size) 597 resource_size_t size)
590{ 598{
591 if (resource == NULL) 599 if (resource == NULL)
592 return; 600 return;
@@ -595,11 +603,7 @@ void pnp_resource_change(struct resource *resource, resource_size_t start,
595 resource->end = start + size - 1; 603 resource->end = start + size - 1;
596} 604}
597 605
598
599EXPORT_SYMBOL(pnp_manual_config_dev); 606EXPORT_SYMBOL(pnp_manual_config_dev);
600#if 0
601EXPORT_SYMBOL(pnp_auto_config_dev);
602#endif
603EXPORT_SYMBOL(pnp_start_dev); 607EXPORT_SYMBOL(pnp_start_dev);
604EXPORT_SYMBOL(pnp_stop_dev); 608EXPORT_SYMBOL(pnp_stop_dev);
605EXPORT_SYMBOL(pnp_activate_dev); 609EXPORT_SYMBOL(pnp_activate_dev);
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index fcd32ac575c3..6a2a3c2f4d5e 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -34,13 +34,13 @@ static int num = 0;
34 * used by the kernel (PCI root, ...), as it is harmless and there were 34 * used by the kernel (PCI root, ...), as it is harmless and there were
35 * already present in pnpbios. But there is an exception for devices that 35 * already present in pnpbios. But there is an exception for devices that
36 * have irqs (PIC, Timer) because we call acpi_register_gsi. 36 * have irqs (PIC, Timer) because we call acpi_register_gsi.
37 * Finaly only devices that have a CRS method need to be in this list. 37 * Finally, only devices that have a CRS method need to be in this list.
38 */ 38 */
39static __initdata struct acpi_device_id excluded_id_list[] ={ 39static struct __initdata acpi_device_id excluded_id_list[] = {
40 {"PNP0C09", 0}, /* EC */ 40 {"PNP0C09", 0}, /* EC */
41 {"PNP0C0F", 0}, /* Link device */ 41 {"PNP0C0F", 0}, /* Link device */
42 {"PNP0000", 0}, /* PIC */ 42 {"PNP0000", 0}, /* PIC */
43 {"PNP0100", 0}, /* Timer */ 43 {"PNP0100", 0}, /* Timer */
44 {"", 0}, 44 {"", 0},
45}; 45};
46 46
@@ -84,15 +84,18 @@ static void __init pnpidacpi_to_pnpid(char *id, char *str)
84 str[7] = '\0'; 84 str[7] = '\0';
85} 85}
86 86
87static int pnpacpi_get_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 87static int pnpacpi_get_resources(struct pnp_dev *dev,
88 struct pnp_resource_table *res)
88{ 89{
89 acpi_status status; 90 acpi_status status;
90 status = pnpacpi_parse_allocated_resource((acpi_handle)dev->data, 91
91 &dev->res); 92 status = pnpacpi_parse_allocated_resource((acpi_handle) dev->data,
93 &dev->res);
92 return ACPI_FAILURE(status) ? -ENODEV : 0; 94 return ACPI_FAILURE(status) ? -ENODEV : 0;
93} 95}
94 96
95static int pnpacpi_set_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 97static int pnpacpi_set_resources(struct pnp_dev *dev,
98 struct pnp_resource_table *res)
96{ 99{
97 acpi_handle handle = dev->data; 100 acpi_handle handle = dev->data;
98 struct acpi_buffer buffer; 101 struct acpi_buffer buffer;
@@ -119,27 +122,29 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
119 acpi_status status; 122 acpi_status status;
120 123
121 /* acpi_unregister_gsi(pnp_irq(dev, 0)); */ 124 /* acpi_unregister_gsi(pnp_irq(dev, 0)); */
122 status = acpi_evaluate_object((acpi_handle)dev->data, 125 status = acpi_evaluate_object((acpi_handle) dev->data,
123 "_DIS", NULL, NULL); 126 "_DIS", NULL, NULL);
124 return ACPI_FAILURE(status) ? -ENODEV : 0; 127 return ACPI_FAILURE(status) ? -ENODEV : 0;
125} 128}
126 129
127static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) 130static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
128{ 131{
129 return acpi_bus_set_power((acpi_handle)dev->data, 132 return acpi_bus_set_power((acpi_handle) dev->data,
130 acpi_pm_device_sleep_state(&dev->dev, 133 acpi_pm_device_sleep_state(&dev->dev,
131 device_may_wakeup(&dev->dev), NULL)); 134 device_may_wakeup
135 (&dev->dev),
136 NULL));
132} 137}
133 138
134static int pnpacpi_resume(struct pnp_dev *dev) 139static int pnpacpi_resume(struct pnp_dev *dev)
135{ 140{
136 return acpi_bus_set_power((acpi_handle)dev->data, ACPI_STATE_D0); 141 return acpi_bus_set_power((acpi_handle) dev->data, ACPI_STATE_D0);
137} 142}
138 143
139static struct pnp_protocol pnpacpi_protocol = { 144static struct pnp_protocol pnpacpi_protocol = {
140 .name = "Plug and Play ACPI", 145 .name = "Plug and Play ACPI",
141 .get = pnpacpi_get_resources, 146 .get = pnpacpi_get_resources,
142 .set = pnpacpi_set_resources, 147 .set = pnpacpi_set_resources,
143 .disable = pnpacpi_disable_resources, 148 .disable = pnpacpi_disable_resources,
144 .suspend = pnpacpi_suspend, 149 .suspend = pnpacpi_suspend,
145 .resume = pnpacpi_resume, 150 .resume = pnpacpi_resume,
@@ -154,17 +159,17 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
154 159
155 status = acpi_get_handle(device->handle, "_CRS", &temp); 160 status = acpi_get_handle(device->handle, "_CRS", &temp);
156 if (ACPI_FAILURE(status) || !ispnpidacpi(acpi_device_hid(device)) || 161 if (ACPI_FAILURE(status) || !ispnpidacpi(acpi_device_hid(device)) ||
157 is_exclusive_device(device)) 162 is_exclusive_device(device))
158 return 0; 163 return 0;
159 164
160 pnp_dbg("ACPI device : hid %s", acpi_device_hid(device)); 165 pnp_dbg("ACPI device : hid %s", acpi_device_hid(device));
161 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL); 166 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
162 if (!dev) { 167 if (!dev) {
163 pnp_err("Out of memory"); 168 pnp_err("Out of memory");
164 return -ENOMEM; 169 return -ENOMEM;
165 } 170 }
166 dev->data = device->handle; 171 dev->data = device->handle;
167 /* .enabled means if the device can decode the resources */ 172 /* .enabled means the device can decode the resources */
168 dev->active = device->status.enabled; 173 dev->active = device->status.enabled;
169 status = acpi_get_handle(device->handle, "_SRS", &temp); 174 status = acpi_get_handle(device->handle, "_SRS", &temp);
170 if (ACPI_SUCCESS(status)) 175 if (ACPI_SUCCESS(status))
@@ -194,20 +199,23 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
194 pnpidacpi_to_pnpid(acpi_device_hid(device), dev_id->id); 199 pnpidacpi_to_pnpid(acpi_device_hid(device), dev_id->id);
195 pnp_add_id(dev_id, dev); 200 pnp_add_id(dev_id, dev);
196 201
197 if(dev->active) { 202 if (dev->active) {
198 /* parse allocated resource */ 203 /* parse allocated resource */
199 status = pnpacpi_parse_allocated_resource(device->handle, &dev->res); 204 status = pnpacpi_parse_allocated_resource(device->handle,
205 &dev->res);
200 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { 206 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
201 pnp_err("PnPACPI: METHOD_NAME__CRS failure for %s", dev_id->id); 207 pnp_err("PnPACPI: METHOD_NAME__CRS failure for %s",
208 dev_id->id);
202 goto err1; 209 goto err1;
203 } 210 }
204 } 211 }
205 212
206 if(dev->capabilities & PNP_CONFIGURABLE) { 213 if (dev->capabilities & PNP_CONFIGURABLE) {
207 status = pnpacpi_parse_resource_option_data(device->handle, 214 status = pnpacpi_parse_resource_option_data(device->handle,
208 dev); 215 dev);
209 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { 216 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
210 pnp_err("PnPACPI: METHOD_NAME__PRS failure for %s", dev_id->id); 217 pnp_err("PnPACPI: METHOD_NAME__PRS failure for %s",
218 dev_id->id);
211 goto err1; 219 goto err1;
212 } 220 }
213 } 221 }
@@ -233,18 +241,19 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
233 if (!dev->active) 241 if (!dev->active)
234 pnp_init_resource_table(&dev->res); 242 pnp_init_resource_table(&dev->res);
235 pnp_add_device(dev); 243 pnp_add_device(dev);
236 num ++; 244 num++;
237 245
238 return AE_OK; 246 return AE_OK;
239err1: 247 err1:
240 kfree(dev_id); 248 kfree(dev_id);
241err: 249 err:
242 kfree(dev); 250 kfree(dev);
243 return -EINVAL; 251 return -EINVAL;
244} 252}
245 253
246static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle, 254static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
247 u32 lvl, void *context, void **rv) 255 u32 lvl, void *context,
256 void **rv)
248{ 257{
249 struct acpi_device *device; 258 struct acpi_device *device;
250 259
@@ -257,23 +266,22 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
257 266
258static int __init acpi_pnp_match(struct device *dev, void *_pnp) 267static int __init acpi_pnp_match(struct device *dev, void *_pnp)
259{ 268{
260 struct acpi_device *acpi = to_acpi_device(dev); 269 struct acpi_device *acpi = to_acpi_device(dev);
261 struct pnp_dev *pnp = _pnp; 270 struct pnp_dev *pnp = _pnp;
262 271
263 /* true means it matched */ 272 /* true means it matched */
264 return acpi->flags.hardware_id 273 return acpi->flags.hardware_id
265 && !acpi_get_physical_device(acpi->handle) 274 && !acpi_get_physical_device(acpi->handle)
266 && compare_pnp_id(pnp->id, acpi->pnp.hardware_id); 275 && compare_pnp_id(pnp->id, acpi->pnp.hardware_id);
267} 276}
268 277
269static int __init acpi_pnp_find_device(struct device *dev, acpi_handle *handle) 278static int __init acpi_pnp_find_device(struct device *dev, acpi_handle * handle)
270{ 279{
271 struct device *adev; 280 struct device *adev;
272 struct acpi_device *acpi; 281 struct acpi_device *acpi;
273 282
274 adev = bus_find_device(&acpi_bus_type, NULL, 283 adev = bus_find_device(&acpi_bus_type, NULL,
275 to_pnp_dev(dev), 284 to_pnp_dev(dev), acpi_pnp_match);
276 acpi_pnp_match);
277 if (!adev) 285 if (!adev)
278 return -ENODEV; 286 return -ENODEV;
279 287
@@ -287,7 +295,7 @@ static int __init acpi_pnp_find_device(struct device *dev, acpi_handle *handle)
287 * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling. 295 * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling.
288 */ 296 */
289static struct acpi_bus_type __initdata acpi_pnp_bus = { 297static struct acpi_bus_type __initdata acpi_pnp_bus = {
290 .bus = &pnp_bus_type, 298 .bus = &pnp_bus_type,
291 .find_device = acpi_pnp_find_device, 299 .find_device = acpi_pnp_find_device,
292}; 300};
293 301
@@ -307,6 +315,7 @@ static int __init pnpacpi_init(void)
307 pnp_platform_devices = 1; 315 pnp_platform_devices = 1;
308 return 0; 316 return 0;
309} 317}
318
310subsys_initcall(pnpacpi_init); 319subsys_initcall(pnpacpi_init);
311 320
312static int __init pnpacpi_setup(char *str) 321static int __init pnpacpi_setup(char *str)
@@ -317,8 +326,5 @@ static int __init pnpacpi_setup(char *str)
317 pnpacpi_disabled = 1; 326 pnpacpi_disabled = 1;
318 return 1; 327 return 1;
319} 328}
320__setup("pnpacpi=", pnpacpi_setup);
321 329
322#if 0 330__setup("pnpacpi=", pnpacpi_setup);
323EXPORT_SYMBOL(pnpacpi_protocol);
324#endif
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 118ac9779b3c..ce5027feb3da 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -40,8 +40,7 @@ static int irq_flags(int triggering, int polarity)
40 flag = IORESOURCE_IRQ_LOWLEVEL; 40 flag = IORESOURCE_IRQ_LOWLEVEL;
41 else 41 else
42 flag = IORESOURCE_IRQ_HIGHLEVEL; 42 flag = IORESOURCE_IRQ_HIGHLEVEL;
43 } 43 } else {
44 else {
45 if (polarity == ACPI_ACTIVE_LOW) 44 if (polarity == ACPI_ACTIVE_LOW)
46 flag = IORESOURCE_IRQ_LOWEDGE; 45 flag = IORESOURCE_IRQ_LOWEDGE;
47 else 46 else
@@ -72,9 +71,9 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
72 } 71 }
73} 72}
74 73
75static void 74static void pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res,
76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, 75 u32 gsi, int triggering,
77 int triggering, int polarity, int shareable) 76 int polarity, int shareable)
78{ 77{
79 int i = 0; 78 int i = 0;
80 int irq; 79 int irq;
@@ -83,12 +82,12 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
83 return; 82 return;
84 83
85 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) && 84 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) &&
86 i < PNP_MAX_IRQ) 85 i < PNP_MAX_IRQ)
87 i++; 86 i++;
88 if (i >= PNP_MAX_IRQ) 87 if (i >= PNP_MAX_IRQ)
89 return; 88 return;
90 89
91 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag 90 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag
92 res->irq_resource[i].flags |= irq_flags(triggering, polarity); 91 res->irq_resource[i].flags |= irq_flags(triggering, polarity);
93 irq = acpi_register_gsi(gsi, triggering, polarity); 92 irq = acpi_register_gsi(gsi, triggering, polarity);
94 if (irq < 0) { 93 if (irq < 0) {
@@ -147,17 +146,19 @@ static int dma_flags(int type, int bus_master, int transfer)
147 return flags; 146 return flags;
148} 147}
149 148
150static void 149static void pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res,
151pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res, u32 dma, 150 u32 dma, int type,
152 int type, int bus_master, int transfer) 151 int bus_master, int transfer)
153{ 152{
154 int i = 0; 153 int i = 0;
154
155 while (i < PNP_MAX_DMA && 155 while (i < PNP_MAX_DMA &&
156 !(res->dma_resource[i].flags & IORESOURCE_UNSET)) 156 !(res->dma_resource[i].flags & IORESOURCE_UNSET))
157 i++; 157 i++;
158 if (i < PNP_MAX_DMA) { 158 if (i < PNP_MAX_DMA) {
159 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag 159 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag
160 res->dma_resource[i].flags |= dma_flags(type, bus_master, transfer); 160 res->dma_resource[i].flags |=
161 dma_flags(type, bus_master, transfer);
161 if (dma == -1) { 162 if (dma == -1) {
162 res->dma_resource[i].flags |= IORESOURCE_DISABLED; 163 res->dma_resource[i].flags |= IORESOURCE_DISABLED;
163 return; 164 return;
@@ -167,19 +168,19 @@ pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res, u32 dma,
167 } 168 }
168} 169}
169 170
170static void 171static void pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res,
171pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res, 172 u64 io, u64 len, int io_decode)
172 u64 io, u64 len, int io_decode)
173{ 173{
174 int i = 0; 174 int i = 0;
175
175 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) && 176 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) &&
176 i < PNP_MAX_PORT) 177 i < PNP_MAX_PORT)
177 i++; 178 i++;
178 if (i < PNP_MAX_PORT) { 179 if (i < PNP_MAX_PORT) {
179 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag 180 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag
180 if (io_decode == ACPI_DECODE_16) 181 if (io_decode == ACPI_DECODE_16)
181 res->port_resource[i].flags |= PNP_PORT_FLAG_16BITADDR; 182 res->port_resource[i].flags |= PNP_PORT_FLAG_16BITADDR;
182 if (len <= 0 || (io + len -1) >= 0x10003) { 183 if (len <= 0 || (io + len - 1) >= 0x10003) {
183 res->port_resource[i].flags |= IORESOURCE_DISABLED; 184 res->port_resource[i].flags |= IORESOURCE_DISABLED;
184 return; 185 return;
185 } 186 }
@@ -188,21 +189,22 @@ pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res,
188 } 189 }
189} 190}
190 191
191static void 192static void pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res,
192pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res, 193 u64 mem, u64 len,
193 u64 mem, u64 len, int write_protect) 194 int write_protect)
194{ 195{
195 int i = 0; 196 int i = 0;
197
196 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) && 198 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) &&
197 (i < PNP_MAX_MEM)) 199 (i < PNP_MAX_MEM))
198 i++; 200 i++;
199 if (i < PNP_MAX_MEM) { 201 if (i < PNP_MAX_MEM) {
200 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag 202 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag
201 if (len <= 0) { 203 if (len <= 0) {
202 res->mem_resource[i].flags |= IORESOURCE_DISABLED; 204 res->mem_resource[i].flags |= IORESOURCE_DISABLED;
203 return; 205 return;
204 } 206 }
205 if(write_protect == ACPI_READ_WRITE_MEMORY) 207 if (write_protect == ACPI_READ_WRITE_MEMORY)
206 res->mem_resource[i].flags |= IORESOURCE_MEM_WRITEABLE; 208 res->mem_resource[i].flags |= IORESOURCE_MEM_WRITEABLE;
207 209
208 res->mem_resource[i].start = mem; 210 res->mem_resource[i].start = mem;
@@ -210,9 +212,8 @@ pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res,
210 } 212 }
211} 213}
212 214
213static void 215static void pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
214pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table, 216 struct acpi_resource *res)
215 struct acpi_resource *res)
216{ 217{
217 struct acpi_resource_address64 addr, *p = &addr; 218 struct acpi_resource_address64 addr, *p = &addr;
218 acpi_status status; 219 acpi_status status;
@@ -220,7 +221,7 @@ pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
220 status = acpi_resource_to_address64(res, p); 221 status = acpi_resource_to_address64(res, p);
221 if (!ACPI_SUCCESS(status)) { 222 if (!ACPI_SUCCESS(status)) {
222 pnp_warn("PnPACPI: failed to convert resource type %d", 223 pnp_warn("PnPACPI: failed to convert resource type %d",
223 res->type); 224 res->type);
224 return; 225 return;
225 } 226 }
226 227
@@ -229,17 +230,20 @@ pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
229 230
230 if (p->resource_type == ACPI_MEMORY_RANGE) 231 if (p->resource_type == ACPI_MEMORY_RANGE)
231 pnpacpi_parse_allocated_memresource(res_table, 232 pnpacpi_parse_allocated_memresource(res_table,
232 p->minimum, p->address_length, p->info.mem.write_protect); 233 p->minimum, p->address_length,
234 p->info.mem.write_protect);
233 else if (p->resource_type == ACPI_IO_RANGE) 235 else if (p->resource_type == ACPI_IO_RANGE)
234 pnpacpi_parse_allocated_ioresource(res_table, 236 pnpacpi_parse_allocated_ioresource(res_table,
235 p->minimum, p->address_length, 237 p->minimum, p->address_length,
236 p->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16); 238 p->granularity == 0xfff ? ACPI_DECODE_10 :
239 ACPI_DECODE_16);
237} 240}
238 241
239static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, 242static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
240 void *data) 243 void *data)
241{ 244{
242 struct pnp_resource_table *res_table = (struct pnp_resource_table *)data; 245 struct pnp_resource_table *res_table =
246 (struct pnp_resource_table *)data;
243 int i; 247 int i;
244 248
245 switch (res->type) { 249 switch (res->type) {
@@ -260,17 +264,17 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
260 case ACPI_RESOURCE_TYPE_DMA: 264 case ACPI_RESOURCE_TYPE_DMA:
261 if (res->data.dma.channel_count > 0) 265 if (res->data.dma.channel_count > 0)
262 pnpacpi_parse_allocated_dmaresource(res_table, 266 pnpacpi_parse_allocated_dmaresource(res_table,
263 res->data.dma.channels[0], 267 res->data.dma.channels[0],
264 res->data.dma.type, 268 res->data.dma.type,
265 res->data.dma.bus_master, 269 res->data.dma.bus_master,
266 res->data.dma.transfer); 270 res->data.dma.transfer);
267 break; 271 break;
268 272
269 case ACPI_RESOURCE_TYPE_IO: 273 case ACPI_RESOURCE_TYPE_IO:
270 pnpacpi_parse_allocated_ioresource(res_table, 274 pnpacpi_parse_allocated_ioresource(res_table,
271 res->data.io.minimum, 275 res->data.io.minimum,
272 res->data.io.address_length, 276 res->data.io.address_length,
273 res->data.io.io_decode); 277 res->data.io.io_decode);
274 break; 278 break;
275 279
276 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 280 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
@@ -279,9 +283,9 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
279 283
280 case ACPI_RESOURCE_TYPE_FIXED_IO: 284 case ACPI_RESOURCE_TYPE_FIXED_IO:
281 pnpacpi_parse_allocated_ioresource(res_table, 285 pnpacpi_parse_allocated_ioresource(res_table,
282 res->data.fixed_io.address, 286 res->data.fixed_io.address,
283 res->data.fixed_io.address_length, 287 res->data.fixed_io.address_length,
284 ACPI_DECODE_10); 288 ACPI_DECODE_10);
285 break; 289 break;
286 290
287 case ACPI_RESOURCE_TYPE_VENDOR: 291 case ACPI_RESOURCE_TYPE_VENDOR:
@@ -292,21 +296,21 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
292 296
293 case ACPI_RESOURCE_TYPE_MEMORY24: 297 case ACPI_RESOURCE_TYPE_MEMORY24:
294 pnpacpi_parse_allocated_memresource(res_table, 298 pnpacpi_parse_allocated_memresource(res_table,
295 res->data.memory24.minimum, 299 res->data.memory24.minimum,
296 res->data.memory24.address_length, 300 res->data.memory24.address_length,
297 res->data.memory24.write_protect); 301 res->data.memory24.write_protect);
298 break; 302 break;
299 case ACPI_RESOURCE_TYPE_MEMORY32: 303 case ACPI_RESOURCE_TYPE_MEMORY32:
300 pnpacpi_parse_allocated_memresource(res_table, 304 pnpacpi_parse_allocated_memresource(res_table,
301 res->data.memory32.minimum, 305 res->data.memory32.minimum,
302 res->data.memory32.address_length, 306 res->data.memory32.address_length,
303 res->data.memory32.write_protect); 307 res->data.memory32.write_protect);
304 break; 308 break;
305 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 309 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
306 pnpacpi_parse_allocated_memresource(res_table, 310 pnpacpi_parse_allocated_memresource(res_table,
307 res->data.fixed_memory32.address, 311 res->data.fixed_memory32.address,
308 res->data.fixed_memory32.address_length, 312 res->data.fixed_memory32.address_length,
309 res->data.fixed_memory32.write_protect); 313 res->data.fixed_memory32.write_protect);
310 break; 314 break;
311 case ACPI_RESOURCE_TYPE_ADDRESS16: 315 case ACPI_RESOURCE_TYPE_ADDRESS16:
312 case ACPI_RESOURCE_TYPE_ADDRESS32: 316 case ACPI_RESOURCE_TYPE_ADDRESS32:
@@ -343,18 +347,21 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
343 return AE_OK; 347 return AE_OK;
344} 348}
345 349
346acpi_status pnpacpi_parse_allocated_resource(acpi_handle handle, struct pnp_resource_table *res) 350acpi_status pnpacpi_parse_allocated_resource(acpi_handle handle,
351 struct pnp_resource_table * res)
347{ 352{
348 /* Blank the resource table values */ 353 /* Blank the resource table values */
349 pnp_init_resource_table(res); 354 pnp_init_resource_table(res);
350 355
351 return acpi_walk_resources(handle, METHOD_NAME__CRS, pnpacpi_allocated_resource, res); 356 return acpi_walk_resources(handle, METHOD_NAME__CRS,
357 pnpacpi_allocated_resource, res);
352} 358}
353 359
354static void pnpacpi_parse_dma_option(struct pnp_option *option, struct acpi_resource_dma *p) 360static void pnpacpi_parse_dma_option(struct pnp_option *option,
361 struct acpi_resource_dma *p)
355{ 362{
356 int i; 363 int i;
357 struct pnp_dma * dma; 364 struct pnp_dma *dma;
358 365
359 if (p->channel_count == 0) 366 if (p->channel_count == 0)
360 return; 367 return;
@@ -362,18 +369,16 @@ static void pnpacpi_parse_dma_option(struct pnp_option *option, struct acpi_reso
362 if (!dma) 369 if (!dma)
363 return; 370 return;
364 371
365 for(i = 0; i < p->channel_count; i++) 372 for (i = 0; i < p->channel_count; i++)
366 dma->map |= 1 << p->channels[i]; 373 dma->map |= 1 << p->channels[i];
367 374
368 dma->flags = dma_flags(p->type, p->bus_master, p->transfer); 375 dma->flags = dma_flags(p->type, p->bus_master, p->transfer);
369 376
370 pnp_register_dma_resource(option, dma); 377 pnp_register_dma_resource(option, dma);
371 return;
372} 378}
373 379
374
375static void pnpacpi_parse_irq_option(struct pnp_option *option, 380static void pnpacpi_parse_irq_option(struct pnp_option *option,
376 struct acpi_resource_irq *p) 381 struct acpi_resource_irq *p)
377{ 382{
378 int i; 383 int i;
379 struct pnp_irq *irq; 384 struct pnp_irq *irq;
@@ -384,17 +389,16 @@ static void pnpacpi_parse_irq_option(struct pnp_option *option,
384 if (!irq) 389 if (!irq)
385 return; 390 return;
386 391
387 for(i = 0; i < p->interrupt_count; i++) 392 for (i = 0; i < p->interrupt_count; i++)
388 if (p->interrupts[i]) 393 if (p->interrupts[i])
389 __set_bit(p->interrupts[i], irq->map); 394 __set_bit(p->interrupts[i], irq->map);
390 irq->flags = irq_flags(p->triggering, p->polarity); 395 irq->flags = irq_flags(p->triggering, p->polarity);
391 396
392 pnp_register_irq_resource(option, irq); 397 pnp_register_irq_resource(option, irq);
393 return;
394} 398}
395 399
396static void pnpacpi_parse_ext_irq_option(struct pnp_option *option, 400static void pnpacpi_parse_ext_irq_option(struct pnp_option *option,
397 struct acpi_resource_extended_irq *p) 401 struct acpi_resource_extended_irq *p)
398{ 402{
399 int i; 403 int i;
400 struct pnp_irq *irq; 404 struct pnp_irq *irq;
@@ -405,18 +409,16 @@ static void pnpacpi_parse_ext_irq_option(struct pnp_option *option,
405 if (!irq) 409 if (!irq)
406 return; 410 return;
407 411
408 for(i = 0; i < p->interrupt_count; i++) 412 for (i = 0; i < p->interrupt_count; i++)
409 if (p->interrupts[i]) 413 if (p->interrupts[i])
410 __set_bit(p->interrupts[i], irq->map); 414 __set_bit(p->interrupts[i], irq->map);
411 irq->flags = irq_flags(p->triggering, p->polarity); 415 irq->flags = irq_flags(p->triggering, p->polarity);
412 416
413 pnp_register_irq_resource(option, irq); 417 pnp_register_irq_resource(option, irq);
414 return;
415} 418}
416 419
417static void 420static void pnpacpi_parse_port_option(struct pnp_option *option,
418pnpacpi_parse_port_option(struct pnp_option *option, 421 struct acpi_resource_io *io)
419 struct acpi_resource_io *io)
420{ 422{
421 struct pnp_port *port; 423 struct pnp_port *port;
422 424
@@ -430,14 +432,12 @@ pnpacpi_parse_port_option(struct pnp_option *option,
430 port->align = io->alignment; 432 port->align = io->alignment;
431 port->size = io->address_length; 433 port->size = io->address_length;
432 port->flags = ACPI_DECODE_16 == io->io_decode ? 434 port->flags = ACPI_DECODE_16 == io->io_decode ?
433 PNP_PORT_FLAG_16BITADDR : 0; 435 PNP_PORT_FLAG_16BITADDR : 0;
434 pnp_register_port_resource(option, port); 436 pnp_register_port_resource(option, port);
435 return;
436} 437}
437 438
438static void 439static void pnpacpi_parse_fixed_port_option(struct pnp_option *option,
439pnpacpi_parse_fixed_port_option(struct pnp_option *option, 440 struct acpi_resource_fixed_io *io)
440 struct acpi_resource_fixed_io *io)
441{ 441{
442 struct pnp_port *port; 442 struct pnp_port *port;
443 443
@@ -451,12 +451,10 @@ pnpacpi_parse_fixed_port_option(struct pnp_option *option,
451 port->align = 0; 451 port->align = 0;
452 port->flags = PNP_PORT_FLAG_FIXED; 452 port->flags = PNP_PORT_FLAG_FIXED;
453 pnp_register_port_resource(option, port); 453 pnp_register_port_resource(option, port);
454 return;
455} 454}
456 455
457static void 456static void pnpacpi_parse_mem24_option(struct pnp_option *option,
458pnpacpi_parse_mem24_option(struct pnp_option *option, 457 struct acpi_resource_memory24 *p)
459 struct acpi_resource_memory24 *p)
460{ 458{
461 struct pnp_mem *mem; 459 struct pnp_mem *mem;
462 460
@@ -471,15 +469,13 @@ pnpacpi_parse_mem24_option(struct pnp_option *option,
471 mem->size = p->address_length; 469 mem->size = p->address_length;
472 470
473 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 471 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
474 IORESOURCE_MEM_WRITEABLE : 0; 472 IORESOURCE_MEM_WRITEABLE : 0;
475 473
476 pnp_register_mem_resource(option, mem); 474 pnp_register_mem_resource(option, mem);
477 return;
478} 475}
479 476
480static void 477static void pnpacpi_parse_mem32_option(struct pnp_option *option,
481pnpacpi_parse_mem32_option(struct pnp_option *option, 478 struct acpi_resource_memory32 *p)
482 struct acpi_resource_memory32 *p)
483{ 479{
484 struct pnp_mem *mem; 480 struct pnp_mem *mem;
485 481
@@ -494,15 +490,13 @@ pnpacpi_parse_mem32_option(struct pnp_option *option,
494 mem->size = p->address_length; 490 mem->size = p->address_length;
495 491
496 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 492 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
497 IORESOURCE_MEM_WRITEABLE : 0; 493 IORESOURCE_MEM_WRITEABLE : 0;
498 494
499 pnp_register_mem_resource(option, mem); 495 pnp_register_mem_resource(option, mem);
500 return;
501} 496}
502 497
503static void 498static void pnpacpi_parse_fixed_mem32_option(struct pnp_option *option,
504pnpacpi_parse_fixed_mem32_option(struct pnp_option *option, 499 struct acpi_resource_fixed_memory32 *p)
505 struct acpi_resource_fixed_memory32 *p)
506{ 500{
507 struct pnp_mem *mem; 501 struct pnp_mem *mem;
508 502
@@ -516,14 +510,13 @@ pnpacpi_parse_fixed_mem32_option(struct pnp_option *option,
516 mem->align = 0; 510 mem->align = 0;
517 511
518 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 512 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
519 IORESOURCE_MEM_WRITEABLE : 0; 513 IORESOURCE_MEM_WRITEABLE : 0;
520 514
521 pnp_register_mem_resource(option, mem); 515 pnp_register_mem_resource(option, mem);
522 return;
523} 516}
524 517
525static void 518static void pnpacpi_parse_address_option(struct pnp_option *option,
526pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r) 519 struct acpi_resource *r)
527{ 520{
528 struct acpi_resource_address64 addr, *p = &addr; 521 struct acpi_resource_address64 addr, *p = &addr;
529 acpi_status status; 522 acpi_status status;
@@ -532,7 +525,8 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
532 525
533 status = acpi_resource_to_address64(r, p); 526 status = acpi_resource_to_address64(r, p);
534 if (!ACPI_SUCCESS(status)) { 527 if (!ACPI_SUCCESS(status)) {
535 pnp_warn("PnPACPI: failed to convert resource type %d", r->type); 528 pnp_warn("PnPACPI: failed to convert resource type %d",
529 r->type);
536 return; 530 return;
537 } 531 }
538 532
@@ -547,7 +541,8 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
547 mem->size = p->address_length; 541 mem->size = p->address_length;
548 mem->align = 0; 542 mem->align = 0;
549 mem->flags = (p->info.mem.write_protect == 543 mem->flags = (p->info.mem.write_protect ==
550 ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE : 0; 544 ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE
545 : 0;
551 pnp_register_mem_resource(option, mem); 546 pnp_register_mem_resource(option, mem);
552 } else if (p->resource_type == ACPI_IO_RANGE) { 547 } else if (p->resource_type == ACPI_IO_RANGE) {
553 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 548 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
@@ -568,109 +563,108 @@ struct acpipnp_parse_option_s {
568}; 563};
569 564
570static acpi_status pnpacpi_option_resource(struct acpi_resource *res, 565static acpi_status pnpacpi_option_resource(struct acpi_resource *res,
571 void *data) 566 void *data)
572{ 567{
573 int priority = 0; 568 int priority = 0;
574 struct acpipnp_parse_option_s *parse_data = (struct acpipnp_parse_option_s *)data; 569 struct acpipnp_parse_option_s *parse_data =
570 (struct acpipnp_parse_option_s *)data;
575 struct pnp_dev *dev = parse_data->dev; 571 struct pnp_dev *dev = parse_data->dev;
576 struct pnp_option *option = parse_data->option; 572 struct pnp_option *option = parse_data->option;
577 573
578 switch (res->type) { 574 switch (res->type) {
579 case ACPI_RESOURCE_TYPE_IRQ: 575 case ACPI_RESOURCE_TYPE_IRQ:
580 pnpacpi_parse_irq_option(option, &res->data.irq); 576 pnpacpi_parse_irq_option(option, &res->data.irq);
581 break; 577 break;
582 578
583 case ACPI_RESOURCE_TYPE_DMA: 579 case ACPI_RESOURCE_TYPE_DMA:
584 pnpacpi_parse_dma_option(option, &res->data.dma); 580 pnpacpi_parse_dma_option(option, &res->data.dma);
585 break; 581 break;
586 582
587 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 583 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
588 switch (res->data.start_dpf.compatibility_priority) { 584 switch (res->data.start_dpf.compatibility_priority) {
589 case ACPI_GOOD_CONFIGURATION: 585 case ACPI_GOOD_CONFIGURATION:
590 priority = PNP_RES_PRIORITY_PREFERRED; 586 priority = PNP_RES_PRIORITY_PREFERRED;
591 break;
592
593 case ACPI_ACCEPTABLE_CONFIGURATION:
594 priority = PNP_RES_PRIORITY_ACCEPTABLE;
595 break;
596
597 case ACPI_SUB_OPTIMAL_CONFIGURATION:
598 priority = PNP_RES_PRIORITY_FUNCTIONAL;
599 break;
600 default:
601 priority = PNP_RES_PRIORITY_INVALID;
602 break;
603 }
604 /* TBD: Considering performace/robustness bits */
605 option = pnp_register_dependent_option(dev, priority);
606 if (!option)
607 return AE_ERROR;
608 parse_data->option = option;
609 break; 587 break;
610 588
611 case ACPI_RESOURCE_TYPE_END_DEPENDENT: 589 case ACPI_ACCEPTABLE_CONFIGURATION:
612 /*only one EndDependentFn is allowed*/ 590 priority = PNP_RES_PRIORITY_ACCEPTABLE;
613 if (!parse_data->option_independent) {
614 pnp_warn("PnPACPI: more than one EndDependentFn");
615 return AE_ERROR;
616 }
617 parse_data->option = parse_data->option_independent;
618 parse_data->option_independent = NULL;
619 break; 591 break;
620 592
621 case ACPI_RESOURCE_TYPE_IO: 593 case ACPI_SUB_OPTIMAL_CONFIGURATION:
622 pnpacpi_parse_port_option(option, &res->data.io); 594 priority = PNP_RES_PRIORITY_FUNCTIONAL;
623 break; 595 break;
624 596 default:
625 case ACPI_RESOURCE_TYPE_FIXED_IO: 597 priority = PNP_RES_PRIORITY_INVALID;
626 pnpacpi_parse_fixed_port_option(option,
627 &res->data.fixed_io);
628 break; 598 break;
599 }
600 /* TBD: Consider performance/robustness bits */
601 option = pnp_register_dependent_option(dev, priority);
602 if (!option)
603 return AE_ERROR;
604 parse_data->option = option;
605 break;
629 606
630 case ACPI_RESOURCE_TYPE_VENDOR: 607 case ACPI_RESOURCE_TYPE_END_DEPENDENT:
631 case ACPI_RESOURCE_TYPE_END_TAG: 608 /*only one EndDependentFn is allowed */
632 break; 609 if (!parse_data->option_independent) {
610 pnp_warn("PnPACPI: more than one EndDependentFn");
611 return AE_ERROR;
612 }
613 parse_data->option = parse_data->option_independent;
614 parse_data->option_independent = NULL;
615 break;
633 616
634 case ACPI_RESOURCE_TYPE_MEMORY24: 617 case ACPI_RESOURCE_TYPE_IO:
635 pnpacpi_parse_mem24_option(option, &res->data.memory24); 618 pnpacpi_parse_port_option(option, &res->data.io);
636 break; 619 break;
637 620
638 case ACPI_RESOURCE_TYPE_MEMORY32: 621 case ACPI_RESOURCE_TYPE_FIXED_IO:
639 pnpacpi_parse_mem32_option(option, &res->data.memory32); 622 pnpacpi_parse_fixed_port_option(option, &res->data.fixed_io);
640 break; 623 break;
641 624
642 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 625 case ACPI_RESOURCE_TYPE_VENDOR:
643 pnpacpi_parse_fixed_mem32_option(option, 626 case ACPI_RESOURCE_TYPE_END_TAG:
644 &res->data.fixed_memory32); 627 break;
645 break;
646 628
647 case ACPI_RESOURCE_TYPE_ADDRESS16: 629 case ACPI_RESOURCE_TYPE_MEMORY24:
648 case ACPI_RESOURCE_TYPE_ADDRESS32: 630 pnpacpi_parse_mem24_option(option, &res->data.memory24);
649 case ACPI_RESOURCE_TYPE_ADDRESS64: 631 break;
650 pnpacpi_parse_address_option(option, res);
651 break;
652 632
653 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 633 case ACPI_RESOURCE_TYPE_MEMORY32:
654 break; 634 pnpacpi_parse_mem32_option(option, &res->data.memory32);
635 break;
655 636
656 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 637 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
657 pnpacpi_parse_ext_irq_option(option, 638 pnpacpi_parse_fixed_mem32_option(option,
658 &res->data.extended_irq); 639 &res->data.fixed_memory32);
659 break; 640 break;
660 641
661 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: 642 case ACPI_RESOURCE_TYPE_ADDRESS16:
662 break; 643 case ACPI_RESOURCE_TYPE_ADDRESS32:
644 case ACPI_RESOURCE_TYPE_ADDRESS64:
645 pnpacpi_parse_address_option(option, res);
646 break;
663 647
664 default: 648 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
665 pnp_warn("PnPACPI: unknown resource type %d", res->type); 649 break;
666 return AE_ERROR; 650
651 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
652 pnpacpi_parse_ext_irq_option(option, &res->data.extended_irq);
653 break;
654
655 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
656 break;
657
658 default:
659 pnp_warn("PnPACPI: unknown resource type %d", res->type);
660 return AE_ERROR;
667 } 661 }
668 662
669 return AE_OK; 663 return AE_OK;
670} 664}
671 665
672acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle, 666acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle,
673 struct pnp_dev *dev) 667 struct pnp_dev * dev)
674{ 668{
675 acpi_status status; 669 acpi_status status;
676 struct acpipnp_parse_option_s parse_data; 670 struct acpipnp_parse_option_s parse_data;
@@ -681,7 +675,7 @@ acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle,
681 parse_data.option_independent = parse_data.option; 675 parse_data.option_independent = parse_data.option;
682 parse_data.dev = dev; 676 parse_data.dev = dev;
683 status = acpi_walk_resources(handle, METHOD_NAME__PRS, 677 status = acpi_walk_resources(handle, METHOD_NAME__PRS,
684 pnpacpi_option_resource, &parse_data); 678 pnpacpi_option_resource, &parse_data);
685 679
686 return status; 680 return status;
687} 681}
@@ -709,7 +703,7 @@ static int pnpacpi_supported_resource(struct acpi_resource *res)
709 * Set resource 703 * Set resource
710 */ 704 */
711static acpi_status pnpacpi_count_resources(struct acpi_resource *res, 705static acpi_status pnpacpi_count_resources(struct acpi_resource *res,
712 void *data) 706 void *data)
713{ 707{
714 int *res_cnt = (int *)data; 708 int *res_cnt = (int *)data;
715 709
@@ -732,14 +726,14 @@ static acpi_status pnpacpi_type_resources(struct acpi_resource *res, void *data)
732} 726}
733 727
734int pnpacpi_build_resource_template(acpi_handle handle, 728int pnpacpi_build_resource_template(acpi_handle handle,
735 struct acpi_buffer *buffer) 729 struct acpi_buffer *buffer)
736{ 730{
737 struct acpi_resource *resource; 731 struct acpi_resource *resource;
738 int res_cnt = 0; 732 int res_cnt = 0;
739 acpi_status status; 733 acpi_status status;
740 734
741 status = acpi_walk_resources(handle, METHOD_NAME__CRS, 735 status = acpi_walk_resources(handle, METHOD_NAME__CRS,
742 pnpacpi_count_resources, &res_cnt); 736 pnpacpi_count_resources, &res_cnt);
743 if (ACPI_FAILURE(status)) { 737 if (ACPI_FAILURE(status)) {
744 pnp_err("Evaluate _CRS failed"); 738 pnp_err("Evaluate _CRS failed");
745 return -EINVAL; 739 return -EINVAL;
@@ -753,7 +747,7 @@ int pnpacpi_build_resource_template(acpi_handle handle,
753 pnp_dbg("Res cnt %d", res_cnt); 747 pnp_dbg("Res cnt %d", res_cnt);
754 resource = (struct acpi_resource *)buffer->pointer; 748 resource = (struct acpi_resource *)buffer->pointer;
755 status = acpi_walk_resources(handle, METHOD_NAME__CRS, 749 status = acpi_walk_resources(handle, METHOD_NAME__CRS,
756 pnpacpi_type_resources, &resource); 750 pnpacpi_type_resources, &resource);
757 if (ACPI_FAILURE(status)) { 751 if (ACPI_FAILURE(status)) {
758 kfree(buffer->pointer); 752 kfree(buffer->pointer);
759 pnp_err("Evaluate _CRS failed"); 753 pnp_err("Evaluate _CRS failed");
@@ -766,7 +760,7 @@ int pnpacpi_build_resource_template(acpi_handle handle,
766} 760}
767 761
768static void pnpacpi_encode_irq(struct acpi_resource *resource, 762static void pnpacpi_encode_irq(struct acpi_resource *resource,
769 struct resource *p) 763 struct resource *p)
770{ 764{
771 int triggering, polarity; 765 int triggering, polarity;
772 766
@@ -782,7 +776,7 @@ static void pnpacpi_encode_irq(struct acpi_resource *resource,
782} 776}
783 777
784static void pnpacpi_encode_ext_irq(struct acpi_resource *resource, 778static void pnpacpi_encode_ext_irq(struct acpi_resource *resource,
785 struct resource *p) 779 struct resource *p)
786{ 780{
787 int triggering, polarity; 781 int triggering, polarity;
788 782
@@ -799,32 +793,32 @@ static void pnpacpi_encode_ext_irq(struct acpi_resource *resource,
799} 793}
800 794
801static void pnpacpi_encode_dma(struct acpi_resource *resource, 795static void pnpacpi_encode_dma(struct acpi_resource *resource,
802 struct resource *p) 796 struct resource *p)
803{ 797{
804 /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */ 798 /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */
805 switch (p->flags & IORESOURCE_DMA_SPEED_MASK) { 799 switch (p->flags & IORESOURCE_DMA_SPEED_MASK) {
806 case IORESOURCE_DMA_TYPEA: 800 case IORESOURCE_DMA_TYPEA:
807 resource->data.dma.type = ACPI_TYPE_A; 801 resource->data.dma.type = ACPI_TYPE_A;
808 break; 802 break;
809 case IORESOURCE_DMA_TYPEB: 803 case IORESOURCE_DMA_TYPEB:
810 resource->data.dma.type = ACPI_TYPE_B; 804 resource->data.dma.type = ACPI_TYPE_B;
811 break; 805 break;
812 case IORESOURCE_DMA_TYPEF: 806 case IORESOURCE_DMA_TYPEF:
813 resource->data.dma.type = ACPI_TYPE_F; 807 resource->data.dma.type = ACPI_TYPE_F;
814 break; 808 break;
815 default: 809 default:
816 resource->data.dma.type = ACPI_COMPATIBILITY; 810 resource->data.dma.type = ACPI_COMPATIBILITY;
817 } 811 }
818 812
819 switch (p->flags & IORESOURCE_DMA_TYPE_MASK) { 813 switch (p->flags & IORESOURCE_DMA_TYPE_MASK) {
820 case IORESOURCE_DMA_8BIT: 814 case IORESOURCE_DMA_8BIT:
821 resource->data.dma.transfer = ACPI_TRANSFER_8; 815 resource->data.dma.transfer = ACPI_TRANSFER_8;
822 break; 816 break;
823 case IORESOURCE_DMA_8AND16BIT: 817 case IORESOURCE_DMA_8AND16BIT:
824 resource->data.dma.transfer = ACPI_TRANSFER_8_16; 818 resource->data.dma.transfer = ACPI_TRANSFER_8_16;
825 break; 819 break;
826 default: 820 default:
827 resource->data.dma.transfer = ACPI_TRANSFER_16; 821 resource->data.dma.transfer = ACPI_TRANSFER_16;
828 } 822 }
829 823
830 resource->data.dma.bus_master = !!(p->flags & IORESOURCE_DMA_MASTER); 824 resource->data.dma.bus_master = !!(p->flags & IORESOURCE_DMA_MASTER);
@@ -833,31 +827,31 @@ static void pnpacpi_encode_dma(struct acpi_resource *resource,
833} 827}
834 828
835static void pnpacpi_encode_io(struct acpi_resource *resource, 829static void pnpacpi_encode_io(struct acpi_resource *resource,
836 struct resource *p) 830 struct resource *p)
837{ 831{
838 /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */ 832 /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */
839 resource->data.io.io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR)? 833 resource->data.io.io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ?
840 ACPI_DECODE_16 : ACPI_DECODE_10; 834 ACPI_DECODE_16 : ACPI_DECODE_10;
841 resource->data.io.minimum = p->start; 835 resource->data.io.minimum = p->start;
842 resource->data.io.maximum = p->end; 836 resource->data.io.maximum = p->end;
843 resource->data.io.alignment = 0; /* Correct? */ 837 resource->data.io.alignment = 0; /* Correct? */
844 resource->data.io.address_length = p->end - p->start + 1; 838 resource->data.io.address_length = p->end - p->start + 1;
845} 839}
846 840
847static void pnpacpi_encode_fixed_io(struct acpi_resource *resource, 841static void pnpacpi_encode_fixed_io(struct acpi_resource *resource,
848 struct resource *p) 842 struct resource *p)
849{ 843{
850 resource->data.fixed_io.address = p->start; 844 resource->data.fixed_io.address = p->start;
851 resource->data.fixed_io.address_length = p->end - p->start + 1; 845 resource->data.fixed_io.address_length = p->end - p->start + 1;
852} 846}
853 847
854static void pnpacpi_encode_mem24(struct acpi_resource *resource, 848static void pnpacpi_encode_mem24(struct acpi_resource *resource,
855 struct resource *p) 849 struct resource *p)
856{ 850{
857 /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */ 851 /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */
858 resource->data.memory24.write_protect = 852 resource->data.memory24.write_protect =
859 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 853 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
860 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 854 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
861 resource->data.memory24.minimum = p->start; 855 resource->data.memory24.minimum = p->start;
862 resource->data.memory24.maximum = p->end; 856 resource->data.memory24.maximum = p->end;
863 resource->data.memory24.alignment = 0; 857 resource->data.memory24.alignment = 0;
@@ -865,11 +859,11 @@ static void pnpacpi_encode_mem24(struct acpi_resource *resource,
865} 859}
866 860
867static void pnpacpi_encode_mem32(struct acpi_resource *resource, 861static void pnpacpi_encode_mem32(struct acpi_resource *resource,
868 struct resource *p) 862 struct resource *p)
869{ 863{
870 resource->data.memory32.write_protect = 864 resource->data.memory32.write_protect =
871 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 865 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
872 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 866 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
873 resource->data.memory32.minimum = p->start; 867 resource->data.memory32.minimum = p->start;
874 resource->data.memory32.maximum = p->end; 868 resource->data.memory32.maximum = p->end;
875 resource->data.memory32.alignment = 0; 869 resource->data.memory32.alignment = 0;
@@ -877,74 +871,77 @@ static void pnpacpi_encode_mem32(struct acpi_resource *resource,
877} 871}
878 872
879static void pnpacpi_encode_fixed_mem32(struct acpi_resource *resource, 873static void pnpacpi_encode_fixed_mem32(struct acpi_resource *resource,
880 struct resource *p) 874 struct resource *p)
881{ 875{
882 resource->data.fixed_memory32.write_protect = 876 resource->data.fixed_memory32.write_protect =
883 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 877 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
884 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 878 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
885 resource->data.fixed_memory32.address = p->start; 879 resource->data.fixed_memory32.address = p->start;
886 resource->data.fixed_memory32.address_length = p->end - p->start + 1; 880 resource->data.fixed_memory32.address_length = p->end - p->start + 1;
887} 881}
888 882
889int pnpacpi_encode_resources(struct pnp_resource_table *res_table, 883int pnpacpi_encode_resources(struct pnp_resource_table *res_table,
890 struct acpi_buffer *buffer) 884 struct acpi_buffer *buffer)
891{ 885{
892 int i = 0; 886 int i = 0;
893 /* pnpacpi_build_resource_template allocates extra mem */ 887 /* pnpacpi_build_resource_template allocates extra mem */
894 int res_cnt = (buffer->length - 1)/sizeof(struct acpi_resource) - 1; 888 int res_cnt = (buffer->length - 1) / sizeof(struct acpi_resource) - 1;
895 struct acpi_resource *resource = (struct acpi_resource*)buffer->pointer; 889 struct acpi_resource *resource =
890 (struct acpi_resource *)buffer->pointer;
896 int port = 0, irq = 0, dma = 0, mem = 0; 891 int port = 0, irq = 0, dma = 0, mem = 0;
897 892
898 pnp_dbg("res cnt %d", res_cnt); 893 pnp_dbg("res cnt %d", res_cnt);
899 while (i < res_cnt) { 894 while (i < res_cnt) {
900 switch(resource->type) { 895 switch (resource->type) {
901 case ACPI_RESOURCE_TYPE_IRQ: 896 case ACPI_RESOURCE_TYPE_IRQ:
902 pnp_dbg("Encode irq"); 897 pnp_dbg("Encode irq");
903 pnpacpi_encode_irq(resource, 898 pnpacpi_encode_irq(resource,
904 &res_table->irq_resource[irq]); 899 &res_table->irq_resource[irq]);
905 irq++; 900 irq++;
906 break; 901 break;
907 902
908 case ACPI_RESOURCE_TYPE_DMA: 903 case ACPI_RESOURCE_TYPE_DMA:
909 pnp_dbg("Encode dma"); 904 pnp_dbg("Encode dma");
910 pnpacpi_encode_dma(resource, 905 pnpacpi_encode_dma(resource,
911 &res_table->dma_resource[dma]); 906 &res_table->dma_resource[dma]);
912 dma++; 907 dma++;
913 break; 908 break;
914 case ACPI_RESOURCE_TYPE_IO: 909 case ACPI_RESOURCE_TYPE_IO:
915 pnp_dbg("Encode io"); 910 pnp_dbg("Encode io");
916 pnpacpi_encode_io(resource, 911 pnpacpi_encode_io(resource,
917 &res_table->port_resource[port]); 912 &res_table->port_resource[port]);
918 port++; 913 port++;
919 break; 914 break;
920 case ACPI_RESOURCE_TYPE_FIXED_IO: 915 case ACPI_RESOURCE_TYPE_FIXED_IO:
921 pnp_dbg("Encode fixed io"); 916 pnp_dbg("Encode fixed io");
922 pnpacpi_encode_fixed_io(resource, 917 pnpacpi_encode_fixed_io(resource,
923 &res_table->port_resource[port]); 918 &res_table->
919 port_resource[port]);
924 port++; 920 port++;
925 break; 921 break;
926 case ACPI_RESOURCE_TYPE_MEMORY24: 922 case ACPI_RESOURCE_TYPE_MEMORY24:
927 pnp_dbg("Encode mem24"); 923 pnp_dbg("Encode mem24");
928 pnpacpi_encode_mem24(resource, 924 pnpacpi_encode_mem24(resource,
929 &res_table->mem_resource[mem]); 925 &res_table->mem_resource[mem]);
930 mem++; 926 mem++;
931 break; 927 break;
932 case ACPI_RESOURCE_TYPE_MEMORY32: 928 case ACPI_RESOURCE_TYPE_MEMORY32:
933 pnp_dbg("Encode mem32"); 929 pnp_dbg("Encode mem32");
934 pnpacpi_encode_mem32(resource, 930 pnpacpi_encode_mem32(resource,
935 &res_table->mem_resource[mem]); 931 &res_table->mem_resource[mem]);
936 mem++; 932 mem++;
937 break; 933 break;
938 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 934 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
939 pnp_dbg("Encode fixed mem32"); 935 pnp_dbg("Encode fixed mem32");
940 pnpacpi_encode_fixed_mem32(resource, 936 pnpacpi_encode_fixed_mem32(resource,
941 &res_table->mem_resource[mem]); 937 &res_table->
938 mem_resource[mem]);
942 mem++; 939 mem++;
943 break; 940 break;
944 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 941 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
945 pnp_dbg("Encode ext irq"); 942 pnp_dbg("Encode ext irq");
946 pnpacpi_encode_ext_irq(resource, 943 pnpacpi_encode_ext_irq(resource,
947 &res_table->irq_resource[irq]); 944 &res_table->irq_resource[irq]);
948 irq++; 945 irq++;
949 break; 946 break;
950 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 947 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
@@ -956,7 +953,7 @@ int pnpacpi_encode_resources(struct pnp_resource_table *res_table,
956 case ACPI_RESOURCE_TYPE_ADDRESS64: 953 case ACPI_RESOURCE_TYPE_ADDRESS64:
957 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 954 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
958 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: 955 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
959 default: /* other type */ 956 default: /* other type */
960 pnp_warn("unknown resource type %d", resource->type); 957 pnp_warn("unknown resource type %d", resource->type);
961 return -EINVAL; 958 return -EINVAL;
962 } 959 }
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index a1f0b0ba2bfe..5dba68fe33f5 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * bioscalls.c - the lowlevel layer of the PnPBIOS driver 2 * bioscalls.c - the lowlevel layer of the PnPBIOS driver
3 *
4 */ 3 */
5 4
6#include <linux/types.h> 5#include <linux/types.h>
@@ -26,11 +25,10 @@
26#include "pnpbios.h" 25#include "pnpbios.h"
27 26
28static struct { 27static struct {
29 u16 offset; 28 u16 offset;
30 u16 segment; 29 u16 segment;
31} pnp_bios_callpoint; 30} pnp_bios_callpoint;
32 31
33
34/* 32/*
35 * These are some opcodes for a "static asmlinkage" 33 * These are some opcodes for a "static asmlinkage"
36 * As this code is *not* executed inside the linux kernel segment, but in a 34 * As this code is *not* executed inside the linux kernel segment, but in a
@@ -44,8 +42,7 @@ static struct {
44 42
45asmlinkage void pnp_bios_callfunc(void); 43asmlinkage void pnp_bios_callfunc(void);
46 44
47__asm__( 45__asm__(".text \n"
48 ".text \n"
49 __ALIGN_STR "\n" 46 __ALIGN_STR "\n"
50 "pnp_bios_callfunc:\n" 47 "pnp_bios_callfunc:\n"
51 " pushl %edx \n" 48 " pushl %edx \n"
@@ -55,8 +52,7 @@ __asm__(
55 " lcallw *pnp_bios_callpoint\n" 52 " lcallw *pnp_bios_callpoint\n"
56 " addl $16, %esp \n" 53 " addl $16, %esp \n"
57 " lret \n" 54 " lret \n"
58 ".previous \n" 55 ".previous \n");
59);
60 56
61#define Q2_SET_SEL(cpu, selname, address, size) \ 57#define Q2_SET_SEL(cpu, selname, address, size) \
62do { \ 58do { \
@@ -78,7 +74,6 @@ u32 pnp_bios_is_utter_crap = 0;
78 74
79static spinlock_t pnp_bios_lock; 75static spinlock_t pnp_bios_lock;
80 76
81
82/* 77/*
83 * Support Functions 78 * Support Functions
84 */ 79 */
@@ -97,7 +92,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
97 * PnP BIOSes are generally not terribly re-entrant. 92 * PnP BIOSes are generally not terribly re-entrant.
98 * Also, don't rely on them to save everything correctly. 93 * Also, don't rely on them to save everything correctly.
99 */ 94 */
100 if(pnp_bios_is_utter_crap) 95 if (pnp_bios_is_utter_crap)
101 return PNP_FUNCTION_NOT_SUPPORTED; 96 return PNP_FUNCTION_NOT_SUPPORTED;
102 97
103 cpu = get_cpu(); 98 cpu = get_cpu();
@@ -113,112 +108,128 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
113 if (ts2_size) 108 if (ts2_size)
114 Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size); 109 Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size);
115 110
116 __asm__ __volatile__( 111 __asm__ __volatile__("pushl %%ebp\n\t"
117 "pushl %%ebp\n\t" 112 "pushl %%edi\n\t"
118 "pushl %%edi\n\t" 113 "pushl %%esi\n\t"
119 "pushl %%esi\n\t" 114 "pushl %%ds\n\t"
120 "pushl %%ds\n\t" 115 "pushl %%es\n\t"
121 "pushl %%es\n\t" 116 "pushl %%fs\n\t"
122 "pushl %%fs\n\t" 117 "pushl %%gs\n\t"
123 "pushl %%gs\n\t" 118 "pushfl\n\t"
124 "pushfl\n\t" 119 "movl %%esp, pnp_bios_fault_esp\n\t"
125 "movl %%esp, pnp_bios_fault_esp\n\t" 120 "movl $1f, pnp_bios_fault_eip\n\t"
126 "movl $1f, pnp_bios_fault_eip\n\t" 121 "lcall %5,%6\n\t"
127 "lcall %5,%6\n\t" 122 "1:popfl\n\t"
128 "1:popfl\n\t" 123 "popl %%gs\n\t"
129 "popl %%gs\n\t" 124 "popl %%fs\n\t"
130 "popl %%fs\n\t" 125 "popl %%es\n\t"
131 "popl %%es\n\t" 126 "popl %%ds\n\t"
132 "popl %%ds\n\t" 127 "popl %%esi\n\t"
133 "popl %%esi\n\t" 128 "popl %%edi\n\t"
134 "popl %%edi\n\t" 129 "popl %%ebp\n\t":"=a"(status)
135 "popl %%ebp\n\t" 130 :"0"((func) | (((u32) arg1) << 16)),
136 : "=a" (status) 131 "b"((arg2) | (((u32) arg3) << 16)),
137 : "0" ((func) | (((u32)arg1) << 16)), 132 "c"((arg4) | (((u32) arg5) << 16)),
138 "b" ((arg2) | (((u32)arg3) << 16)), 133 "d"((arg6) | (((u32) arg7) << 16)),
139 "c" ((arg4) | (((u32)arg5) << 16)), 134 "i"(PNP_CS32), "i"(0)
140 "d" ((arg6) | (((u32)arg7) << 16)), 135 :"memory");
141 "i" (PNP_CS32),
142 "i" (0)
143 : "memory"
144 );
145 spin_unlock_irqrestore(&pnp_bios_lock, flags); 136 spin_unlock_irqrestore(&pnp_bios_lock, flags);
146 137
147 get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; 138 get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
148 put_cpu(); 139 put_cpu();
149 140
150 /* If we get here and this is set then the PnP BIOS faulted on us. */ 141 /* If we get here and this is set then the PnP BIOS faulted on us. */
151 if(pnp_bios_is_utter_crap) 142 if (pnp_bios_is_utter_crap) {
152 { 143 printk(KERN_ERR
153 printk(KERN_ERR "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n"); 144 "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n");
154 printk(KERN_ERR "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n"); 145 printk(KERN_ERR
155 printk(KERN_ERR "PnPBIOS: Check with your vendor for an updated BIOS\n"); 146 "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n");
147 printk(KERN_ERR
148 "PnPBIOS: Check with your vendor for an updated BIOS\n");
156 } 149 }
157 150
158 return status; 151 return status;
159} 152}
160 153
161void pnpbios_print_status(const char * module, u16 status) 154void pnpbios_print_status(const char *module, u16 status)
162{ 155{
163 switch(status) { 156 switch (status) {
164 case PNP_SUCCESS: 157 case PNP_SUCCESS:
165 printk(KERN_ERR "PnPBIOS: %s: function successful\n", module); 158 printk(KERN_ERR "PnPBIOS: %s: function successful\n", module);
166 break; 159 break;
167 case PNP_NOT_SET_STATICALLY: 160 case PNP_NOT_SET_STATICALLY:
168 printk(KERN_ERR "PnPBIOS: %s: unable to set static resources\n", module); 161 printk(KERN_ERR "PnPBIOS: %s: unable to set static resources\n",
162 module);
169 break; 163 break;
170 case PNP_UNKNOWN_FUNCTION: 164 case PNP_UNKNOWN_FUNCTION:
171 printk(KERN_ERR "PnPBIOS: %s: invalid function number passed\n", module); 165 printk(KERN_ERR "PnPBIOS: %s: invalid function number passed\n",
166 module);
172 break; 167 break;
173 case PNP_FUNCTION_NOT_SUPPORTED: 168 case PNP_FUNCTION_NOT_SUPPORTED:
174 printk(KERN_ERR "PnPBIOS: %s: function not supported on this system\n", module); 169 printk(KERN_ERR
170 "PnPBIOS: %s: function not supported on this system\n",
171 module);
175 break; 172 break;
176 case PNP_INVALID_HANDLE: 173 case PNP_INVALID_HANDLE:
177 printk(KERN_ERR "PnPBIOS: %s: invalid handle\n", module); 174 printk(KERN_ERR "PnPBIOS: %s: invalid handle\n", module);
178 break; 175 break;
179 case PNP_BAD_PARAMETER: 176 case PNP_BAD_PARAMETER:
180 printk(KERN_ERR "PnPBIOS: %s: invalid parameters were passed\n", module); 177 printk(KERN_ERR "PnPBIOS: %s: invalid parameters were passed\n",
178 module);
181 break; 179 break;
182 case PNP_SET_FAILED: 180 case PNP_SET_FAILED:
183 printk(KERN_ERR "PnPBIOS: %s: unable to set resources\n", module); 181 printk(KERN_ERR "PnPBIOS: %s: unable to set resources\n",
182 module);
184 break; 183 break;
185 case PNP_EVENTS_NOT_PENDING: 184 case PNP_EVENTS_NOT_PENDING:
186 printk(KERN_ERR "PnPBIOS: %s: no events are pending\n", module); 185 printk(KERN_ERR "PnPBIOS: %s: no events are pending\n", module);
187 break; 186 break;
188 case PNP_SYSTEM_NOT_DOCKED: 187 case PNP_SYSTEM_NOT_DOCKED:
189 printk(KERN_ERR "PnPBIOS: %s: the system is not docked\n", module); 188 printk(KERN_ERR "PnPBIOS: %s: the system is not docked\n",
189 module);
190 break; 190 break;
191 case PNP_NO_ISA_PNP_CARDS: 191 case PNP_NO_ISA_PNP_CARDS:
192 printk(KERN_ERR "PnPBIOS: %s: no isapnp cards are installed on this system\n", module); 192 printk(KERN_ERR
193 "PnPBIOS: %s: no isapnp cards are installed on this system\n",
194 module);
193 break; 195 break;
194 case PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES: 196 case PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES:
195 printk(KERN_ERR "PnPBIOS: %s: cannot determine the capabilities of the docking station\n", module); 197 printk(KERN_ERR
198 "PnPBIOS: %s: cannot determine the capabilities of the docking station\n",
199 module);
196 break; 200 break;
197 case PNP_CONFIG_CHANGE_FAILED_NO_BATTERY: 201 case PNP_CONFIG_CHANGE_FAILED_NO_BATTERY:
198 printk(KERN_ERR "PnPBIOS: %s: unable to undock, the system does not have a battery\n", module); 202 printk(KERN_ERR
203 "PnPBIOS: %s: unable to undock, the system does not have a battery\n",
204 module);
199 break; 205 break;
200 case PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT: 206 case PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT:
201 printk(KERN_ERR "PnPBIOS: %s: could not dock due to resource conflicts\n", module); 207 printk(KERN_ERR
208 "PnPBIOS: %s: could not dock due to resource conflicts\n",
209 module);
202 break; 210 break;
203 case PNP_BUFFER_TOO_SMALL: 211 case PNP_BUFFER_TOO_SMALL:
204 printk(KERN_ERR "PnPBIOS: %s: the buffer passed is too small\n", module); 212 printk(KERN_ERR "PnPBIOS: %s: the buffer passed is too small\n",
213 module);
205 break; 214 break;
206 case PNP_USE_ESCD_SUPPORT: 215 case PNP_USE_ESCD_SUPPORT:
207 printk(KERN_ERR "PnPBIOS: %s: use ESCD instead\n", module); 216 printk(KERN_ERR "PnPBIOS: %s: use ESCD instead\n", module);
208 break; 217 break;
209 case PNP_MESSAGE_NOT_SUPPORTED: 218 case PNP_MESSAGE_NOT_SUPPORTED:
210 printk(KERN_ERR "PnPBIOS: %s: the message is unsupported\n", module); 219 printk(KERN_ERR "PnPBIOS: %s: the message is unsupported\n",
220 module);
211 break; 221 break;
212 case PNP_HARDWARE_ERROR: 222 case PNP_HARDWARE_ERROR:
213 printk(KERN_ERR "PnPBIOS: %s: a hardware failure has occured\n", module); 223 printk(KERN_ERR "PnPBIOS: %s: a hardware failure has occured\n",
224 module);
214 break; 225 break;
215 default: 226 default:
216 printk(KERN_ERR "PnPBIOS: %s: unexpected status 0x%x\n", module, status); 227 printk(KERN_ERR "PnPBIOS: %s: unexpected status 0x%x\n", module,
228 status);
217 break; 229 break;
218 } 230 }
219} 231}
220 232
221
222/* 233/*
223 * PnP BIOS Low Level Calls 234 * PnP BIOS Low Level Calls
224 */ 235 */
@@ -243,19 +254,22 @@ void pnpbios_print_status(const char * module, u16 status)
243static int __pnp_bios_dev_node_info(struct pnp_dev_node_info *data) 254static int __pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
244{ 255{
245 u16 status; 256 u16 status;
257
246 if (!pnp_bios_present()) 258 if (!pnp_bios_present())
247 return PNP_FUNCTION_NOT_SUPPORTED; 259 return PNP_FUNCTION_NOT_SUPPORTED;
248 status = call_pnp_bios(PNP_GET_NUM_SYS_DEV_NODES, 0, PNP_TS1, 2, PNP_TS1, PNP_DS, 0, 0, 260 status = call_pnp_bios(PNP_GET_NUM_SYS_DEV_NODES, 0, PNP_TS1, 2,
249 data, sizeof(struct pnp_dev_node_info), NULL, 0); 261 PNP_TS1, PNP_DS, 0, 0, data,
262 sizeof(struct pnp_dev_node_info), NULL, 0);
250 data->no_nodes &= 0xff; 263 data->no_nodes &= 0xff;
251 return status; 264 return status;
252} 265}
253 266
254int pnp_bios_dev_node_info(struct pnp_dev_node_info *data) 267int pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
255{ 268{
256 int status = __pnp_bios_dev_node_info( data ); 269 int status = __pnp_bios_dev_node_info(data);
257 if ( status ) 270
258 pnpbios_print_status( "dev_node_info", status ); 271 if (status)
272 pnpbios_print_status("dev_node_info", status);
259 return status; 273 return status;
260} 274}
261 275
@@ -273,17 +287,20 @@ int pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
273 * or volatile current (0) config 287 * or volatile current (0) config
274 * Output: *nodenum=next node or 0xff if no more nodes 288 * Output: *nodenum=next node or 0xff if no more nodes
275 */ 289 */
276static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data) 290static int __pnp_bios_get_dev_node(u8 *nodenum, char boot,
291 struct pnp_bios_node *data)
277{ 292{
278 u16 status; 293 u16 status;
279 u16 tmp_nodenum; 294 u16 tmp_nodenum;
295
280 if (!pnp_bios_present()) 296 if (!pnp_bios_present())
281 return PNP_FUNCTION_NOT_SUPPORTED; 297 return PNP_FUNCTION_NOT_SUPPORTED;
282 if ( !boot && pnpbios_dont_use_current_config ) 298 if (!boot && pnpbios_dont_use_current_config)
283 return PNP_FUNCTION_NOT_SUPPORTED; 299 return PNP_FUNCTION_NOT_SUPPORTED;
284 tmp_nodenum = *nodenum; 300 tmp_nodenum = *nodenum;
285 status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2, boot ? 2 : 1, PNP_DS, 0, 301 status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2,
286 &tmp_nodenum, sizeof(tmp_nodenum), data, 65536); 302 boot ? 2 : 1, PNP_DS, 0, &tmp_nodenum,
303 sizeof(tmp_nodenum), data, 65536);
287 *nodenum = tmp_nodenum; 304 *nodenum = tmp_nodenum;
288 return status; 305 return status;
289} 306}
@@ -291,104 +308,66 @@ static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node
291int pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data) 308int pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data)
292{ 309{
293 int status; 310 int status;
294 status = __pnp_bios_get_dev_node( nodenum, boot, data ); 311
295 if ( status ) 312 status = __pnp_bios_get_dev_node(nodenum, boot, data);
296 pnpbios_print_status( "get_dev_node", status ); 313 if (status)
314 pnpbios_print_status("get_dev_node", status);
297 return status; 315 return status;
298} 316}
299 317
300
301/* 318/*
302 * Call PnP BIOS with function 0x02, "set system device node" 319 * Call PnP BIOS with function 0x02, "set system device node"
303 * Input: *nodenum = desired node, 320 * Input: *nodenum = desired node,
304 * boot = whether to set nonvolatile boot (!=0) 321 * boot = whether to set nonvolatile boot (!=0)
305 * or volatile current (0) config 322 * or volatile current (0) config
306 */ 323 */
307static int __pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data) 324static int __pnp_bios_set_dev_node(u8 nodenum, char boot,
325 struct pnp_bios_node *data)
308{ 326{
309 u16 status; 327 u16 status;
328
310 if (!pnp_bios_present()) 329 if (!pnp_bios_present())
311 return PNP_FUNCTION_NOT_SUPPORTED; 330 return PNP_FUNCTION_NOT_SUPPORTED;
312 if ( !boot && pnpbios_dont_use_current_config ) 331 if (!boot && pnpbios_dont_use_current_config)
313 return PNP_FUNCTION_NOT_SUPPORTED; 332 return PNP_FUNCTION_NOT_SUPPORTED;
314 status = call_pnp_bios(PNP_SET_SYS_DEV_NODE, nodenum, 0, PNP_TS1, boot ? 2 : 1, PNP_DS, 0, 0, 333 status = call_pnp_bios(PNP_SET_SYS_DEV_NODE, nodenum, 0, PNP_TS1,
315 data, 65536, NULL, 0); 334 boot ? 2 : 1, PNP_DS, 0, 0, data, 65536, NULL,
335 0);
316 return status; 336 return status;
317} 337}
318 338
319int pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data) 339int pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data)
320{ 340{
321 int status; 341 int status;
322 status = __pnp_bios_set_dev_node( nodenum, boot, data ); 342
323 if ( status ) { 343 status = __pnp_bios_set_dev_node(nodenum, boot, data);
324 pnpbios_print_status( "set_dev_node", status ); 344 if (status) {
345 pnpbios_print_status("set_dev_node", status);
325 return status; 346 return status;
326 } 347 }
327 if ( !boot ) { /* Update devlist */ 348 if (!boot) { /* Update devlist */
328 status = pnp_bios_get_dev_node( &nodenum, boot, data ); 349 status = pnp_bios_get_dev_node(&nodenum, boot, data);
329 if ( status ) 350 if (status)
330 return status; 351 return status;
331 } 352 }
332 return status; 353 return status;
333} 354}
334 355
335#if needed
336/*
337 * Call PnP BIOS with function 0x03, "get event"
338 */
339static int pnp_bios_get_event(u16 *event)
340{
341 u16 status;
342 if (!pnp_bios_present())
343 return PNP_FUNCTION_NOT_SUPPORTED;
344 status = call_pnp_bios(PNP_GET_EVENT, 0, PNP_TS1, PNP_DS, 0, 0 ,0 ,0,
345 event, sizeof(u16), NULL, 0);
346 return status;
347}
348#endif
349
350#if needed
351/*
352 * Call PnP BIOS with function 0x04, "send message"
353 */
354static int pnp_bios_send_message(u16 message)
355{
356 u16 status;
357 if (!pnp_bios_present())
358 return PNP_FUNCTION_NOT_SUPPORTED;
359 status = call_pnp_bios(PNP_SEND_MESSAGE, message, PNP_DS, 0, 0, 0, 0, 0, 0, 0, 0, 0);
360 return status;
361}
362#endif
363
364/* 356/*
365 * Call PnP BIOS with function 0x05, "get docking station information" 357 * Call PnP BIOS with function 0x05, "get docking station information"
366 */ 358 */
367int pnp_bios_dock_station_info(struct pnp_docking_station_info *data) 359int pnp_bios_dock_station_info(struct pnp_docking_station_info *data)
368{ 360{
369 u16 status; 361 u16 status;
370 if (!pnp_bios_present())
371 return PNP_FUNCTION_NOT_SUPPORTED;
372 status = call_pnp_bios(PNP_GET_DOCKING_STATION_INFORMATION, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0,
373 data, sizeof(struct pnp_docking_station_info), NULL, 0);
374 return status;
375}
376 362
377#if needed
378/*
379 * Call PnP BIOS with function 0x09, "set statically allocated resource
380 * information"
381 */
382static int pnp_bios_set_stat_res(char *info)
383{
384 u16 status;
385 if (!pnp_bios_present()) 363 if (!pnp_bios_present())
386 return PNP_FUNCTION_NOT_SUPPORTED; 364 return PNP_FUNCTION_NOT_SUPPORTED;
387 status = call_pnp_bios(PNP_SET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 365 status = call_pnp_bios(PNP_GET_DOCKING_STATION_INFORMATION, 0, PNP_TS1,
388 info, *((u16 *) info), 0, 0); 366 PNP_DS, 0, 0, 0, 0, data,
367 sizeof(struct pnp_docking_station_info), NULL,
368 0);
389 return status; 369 return status;
390} 370}
391#endif
392 371
393/* 372/*
394 * Call PnP BIOS with function 0x0a, "get statically allocated resource 373 * Call PnP BIOS with function 0x0a, "get statically allocated resource
@@ -397,36 +376,23 @@ static int pnp_bios_set_stat_res(char *info)
397static int __pnp_bios_get_stat_res(char *info) 376static int __pnp_bios_get_stat_res(char *info)
398{ 377{
399 u16 status; 378 u16 status;
379
400 if (!pnp_bios_present()) 380 if (!pnp_bios_present())
401 return PNP_FUNCTION_NOT_SUPPORTED; 381 return PNP_FUNCTION_NOT_SUPPORTED;
402 status = call_pnp_bios(PNP_GET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 382 status = call_pnp_bios(PNP_GET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1,
403 info, 65536, NULL, 0); 383 PNP_DS, 0, 0, 0, 0, info, 65536, NULL, 0);
404 return status; 384 return status;
405} 385}
406 386
407int pnp_bios_get_stat_res(char *info) 387int pnp_bios_get_stat_res(char *info)
408{ 388{
409 int status; 389 int status;
410 status = __pnp_bios_get_stat_res( info );
411 if ( status )
412 pnpbios_print_status( "get_stat_res", status );
413 return status;
414}
415 390
416#if needed 391 status = __pnp_bios_get_stat_res(info);
417/* 392 if (status)
418 * Call PnP BIOS with function 0x0b, "get APM id table" 393 pnpbios_print_status("get_stat_res", status);
419 */
420static int pnp_bios_apm_id_table(char *table, u16 *size)
421{
422 u16 status;
423 if (!pnp_bios_present())
424 return PNP_FUNCTION_NOT_SUPPORTED;
425 status = call_pnp_bios(PNP_GET_APM_ID_TABLE, 0, PNP_TS2, 0, PNP_TS1, PNP_DS, 0, 0,
426 table, *size, size, sizeof(u16));
427 return status; 394 return status;
428} 395}
429#endif
430 396
431/* 397/*
432 * Call PnP BIOS with function 0x40, "get isa pnp configuration structure" 398 * Call PnP BIOS with function 0x40, "get isa pnp configuration structure"
@@ -434,19 +400,22 @@ static int pnp_bios_apm_id_table(char *table, u16 *size)
434static int __pnp_bios_isapnp_config(struct pnp_isa_config_struc *data) 400static int __pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
435{ 401{
436 u16 status; 402 u16 status;
403
437 if (!pnp_bios_present()) 404 if (!pnp_bios_present())
438 return PNP_FUNCTION_NOT_SUPPORTED; 405 return PNP_FUNCTION_NOT_SUPPORTED;
439 status = call_pnp_bios(PNP_GET_PNP_ISA_CONFIG_STRUC, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 406 status = call_pnp_bios(PNP_GET_PNP_ISA_CONFIG_STRUC, 0, PNP_TS1, PNP_DS,
440 data, sizeof(struct pnp_isa_config_struc), NULL, 0); 407 0, 0, 0, 0, data,
408 sizeof(struct pnp_isa_config_struc), NULL, 0);
441 return status; 409 return status;
442} 410}
443 411
444int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data) 412int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
445{ 413{
446 int status; 414 int status;
447 status = __pnp_bios_isapnp_config( data ); 415
448 if ( status ) 416 status = __pnp_bios_isapnp_config(data);
449 pnpbios_print_status( "isapnp_config", status ); 417 if (status)
418 pnpbios_print_status("isapnp_config", status);
450 return status; 419 return status;
451} 420}
452 421
@@ -456,19 +425,22 @@ int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
456static int __pnp_bios_escd_info(struct escd_info_struc *data) 425static int __pnp_bios_escd_info(struct escd_info_struc *data)
457{ 426{
458 u16 status; 427 u16 status;
428
459 if (!pnp_bios_present()) 429 if (!pnp_bios_present())
460 return ESCD_FUNCTION_NOT_SUPPORTED; 430 return ESCD_FUNCTION_NOT_SUPPORTED;
461 status = call_pnp_bios(PNP_GET_ESCD_INFO, 0, PNP_TS1, 2, PNP_TS1, 4, PNP_TS1, PNP_DS, 431 status = call_pnp_bios(PNP_GET_ESCD_INFO, 0, PNP_TS1, 2, PNP_TS1, 4,
462 data, sizeof(struct escd_info_struc), NULL, 0); 432 PNP_TS1, PNP_DS, data,
433 sizeof(struct escd_info_struc), NULL, 0);
463 return status; 434 return status;
464} 435}
465 436
466int pnp_bios_escd_info(struct escd_info_struc *data) 437int pnp_bios_escd_info(struct escd_info_struc *data)
467{ 438{
468 int status; 439 int status;
469 status = __pnp_bios_escd_info( data ); 440
470 if ( status ) 441 status = __pnp_bios_escd_info(data);
471 pnpbios_print_status( "escd_info", status ); 442 if (status)
443 pnpbios_print_status("escd_info", status);
472 return status; 444 return status;
473} 445}
474 446
@@ -479,57 +451,42 @@ int pnp_bios_escd_info(struct escd_info_struc *data)
479static int __pnp_bios_read_escd(char *data, u32 nvram_base) 451static int __pnp_bios_read_escd(char *data, u32 nvram_base)
480{ 452{
481 u16 status; 453 u16 status;
454
482 if (!pnp_bios_present()) 455 if (!pnp_bios_present())
483 return ESCD_FUNCTION_NOT_SUPPORTED; 456 return ESCD_FUNCTION_NOT_SUPPORTED;
484 status = call_pnp_bios(PNP_READ_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0, 0, 457 status = call_pnp_bios(PNP_READ_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0,
485 data, 65536, __va(nvram_base), 65536); 458 0, data, 65536, __va(nvram_base), 65536);
486 return status; 459 return status;
487} 460}
488 461
489int pnp_bios_read_escd(char *data, u32 nvram_base) 462int pnp_bios_read_escd(char *data, u32 nvram_base)
490{ 463{
491 int status; 464 int status;
492 status = __pnp_bios_read_escd( data, nvram_base );
493 if ( status )
494 pnpbios_print_status( "read_escd", status );
495 return status;
496}
497 465
498#if needed 466 status = __pnp_bios_read_escd(data, nvram_base);
499/* 467 if (status)
500 * Call PnP BIOS function 0x43, "write ESCD" 468 pnpbios_print_status("read_escd", status);
501 */
502static int pnp_bios_write_escd(char *data, u32 nvram_base)
503{
504 u16 status;
505 if (!pnp_bios_present())
506 return ESCD_FUNCTION_NOT_SUPPORTED;
507 status = call_pnp_bios(PNP_WRITE_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0, 0,
508 data, 65536, __va(nvram_base), 65536);
509 return status; 469 return status;
510} 470}
511#endif
512
513
514/*
515 * Initialization
516 */
517 471
518void pnpbios_calls_init(union pnp_bios_install_struct *header) 472void pnpbios_calls_init(union pnp_bios_install_struct *header)
519{ 473{
520 int i; 474 int i;
475
521 spin_lock_init(&pnp_bios_lock); 476 spin_lock_init(&pnp_bios_lock);
522 pnp_bios_callpoint.offset = header->fields.pm16offset; 477 pnp_bios_callpoint.offset = header->fields.pm16offset;
523 pnp_bios_callpoint.segment = PNP_CS16; 478 pnp_bios_callpoint.segment = PNP_CS16;
524 479
525 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); 480 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
526 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); 481 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
527 for (i = 0; i < NR_CPUS; i++) { 482 for (i = 0; i < NR_CPUS; i++) {
528 struct desc_struct *gdt = get_cpu_gdt_table(i); 483 struct desc_struct *gdt = get_cpu_gdt_table(i);
529 if (!gdt) 484 if (!gdt)
530 continue; 485 continue;
531 set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc); 486 set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc);
532 set_base(gdt[GDT_ENTRY_PNPBIOS_CS16], __va(header->fields.pm16cseg)); 487 set_base(gdt[GDT_ENTRY_PNPBIOS_CS16],
533 set_base(gdt[GDT_ENTRY_PNPBIOS_DS], __va(header->fields.pm16dseg)); 488 __va(header->fields.pm16cseg));
534 } 489 set_base(gdt[GDT_ENTRY_PNPBIOS_DS],
490 __va(header->fields.pm16dseg));
491 }
535} 492}
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index ed112ee16012..3692a099b45f 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -32,7 +32,7 @@
32 * along with this program; if not, write to the Free Software 32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 */ 34 */
35 35
36/* Change Log 36/* Change Log
37 * 37 *
38 * Adam Belay - <ambx1@neo.rr.com> - March 16, 2003 38 * Adam Belay - <ambx1@neo.rr.com> - March 16, 2003
@@ -71,14 +71,13 @@
71 71
72#include "pnpbios.h" 72#include "pnpbios.h"
73 73
74
75/* 74/*
76 * 75 *
77 * PnP BIOS INTERFACE 76 * PnP BIOS INTERFACE
78 * 77 *
79 */ 78 */
80 79
81static union pnp_bios_install_struct * pnp_bios_install = NULL; 80static union pnp_bios_install_struct *pnp_bios_install = NULL;
82 81
83int pnp_bios_present(void) 82int pnp_bios_present(void)
84{ 83{
@@ -101,36 +100,35 @@ static struct completion unload_sem;
101/* 100/*
102 * (Much of this belongs in a shared routine somewhere) 101 * (Much of this belongs in a shared routine somewhere)
103 */ 102 */
104
105static int pnp_dock_event(int dock, struct pnp_docking_station_info *info) 103static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
106{ 104{
107 char *argv [3], **envp, *buf, *scratch; 105 char *argv[3], **envp, *buf, *scratch;
108 int i = 0, value; 106 int i = 0, value;
109 107
110 if (!current->fs->root) { 108 if (!current->fs->root)
111 return -EAGAIN; 109 return -EAGAIN;
112 } 110 if (!(envp = kcalloc(20, sizeof(char *), GFP_KERNEL)))
113 if (!(envp = kcalloc(20, sizeof (char *), GFP_KERNEL))) {
114 return -ENOMEM; 111 return -ENOMEM;
115 }
116 if (!(buf = kzalloc(256, GFP_KERNEL))) { 112 if (!(buf = kzalloc(256, GFP_KERNEL))) {
117 kfree (envp); 113 kfree(envp);
118 return -ENOMEM; 114 return -ENOMEM;
119 } 115 }
120 116
121 /* FIXME: if there are actual users of this, it should be integrated into 117 /* FIXME: if there are actual users of this, it should be
122 * the driver core and use the usual infrastructure like sysfs and uevents */ 118 * integrated into the driver core and use the usual infrastructure
123 argv [0] = "/sbin/pnpbios"; 119 * like sysfs and uevents
124 argv [1] = "dock"; 120 */
125 argv [2] = NULL; 121 argv[0] = "/sbin/pnpbios";
122 argv[1] = "dock";
123 argv[2] = NULL;
126 124
127 /* minimal command environment */ 125 /* minimal command environment */
128 envp [i++] = "HOME=/"; 126 envp[i++] = "HOME=/";
129 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 127 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
130 128
131#ifdef DEBUG 129#ifdef DEBUG
132 /* hint that policy agent should enter no-stdout debug mode */ 130 /* hint that policy agent should enter no-stdout debug mode */
133 envp [i++] = "DEBUG=kernel"; 131 envp[i++] = "DEBUG=kernel";
134#endif 132#endif
135 /* extensible set of named bus-specific parameters, 133 /* extensible set of named bus-specific parameters,
136 * supporting multiple driver selection algorithms. 134 * supporting multiple driver selection algorithms.
@@ -138,33 +136,33 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
138 scratch = buf; 136 scratch = buf;
139 137
140 /* action: add, remove */ 138 /* action: add, remove */
141 envp [i++] = scratch; 139 envp[i++] = scratch;
142 scratch += sprintf (scratch, "ACTION=%s", dock?"add":"remove") + 1; 140 scratch += sprintf(scratch, "ACTION=%s", dock ? "add" : "remove") + 1;
143 141
144 /* Report the ident for the dock */ 142 /* Report the ident for the dock */
145 envp [i++] = scratch; 143 envp[i++] = scratch;
146 scratch += sprintf (scratch, "DOCK=%x/%x/%x", 144 scratch += sprintf(scratch, "DOCK=%x/%x/%x",
147 info->location_id, info->serial, info->capabilities); 145 info->location_id, info->serial, info->capabilities);
148 envp[i] = NULL; 146 envp[i] = NULL;
149 147
150 value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); 148 value = call_usermodehelper(argv [0], argv, envp, UMH_WAIT_EXEC);
151 kfree (buf); 149 kfree(buf);
152 kfree (envp); 150 kfree(envp);
153 return 0; 151 return 0;
154} 152}
155 153
156/* 154/*
157 * Poll the PnP docking at regular intervals 155 * Poll the PnP docking at regular intervals
158 */ 156 */
159static int pnp_dock_thread(void * unused) 157static int pnp_dock_thread(void *unused)
160{ 158{
161 static struct pnp_docking_station_info now; 159 static struct pnp_docking_station_info now;
162 int docked = -1, d = 0; 160 int docked = -1, d = 0;
161
163 set_freezable(); 162 set_freezable();
164 while (!unloading) 163 while (!unloading) {
165 {
166 int status; 164 int status;
167 165
168 /* 166 /*
169 * Poll every 2 seconds 167 * Poll every 2 seconds
170 */ 168 */
@@ -175,30 +173,29 @@ static int pnp_dock_thread(void * unused)
175 173
176 status = pnp_bios_dock_station_info(&now); 174 status = pnp_bios_dock_station_info(&now);
177 175
178 switch(status) 176 switch (status) {
179 {
180 /* 177 /*
181 * No dock to manage 178 * No dock to manage
182 */ 179 */
183 case PNP_FUNCTION_NOT_SUPPORTED: 180 case PNP_FUNCTION_NOT_SUPPORTED:
184 complete_and_exit(&unload_sem, 0); 181 complete_and_exit(&unload_sem, 0);
185 case PNP_SYSTEM_NOT_DOCKED: 182 case PNP_SYSTEM_NOT_DOCKED:
186 d = 0; 183 d = 0;
187 break; 184 break;
188 case PNP_SUCCESS: 185 case PNP_SUCCESS:
189 d = 1; 186 d = 1;
190 break; 187 break;
191 default: 188 default:
192 pnpbios_print_status( "pnp_dock_thread", status ); 189 pnpbios_print_status("pnp_dock_thread", status);
193 continue; 190 continue;
194 } 191 }
195 if(d != docked) 192 if (d != docked) {
196 { 193 if (pnp_dock_event(d, &now) == 0) {
197 if(pnp_dock_event(d, &now)==0)
198 {
199 docked = d; 194 docked = d;
200#if 0 195#if 0
201 printk(KERN_INFO "PnPBIOS: Docking station %stached\n", docked?"at":"de"); 196 printk(KERN_INFO
197 "PnPBIOS: Docking station %stached\n",
198 docked ? "at" : "de");
202#endif 199#endif
203 } 200 }
204 } 201 }
@@ -206,21 +203,21 @@ static int pnp_dock_thread(void * unused)
206 complete_and_exit(&unload_sem, 0); 203 complete_and_exit(&unload_sem, 0);
207} 204}
208 205
209#endif /* CONFIG_HOTPLUG */ 206#endif /* CONFIG_HOTPLUG */
210 207
211static int pnpbios_get_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 208static int pnpbios_get_resources(struct pnp_dev *dev,
209 struct pnp_resource_table *res)
212{ 210{
213 u8 nodenum = dev->number; 211 u8 nodenum = dev->number;
214 struct pnp_bios_node * node; 212 struct pnp_bios_node *node;
215 213
216 /* just in case */ 214 if (!pnpbios_is_dynamic(dev))
217 if(!pnpbios_is_dynamic(dev))
218 return -EPERM; 215 return -EPERM;
219 216
220 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 217 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
221 if (!node) 218 if (!node)
222 return -1; 219 return -1;
223 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 220 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
224 kfree(node); 221 kfree(node);
225 return -ENODEV; 222 return -ENODEV;
226 } 223 }
@@ -230,24 +227,24 @@ static int pnpbios_get_resources(struct pnp_dev * dev, struct pnp_resource_table
230 return 0; 227 return 0;
231} 228}
232 229
233static int pnpbios_set_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 230static int pnpbios_set_resources(struct pnp_dev *dev,
231 struct pnp_resource_table *res)
234{ 232{
235 u8 nodenum = dev->number; 233 u8 nodenum = dev->number;
236 struct pnp_bios_node * node; 234 struct pnp_bios_node *node;
237 int ret; 235 int ret;
238 236
239 /* just in case */
240 if (!pnpbios_is_dynamic(dev)) 237 if (!pnpbios_is_dynamic(dev))
241 return -EPERM; 238 return -EPERM;
242 239
243 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 240 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
244 if (!node) 241 if (!node)
245 return -1; 242 return -1;
246 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 243 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
247 kfree(node); 244 kfree(node);
248 return -ENODEV; 245 return -ENODEV;
249 } 246 }
250 if(pnpbios_write_resources_to_node(res, node)<0) { 247 if (pnpbios_write_resources_to_node(res, node) < 0) {
251 kfree(node); 248 kfree(node);
252 return -1; 249 return -1;
253 } 250 }
@@ -258,18 +255,19 @@ static int pnpbios_set_resources(struct pnp_dev * dev, struct pnp_resource_table
258 return ret; 255 return ret;
259} 256}
260 257
261static void pnpbios_zero_data_stream(struct pnp_bios_node * node) 258static void pnpbios_zero_data_stream(struct pnp_bios_node *node)
262{ 259{
263 unsigned char * p = (char *)node->data; 260 unsigned char *p = (char *)node->data;
264 unsigned char * end = (char *)(node->data + node->size); 261 unsigned char *end = (char *)(node->data + node->size);
265 unsigned int len; 262 unsigned int len;
266 int i; 263 int i;
264
267 while ((char *)p < (char *)end) { 265 while ((char *)p < (char *)end) {
268 if(p[0] & 0x80) { /* large tag */ 266 if (p[0] & 0x80) { /* large tag */
269 len = (p[2] << 8) | p[1]; 267 len = (p[2] << 8) | p[1];
270 p += 3; 268 p += 3;
271 } else { 269 } else {
272 if (((p[0]>>3) & 0x0f) == 0x0f) 270 if (((p[0] >> 3) & 0x0f) == 0x0f)
273 return; 271 return;
274 len = p[0] & 0x07; 272 len = p[0] & 0x07;
275 p += 1; 273 p += 1;
@@ -278,24 +276,24 @@ static void pnpbios_zero_data_stream(struct pnp_bios_node * node)
278 p[i] = 0; 276 p[i] = 0;
279 p += len; 277 p += len;
280 } 278 }
281 printk(KERN_ERR "PnPBIOS: Resource structure did not contain an end tag.\n"); 279 printk(KERN_ERR
280 "PnPBIOS: Resource structure did not contain an end tag.\n");
282} 281}
283 282
284static int pnpbios_disable_resources(struct pnp_dev *dev) 283static int pnpbios_disable_resources(struct pnp_dev *dev)
285{ 284{
286 struct pnp_bios_node * node; 285 struct pnp_bios_node *node;
287 u8 nodenum = dev->number; 286 u8 nodenum = dev->number;
288 int ret; 287 int ret;
289 288
290 /* just in case */ 289 if (dev->flags & PNPBIOS_NO_DISABLE || !pnpbios_is_dynamic(dev))
291 if(dev->flags & PNPBIOS_NO_DISABLE || !pnpbios_is_dynamic(dev))
292 return -EPERM; 290 return -EPERM;
293 291
294 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 292 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
295 if (!node) 293 if (!node)
296 return -ENOMEM; 294 return -ENOMEM;
297 295
298 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 296 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
299 kfree(node); 297 kfree(node);
300 return -ENODEV; 298 return -ENODEV;
301 } 299 }
@@ -311,22 +309,22 @@ static int pnpbios_disable_resources(struct pnp_dev *dev)
311/* PnP Layer support */ 309/* PnP Layer support */
312 310
313struct pnp_protocol pnpbios_protocol = { 311struct pnp_protocol pnpbios_protocol = {
314 .name = "Plug and Play BIOS", 312 .name = "Plug and Play BIOS",
315 .get = pnpbios_get_resources, 313 .get = pnpbios_get_resources,
316 .set = pnpbios_set_resources, 314 .set = pnpbios_set_resources,
317 .disable = pnpbios_disable_resources, 315 .disable = pnpbios_disable_resources,
318}; 316};
319 317
320static int insert_device(struct pnp_dev *dev, struct pnp_bios_node * node) 318static int insert_device(struct pnp_dev *dev, struct pnp_bios_node *node)
321{ 319{
322 struct list_head * pos; 320 struct list_head *pos;
323 struct pnp_dev * pnp_dev; 321 struct pnp_dev *pnp_dev;
324 struct pnp_id *dev_id; 322 struct pnp_id *dev_id;
325 char id[8]; 323 char id[8];
326 324
327 /* check if the device is already added */ 325 /* check if the device is already added */
328 dev->number = node->handle; 326 dev->number = node->handle;
329 list_for_each (pos, &pnpbios_protocol.devices){ 327 list_for_each(pos, &pnpbios_protocol.devices) {
330 pnp_dev = list_entry(pos, struct pnp_dev, protocol_list); 328 pnp_dev = list_entry(pos, struct pnp_dev, protocol_list);
331 if (dev->number == pnp_dev->number) 329 if (dev->number == pnp_dev->number)
332 return -1; 330 return -1;
@@ -336,8 +334,8 @@ static int insert_device(struct pnp_dev *dev, struct pnp_bios_node * node)
336 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 334 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
337 if (!dev_id) 335 if (!dev_id)
338 return -1; 336 return -1;
339 pnpid32_to_pnpid(node->eisa_id,id); 337 pnpid32_to_pnpid(node->eisa_id, id);
340 memcpy(dev_id->id,id,7); 338 memcpy(dev_id->id, id, 7);
341 pnp_add_id(dev_id, dev); 339 pnp_add_id(dev_id, dev);
342 pnpbios_parse_data_stream(dev, node); 340 pnpbios_parse_data_stream(dev, node);
343 dev->active = pnp_is_active(dev); 341 dev->active = pnp_is_active(dev);
@@ -375,35 +373,41 @@ static void __init build_devlist(void)
375 if (!node) 373 if (!node)
376 return; 374 return;
377 375
378 for(nodenum=0; nodenum<0xff; ) { 376 for (nodenum = 0; nodenum < 0xff;) {
379 u8 thisnodenum = nodenum; 377 u8 thisnodenum = nodenum;
380 /* eventually we will want to use PNPMODE_STATIC here but for now 378 /* eventually we will want to use PNPMODE_STATIC here but for now
381 * dynamic will help us catch buggy bioses to add to the blacklist. 379 * dynamic will help us catch buggy bioses to add to the blacklist.
382 */ 380 */
383 if (!pnpbios_dont_use_current_config) { 381 if (!pnpbios_dont_use_current_config) {
384 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) 382 if (pnp_bios_get_dev_node
383 (&nodenum, (char)PNPMODE_DYNAMIC, node))
385 break; 384 break;
386 } else { 385 } else {
387 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_STATIC, node)) 386 if (pnp_bios_get_dev_node
387 (&nodenum, (char)PNPMODE_STATIC, node))
388 break; 388 break;
389 } 389 }
390 nodes_got++; 390 nodes_got++;
391 dev = kzalloc(sizeof (struct pnp_dev), GFP_KERNEL); 391 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
392 if (!dev) 392 if (!dev)
393 break; 393 break;
394 if(insert_device(dev,node)<0) 394 if (insert_device(dev, node) < 0)
395 kfree(dev); 395 kfree(dev);
396 else 396 else
397 devs++; 397 devs++;
398 if (nodenum <= thisnodenum) { 398 if (nodenum <= thisnodenum) {
399 printk(KERN_ERR "PnPBIOS: build_devlist: Node number 0x%x is out of sequence following node 0x%x. Aborting.\n", (unsigned int)nodenum, (unsigned int)thisnodenum); 399 printk(KERN_ERR
400 "PnPBIOS: build_devlist: Node number 0x%x is out of sequence following node 0x%x. Aborting.\n",
401 (unsigned int)nodenum,
402 (unsigned int)thisnodenum);
400 break; 403 break;
401 } 404 }
402 } 405 }
403 kfree(node); 406 kfree(node);
404 407
405 printk(KERN_INFO "PnPBIOS: %i node%s reported by PnP BIOS; %i recorded by driver\n", 408 printk(KERN_INFO
406 nodes_got, nodes_got != 1 ? "s" : "", devs); 409 "PnPBIOS: %i node%s reported by PnP BIOS; %i recorded by driver\n",
410 nodes_got, nodes_got != 1 ? "s" : "", devs);
407} 411}
408 412
409/* 413/*
@@ -412,8 +416,8 @@ static void __init build_devlist(void)
412 * 416 *
413 */ 417 */
414 418
415static int pnpbios_disabled; /* = 0 */ 419static int pnpbios_disabled;
416int pnpbios_dont_use_current_config; /* = 0 */ 420int pnpbios_dont_use_current_config;
417 421
418#ifndef MODULE 422#ifndef MODULE
419static int __init pnpbios_setup(char *str) 423static int __init pnpbios_setup(char *str)
@@ -422,9 +426,9 @@ static int __init pnpbios_setup(char *str)
422 426
423 while ((str != NULL) && (*str != '\0')) { 427 while ((str != NULL) && (*str != '\0')) {
424 if (strncmp(str, "off", 3) == 0) 428 if (strncmp(str, "off", 3) == 0)
425 pnpbios_disabled=1; 429 pnpbios_disabled = 1;
426 if (strncmp(str, "on", 2) == 0) 430 if (strncmp(str, "on", 2) == 0)
427 pnpbios_disabled=0; 431 pnpbios_disabled = 0;
428 invert = (strncmp(str, "no-", 3) == 0); 432 invert = (strncmp(str, "no-", 3) == 0);
429 if (invert) 433 if (invert)
430 str += 3; 434 str += 3;
@@ -453,35 +457,41 @@ static int __init pnpbios_probe_system(void)
453 printk(KERN_INFO "PnPBIOS: Scanning system for PnP BIOS support...\n"); 457 printk(KERN_INFO "PnPBIOS: Scanning system for PnP BIOS support...\n");
454 458
455 /* 459 /*
456 * Search the defined area (0xf0000-0xffff0) for a valid PnP BIOS 460 * Search the defined area (0xf0000-0xffff0) for a valid PnP BIOS
457 * structure and, if one is found, sets up the selectors and 461 * structure and, if one is found, sets up the selectors and
458 * entry points 462 * entry points
459 */ 463 */
460 for (check = (union pnp_bios_install_struct *) __va(0xf0000); 464 for (check = (union pnp_bios_install_struct *)__va(0xf0000);
461 check < (union pnp_bios_install_struct *) __va(0xffff0); 465 check < (union pnp_bios_install_struct *)__va(0xffff0);
462 check = (void *)check + 16) { 466 check = (void *)check + 16) {
463 if (check->fields.signature != PNP_SIGNATURE) 467 if (check->fields.signature != PNP_SIGNATURE)
464 continue; 468 continue;
465 printk(KERN_INFO "PnPBIOS: Found PnP BIOS installation structure at 0x%p\n", check); 469 printk(KERN_INFO
470 "PnPBIOS: Found PnP BIOS installation structure at 0x%p\n",
471 check);
466 length = check->fields.length; 472 length = check->fields.length;
467 if (!length) { 473 if (!length) {
468 printk(KERN_ERR "PnPBIOS: installation structure is invalid, skipping\n"); 474 printk(KERN_ERR
475 "PnPBIOS: installation structure is invalid, skipping\n");
469 continue; 476 continue;
470 } 477 }
471 for (sum = 0, i = 0; i < length; i++) 478 for (sum = 0, i = 0; i < length; i++)
472 sum += check->chars[i]; 479 sum += check->chars[i];
473 if (sum) { 480 if (sum) {
474 printk(KERN_ERR "PnPBIOS: installation structure is corrupted, skipping\n"); 481 printk(KERN_ERR
482 "PnPBIOS: installation structure is corrupted, skipping\n");
475 continue; 483 continue;
476 } 484 }
477 if (check->fields.version < 0x10) { 485 if (check->fields.version < 0x10) {
478 printk(KERN_WARNING "PnPBIOS: PnP BIOS version %d.%d is not supported\n", 486 printk(KERN_WARNING
487 "PnPBIOS: PnP BIOS version %d.%d is not supported\n",
479 check->fields.version >> 4, 488 check->fields.version >> 4,
480 check->fields.version & 15); 489 check->fields.version & 15);
481 continue; 490 continue;
482 } 491 }
483 printk(KERN_INFO "PnPBIOS: PnP BIOS version %d.%d, entry 0x%x:0x%x, dseg 0x%x\n", 492 printk(KERN_INFO
484 check->fields.version >> 4, check->fields.version & 15, 493 "PnPBIOS: PnP BIOS version %d.%d, entry 0x%x:0x%x, dseg 0x%x\n",
494 check->fields.version >> 4, check->fields.version & 15,
485 check->fields.pm16cseg, check->fields.pm16offset, 495 check->fields.pm16cseg, check->fields.pm16offset,
486 check->fields.pm16dseg); 496 check->fields.pm16dseg);
487 pnp_bios_install = check; 497 pnp_bios_install = check;
@@ -499,25 +509,25 @@ static int __init exploding_pnp_bios(struct dmi_system_id *d)
499} 509}
500 510
501static struct dmi_system_id pnpbios_dmi_table[] __initdata = { 511static struct dmi_system_id pnpbios_dmi_table[] __initdata = {
502 { /* PnPBIOS GPF on boot */ 512 { /* PnPBIOS GPF on boot */
503 .callback = exploding_pnp_bios, 513 .callback = exploding_pnp_bios,
504 .ident = "Higraded P14H", 514 .ident = "Higraded P14H",
505 .matches = { 515 .matches = {
506 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), 516 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
507 DMI_MATCH(DMI_BIOS_VERSION, "07.00T"), 517 DMI_MATCH(DMI_BIOS_VERSION, "07.00T"),
508 DMI_MATCH(DMI_SYS_VENDOR, "Higraded"), 518 DMI_MATCH(DMI_SYS_VENDOR, "Higraded"),
509 DMI_MATCH(DMI_PRODUCT_NAME, "P14H"), 519 DMI_MATCH(DMI_PRODUCT_NAME, "P14H"),
510 }, 520 },
511 }, 521 },
512 { /* PnPBIOS GPF on boot */ 522 { /* PnPBIOS GPF on boot */
513 .callback = exploding_pnp_bios, 523 .callback = exploding_pnp_bios,
514 .ident = "ASUS P4P800", 524 .ident = "ASUS P4P800",
515 .matches = { 525 .matches = {
516 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."), 526 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."),
517 DMI_MATCH(DMI_BOARD_NAME, "P4P800"), 527 DMI_MATCH(DMI_BOARD_NAME, "P4P800"),
518 }, 528 },
519 }, 529 },
520 { } 530 {}
521}; 531};
522 532
523static int __init pnpbios_init(void) 533static int __init pnpbios_init(void)
@@ -533,14 +543,13 @@ static int __init pnpbios_init(void)
533 printk(KERN_INFO "PnPBIOS: Disabled\n"); 543 printk(KERN_INFO "PnPBIOS: Disabled\n");
534 return -ENODEV; 544 return -ENODEV;
535 } 545 }
536
537#ifdef CONFIG_PNPACPI 546#ifdef CONFIG_PNPACPI
538 if (!acpi_disabled && !pnpacpi_disabled) { 547 if (!acpi_disabled && !pnpacpi_disabled) {
539 pnpbios_disabled = 1; 548 pnpbios_disabled = 1;
540 printk(KERN_INFO "PnPBIOS: Disabled by ACPI PNP\n"); 549 printk(KERN_INFO "PnPBIOS: Disabled by ACPI PNP\n");
541 return -ENODEV; 550 return -ENODEV;
542 } 551 }
543#endif /* CONFIG_ACPI */ 552#endif /* CONFIG_ACPI */
544 553
545 /* scan the system for pnpbios support */ 554 /* scan the system for pnpbios support */
546 if (!pnpbios_probe_system()) 555 if (!pnpbios_probe_system())
@@ -552,14 +561,16 @@ static int __init pnpbios_init(void)
552 /* read the node info */ 561 /* read the node info */
553 ret = pnp_bios_dev_node_info(&node_info); 562 ret = pnp_bios_dev_node_info(&node_info);
554 if (ret) { 563 if (ret) {
555 printk(KERN_ERR "PnPBIOS: Unable to get node info. Aborting.\n"); 564 printk(KERN_ERR
565 "PnPBIOS: Unable to get node info. Aborting.\n");
556 return ret; 566 return ret;
557 } 567 }
558 568
559 /* register with the pnp layer */ 569 /* register with the pnp layer */
560 ret = pnp_register_protocol(&pnpbios_protocol); 570 ret = pnp_register_protocol(&pnpbios_protocol);
561 if (ret) { 571 if (ret) {
562 printk(KERN_ERR "PnPBIOS: Unable to register driver. Aborting.\n"); 572 printk(KERN_ERR
573 "PnPBIOS: Unable to register driver. Aborting.\n");
563 return ret; 574 return ret;
564 } 575 }
565 576
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 8027073f7919..9c8c07701b65 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -18,9 +18,6 @@
18 * The other files are human-readable. 18 * The other files are human-readable.
19 */ 19 */
20 20
21//#include <pcmcia/config.h>
22//#include <pcmcia/k_compat.h>
23
24#include <linux/module.h> 21#include <linux/module.h>
25#include <linux/kernel.h> 22#include <linux/kernel.h>
26#include <linux/slab.h> 23#include <linux/slab.h>
@@ -37,42 +34,37 @@ static struct proc_dir_entry *proc_pnp = NULL;
37static struct proc_dir_entry *proc_pnp_boot = NULL; 34static struct proc_dir_entry *proc_pnp_boot = NULL;
38 35
39static int proc_read_pnpconfig(char *buf, char **start, off_t pos, 36static int proc_read_pnpconfig(char *buf, char **start, off_t pos,
40 int count, int *eof, void *data) 37 int count, int *eof, void *data)
41{ 38{
42 struct pnp_isa_config_struc pnps; 39 struct pnp_isa_config_struc pnps;
43 40
44 if (pnp_bios_isapnp_config(&pnps)) 41 if (pnp_bios_isapnp_config(&pnps))
45 return -EIO; 42 return -EIO;
46 return snprintf(buf, count, 43 return snprintf(buf, count,
47 "structure_revision %d\n" 44 "structure_revision %d\n"
48 "number_of_CSNs %d\n" 45 "number_of_CSNs %d\n"
49 "ISA_read_data_port 0x%x\n", 46 "ISA_read_data_port 0x%x\n",
50 pnps.revision, 47 pnps.revision, pnps.no_csns, pnps.isa_rd_data_port);
51 pnps.no_csns,
52 pnps.isa_rd_data_port
53 );
54} 48}
55 49
56static int proc_read_escdinfo(char *buf, char **start, off_t pos, 50static int proc_read_escdinfo(char *buf, char **start, off_t pos,
57 int count, int *eof, void *data) 51 int count, int *eof, void *data)
58{ 52{
59 struct escd_info_struc escd; 53 struct escd_info_struc escd;
60 54
61 if (pnp_bios_escd_info(&escd)) 55 if (pnp_bios_escd_info(&escd))
62 return -EIO; 56 return -EIO;
63 return snprintf(buf, count, 57 return snprintf(buf, count,
64 "min_ESCD_write_size %d\n" 58 "min_ESCD_write_size %d\n"
65 "ESCD_size %d\n" 59 "ESCD_size %d\n"
66 "NVRAM_base 0x%x\n", 60 "NVRAM_base 0x%x\n",
67 escd.min_escd_write_size, 61 escd.min_escd_write_size,
68 escd.escd_size, 62 escd.escd_size, escd.nv_storage_base);
69 escd.nv_storage_base
70 );
71} 63}
72 64
73#define MAX_SANE_ESCD_SIZE (32*1024) 65#define MAX_SANE_ESCD_SIZE (32*1024)
74static int proc_read_escd(char *buf, char **start, off_t pos, 66static int proc_read_escd(char *buf, char **start, off_t pos,
75 int count, int *eof, void *data) 67 int count, int *eof, void *data)
76{ 68{
77 struct escd_info_struc escd; 69 struct escd_info_struc escd;
78 char *tmpbuf; 70 char *tmpbuf;
@@ -83,30 +75,36 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
83 75
84 /* sanity check */ 76 /* sanity check */
85 if (escd.escd_size > MAX_SANE_ESCD_SIZE) { 77 if (escd.escd_size > MAX_SANE_ESCD_SIZE) {
86 printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n"); 78 printk(KERN_ERR
79 "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n");
87 return -EFBIG; 80 return -EFBIG;
88 } 81 }
89 82
90 tmpbuf = kzalloc(escd.escd_size, GFP_KERNEL); 83 tmpbuf = kzalloc(escd.escd_size, GFP_KERNEL);
91 if (!tmpbuf) return -ENOMEM; 84 if (!tmpbuf)
85 return -ENOMEM;
92 86
93 if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) { 87 if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) {
94 kfree(tmpbuf); 88 kfree(tmpbuf);
95 return -EIO; 89 return -EIO;
96 } 90 }
97 91
98 escd_size = (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1])*256; 92 escd_size =
93 (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1]) * 256;
99 94
100 /* sanity check */ 95 /* sanity check */
101 if (escd_size > MAX_SANE_ESCD_SIZE) { 96 if (escd_size > MAX_SANE_ESCD_SIZE) {
102 printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n"); 97 printk(KERN_ERR
98 "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n");
103 return -EFBIG; 99 return -EFBIG;
104 } 100 }
105 101
106 escd_left_to_read = escd_size - pos; 102 escd_left_to_read = escd_size - pos;
107 if (escd_left_to_read < 0) escd_left_to_read = 0; 103 if (escd_left_to_read < 0)
108 if (escd_left_to_read == 0) *eof = 1; 104 escd_left_to_read = 0;
109 n = min(count,escd_left_to_read); 105 if (escd_left_to_read == 0)
106 *eof = 1;
107 n = min(count, escd_left_to_read);
110 memcpy(buf, tmpbuf + pos, n); 108 memcpy(buf, tmpbuf + pos, n);
111 kfree(tmpbuf); 109 kfree(tmpbuf);
112 *start = buf; 110 *start = buf;
@@ -114,17 +112,17 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
114} 112}
115 113
116static int proc_read_legacyres(char *buf, char **start, off_t pos, 114static int proc_read_legacyres(char *buf, char **start, off_t pos,
117 int count, int *eof, void *data) 115 int count, int *eof, void *data)
118{ 116{
119 /* Assume that the following won't overflow the buffer */ 117 /* Assume that the following won't overflow the buffer */
120 if (pnp_bios_get_stat_res(buf)) 118 if (pnp_bios_get_stat_res(buf))
121 return -EIO; 119 return -EIO;
122 120
123 return count; // FIXME: Return actual length 121 return count; // FIXME: Return actual length
124} 122}
125 123
126static int proc_read_devices(char *buf, char **start, off_t pos, 124static int proc_read_devices(char *buf, char **start, off_t pos,
127 int count, int *eof, void *data) 125 int count, int *eof, void *data)
128{ 126{
129 struct pnp_bios_node *node; 127 struct pnp_bios_node *node;
130 u8 nodenum; 128 u8 nodenum;
@@ -134,9 +132,10 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
134 return 0; 132 return 0;
135 133
136 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 134 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
137 if (!node) return -ENOMEM; 135 if (!node)
136 return -ENOMEM;
138 137
139 for (nodenum=pos; nodenum<0xff; ) { 138 for (nodenum = pos; nodenum < 0xff;) {
140 u8 thisnodenum = nodenum; 139 u8 thisnodenum = nodenum;
141 /* 26 = the number of characters per line sprintf'ed */ 140 /* 26 = the number of characters per line sprintf'ed */
142 if ((p - buf + 26) > count) 141 if ((p - buf + 26) > count)
@@ -148,7 +147,11 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
148 node->type_code[0], node->type_code[1], 147 node->type_code[0], node->type_code[1],
149 node->type_code[2], node->flags); 148 node->type_code[2], node->flags);
150 if (nodenum <= thisnodenum) { 149 if (nodenum <= thisnodenum) {
151 printk(KERN_ERR "%s Node number 0x%x is out of sequence following node 0x%x. Aborting.\n", "PnPBIOS: proc_read_devices:", (unsigned int)nodenum, (unsigned int)thisnodenum); 150 printk(KERN_ERR
151 "%s Node number 0x%x is out of sequence following node 0x%x. Aborting.\n",
152 "PnPBIOS: proc_read_devices:",
153 (unsigned int)nodenum,
154 (unsigned int)thisnodenum);
152 *eof = 1; 155 *eof = 1;
153 break; 156 break;
154 } 157 }
@@ -156,12 +159,12 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
156 kfree(node); 159 kfree(node);
157 if (nodenum == 0xff) 160 if (nodenum == 0xff)
158 *eof = 1; 161 *eof = 1;
159 *start = (char *)((off_t)nodenum - pos); 162 *start = (char *)((off_t) nodenum - pos);
160 return p - buf; 163 return p - buf;
161} 164}
162 165
163static int proc_read_node(char *buf, char **start, off_t pos, 166static int proc_read_node(char *buf, char **start, off_t pos,
164 int count, int *eof, void *data) 167 int count, int *eof, void *data)
165{ 168{
166 struct pnp_bios_node *node; 169 struct pnp_bios_node *node;
167 int boot = (long)data >> 8; 170 int boot = (long)data >> 8;
@@ -169,7 +172,8 @@ static int proc_read_node(char *buf, char **start, off_t pos,
169 int len; 172 int len;
170 173
171 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 174 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
172 if (!node) return -ENOMEM; 175 if (!node)
176 return -ENOMEM;
173 if (pnp_bios_get_dev_node(&nodenum, boot, node)) { 177 if (pnp_bios_get_dev_node(&nodenum, boot, node)) {
174 kfree(node); 178 kfree(node);
175 return -EIO; 179 return -EIO;
@@ -180,8 +184,8 @@ static int proc_read_node(char *buf, char **start, off_t pos,
180 return len; 184 return len;
181} 185}
182 186
183static int proc_write_node(struct file *file, const char __user *buf, 187static int proc_write_node(struct file *file, const char __user * buf,
184 unsigned long count, void *data) 188 unsigned long count, void *data)
185{ 189{
186 struct pnp_bios_node *node; 190 struct pnp_bios_node *node;
187 int boot = (long)data >> 8; 191 int boot = (long)data >> 8;
@@ -208,12 +212,12 @@ static int proc_write_node(struct file *file, const char __user *buf,
208 goto out; 212 goto out;
209 } 213 }
210 ret = count; 214 ret = count;
211out: 215 out:
212 kfree(node); 216 kfree(node);
213 return ret; 217 return ret;
214} 218}
215 219
216int pnpbios_interface_attach_device(struct pnp_bios_node * node) 220int pnpbios_interface_attach_device(struct pnp_bios_node *node)
217{ 221{
218 char name[3]; 222 char name[3];
219 struct proc_dir_entry *ent; 223 struct proc_dir_entry *ent;
@@ -222,7 +226,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
222 226
223 if (!proc_pnp) 227 if (!proc_pnp)
224 return -EIO; 228 return -EIO;
225 if ( !pnpbios_dont_use_current_config ) { 229 if (!pnpbios_dont_use_current_config) {
226 ent = create_proc_entry(name, 0, proc_pnp); 230 ent = create_proc_entry(name, 0, proc_pnp);
227 if (ent) { 231 if (ent) {
228 ent->read_proc = proc_read_node; 232 ent->read_proc = proc_read_node;
@@ -237,7 +241,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
237 if (ent) { 241 if (ent) {
238 ent->read_proc = proc_read_node; 242 ent->read_proc = proc_read_node;
239 ent->write_proc = proc_write_node; 243 ent->write_proc = proc_write_node;
240 ent->data = (void *)(long)(node->handle+0x100); 244 ent->data = (void *)(long)(node->handle + 0x100);
241 return 0; 245 return 0;
242 } 246 }
243 247
@@ -249,7 +253,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
249 * work and the pnpbios_dont_use_current_config flag 253 * work and the pnpbios_dont_use_current_config flag
250 * should already have been set to the appropriate value 254 * should already have been set to the appropriate value
251 */ 255 */
252int __init pnpbios_proc_init( void ) 256int __init pnpbios_proc_init(void)
253{ 257{
254 proc_pnp = proc_mkdir("pnp", proc_bus); 258 proc_pnp = proc_mkdir("pnp", proc_bus);
255 if (!proc_pnp) 259 if (!proc_pnp)
@@ -258,10 +262,13 @@ int __init pnpbios_proc_init( void )
258 if (!proc_pnp_boot) 262 if (!proc_pnp_boot)
259 return -EIO; 263 return -EIO;
260 create_proc_read_entry("devices", 0, proc_pnp, proc_read_devices, NULL); 264 create_proc_read_entry("devices", 0, proc_pnp, proc_read_devices, NULL);
261 create_proc_read_entry("configuration_info", 0, proc_pnp, proc_read_pnpconfig, NULL); 265 create_proc_read_entry("configuration_info", 0, proc_pnp,
262 create_proc_read_entry("escd_info", 0, proc_pnp, proc_read_escdinfo, NULL); 266 proc_read_pnpconfig, NULL);
267 create_proc_read_entry("escd_info", 0, proc_pnp, proc_read_escdinfo,
268 NULL);
263 create_proc_read_entry("escd", S_IRUSR, proc_pnp, proc_read_escd, NULL); 269 create_proc_read_entry("escd", S_IRUSR, proc_pnp, proc_read_escd, NULL);
264 create_proc_read_entry("legacy_device_resources", 0, proc_pnp, proc_read_legacyres, NULL); 270 create_proc_read_entry("legacy_device_resources", 0, proc_pnp,
271 proc_read_legacyres, NULL);
265 272
266 return 0; 273 return 0;
267} 274}
@@ -274,9 +281,9 @@ void __exit pnpbios_proc_exit(void)
274 if (!proc_pnp) 281 if (!proc_pnp)
275 return; 282 return;
276 283
277 for (i=0; i<0xff; i++) { 284 for (i = 0; i < 0xff; i++) {
278 sprintf(name, "%02x", i); 285 sprintf(name, "%02x", i);
279 if ( !pnpbios_dont_use_current_config ) 286 if (!pnpbios_dont_use_current_config)
280 remove_proc_entry(name, proc_pnp); 287 remove_proc_entry(name, proc_pnp);
281 remove_proc_entry(name, proc_pnp_boot); 288 remove_proc_entry(name, proc_pnp_boot);
282 } 289 }
@@ -287,6 +294,4 @@ void __exit pnpbios_proc_exit(void)
287 remove_proc_entry("devices", proc_pnp); 294 remove_proc_entry("devices", proc_pnp);
288 remove_proc_entry("boot", proc_pnp); 295 remove_proc_entry("boot", proc_pnp);
289 remove_proc_entry("pnp", proc_bus); 296 remove_proc_entry("pnp", proc_bus);
290
291 return;
292} 297}
diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c
index 3c2ab8394e3f..04ecd7b67230 100644
--- a/drivers/pnp/pnpbios/rsparser.c
+++ b/drivers/pnp/pnpbios/rsparser.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * rsparser.c - parses and encodes pnpbios resource data streams 2 * rsparser.c - parses and encodes pnpbios resource data streams
3 *
4 */ 3 */
5 4
6#include <linux/ctype.h> 5#include <linux/ctype.h>
@@ -12,8 +11,10 @@
12#ifdef CONFIG_PCI 11#ifdef CONFIG_PCI
13#include <linux/pci.h> 12#include <linux/pci.h>
14#else 13#else
15inline void pcibios_penalize_isa_irq(int irq, int active) {} 14inline void pcibios_penalize_isa_irq(int irq, int active)
16#endif /* CONFIG_PCI */ 15{
16}
17#endif /* CONFIG_PCI */
17 18
18#include "pnpbios.h" 19#include "pnpbios.h"
19 20
@@ -52,75 +53,88 @@ inline void pcibios_penalize_isa_irq(int irq, int active) {}
52 * Allocated Resources 53 * Allocated Resources
53 */ 54 */
54 55
55static void 56static void pnpbios_parse_allocated_irqresource(struct pnp_resource_table *res,
56pnpbios_parse_allocated_irqresource(struct pnp_resource_table * res, int irq) 57 int irq)
57{ 58{
58 int i = 0; 59 int i = 0;
59 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_IRQ) i++; 60
61 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET)
62 && i < PNP_MAX_IRQ)
63 i++;
60 if (i < PNP_MAX_IRQ) { 64 if (i < PNP_MAX_IRQ) {
61 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag 65 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag
62 if (irq == -1) { 66 if (irq == -1) {
63 res->irq_resource[i].flags |= IORESOURCE_DISABLED; 67 res->irq_resource[i].flags |= IORESOURCE_DISABLED;
64 return; 68 return;
65 } 69 }
66 res->irq_resource[i].start = 70 res->irq_resource[i].start =
67 res->irq_resource[i].end = (unsigned long) irq; 71 res->irq_resource[i].end = (unsigned long)irq;
68 pcibios_penalize_isa_irq(irq, 1); 72 pcibios_penalize_isa_irq(irq, 1);
69 } 73 }
70} 74}
71 75
72static void 76static void pnpbios_parse_allocated_dmaresource(struct pnp_resource_table *res,
73pnpbios_parse_allocated_dmaresource(struct pnp_resource_table * res, int dma) 77 int dma)
74{ 78{
75 int i = 0; 79 int i = 0;
80
76 while (i < PNP_MAX_DMA && 81 while (i < PNP_MAX_DMA &&
77 !(res->dma_resource[i].flags & IORESOURCE_UNSET)) 82 !(res->dma_resource[i].flags & IORESOURCE_UNSET))
78 i++; 83 i++;
79 if (i < PNP_MAX_DMA) { 84 if (i < PNP_MAX_DMA) {
80 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag 85 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag
81 if (dma == -1) { 86 if (dma == -1) {
82 res->dma_resource[i].flags |= IORESOURCE_DISABLED; 87 res->dma_resource[i].flags |= IORESOURCE_DISABLED;
83 return; 88 return;
84 } 89 }
85 res->dma_resource[i].start = 90 res->dma_resource[i].start =
86 res->dma_resource[i].end = (unsigned long) dma; 91 res->dma_resource[i].end = (unsigned long)dma;
87 } 92 }
88} 93}
89 94
90static void 95static void pnpbios_parse_allocated_ioresource(struct pnp_resource_table *res,
91pnpbios_parse_allocated_ioresource(struct pnp_resource_table * res, int io, int len) 96 int io, int len)
92{ 97{
93 int i = 0; 98 int i = 0;
94 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_PORT) i++; 99
100 while (!(res->port_resource[i].flags & IORESOURCE_UNSET)
101 && i < PNP_MAX_PORT)
102 i++;
95 if (i < PNP_MAX_PORT) { 103 if (i < PNP_MAX_PORT) {
96 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag 104 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag
97 if (len <= 0 || (io + len -1) >= 0x10003) { 105 if (len <= 0 || (io + len - 1) >= 0x10003) {
98 res->port_resource[i].flags |= IORESOURCE_DISABLED; 106 res->port_resource[i].flags |= IORESOURCE_DISABLED;
99 return; 107 return;
100 } 108 }
101 res->port_resource[i].start = (unsigned long) io; 109 res->port_resource[i].start = (unsigned long)io;
102 res->port_resource[i].end = (unsigned long)(io + len - 1); 110 res->port_resource[i].end = (unsigned long)(io + len - 1);
103 } 111 }
104} 112}
105 113
106static void 114static void pnpbios_parse_allocated_memresource(struct pnp_resource_table *res,
107pnpbios_parse_allocated_memresource(struct pnp_resource_table * res, int mem, int len) 115 int mem, int len)
108{ 116{
109 int i = 0; 117 int i = 0;
110 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_MEM) i++; 118
119 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET)
120 && i < PNP_MAX_MEM)
121 i++;
111 if (i < PNP_MAX_MEM) { 122 if (i < PNP_MAX_MEM) {
112 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag 123 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag
113 if (len <= 0) { 124 if (len <= 0) {
114 res->mem_resource[i].flags |= IORESOURCE_DISABLED; 125 res->mem_resource[i].flags |= IORESOURCE_DISABLED;
115 return; 126 return;
116 } 127 }
117 res->mem_resource[i].start = (unsigned long) mem; 128 res->mem_resource[i].start = (unsigned long)mem;
118 res->mem_resource[i].end = (unsigned long)(mem + len - 1); 129 res->mem_resource[i].end = (unsigned long)(mem + len - 1);
119 } 130 }
120} 131}
121 132
122static unsigned char * 133static unsigned char *pnpbios_parse_allocated_resource_data(unsigned char *p,
123pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, struct pnp_resource_table * res) 134 unsigned char *end,
135 struct
136 pnp_resource_table
137 *res)
124{ 138{
125 unsigned int len, tag; 139 unsigned int len, tag;
126 int io, size, mask, i; 140 int io, size, mask, i;
@@ -134,12 +148,12 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
134 while ((char *)p < (char *)end) { 148 while ((char *)p < (char *)end) {
135 149
136 /* determine the type of tag */ 150 /* determine the type of tag */
137 if (p[0] & LARGE_TAG) { /* large tag */ 151 if (p[0] & LARGE_TAG) { /* large tag */
138 len = (p[2] << 8) | p[1]; 152 len = (p[2] << 8) | p[1];
139 tag = p[0]; 153 tag = p[0];
140 } else { /* small tag */ 154 } else { /* small tag */
141 len = p[0] & 0x07; 155 len = p[0] & 0x07;
142 tag = ((p[0]>>3) & 0x0f); 156 tag = ((p[0] >> 3) & 0x0f);
143 } 157 }
144 158
145 switch (tag) { 159 switch (tag) {
@@ -147,8 +161,8 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
147 case LARGE_TAG_MEM: 161 case LARGE_TAG_MEM:
148 if (len != 9) 162 if (len != 9)
149 goto len_err; 163 goto len_err;
150 io = *(short *) &p[4]; 164 io = *(short *)&p[4];
151 size = *(short *) &p[10]; 165 size = *(short *)&p[10];
152 pnpbios_parse_allocated_memresource(res, io, size); 166 pnpbios_parse_allocated_memresource(res, io, size);
153 break; 167 break;
154 168
@@ -163,16 +177,16 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
163 case LARGE_TAG_MEM32: 177 case LARGE_TAG_MEM32:
164 if (len != 17) 178 if (len != 17)
165 goto len_err; 179 goto len_err;
166 io = *(int *) &p[4]; 180 io = *(int *)&p[4];
167 size = *(int *) &p[16]; 181 size = *(int *)&p[16];
168 pnpbios_parse_allocated_memresource(res, io, size); 182 pnpbios_parse_allocated_memresource(res, io, size);
169 break; 183 break;
170 184
171 case LARGE_TAG_FIXEDMEM32: 185 case LARGE_TAG_FIXEDMEM32:
172 if (len != 9) 186 if (len != 9)
173 goto len_err; 187 goto len_err;
174 io = *(int *) &p[4]; 188 io = *(int *)&p[4];
175 size = *(int *) &p[8]; 189 size = *(int *)&p[8];
176 pnpbios_parse_allocated_memresource(res, io, size); 190 pnpbios_parse_allocated_memresource(res, io, size);
177 break; 191 break;
178 192
@@ -180,9 +194,10 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
180 if (len < 2 || len > 3) 194 if (len < 2 || len > 3)
181 goto len_err; 195 goto len_err;
182 io = -1; 196 io = -1;
183 mask= p[1] + p[2]*256; 197 mask = p[1] + p[2] * 256;
184 for (i=0;i<16;i++, mask=mask>>1) 198 for (i = 0; i < 16; i++, mask = mask >> 1)
185 if(mask & 0x01) io=i; 199 if (mask & 0x01)
200 io = i;
186 pnpbios_parse_allocated_irqresource(res, io); 201 pnpbios_parse_allocated_irqresource(res, io);
187 break; 202 break;
188 203
@@ -191,15 +206,16 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
191 goto len_err; 206 goto len_err;
192 io = -1; 207 io = -1;
193 mask = p[1]; 208 mask = p[1];
194 for (i=0;i<8;i++, mask = mask>>1) 209 for (i = 0; i < 8; i++, mask = mask >> 1)
195 if(mask & 0x01) io=i; 210 if (mask & 0x01)
211 io = i;
196 pnpbios_parse_allocated_dmaresource(res, io); 212 pnpbios_parse_allocated_dmaresource(res, io);
197 break; 213 break;
198 214
199 case SMALL_TAG_PORT: 215 case SMALL_TAG_PORT:
200 if (len != 7) 216 if (len != 7)
201 goto len_err; 217 goto len_err;
202 io = p[2] + p[3] *256; 218 io = p[2] + p[3] * 256;
203 size = p[7]; 219 size = p[7];
204 pnpbios_parse_allocated_ioresource(res, io, size); 220 pnpbios_parse_allocated_ioresource(res, io, size);
205 break; 221 break;
@@ -218,12 +234,14 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
218 234
219 case SMALL_TAG_END: 235 case SMALL_TAG_END:
220 p = p + 2; 236 p = p + 2;
221 return (unsigned char *)p; 237 return (unsigned char *)p;
222 break; 238 break;
223 239
224 default: /* an unkown tag */ 240 default: /* an unkown tag */
225 len_err: 241 len_err:
226 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 242 printk(KERN_ERR
243 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
244 tag, len);
227 break; 245 break;
228 } 246 }
229 247
@@ -234,20 +252,21 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
234 p += len + 1; 252 p += len + 1;
235 } 253 }
236 254
237 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 255 printk(KERN_ERR
256 "PnPBIOS: Resource structure does not contain an end tag.\n");
238 257
239 return NULL; 258 return NULL;
240} 259}
241 260
242
243/* 261/*
244 * Resource Configuration Options 262 * Resource Configuration Options
245 */ 263 */
246 264
247static void 265static void pnpbios_parse_mem_option(unsigned char *p, int size,
248pnpbios_parse_mem_option(unsigned char *p, int size, struct pnp_option *option) 266 struct pnp_option *option)
249{ 267{
250 struct pnp_mem * mem; 268 struct pnp_mem *mem;
269
251 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 270 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
252 if (!mem) 271 if (!mem)
253 return; 272 return;
@@ -256,14 +275,14 @@ pnpbios_parse_mem_option(unsigned char *p, int size, struct pnp_option *option)
256 mem->align = (p[9] << 8) | p[8]; 275 mem->align = (p[9] << 8) | p[8];
257 mem->size = ((p[11] << 8) | p[10]) << 8; 276 mem->size = ((p[11] << 8) | p[10]) << 8;
258 mem->flags = p[3]; 277 mem->flags = p[3];
259 pnp_register_mem_resource(option,mem); 278 pnp_register_mem_resource(option, mem);
260 return;
261} 279}
262 280
263static void 281static void pnpbios_parse_mem32_option(unsigned char *p, int size,
264pnpbios_parse_mem32_option(unsigned char *p, int size, struct pnp_option *option) 282 struct pnp_option *option)
265{ 283{
266 struct pnp_mem * mem; 284 struct pnp_mem *mem;
285
267 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 286 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
268 if (!mem) 287 if (!mem)
269 return; 288 return;
@@ -272,14 +291,13 @@ pnpbios_parse_mem32_option(unsigned char *p, int size, struct pnp_option *option
272 mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; 291 mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12];
273 mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; 292 mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16];
274 mem->flags = p[3]; 293 mem->flags = p[3];
275 pnp_register_mem_resource(option,mem); 294 pnp_register_mem_resource(option, mem);
276 return;
277} 295}
278 296
279static void 297static void pnpbios_parse_fixed_mem32_option(unsigned char *p, int size,
280pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, struct pnp_option *option) 298 struct pnp_option *option)
281{ 299{
282 struct pnp_mem * mem; 300 struct pnp_mem *mem;
283 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 301 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
284 if (!mem) 302 if (!mem)
285 return; 303 return;
@@ -287,14 +305,13 @@ pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, struct pnp_option *
287 mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; 305 mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
288 mem->align = 0; 306 mem->align = 0;
289 mem->flags = p[3]; 307 mem->flags = p[3];
290 pnp_register_mem_resource(option,mem); 308 pnp_register_mem_resource(option, mem);
291 return;
292} 309}
293 310
294static void 311static void pnpbios_parse_irq_option(unsigned char *p, int size,
295pnpbios_parse_irq_option(unsigned char *p, int size, struct pnp_option *option) 312 struct pnp_option *option)
296{ 313{
297 struct pnp_irq * irq; 314 struct pnp_irq *irq;
298 unsigned long bits; 315 unsigned long bits;
299 316
300 irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); 317 irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
@@ -306,27 +323,27 @@ pnpbios_parse_irq_option(unsigned char *p, int size, struct pnp_option *option)
306 irq->flags = p[3]; 323 irq->flags = p[3];
307 else 324 else
308 irq->flags = IORESOURCE_IRQ_HIGHEDGE; 325 irq->flags = IORESOURCE_IRQ_HIGHEDGE;
309 pnp_register_irq_resource(option,irq); 326 pnp_register_irq_resource(option, irq);
310 return;
311} 327}
312 328
313static void 329static void pnpbios_parse_dma_option(unsigned char *p, int size,
314pnpbios_parse_dma_option(unsigned char *p, int size, struct pnp_option *option) 330 struct pnp_option *option)
315{ 331{
316 struct pnp_dma * dma; 332 struct pnp_dma *dma;
333
317 dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); 334 dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
318 if (!dma) 335 if (!dma)
319 return; 336 return;
320 dma->map = p[1]; 337 dma->map = p[1];
321 dma->flags = p[2]; 338 dma->flags = p[2];
322 pnp_register_dma_resource(option,dma); 339 pnp_register_dma_resource(option, dma);
323 return;
324} 340}
325 341
326static void 342static void pnpbios_parse_port_option(unsigned char *p, int size,
327pnpbios_parse_port_option(unsigned char *p, int size, struct pnp_option *option) 343 struct pnp_option *option)
328{ 344{
329 struct pnp_port * port; 345 struct pnp_port *port;
346
330 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 347 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
331 if (!port) 348 if (!port)
332 return; 349 return;
@@ -335,14 +352,14 @@ pnpbios_parse_port_option(unsigned char *p, int size, struct pnp_option *option)
335 port->align = p[6]; 352 port->align = p[6];
336 port->size = p[7]; 353 port->size = p[7];
337 port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0; 354 port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0;
338 pnp_register_port_resource(option,port); 355 pnp_register_port_resource(option, port);
339 return;
340} 356}
341 357
342static void 358static void pnpbios_parse_fixed_port_option(unsigned char *p, int size,
343pnpbios_parse_fixed_port_option(unsigned char *p, int size, struct pnp_option *option) 359 struct pnp_option *option)
344{ 360{
345 struct pnp_port * port; 361 struct pnp_port *port;
362
346 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 363 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
347 if (!port) 364 if (!port)
348 return; 365 return;
@@ -350,12 +367,12 @@ pnpbios_parse_fixed_port_option(unsigned char *p, int size, struct pnp_option *o
350 port->size = p[3]; 367 port->size = p[3];
351 port->align = 0; 368 port->align = 0;
352 port->flags = PNP_PORT_FLAG_FIXED; 369 port->flags = PNP_PORT_FLAG_FIXED;
353 pnp_register_port_resource(option,port); 370 pnp_register_port_resource(option, port);
354 return;
355} 371}
356 372
357static unsigned char * 373static unsigned char *pnpbios_parse_resource_option_data(unsigned char *p,
358pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struct pnp_dev *dev) 374 unsigned char *end,
375 struct pnp_dev *dev)
359{ 376{
360 unsigned int len, tag; 377 unsigned int len, tag;
361 int priority = 0; 378 int priority = 0;
@@ -371,12 +388,12 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
371 while ((char *)p < (char *)end) { 388 while ((char *)p < (char *)end) {
372 389
373 /* determine the type of tag */ 390 /* determine the type of tag */
374 if (p[0] & LARGE_TAG) { /* large tag */ 391 if (p[0] & LARGE_TAG) { /* large tag */
375 len = (p[2] << 8) | p[1]; 392 len = (p[2] << 8) | p[1];
376 tag = p[0]; 393 tag = p[0];
377 } else { /* small tag */ 394 } else { /* small tag */
378 len = p[0] & 0x07; 395 len = p[0] & 0x07;
379 tag = ((p[0]>>3) & 0x0f); 396 tag = ((p[0] >> 3) & 0x0f);
380 } 397 }
381 398
382 switch (tag) { 399 switch (tag) {
@@ -442,16 +459,19 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
442 if (len != 0) 459 if (len != 0)
443 goto len_err; 460 goto len_err;
444 if (option_independent == option) 461 if (option_independent == option)
445 printk(KERN_WARNING "PnPBIOS: Missing SMALL_TAG_STARTDEP tag\n"); 462 printk(KERN_WARNING
463 "PnPBIOS: Missing SMALL_TAG_STARTDEP tag\n");
446 option = option_independent; 464 option = option_independent;
447 break; 465 break;
448 466
449 case SMALL_TAG_END: 467 case SMALL_TAG_END:
450 return p + 2; 468 return p + 2;
451 469
452 default: /* an unkown tag */ 470 default: /* an unkown tag */
453 len_err: 471 len_err:
454 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 472 printk(KERN_ERR
473 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
474 tag, len);
455 break; 475 break;
456 } 476 }
457 477
@@ -462,19 +482,18 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
462 p += len + 1; 482 p += len + 1;
463 } 483 }
464 484
465 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 485 printk(KERN_ERR
486 "PnPBIOS: Resource structure does not contain an end tag.\n");
466 487
467 return NULL; 488 return NULL;
468} 489}
469 490
470
471/* 491/*
472 * Compatible Device IDs 492 * Compatible Device IDs
473 */ 493 */
474 494
475#define HEX(id,a) hex[((id)>>a) & 15] 495#define HEX(id,a) hex[((id)>>a) & 15]
476#define CHAR(id,a) (0x40 + (((id)>>a) & 31)) 496#define CHAR(id,a) (0x40 + (((id)>>a) & 31))
477//
478 497
479void pnpid32_to_pnpid(u32 id, char *str) 498void pnpid32_to_pnpid(u32 id, char *str)
480{ 499{
@@ -483,21 +502,20 @@ void pnpid32_to_pnpid(u32 id, char *str)
483 id = be32_to_cpu(id); 502 id = be32_to_cpu(id);
484 str[0] = CHAR(id, 26); 503 str[0] = CHAR(id, 26);
485 str[1] = CHAR(id, 21); 504 str[1] = CHAR(id, 21);
486 str[2] = CHAR(id,16); 505 str[2] = CHAR(id, 16);
487 str[3] = HEX(id, 12); 506 str[3] = HEX(id, 12);
488 str[4] = HEX(id, 8); 507 str[4] = HEX(id, 8);
489 str[5] = HEX(id, 4); 508 str[5] = HEX(id, 4);
490 str[6] = HEX(id, 0); 509 str[6] = HEX(id, 0);
491 str[7] = '\0'; 510 str[7] = '\0';
492
493 return;
494} 511}
495// 512
496#undef CHAR 513#undef CHAR
497#undef HEX 514#undef HEX
498 515
499static unsigned char * 516static unsigned char *pnpbios_parse_compatible_ids(unsigned char *p,
500pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_dev *dev) 517 unsigned char *end,
518 struct pnp_dev *dev)
501{ 519{
502 int len, tag; 520 int len, tag;
503 char id[8]; 521 char id[8];
@@ -509,40 +527,45 @@ pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_de
509 while ((char *)p < (char *)end) { 527 while ((char *)p < (char *)end) {
510 528
511 /* determine the type of tag */ 529 /* determine the type of tag */
512 if (p[0] & LARGE_TAG) { /* large tag */ 530 if (p[0] & LARGE_TAG) { /* large tag */
513 len = (p[2] << 8) | p[1]; 531 len = (p[2] << 8) | p[1];
514 tag = p[0]; 532 tag = p[0];
515 } else { /* small tag */ 533 } else { /* small tag */
516 len = p[0] & 0x07; 534 len = p[0] & 0x07;
517 tag = ((p[0]>>3) & 0x0f); 535 tag = ((p[0] >> 3) & 0x0f);
518 } 536 }
519 537
520 switch (tag) { 538 switch (tag) {
521 539
522 case LARGE_TAG_ANSISTR: 540 case LARGE_TAG_ANSISTR:
523 strncpy(dev->name, p + 3, len >= PNP_NAME_LEN ? PNP_NAME_LEN - 2 : len); 541 strncpy(dev->name, p + 3,
524 dev->name[len >= PNP_NAME_LEN ? PNP_NAME_LEN - 1 : len] = '\0'; 542 len >= PNP_NAME_LEN ? PNP_NAME_LEN - 2 : len);
543 dev->name[len >=
544 PNP_NAME_LEN ? PNP_NAME_LEN - 1 : len] = '\0';
525 break; 545 break;
526 546
527 case SMALL_TAG_COMPATDEVID: /* compatible ID */ 547 case SMALL_TAG_COMPATDEVID: /* compatible ID */
528 if (len != 4) 548 if (len != 4)
529 goto len_err; 549 goto len_err;
530 dev_id = kzalloc(sizeof (struct pnp_id), GFP_KERNEL); 550 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
531 if (!dev_id) 551 if (!dev_id)
532 return NULL; 552 return NULL;
533 pnpid32_to_pnpid(p[1] | p[2] << 8 | p[3] << 16 | p[4] << 24,id); 553 pnpid32_to_pnpid(p[1] | p[2] << 8 | p[3] << 16 | p[4] <<
554 24, id);
534 memcpy(&dev_id->id, id, 7); 555 memcpy(&dev_id->id, id, 7);
535 pnp_add_id(dev_id, dev); 556 pnp_add_id(dev_id, dev);
536 break; 557 break;
537 558
538 case SMALL_TAG_END: 559 case SMALL_TAG_END:
539 p = p + 2; 560 p = p + 2;
540 return (unsigned char *)p; 561 return (unsigned char *)p;
541 break; 562 break;
542 563
543 default: /* an unkown tag */ 564 default: /* an unkown tag */
544 len_err: 565 len_err:
545 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 566 printk(KERN_ERR
567 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
568 tag, len);
546 break; 569 break;
547 } 570 }
548 571
@@ -553,33 +576,34 @@ pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_de
553 p += len + 1; 576 p += len + 1;
554 } 577 }
555 578
556 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 579 printk(KERN_ERR
580 "PnPBIOS: Resource structure does not contain an end tag.\n");
557 581
558 return NULL; 582 return NULL;
559} 583}
560 584
561
562/* 585/*
563 * Allocated Resource Encoding 586 * Allocated Resource Encoding
564 */ 587 */
565 588
566static void pnpbios_encode_mem(unsigned char *p, struct resource * res) 589static void pnpbios_encode_mem(unsigned char *p, struct resource *res)
567{ 590{
568 unsigned long base = res->start; 591 unsigned long base = res->start;
569 unsigned long len = res->end - res->start + 1; 592 unsigned long len = res->end - res->start + 1;
593
570 p[4] = (base >> 8) & 0xff; 594 p[4] = (base >> 8) & 0xff;
571 p[5] = ((base >> 8) >> 8) & 0xff; 595 p[5] = ((base >> 8) >> 8) & 0xff;
572 p[6] = (base >> 8) & 0xff; 596 p[6] = (base >> 8) & 0xff;
573 p[7] = ((base >> 8) >> 8) & 0xff; 597 p[7] = ((base >> 8) >> 8) & 0xff;
574 p[10] = (len >> 8) & 0xff; 598 p[10] = (len >> 8) & 0xff;
575 p[11] = ((len >> 8) >> 8) & 0xff; 599 p[11] = ((len >> 8) >> 8) & 0xff;
576 return;
577} 600}
578 601
579static void pnpbios_encode_mem32(unsigned char *p, struct resource * res) 602static void pnpbios_encode_mem32(unsigned char *p, struct resource *res)
580{ 603{
581 unsigned long base = res->start; 604 unsigned long base = res->start;
582 unsigned long len = res->end - res->start + 1; 605 unsigned long len = res->end - res->start + 1;
606
583 p[4] = base & 0xff; 607 p[4] = base & 0xff;
584 p[5] = (base >> 8) & 0xff; 608 p[5] = (base >> 8) & 0xff;
585 p[6] = (base >> 16) & 0xff; 609 p[6] = (base >> 16) & 0xff;
@@ -592,12 +616,13 @@ static void pnpbios_encode_mem32(unsigned char *p, struct resource * res)
592 p[17] = (len >> 8) & 0xff; 616 p[17] = (len >> 8) & 0xff;
593 p[18] = (len >> 16) & 0xff; 617 p[18] = (len >> 16) & 0xff;
594 p[19] = (len >> 24) & 0xff; 618 p[19] = (len >> 24) & 0xff;
595 return;
596} 619}
597 620
598static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource * res) 621static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource *res)
599{ unsigned long base = res->start; 622{
623 unsigned long base = res->start;
600 unsigned long len = res->end - res->start + 1; 624 unsigned long len = res->end - res->start + 1;
625
601 p[4] = base & 0xff; 626 p[4] = base & 0xff;
602 p[5] = (base >> 8) & 0xff; 627 p[5] = (base >> 8) & 0xff;
603 p[6] = (base >> 16) & 0xff; 628 p[6] = (base >> 16) & 0xff;
@@ -606,50 +631,52 @@ static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource * res)
606 p[9] = (len >> 8) & 0xff; 631 p[9] = (len >> 8) & 0xff;
607 p[10] = (len >> 16) & 0xff; 632 p[10] = (len >> 16) & 0xff;
608 p[11] = (len >> 24) & 0xff; 633 p[11] = (len >> 24) & 0xff;
609 return;
610} 634}
611 635
612static void pnpbios_encode_irq(unsigned char *p, struct resource * res) 636static void pnpbios_encode_irq(unsigned char *p, struct resource *res)
613{ 637{
614 unsigned long map = 0; 638 unsigned long map = 0;
639
615 map = 1 << res->start; 640 map = 1 << res->start;
616 p[1] = map & 0xff; 641 p[1] = map & 0xff;
617 p[2] = (map >> 8) & 0xff; 642 p[2] = (map >> 8) & 0xff;
618 return;
619} 643}
620 644
621static void pnpbios_encode_dma(unsigned char *p, struct resource * res) 645static void pnpbios_encode_dma(unsigned char *p, struct resource *res)
622{ 646{
623 unsigned long map = 0; 647 unsigned long map = 0;
648
624 map = 1 << res->start; 649 map = 1 << res->start;
625 p[1] = map & 0xff; 650 p[1] = map & 0xff;
626 return;
627} 651}
628 652
629static void pnpbios_encode_port(unsigned char *p, struct resource * res) 653static void pnpbios_encode_port(unsigned char *p, struct resource *res)
630{ 654{
631 unsigned long base = res->start; 655 unsigned long base = res->start;
632 unsigned long len = res->end - res->start + 1; 656 unsigned long len = res->end - res->start + 1;
657
633 p[2] = base & 0xff; 658 p[2] = base & 0xff;
634 p[3] = (base >> 8) & 0xff; 659 p[3] = (base >> 8) & 0xff;
635 p[4] = base & 0xff; 660 p[4] = base & 0xff;
636 p[5] = (base >> 8) & 0xff; 661 p[5] = (base >> 8) & 0xff;
637 p[7] = len & 0xff; 662 p[7] = len & 0xff;
638 return;
639} 663}
640 664
641static void pnpbios_encode_fixed_port(unsigned char *p, struct resource * res) 665static void pnpbios_encode_fixed_port(unsigned char *p, struct resource *res)
642{ 666{
643 unsigned long base = res->start; 667 unsigned long base = res->start;
644 unsigned long len = res->end - res->start + 1; 668 unsigned long len = res->end - res->start + 1;
669
645 p[1] = base & 0xff; 670 p[1] = base & 0xff;
646 p[2] = (base >> 8) & 0xff; 671 p[2] = (base >> 8) & 0xff;
647 p[3] = len & 0xff; 672 p[3] = len & 0xff;
648 return;
649} 673}
650 674
651static unsigned char * 675static unsigned char *pnpbios_encode_allocated_resource_data(unsigned char *p,
652pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, struct pnp_resource_table * res) 676 unsigned char *end,
677 struct
678 pnp_resource_table
679 *res)
653{ 680{
654 unsigned int len, tag; 681 unsigned int len, tag;
655 int port = 0, irq = 0, dma = 0, mem = 0; 682 int port = 0, irq = 0, dma = 0, mem = 0;
@@ -660,12 +687,12 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
660 while ((char *)p < (char *)end) { 687 while ((char *)p < (char *)end) {
661 688
662 /* determine the type of tag */ 689 /* determine the type of tag */
663 if (p[0] & LARGE_TAG) { /* large tag */ 690 if (p[0] & LARGE_TAG) { /* large tag */
664 len = (p[2] << 8) | p[1]; 691 len = (p[2] << 8) | p[1];
665 tag = p[0]; 692 tag = p[0];
666 } else { /* small tag */ 693 } else { /* small tag */
667 len = p[0] & 0x07; 694 len = p[0] & 0x07;
668 tag = ((p[0]>>3) & 0x0f); 695 tag = ((p[0] >> 3) & 0x0f);
669 } 696 }
670 697
671 switch (tag) { 698 switch (tag) {
@@ -725,12 +752,14 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
725 752
726 case SMALL_TAG_END: 753 case SMALL_TAG_END:
727 p = p + 2; 754 p = p + 2;
728 return (unsigned char *)p; 755 return (unsigned char *)p;
729 break; 756 break;
730 757
731 default: /* an unkown tag */ 758 default: /* an unkown tag */
732 len_err: 759 len_err:
733 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 760 printk(KERN_ERR
761 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
762 tag, len);
734 break; 763 break;
735 } 764 }
736 765
@@ -741,52 +770,52 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
741 p += len + 1; 770 p += len + 1;
742 } 771 }
743 772
744 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 773 printk(KERN_ERR
774 "PnPBIOS: Resource structure does not contain an end tag.\n");
745 775
746 return NULL; 776 return NULL;
747} 777}
748 778
749
750/* 779/*
751 * Core Parsing Functions 780 * Core Parsing Functions
752 */ 781 */
753 782
754int 783int pnpbios_parse_data_stream(struct pnp_dev *dev, struct pnp_bios_node *node)
755pnpbios_parse_data_stream(struct pnp_dev *dev, struct pnp_bios_node * node)
756{ 784{
757 unsigned char * p = (char *)node->data; 785 unsigned char *p = (char *)node->data;
758 unsigned char * end = (char *)(node->data + node->size); 786 unsigned char *end = (char *)(node->data + node->size);
759 p = pnpbios_parse_allocated_resource_data(p,end,&dev->res); 787
788 p = pnpbios_parse_allocated_resource_data(p, end, &dev->res);
760 if (!p) 789 if (!p)
761 return -EIO; 790 return -EIO;
762 p = pnpbios_parse_resource_option_data(p,end,dev); 791 p = pnpbios_parse_resource_option_data(p, end, dev);
763 if (!p) 792 if (!p)
764 return -EIO; 793 return -EIO;
765 p = pnpbios_parse_compatible_ids(p,end,dev); 794 p = pnpbios_parse_compatible_ids(p, end, dev);
766 if (!p) 795 if (!p)
767 return -EIO; 796 return -EIO;
768 return 0; 797 return 0;
769} 798}
770 799
771int 800int pnpbios_read_resources_from_node(struct pnp_resource_table *res,
772pnpbios_read_resources_from_node(struct pnp_resource_table *res, 801 struct pnp_bios_node *node)
773 struct pnp_bios_node * node)
774{ 802{
775 unsigned char * p = (char *)node->data; 803 unsigned char *p = (char *)node->data;
776 unsigned char * end = (char *)(node->data + node->size); 804 unsigned char *end = (char *)(node->data + node->size);
777 p = pnpbios_parse_allocated_resource_data(p,end,res); 805
806 p = pnpbios_parse_allocated_resource_data(p, end, res);
778 if (!p) 807 if (!p)
779 return -EIO; 808 return -EIO;
780 return 0; 809 return 0;
781} 810}
782 811
783int 812int pnpbios_write_resources_to_node(struct pnp_resource_table *res,
784pnpbios_write_resources_to_node(struct pnp_resource_table *res, 813 struct pnp_bios_node *node)
785 struct pnp_bios_node * node)
786{ 814{
787 unsigned char * p = (char *)node->data; 815 unsigned char *p = (char *)node->data;
788 unsigned char * end = (char *)(node->data + node->size); 816 unsigned char *end = (char *)(node->data + node->size);
789 p = pnpbios_encode_allocated_resource_data(p,end,res); 817
818 p = pnpbios_encode_allocated_resource_data(p, end, res);
790 if (!p) 819 if (!p)
791 return -EIO; 820 return -EIO;
792 return 0; 821 return 0;
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index 7c3236690cc3..90755d4cdb9f 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -19,7 +19,6 @@
19#include <linux/io.h> 19#include <linux/io.h>
20#include "base.h" 20#include "base.h"
21 21
22
23static void quirk_awe32_resources(struct pnp_dev *dev) 22static void quirk_awe32_resources(struct pnp_dev *dev)
24{ 23{
25 struct pnp_port *port, *port2, *port3; 24 struct pnp_port *port, *port2, *port3;
@@ -31,7 +30,7 @@ static void quirk_awe32_resources(struct pnp_dev *dev)
31 * two extra ports (at offset 0x400 and 0x800 from the one given) by 30 * two extra ports (at offset 0x400 and 0x800 from the one given) by
32 * hand. 31 * hand.
33 */ 32 */
34 for ( ; res ; res = res->next ) { 33 for (; res; res = res->next) {
35 port2 = pnp_alloc(sizeof(struct pnp_port)); 34 port2 = pnp_alloc(sizeof(struct pnp_port));
36 if (!port2) 35 if (!port2)
37 return; 36 return;
@@ -58,18 +57,19 @@ static void quirk_cmi8330_resources(struct pnp_dev *dev)
58 struct pnp_option *res = dev->dependent; 57 struct pnp_option *res = dev->dependent;
59 unsigned long tmp; 58 unsigned long tmp;
60 59
61 for ( ; res ; res = res->next ) { 60 for (; res; res = res->next) {
62 61
63 struct pnp_irq *irq; 62 struct pnp_irq *irq;
64 struct pnp_dma *dma; 63 struct pnp_dma *dma;
65 64
66 for( irq = res->irq; irq; irq = irq->next ) { // Valid irqs are 5, 7, 10 65 for (irq = res->irq; irq; irq = irq->next) { // Valid irqs are 5, 7, 10
67 tmp = 0x04A0; 66 tmp = 0x04A0;
68 bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000 67 bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000
69 } 68 }
70 69
71 for( dma = res->dma; dma; dma = dma->next ) // Valid 8bit dma channels are 1,3 70 for (dma = res->dma; dma; dma = dma->next) // Valid 8bit dma channels are 1,3
72 if( ( dma->flags & IORESOURCE_DMA_TYPE_MASK ) == IORESOURCE_DMA_8BIT ) 71 if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) ==
72 IORESOURCE_DMA_8BIT)
73 dma->map = 0x000A; 73 dma->map = 0x000A;
74 } 74 }
75 printk(KERN_INFO "pnp: CMI8330 quirk - fixing interrupts and dma\n"); 75 printk(KERN_INFO "pnp: CMI8330 quirk - fixing interrupts and dma\n");
@@ -79,7 +79,7 @@ static void quirk_sb16audio_resources(struct pnp_dev *dev)
79{ 79{
80 struct pnp_port *port; 80 struct pnp_port *port;
81 struct pnp_option *res = dev->dependent; 81 struct pnp_option *res = dev->dependent;
82 int changed = 0; 82 int changed = 0;
83 83
84 /* 84 /*
85 * The default range on the mpu port for these devices is 0x388-0x388. 85 * The default range on the mpu port for these devices is 0x388-0x388.
@@ -87,24 +87,24 @@ static void quirk_sb16audio_resources(struct pnp_dev *dev)
87 * auto-configured. 87 * auto-configured.
88 */ 88 */
89 89
90 for( ; res ; res = res->next ) { 90 for (; res; res = res->next) {
91 port = res->port; 91 port = res->port;
92 if(!port) 92 if (!port)
93 continue; 93 continue;
94 port = port->next; 94 port = port->next;
95 if(!port) 95 if (!port)
96 continue; 96 continue;
97 port = port->next; 97 port = port->next;
98 if(!port) 98 if (!port)
99 continue; 99 continue;
100 if(port->min != port->max) 100 if (port->min != port->max)
101 continue; 101 continue;
102 port->max += 0x70; 102 port->max += 0x70;
103 changed = 1; 103 changed = 1;
104 } 104 }
105 if(changed) 105 if (changed)
106 printk(KERN_INFO "pnp: SB audio device quirk - increasing port range\n"); 106 printk(KERN_INFO
107 return; 107 "pnp: SB audio device quirk - increasing port range\n");
108} 108}
109 109
110static int quirk_smc_fir_enabled(struct pnp_dev *dev) 110static int quirk_smc_fir_enabled(struct pnp_dev *dev)
@@ -124,7 +124,7 @@ static int quirk_smc_fir_enabled(struct pnp_dev *dev)
124 outb(bank, firbase + 7); 124 outb(bank, firbase + 7);
125 125
126 high = inb(firbase + 0); 126 high = inb(firbase + 0);
127 low = inb(firbase + 1); 127 low = inb(firbase + 1);
128 chip = inb(firbase + 2); 128 chip = inb(firbase + 2);
129 129
130 /* This corresponds to the check in smsc_ircc_present() */ 130 /* This corresponds to the check in smsc_ircc_present() */
@@ -153,8 +153,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
153 */ 153 */
154 dev_err(&dev->dev, "%s not responding at SIR 0x%lx, FIR 0x%lx; " 154 dev_err(&dev->dev, "%s not responding at SIR 0x%lx, FIR 0x%lx; "
155 "auto-configuring\n", dev->id->id, 155 "auto-configuring\n", dev->id->id,
156 (unsigned long) pnp_port_start(dev, 0), 156 (unsigned long)pnp_port_start(dev, 0),
157 (unsigned long) pnp_port_start(dev, 1)); 157 (unsigned long)pnp_port_start(dev, 1));
158 158
159 pnp_disable_dev(dev); 159 pnp_disable_dev(dev);
160 pnp_init_resource_table(&dev->res); 160 pnp_init_resource_table(&dev->res);
@@ -162,8 +162,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
162 pnp_activate_dev(dev); 162 pnp_activate_dev(dev);
163 if (quirk_smc_fir_enabled(dev)) { 163 if (quirk_smc_fir_enabled(dev)) {
164 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n", 164 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n",
165 (unsigned long) pnp_port_start(dev, 0), 165 (unsigned long)pnp_port_start(dev, 0),
166 (unsigned long) pnp_port_start(dev, 1)); 166 (unsigned long)pnp_port_start(dev, 1));
167 return; 167 return;
168 } 168 }
169 169
@@ -175,8 +175,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
175 */ 175 */
176 dev_err(&dev->dev, "not responding at SIR 0x%lx, FIR 0x%lx; " 176 dev_err(&dev->dev, "not responding at SIR 0x%lx, FIR 0x%lx; "
177 "swapping SIR/FIR and reconfiguring\n", 177 "swapping SIR/FIR and reconfiguring\n",
178 (unsigned long) pnp_port_start(dev, 0), 178 (unsigned long)pnp_port_start(dev, 0),
179 (unsigned long) pnp_port_start(dev, 1)); 179 (unsigned long)pnp_port_start(dev, 1));
180 180
181 /* 181 /*
182 * Clear IORESOURCE_AUTO so pnp_activate_dev() doesn't reassign 182 * Clear IORESOURCE_AUTO so pnp_activate_dev() doesn't reassign
@@ -200,8 +200,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
200 200
201 if (quirk_smc_fir_enabled(dev)) { 201 if (quirk_smc_fir_enabled(dev)) {
202 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n", 202 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n",
203 (unsigned long) pnp_port_start(dev, 0), 203 (unsigned long)pnp_port_start(dev, 0),
204 (unsigned long) pnp_port_start(dev, 1)); 204 (unsigned long)pnp_port_start(dev, 1));
205 return; 205 return;
206 } 206 }
207 207
@@ -209,7 +209,6 @@ static void quirk_smc_enable(struct pnp_dev *dev)
209 "email bjorn.helgaas@hp.com\n"); 209 "email bjorn.helgaas@hp.com\n");
210} 210}
211 211
212
213/* 212/*
214 * PnP Quirks 213 * PnP Quirks
215 * Cards or devices that need some tweaking due to incomplete resource info 214 * Cards or devices that need some tweaking due to incomplete resource info
@@ -217,21 +216,21 @@ static void quirk_smc_enable(struct pnp_dev *dev)
217 216
218static struct pnp_fixup pnp_fixups[] = { 217static struct pnp_fixup pnp_fixups[] = {
219 /* Soundblaster awe io port quirk */ 218 /* Soundblaster awe io port quirk */
220 { "CTL0021", quirk_awe32_resources }, 219 {"CTL0021", quirk_awe32_resources},
221 { "CTL0022", quirk_awe32_resources }, 220 {"CTL0022", quirk_awe32_resources},
222 { "CTL0023", quirk_awe32_resources }, 221 {"CTL0023", quirk_awe32_resources},
223 /* CMI 8330 interrupt and dma fix */ 222 /* CMI 8330 interrupt and dma fix */
224 { "@X@0001", quirk_cmi8330_resources }, 223 {"@X@0001", quirk_cmi8330_resources},
225 /* Soundblaster audio device io port range quirk */ 224 /* Soundblaster audio device io port range quirk */
226 { "CTL0001", quirk_sb16audio_resources }, 225 {"CTL0001", quirk_sb16audio_resources},
227 { "CTL0031", quirk_sb16audio_resources }, 226 {"CTL0031", quirk_sb16audio_resources},
228 { "CTL0041", quirk_sb16audio_resources }, 227 {"CTL0041", quirk_sb16audio_resources},
229 { "CTL0042", quirk_sb16audio_resources }, 228 {"CTL0042", quirk_sb16audio_resources},
230 { "CTL0043", quirk_sb16audio_resources }, 229 {"CTL0043", quirk_sb16audio_resources},
231 { "CTL0044", quirk_sb16audio_resources }, 230 {"CTL0044", quirk_sb16audio_resources},
232 { "CTL0045", quirk_sb16audio_resources }, 231 {"CTL0045", quirk_sb16audio_resources},
233 { "SMCf010", quirk_smc_enable }, 232 {"SMCf010", quirk_smc_enable},
234 { "" } 233 {""}
235}; 234};
236 235
237void pnp_fixup_device(struct pnp_dev *dev) 236void pnp_fixup_device(struct pnp_dev *dev)
@@ -239,9 +238,8 @@ void pnp_fixup_device(struct pnp_dev *dev)
239 int i = 0; 238 int i = 0;
240 239
241 while (*pnp_fixups[i].id) { 240 while (*pnp_fixups[i].id) {
242 if (compare_pnp_id(dev->id,pnp_fixups[i].id)) { 241 if (compare_pnp_id(dev->id, pnp_fixups[i].id)) {
243 pnp_dbg("Calling quirk for %s", 242 pnp_dbg("Calling quirk for %s", dev->dev.bus_id);
244 dev->dev.bus_id);
245 pnp_fixups[i].quirk_function(dev); 243 pnp_fixups[i].quirk_function(dev);
246 } 244 }
247 i++; 245 i++;
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index a685fbec4604..ea6ec14a0559 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz> 4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/module.h> 8#include <linux/module.h>
@@ -20,21 +19,19 @@
20#include <linux/pnp.h> 19#include <linux/pnp.h>
21#include "base.h" 20#include "base.h"
22 21
23static int pnp_reserve_irq[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some IRQ */ 22static int pnp_reserve_irq[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some IRQ */
24static int pnp_reserve_dma[8] = { [0 ... 7] = -1 }; /* reserve (don't use) some DMA */ 23static int pnp_reserve_dma[8] = {[0 ... 7] = -1 }; /* reserve (don't use) some DMA */
25static int pnp_reserve_io[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some I/O region */ 24static int pnp_reserve_io[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some I/O region */
26static int pnp_reserve_mem[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some memory region */ 25static int pnp_reserve_mem[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some memory region */
27
28 26
29/* 27/*
30 * option registration 28 * option registration
31 */ 29 */
32 30
33static struct pnp_option * pnp_build_option(int priority) 31static struct pnp_option *pnp_build_option(int priority)
34{ 32{
35 struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option)); 33 struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option));
36 34
37 /* check if pnp_alloc ran out of memory */
38 if (!option) 35 if (!option)
39 return NULL; 36 return NULL;
40 37
@@ -46,9 +43,10 @@ static struct pnp_option * pnp_build_option(int priority)
46 return option; 43 return option;
47} 44}
48 45
49struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev) 46struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev)
50{ 47{
51 struct pnp_option *option; 48 struct pnp_option *option;
49
52 if (!dev) 50 if (!dev)
53 return NULL; 51 return NULL;
54 52
@@ -61,9 +59,11 @@ struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev)
61 return option; 59 return option;
62} 60}
63 61
64struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int priority) 62struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev,
63 int priority)
65{ 64{
66 struct pnp_option *option; 65 struct pnp_option *option;
66
67 if (!dev) 67 if (!dev)
68 return NULL; 68 return NULL;
69 69
@@ -82,6 +82,7 @@ struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int prior
82int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) 82int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data)
83{ 83{
84 struct pnp_irq *ptr; 84 struct pnp_irq *ptr;
85
85 if (!option) 86 if (!option)
86 return -EINVAL; 87 return -EINVAL;
87 if (!data) 88 if (!data)
@@ -110,6 +111,7 @@ int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data)
110int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) 111int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data)
111{ 112{
112 struct pnp_dma *ptr; 113 struct pnp_dma *ptr;
114
113 if (!option) 115 if (!option)
114 return -EINVAL; 116 return -EINVAL;
115 if (!data) 117 if (!data)
@@ -129,6 +131,7 @@ int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data)
129int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) 131int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data)
130{ 132{
131 struct pnp_port *ptr; 133 struct pnp_port *ptr;
134
132 if (!option) 135 if (!option)
133 return -EINVAL; 136 return -EINVAL;
134 if (!data) 137 if (!data)
@@ -148,6 +151,7 @@ int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data)
148int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data) 151int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data)
149{ 152{
150 struct pnp_mem *ptr; 153 struct pnp_mem *ptr;
154
151 if (!option) 155 if (!option)
152 return -EINVAL; 156 return -EINVAL;
153 if (!data) 157 if (!data)
@@ -222,7 +226,6 @@ void pnp_free_option(struct pnp_option *option)
222 } 226 }
223} 227}
224 228
225
226/* 229/*
227 * resource validity checking 230 * resource validity checking
228 */ 231 */
@@ -236,11 +239,12 @@ void pnp_free_option(struct pnp_option *option)
236#define cannot_compare(flags) \ 239#define cannot_compare(flags) \
237((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) 240((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
238 241
239int pnp_check_port(struct pnp_dev * dev, int idx) 242int pnp_check_port(struct pnp_dev *dev, int idx)
240{ 243{
241 int tmp; 244 int tmp;
242 struct pnp_dev *tdev; 245 struct pnp_dev *tdev;
243 resource_size_t *port, *end, *tport, *tend; 246 resource_size_t *port, *end, *tport, *tend;
247
244 port = &dev->res.port_resource[idx].start; 248 port = &dev->res.port_resource[idx].start;
245 end = &dev->res.port_resource[idx].end; 249 end = &dev->res.port_resource[idx].end;
246 250
@@ -250,8 +254,8 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
250 254
251 /* check if the resource is already in use, skip if the 255 /* check if the resource is already in use, skip if the
252 * device is active because it itself may be in use */ 256 * device is active because it itself may be in use */
253 if(!dev->active) { 257 if (!dev->active) {
254 if (__check_region(&ioport_resource, *port, length(port,end))) 258 if (__check_region(&ioport_resource, *port, length(port, end)))
255 return 0; 259 return 0;
256 } 260 }
257 261
@@ -259,7 +263,7 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
259 for (tmp = 0; tmp < 8; tmp++) { 263 for (tmp = 0; tmp < 8; tmp++) {
260 int rport = pnp_reserve_io[tmp << 1]; 264 int rport = pnp_reserve_io[tmp << 1];
261 int rend = pnp_reserve_io[(tmp << 1) + 1] + rport - 1; 265 int rend = pnp_reserve_io[(tmp << 1) + 1] + rport - 1;
262 if (ranged_conflict(port,end,&rport,&rend)) 266 if (ranged_conflict(port, end, &rport, &rend))
263 return 0; 267 return 0;
264 } 268 }
265 269
@@ -268,7 +272,7 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
268 if (dev->res.port_resource[tmp].flags & IORESOURCE_IO) { 272 if (dev->res.port_resource[tmp].flags & IORESOURCE_IO) {
269 tport = &dev->res.port_resource[tmp].start; 273 tport = &dev->res.port_resource[tmp].start;
270 tend = &dev->res.port_resource[tmp].end; 274 tend = &dev->res.port_resource[tmp].end;
271 if (ranged_conflict(port,end,tport,tend)) 275 if (ranged_conflict(port, end, tport, tend))
272 return 0; 276 return 0;
273 } 277 }
274 } 278 }
@@ -279,11 +283,12 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
279 continue; 283 continue;
280 for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { 284 for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) {
281 if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { 285 if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) {
282 if (cannot_compare(tdev->res.port_resource[tmp].flags)) 286 if (cannot_compare
287 (tdev->res.port_resource[tmp].flags))
283 continue; 288 continue;
284 tport = &tdev->res.port_resource[tmp].start; 289 tport = &tdev->res.port_resource[tmp].start;
285 tend = &tdev->res.port_resource[tmp].end; 290 tend = &tdev->res.port_resource[tmp].end;
286 if (ranged_conflict(port,end,tport,tend)) 291 if (ranged_conflict(port, end, tport, tend))
287 return 0; 292 return 0;
288 } 293 }
289 } 294 }
@@ -292,11 +297,12 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
292 return 1; 297 return 1;
293} 298}
294 299
295int pnp_check_mem(struct pnp_dev * dev, int idx) 300int pnp_check_mem(struct pnp_dev *dev, int idx)
296{ 301{
297 int tmp; 302 int tmp;
298 struct pnp_dev *tdev; 303 struct pnp_dev *tdev;
299 resource_size_t *addr, *end, *taddr, *tend; 304 resource_size_t *addr, *end, *taddr, *tend;
305
300 addr = &dev->res.mem_resource[idx].start; 306 addr = &dev->res.mem_resource[idx].start;
301 end = &dev->res.mem_resource[idx].end; 307 end = &dev->res.mem_resource[idx].end;
302 308
@@ -306,8 +312,8 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
306 312
307 /* check if the resource is already in use, skip if the 313 /* check if the resource is already in use, skip if the
308 * device is active because it itself may be in use */ 314 * device is active because it itself may be in use */
309 if(!dev->active) { 315 if (!dev->active) {
310 if (check_mem_region(*addr, length(addr,end))) 316 if (check_mem_region(*addr, length(addr, end)))
311 return 0; 317 return 0;
312 } 318 }
313 319
@@ -315,7 +321,7 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
315 for (tmp = 0; tmp < 8; tmp++) { 321 for (tmp = 0; tmp < 8; tmp++) {
316 int raddr = pnp_reserve_mem[tmp << 1]; 322 int raddr = pnp_reserve_mem[tmp << 1];
317 int rend = pnp_reserve_mem[(tmp << 1) + 1] + raddr - 1; 323 int rend = pnp_reserve_mem[(tmp << 1) + 1] + raddr - 1;
318 if (ranged_conflict(addr,end,&raddr,&rend)) 324 if (ranged_conflict(addr, end, &raddr, &rend))
319 return 0; 325 return 0;
320 } 326 }
321 327
@@ -324,7 +330,7 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
324 if (dev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { 330 if (dev->res.mem_resource[tmp].flags & IORESOURCE_MEM) {
325 taddr = &dev->res.mem_resource[tmp].start; 331 taddr = &dev->res.mem_resource[tmp].start;
326 tend = &dev->res.mem_resource[tmp].end; 332 tend = &dev->res.mem_resource[tmp].end;
327 if (ranged_conflict(addr,end,taddr,tend)) 333 if (ranged_conflict(addr, end, taddr, tend))
328 return 0; 334 return 0;
329 } 335 }
330 } 336 }
@@ -335,11 +341,12 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
335 continue; 341 continue;
336 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { 342 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) {
337 if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { 343 if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) {
338 if (cannot_compare(tdev->res.mem_resource[tmp].flags)) 344 if (cannot_compare
345 (tdev->res.mem_resource[tmp].flags))
339 continue; 346 continue;
340 taddr = &tdev->res.mem_resource[tmp].start; 347 taddr = &tdev->res.mem_resource[tmp].start;
341 tend = &tdev->res.mem_resource[tmp].end; 348 tend = &tdev->res.mem_resource[tmp].end;
342 if (ranged_conflict(addr,end,taddr,tend)) 349 if (ranged_conflict(addr, end, taddr, tend))
343 return 0; 350 return 0;
344 } 351 }
345 } 352 }
@@ -353,11 +360,11 @@ static irqreturn_t pnp_test_handler(int irq, void *dev_id)
353 return IRQ_HANDLED; 360 return IRQ_HANDLED;
354} 361}
355 362
356int pnp_check_irq(struct pnp_dev * dev, int idx) 363int pnp_check_irq(struct pnp_dev *dev, int idx)
357{ 364{
358 int tmp; 365 int tmp;
359 struct pnp_dev *tdev; 366 struct pnp_dev *tdev;
360 resource_size_t * irq = &dev->res.irq_resource[idx].start; 367 resource_size_t *irq = &dev->res.irq_resource[idx].start;
361 368
362 /* if the resource doesn't exist, don't complain about it */ 369 /* if the resource doesn't exist, don't complain about it */
363 if (cannot_compare(dev->res.irq_resource[idx].flags)) 370 if (cannot_compare(dev->res.irq_resource[idx].flags))
@@ -394,9 +401,9 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
394 401
395 /* check if the resource is already in use, skip if the 402 /* check if the resource is already in use, skip if the
396 * device is active because it itself may be in use */ 403 * device is active because it itself may be in use */
397 if(!dev->active) { 404 if (!dev->active) {
398 if (request_irq(*irq, pnp_test_handler, 405 if (request_irq(*irq, pnp_test_handler,
399 IRQF_DISABLED|IRQF_PROBE_SHARED, "pnp", NULL)) 406 IRQF_DISABLED | IRQF_PROBE_SHARED, "pnp", NULL))
400 return 0; 407 return 0;
401 free_irq(*irq, NULL); 408 free_irq(*irq, NULL);
402 } 409 }
@@ -407,7 +414,8 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
407 continue; 414 continue;
408 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { 415 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) {
409 if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { 416 if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) {
410 if (cannot_compare(tdev->res.irq_resource[tmp].flags)) 417 if (cannot_compare
418 (tdev->res.irq_resource[tmp].flags))
411 continue; 419 continue;
412 if ((tdev->res.irq_resource[tmp].start == *irq)) 420 if ((tdev->res.irq_resource[tmp].start == *irq))
413 return 0; 421 return 0;
@@ -418,12 +426,12 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
418 return 1; 426 return 1;
419} 427}
420 428
421int pnp_check_dma(struct pnp_dev * dev, int idx) 429int pnp_check_dma(struct pnp_dev *dev, int idx)
422{ 430{
423#ifndef CONFIG_IA64 431#ifndef CONFIG_IA64
424 int tmp; 432 int tmp;
425 struct pnp_dev *tdev; 433 struct pnp_dev *tdev;
426 resource_size_t * dma = &dev->res.dma_resource[idx].start; 434 resource_size_t *dma = &dev->res.dma_resource[idx].start;
427 435
428 /* if the resource doesn't exist, don't complain about it */ 436 /* if the resource doesn't exist, don't complain about it */
429 if (cannot_compare(dev->res.dma_resource[idx].flags)) 437 if (cannot_compare(dev->res.dma_resource[idx].flags))
@@ -449,7 +457,7 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
449 457
450 /* check if the resource is already in use, skip if the 458 /* check if the resource is already in use, skip if the
451 * device is active because it itself may be in use */ 459 * device is active because it itself may be in use */
452 if(!dev->active) { 460 if (!dev->active) {
453 if (request_dma(*dma, "pnp")) 461 if (request_dma(*dma, "pnp"))
454 return 0; 462 return 0;
455 free_dma(*dma); 463 free_dma(*dma);
@@ -461,7 +469,8 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
461 continue; 469 continue;
462 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { 470 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) {
463 if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { 471 if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) {
464 if (cannot_compare(tdev->res.dma_resource[tmp].flags)) 472 if (cannot_compare
473 (tdev->res.dma_resource[tmp].flags))
465 continue; 474 continue;
466 if ((tdev->res.dma_resource[tmp].start == *dma)) 475 if ((tdev->res.dma_resource[tmp].start == *dma))
467 return 0; 476 return 0;
@@ -471,30 +480,18 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
471 480
472 return 1; 481 return 1;
473#else 482#else
474 /* IA64 hasn't legacy DMA */ 483 /* IA64 does not have legacy DMA */
475 return 0; 484 return 0;
476#endif 485#endif
477} 486}
478 487
479
480#if 0
481EXPORT_SYMBOL(pnp_register_dependent_option);
482EXPORT_SYMBOL(pnp_register_independent_option);
483EXPORT_SYMBOL(pnp_register_irq_resource);
484EXPORT_SYMBOL(pnp_register_dma_resource);
485EXPORT_SYMBOL(pnp_register_port_resource);
486EXPORT_SYMBOL(pnp_register_mem_resource);
487#endif /* 0 */
488
489
490/* format is: pnp_reserve_irq=irq1[,irq2] .... */ 488/* format is: pnp_reserve_irq=irq1[,irq2] .... */
491
492static int __init pnp_setup_reserve_irq(char *str) 489static int __init pnp_setup_reserve_irq(char *str)
493{ 490{
494 int i; 491 int i;
495 492
496 for (i = 0; i < 16; i++) 493 for (i = 0; i < 16; i++)
497 if (get_option(&str,&pnp_reserve_irq[i]) != 2) 494 if (get_option(&str, &pnp_reserve_irq[i]) != 2)
498 break; 495 break;
499 return 1; 496 return 1;
500} 497}
@@ -502,13 +499,12 @@ static int __init pnp_setup_reserve_irq(char *str)
502__setup("pnp_reserve_irq=", pnp_setup_reserve_irq); 499__setup("pnp_reserve_irq=", pnp_setup_reserve_irq);
503 500
504/* format is: pnp_reserve_dma=dma1[,dma2] .... */ 501/* format is: pnp_reserve_dma=dma1[,dma2] .... */
505
506static int __init pnp_setup_reserve_dma(char *str) 502static int __init pnp_setup_reserve_dma(char *str)
507{ 503{
508 int i; 504 int i;
509 505
510 for (i = 0; i < 8; i++) 506 for (i = 0; i < 8; i++)
511 if (get_option(&str,&pnp_reserve_dma[i]) != 2) 507 if (get_option(&str, &pnp_reserve_dma[i]) != 2)
512 break; 508 break;
513 return 1; 509 return 1;
514} 510}
@@ -516,13 +512,12 @@ static int __init pnp_setup_reserve_dma(char *str)
516__setup("pnp_reserve_dma=", pnp_setup_reserve_dma); 512__setup("pnp_reserve_dma=", pnp_setup_reserve_dma);
517 513
518/* format is: pnp_reserve_io=io1,size1[,io2,size2] .... */ 514/* format is: pnp_reserve_io=io1,size1[,io2,size2] .... */
519
520static int __init pnp_setup_reserve_io(char *str) 515static int __init pnp_setup_reserve_io(char *str)
521{ 516{
522 int i; 517 int i;
523 518
524 for (i = 0; i < 16; i++) 519 for (i = 0; i < 16; i++)
525 if (get_option(&str,&pnp_reserve_io[i]) != 2) 520 if (get_option(&str, &pnp_reserve_io[i]) != 2)
526 break; 521 break;
527 return 1; 522 return 1;
528} 523}
@@ -530,13 +525,12 @@ static int __init pnp_setup_reserve_io(char *str)
530__setup("pnp_reserve_io=", pnp_setup_reserve_io); 525__setup("pnp_reserve_io=", pnp_setup_reserve_io);
531 526
532/* format is: pnp_reserve_mem=mem1,size1[,mem2,size2] .... */ 527/* format is: pnp_reserve_mem=mem1,size1[,mem2,size2] .... */
533
534static int __init pnp_setup_reserve_mem(char *str) 528static int __init pnp_setup_reserve_mem(char *str)
535{ 529{
536 int i; 530 int i;
537 531
538 for (i = 0; i < 16; i++) 532 for (i = 0; i < 16; i++)
539 if (get_option(&str,&pnp_reserve_mem[i]) != 2) 533 if (get_option(&str, &pnp_reserve_mem[i]) != 2)
540 break; 534 break;
541 return 1; 535 return 1;
542} 536}
diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c
index 946a0dcd627d..13c608f5fb30 100644
--- a/drivers/pnp/support.c
+++ b/drivers/pnp/support.c
@@ -1,8 +1,7 @@
1/* 1/*
2 * support.c - provides standard pnp functions for the use of pnp protocol drivers, 2 * support.c - standard functions for the use of pnp protocol drivers
3 * 3 *
4 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/module.h> 7#include <linux/module.h>
@@ -11,22 +10,18 @@
11#include "base.h" 10#include "base.h"
12 11
13/** 12/**
14 * pnp_is_active - Determines if a device is active based on its current resources 13 * pnp_is_active - Determines if a device is active based on its current
14 * resources
15 * @dev: pointer to the desired PnP device 15 * @dev: pointer to the desired PnP device
16 *
17 */ 16 */
18 17int pnp_is_active(struct pnp_dev *dev)
19int pnp_is_active(struct pnp_dev * dev)
20{ 18{
21 if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 && 19 if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 &&
22 !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 && 20 !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 &&
23 pnp_irq(dev, 0) == -1 && 21 pnp_irq(dev, 0) == -1 && pnp_dma(dev, 0) == -1)
24 pnp_dma(dev, 0) == -1) 22 return 0;
25 return 0;
26 else 23 else
27 return 1; 24 return 1;
28} 25}
29 26
30
31
32EXPORT_SYMBOL(pnp_is_active); 27EXPORT_SYMBOL(pnp_is_active);
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c
index a8a95540b1ef..a06f980b3ac9 100644
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -16,13 +16,14 @@
16 16
17static const struct pnp_device_id pnp_dev_table[] = { 17static const struct pnp_device_id pnp_dev_table[] = {
18 /* General ID for reserving resources */ 18 /* General ID for reserving resources */
19 { "PNP0c02", 0 }, 19 {"PNP0c02", 0},
20 /* memory controller */ 20 /* memory controller */
21 { "PNP0c01", 0 }, 21 {"PNP0c01", 0},
22 { "", 0 } 22 {"", 0}
23}; 23};
24 24
25static void reserve_range(const char *pnpid, resource_size_t start, resource_size_t end, int port) 25static void reserve_range(const char *pnpid, resource_size_t start,
26 resource_size_t end, int port)
26{ 27{
27 struct resource *res; 28 struct resource *res;
28 char *regionid; 29 char *regionid;
@@ -32,9 +33,9 @@ static void reserve_range(const char *pnpid, resource_size_t start, resource_siz
32 return; 33 return;
33 snprintf(regionid, 16, "pnp %s", pnpid); 34 snprintf(regionid, 16, "pnp %s", pnpid);
34 if (port) 35 if (port)
35 res = request_region(start, end-start+1, regionid); 36 res = request_region(start, end - start + 1, regionid);
36 else 37 else
37 res = request_mem_region(start, end-start+1, regionid); 38 res = request_mem_region(start, end - start + 1, regionid);
38 if (res == NULL) 39 if (res == NULL)
39 kfree(regionid); 40 kfree(regionid);
40 else 41 else
@@ -44,11 +45,10 @@ static void reserve_range(const char *pnpid, resource_size_t start, resource_siz
44 * example do reserve stuff they know about too, so we may well 45 * example do reserve stuff they know about too, so we may well
45 * have double reservations. 46 * have double reservations.
46 */ 47 */
47 printk(KERN_INFO 48 printk(KERN_INFO "pnp: %s: %s range 0x%llx-0x%llx %s reserved\n",
48 "pnp: %s: %s range 0x%llx-0x%llx %s reserved\n", 49 pnpid, port ? "ioport" : "iomem",
49 pnpid, port ? "ioport" : "iomem", 50 (unsigned long long)start, (unsigned long long)end,
50 (unsigned long long)start, (unsigned long long)end, 51 NULL != res ? "has been" : "could not be");
51 NULL != res ? "has been" : "could not be");
52} 52}
53 53
54static void reserve_resources_of_dev(const struct pnp_dev *dev) 54static void reserve_resources_of_dev(const struct pnp_dev *dev)
@@ -74,7 +74,7 @@ static void reserve_resources_of_dev(const struct pnp_dev *dev)
74 continue; /* invalid */ 74 continue; /* invalid */
75 75
76 reserve_range(dev->dev.bus_id, pnp_port_start(dev, i), 76 reserve_range(dev->dev.bus_id, pnp_port_start(dev, i),
77 pnp_port_end(dev, i), 1); 77 pnp_port_end(dev, i), 1);
78 } 78 }
79 79
80 for (i = 0; i < PNP_MAX_MEM; i++) { 80 for (i = 0; i < PNP_MAX_MEM; i++) {
@@ -82,24 +82,22 @@ static void reserve_resources_of_dev(const struct pnp_dev *dev)
82 continue; 82 continue;
83 83
84 reserve_range(dev->dev.bus_id, pnp_mem_start(dev, i), 84 reserve_range(dev->dev.bus_id, pnp_mem_start(dev, i),
85 pnp_mem_end(dev, i), 0); 85 pnp_mem_end(dev, i), 0);
86 } 86 }
87
88 return;
89} 87}
90 88
91static int system_pnp_probe(struct pnp_dev * dev, const struct pnp_device_id *dev_id) 89static int system_pnp_probe(struct pnp_dev *dev,
90 const struct pnp_device_id *dev_id)
92{ 91{
93 reserve_resources_of_dev(dev); 92 reserve_resources_of_dev(dev);
94 return 0; 93 return 0;
95} 94}
96 95
97static struct pnp_driver system_pnp_driver = { 96static struct pnp_driver system_pnp_driver = {
98 .name = "system", 97 .name = "system",
99 .id_table = pnp_dev_table, 98 .id_table = pnp_dev_table,
100 .flags = PNP_DRIVER_RES_DO_NOT_CHANGE, 99 .flags = PNP_DRIVER_RES_DO_NOT_CHANGE,
101 .probe = system_pnp_probe, 100 .probe = system_pnp_probe,
102 .remove = NULL,
103}; 101};
104 102
105static int __init pnp_system_init(void) 103static int __init pnp_system_init(void)
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 7ede9e725360..d3a33aa2696f 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,34 +15,36 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o
15rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o 15rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
16rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o 16rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
17 17
18# Keep the list ordered.
19
20obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
21obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
22obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o
18obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o 23obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
19obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o 24obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o
20obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o
21obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
22obj-$(CONFIG_RTC_DRV_AT32AP700X) += rtc-at32ap700x.o
23obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o 25obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o
26obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
24obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o 27obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o
25obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o 28obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o
29obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o
30obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o
31obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o
32obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o
33obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
34obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
35obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
26obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o 36obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
27obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o 37obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
28obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 38obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
39obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
40obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
41obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
29obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o 42obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o
30obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o 43obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o
31obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
32obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o
33obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
34obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
35obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
36obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
37obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o
38obj-$(CONFIG_RTC_DRV_SA1100) += rtc-sa1100.o 44obj-$(CONFIG_RTC_DRV_SA1100) += rtc-sa1100.o
39obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
40obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
41obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
42obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
43obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o
44obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
45obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o 45obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o
46obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o 46obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
47obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o 47obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
48obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o 48obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o
49obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
50obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 5158a625671f..db6f3f0d8982 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -352,7 +352,7 @@ read_rtc:
352 /* oscillator fault? clear flag, and warn */ 352 /* oscillator fault? clear flag, and warn */
353 if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) { 353 if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) {
354 i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL, 354 i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL,
355 ds1307->regs[DS1337_REG_CONTROL] 355 ds1307->regs[DS1307_REG_CONTROL]
356 & ~DS1338_BIT_OSF); 356 & ~DS1338_BIT_OSF);
357 dev_warn(&client->dev, "SET TIME!\n"); 357 dev_warn(&client->dev, "SET TIME!\n");
358 goto read_rtc; 358 goto read_rtc;
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index f10d3facecbe..8288b6b2bf2b 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -258,7 +258,8 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
258 .ioctl = stk17ta8_rtc_ioctl, 258 .ioctl = stk17ta8_rtc_ioctl,
259}; 259};
260 260
261static ssize_t stk17ta8_nvram_read(struct kobject *kobj, char *buf, 261static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
262 struct bin_attribute *attr, char *buf,
262 loff_t pos, size_t size) 263 loff_t pos, size_t size)
263{ 264{
264 struct platform_device *pdev = 265 struct platform_device *pdev =
@@ -272,7 +273,8 @@ static ssize_t stk17ta8_nvram_read(struct kobject *kobj, char *buf,
272 return count; 273 return count;
273} 274}
274 275
275static ssize_t stk17ta8_nvram_write(struct kobject *kobj, char *buf, 276static ssize_t stk17ta8_nvram_write(struct kobject *kobj,
277 struct bin_attribute *attr, char *buf,
276 loff_t pos, size_t size) 278 loff_t pos, size_t size)
277{ 279{
278 struct platform_device *pdev = 280 struct platform_device *pdev =
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 7071ff8da63e..5cf48123e0ef 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -28,7 +28,7 @@
28#include <asm/hardware.h> 28#include <asm/hardware.h>
29 29
30#include <asm/arch/regs-gpio.h> 30#include <asm/arch/regs-gpio.h>
31#include <asm/arch/regs-spi.h> 31#include <asm/plat-s3c24xx/regs-spi.h>
32#include <asm/arch/spi.h> 32#include <asm/arch/spi.h>
33 33
34struct s3c24xx_spi { 34struct s3c24xx_spi {
diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c
index f48e8c534c87..6796ba62c3c6 100644
--- a/drivers/video/chipsfb.c
+++ b/drivers/video/chipsfb.c
@@ -24,6 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/fb.h> 26#include <linux/fb.h>
27#include <linux/pm.h>
27#include <linux/init.h> 28#include <linux/init.h>
28#include <linux/pci.h> 29#include <linux/pci.h>
29#include <linux/console.h> 30#include <linux/console.h>
@@ -458,7 +459,7 @@ static int chipsfb_pci_suspend(struct pci_dev *pdev, pm_message_t state)
458 459
459 if (state.event == pdev->dev.power.power_state.event) 460 if (state.event == pdev->dev.power.power_state.event)
460 return 0; 461 return 0;
461 if (state.event != PM_SUSPEND_MEM) 462 if (state.event != PM_EVENT_SUSPEND)
462 goto done; 463 goto done;
463 464
464 acquire_console_sem(); 465 acquire_console_sem();
diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
index 89facb73edfc..d292a37ec7d6 100644
--- a/drivers/video/tgafb.c
+++ b/drivers/video/tgafb.c
@@ -849,7 +849,7 @@ tgafb_clut_imageblit(struct fb_info *info, const struct fb_image *image)
849 u32 *palette = ((u32 *)info->pseudo_palette); 849 u32 *palette = ((u32 *)info->pseudo_palette);
850 unsigned long pos, line_length, i, j; 850 unsigned long pos, line_length, i, j;
851 const unsigned char *data; 851 const unsigned char *data;
852 void *regs_base, *fb_base; 852 void __iomem *regs_base, *fb_base;
853 853
854 dx = image->dx; 854 dx = image->dx;
855 dy = image->dy; 855 dy = image->dy;
diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c
index 763bc73e5070..4b696641ce33 100644
--- a/drivers/w1/masters/ds1wm.c
+++ b/drivers/w1/masters/ds1wm.c
@@ -85,7 +85,7 @@ static struct {
85}; 85};
86 86
87struct ds1wm_data { 87struct ds1wm_data {
88 void *map; 88 void __iomem *map;
89 int bus_shift; /* # of shifts to calc register offsets */ 89 int bus_shift; /* # of shifts to calc register offsets */
90 struct platform_device *pdev; 90 struct platform_device *pdev;
91 struct ds1wm_platform_data *pdata; 91 struct ds1wm_platform_data *pdata;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 9e943fbce81b..227d53b12a5c 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -782,8 +782,8 @@ static int process_msg(void)
782 msg->u.watch.vec = split(body, msg->hdr.len, 782 msg->u.watch.vec = split(body, msg->hdr.len,
783 &msg->u.watch.vec_size); 783 &msg->u.watch.vec_size);
784 if (IS_ERR(msg->u.watch.vec)) { 784 if (IS_ERR(msg->u.watch.vec)) {
785 kfree(msg);
786 err = PTR_ERR(msg->u.watch.vec); 785 err = PTR_ERR(msg->u.watch.vec);
786 kfree(msg);
787 goto out; 787 goto out;
788 } 788 }
789 789
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 68579a0ed3f0..639a32c3c9c1 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -580,7 +580,7 @@ static int ext2_check_descriptors (struct super_block * sb)
580 return 0; 580 return 0;
581 } 581 }
582 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 582 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
583 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > 583 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
584 last_block) 584 last_block)
585 { 585 {
586 ext2_error (sb, "ext2_check_descriptors", 586 ext2_error (sb, "ext2_check_descriptors",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f0614e3f1fe8..22cfdd61c060 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1221,7 +1221,7 @@ static int ext3_check_descriptors (struct super_block * sb)
1221 return 0; 1221 return 0;
1222 } 1222 }
1223 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1223 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1224 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > 1224 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
1225 last_block) 1225 last_block)
1226 { 1226 {
1227 ext3_error (sb, "ext3_check_descriptors", 1227 ext3_error (sb, "ext3_check_descriptors",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 75adbb64e028..4550b83ab1c9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1283,7 +1283,7 @@ static int ext4_check_descriptors (struct super_block * sb)
1283 } 1283 }
1284 inode_table = ext4_inode_table(sb, gdp); 1284 inode_table = ext4_inode_table(sb, gdp);
1285 if (inode_table < first_block || 1285 if (inode_table < first_block ||
1286 inode_table + sbi->s_itb_per_group > last_block) 1286 inode_table + sbi->s_itb_per_group - 1 > last_block)
1287 { 1287 {
1288 ext4_error (sb, "ext4_check_descriptors", 1288 ext4_error (sb, "ext4_check_descriptors",
1289 "Inode table for group %d" 1289 "Inode table for group %d"
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index b3efa4536cc5..a21e4bc5444b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -335,10 +335,10 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
335/* 335/*
336 * Deferred lock request handling for non-blocking lock 336 * Deferred lock request handling for non-blocking lock
337 */ 337 */
338static u32 338static __be32
339nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) 339nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
340{ 340{
341 u32 status = nlm_lck_denied_nolocks; 341 __be32 status = nlm_lck_denied_nolocks;
342 342
343 block->b_flags |= B_QUEUED; 343 block->b_flags |= B_QUEUED;
344 344
@@ -352,7 +352,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
352 status = nlm_drop_reply; 352 status = nlm_drop_reply;
353 } 353 }
354 dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n", 354 dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
355 block, block->b_flags, status); 355 block, block->b_flags, ntohl(status));
356 356
357 return status; 357 return status;
358} 358}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b3d55c6747fd..8ef0964179bc 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2450,7 +2450,7 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2450} 2450}
2451 2451
2452static void 2452static void
2453nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, int nfserr, 2453nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2454 struct nfsd4_secinfo *secinfo) 2454 struct nfsd4_secinfo *secinfo)
2455{ 2455{
2456 int i = 0; 2456 int i = 0;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index b6f12593c39d..981027d1187b 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1042,7 +1042,8 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1042 pos = I_UNFM_NUM(&s_ih); 1042 pos = I_UNFM_NUM(&s_ih);
1043 1043
1044 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) { 1044 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
1045 __u32 *unfm, block; 1045 __le32 *unfm;
1046 __u32 block;
1046 1047
1047 /* Each unformatted block deletion may involve one additional 1048 /* Each unformatted block deletion may involve one additional
1048 * bitmap block into the transaction, thereby the initial 1049 * bitmap block into the transaction, thereby the initial
@@ -1052,7 +1053,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1052 break; 1053 break;
1053 } 1054 }
1054 1055
1055 unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1; 1056 unfm = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
1056 block = get_block_num(unfm, 0); 1057 block = get_block_num(unfm, 0);
1057 1058
1058 if (block != 0) { 1059 if (block != 0) {
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 3b07f26d984d..7b941abbcde0 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -320,7 +320,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
320 320
321 if (sizemask != sizeof(sigset_t) || 321 if (sizemask != sizeof(sigset_t) ||
322 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 322 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
323 return error = -EINVAL; 323 return -EINVAL;
324 sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); 324 sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
325 signotset(&sigmask); 325 signotset(&sigmask);
326 326
diff --git a/fs/timerfd.c b/fs/timerfd.c
index af9eca5c0230..61983f3b107c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -95,7 +95,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
95{ 95{
96 struct timerfd_ctx *ctx = file->private_data; 96 struct timerfd_ctx *ctx = file->private_data;
97 ssize_t res; 97 ssize_t res;
98 u32 ticks = 0; 98 u64 ticks = 0;
99 DECLARE_WAITQUEUE(wait, current); 99 DECLARE_WAITQUEUE(wait, current);
100 100
101 if (count < sizeof(ticks)) 101 if (count < sizeof(ticks))
@@ -130,7 +130,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
130 * callback to avoid DoS attacks specifying a very 130 * callback to avoid DoS attacks specifying a very
131 * short timer period. 131 * short timer period.
132 */ 132 */
133 ticks = (u32) 133 ticks = (u64)
134 hrtimer_forward(&ctx->tmr, 134 hrtimer_forward(&ctx->tmr,
135 hrtimer_cb_get_time(&ctx->tmr), 135 hrtimer_cb_get_time(&ctx->tmr),
136 ctx->tintv); 136 ctx->tintv);
@@ -140,7 +140,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
140 } 140 }
141 spin_unlock_irq(&ctx->wqh.lock); 141 spin_unlock_irq(&ctx->wqh.lock);
142 if (ticks) 142 if (ticks)
143 res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks); 143 res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
144 return res; 144 return res;
145} 145}
146 146
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 141cf15067c2..42319d75aaab 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -139,7 +139,7 @@ STATIC int xfs_inumbers_fmt_compat(
139 long count, 139 long count,
140 long *written) 140 long *written)
141{ 141{
142 compat_xfs_inogrp_t *p32 = ubuffer; 142 compat_xfs_inogrp_t __user *p32 = ubuffer;
143 long i; 143 long i;
144 144
145 for (i = 0; i < count; i++) { 145 for (i = 0; i < count; i++) {
@@ -444,7 +444,7 @@ xfs_compat_ioctl(
444 case XFS_IOC_FSINUMBERS_32: 444 case XFS_IOC_FSINUMBERS_32:
445 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); 445 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq);
446 return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount, 446 return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount,
447 cmd, (void*)arg); 447 cmd, (void __user*)arg);
448 case XFS_IOC_FD_TO_HANDLE_32: 448 case XFS_IOC_FD_TO_HANDLE_32:
449 case XFS_IOC_PATH_TO_HANDLE_32: 449 case XFS_IOC_PATH_TO_HANDLE_32:
450 case XFS_IOC_PATH_TO_FSHANDLE_32: 450 case XFS_IOC_PATH_TO_FSHANDLE_32:
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 3a0cbeb03fa1..9e71201000d5 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -324,7 +324,7 @@ static inline int fls64(unsigned long x)
324{ 324{
325 unsigned long t, a, r; 325 unsigned long t, a, r;
326 326
327 t = __kernel_cmpbge (x, 0x0101010101010101); 327 t = __kernel_cmpbge (x, 0x0101010101010101UL);
328 a = __flsm1_tab[t]; 328 a = __flsm1_tab[t];
329 t = __kernel_extbl (x, a); 329 t = __kernel_extbl (x, a);
330 r = a*8 + __flsm1_tab[t] + (x != 0); 330 r = a*8 + __flsm1_tab[t] + (x != 0);
diff --git a/include/asm-arm/unaligned.h b/include/asm-arm/unaligned.h
index 795b9e5b9e6a..8431f6eed5c6 100644
--- a/include/asm-arm/unaligned.h
+++ b/include/asm-arm/unaligned.h
@@ -60,24 +60,24 @@ extern int __bug_unaligned_x(const void *ptr);
60 __get_unaligned_4_be((__p+4))) 60 __get_unaligned_4_be((__p+4)))
61 61
62#define __get_unaligned_le(ptr) \ 62#define __get_unaligned_le(ptr) \
63 ({ \ 63 ((__force typeof(*(ptr)))({ \
64 const __u8 *__p = (const __u8 *)(ptr); \ 64 const __u8 *__p = (const __u8 *)(ptr); \
65 __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \ 65 __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \
66 __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_le(__p), \ 66 __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_le(__p), \
67 __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_le(__p), \ 67 __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_le(__p), \
68 __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_le(__p), \ 68 __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_le(__p), \
69 (void)__bug_unaligned_x(__p))))); \ 69 (void)__bug_unaligned_x(__p))))); \
70 }) 70 }))
71 71
72#define __get_unaligned_be(ptr) \ 72#define __get_unaligned_be(ptr) \
73 ({ \ 73 ((__force typeof(*(ptr)))({ \
74 const __u8 *__p = (const __u8 *)(ptr); \ 74 const __u8 *__p = (const __u8 *)(ptr); \
75 __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \ 75 __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \
76 __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_be(__p), \ 76 __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_be(__p), \
77 __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_be(__p), \ 77 __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_be(__p), \
78 __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_be(__p), \ 78 __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_be(__p), \
79 (void)__bug_unaligned_x(__p))))); \ 79 (void)__bug_unaligned_x(__p))))); \
80 }) 80 }))
81 81
82 82
83static inline void __put_unaligned_2_le(__u32 __v, register __u8 *__p) 83static inline void __put_unaligned_2_le(__u32 __v, register __u8 *__p)
@@ -131,15 +131,16 @@ static inline void __put_unaligned_8_be(const unsigned long long __v, register _
131 */ 131 */
132#define __put_unaligned_le(val,ptr) \ 132#define __put_unaligned_le(val,ptr) \
133 ({ \ 133 ({ \
134 (void)sizeof(*(ptr) = (val)); \
134 switch (sizeof(*(ptr))) { \ 135 switch (sizeof(*(ptr))) { \
135 case 1: \ 136 case 1: \
136 *(ptr) = (val); \ 137 *(ptr) = (val); \
137 break; \ 138 break; \
138 case 2: __put_unaligned_2_le((val),(__u8 *)(ptr)); \ 139 case 2: __put_unaligned_2_le((__force u16)(val),(__u8 *)(ptr)); \
139 break; \ 140 break; \
140 case 4: __put_unaligned_4_le((val),(__u8 *)(ptr)); \ 141 case 4: __put_unaligned_4_le((__force u32)(val),(__u8 *)(ptr)); \
141 break; \ 142 break; \
142 case 8: __put_unaligned_8_le((val),(__u8 *)(ptr)); \ 143 case 8: __put_unaligned_8_le((__force u64)(val),(__u8 *)(ptr)); \
143 break; \ 144 break; \
144 default: __bug_unaligned_x(ptr); \ 145 default: __bug_unaligned_x(ptr); \
145 break; \ 146 break; \
@@ -149,15 +150,16 @@ static inline void __put_unaligned_8_be(const unsigned long long __v, register _
149 150
150#define __put_unaligned_be(val,ptr) \ 151#define __put_unaligned_be(val,ptr) \
151 ({ \ 152 ({ \
153 (void)sizeof(*(ptr) = (val)); \
152 switch (sizeof(*(ptr))) { \ 154 switch (sizeof(*(ptr))) { \
153 case 1: \ 155 case 1: \
154 *(ptr) = (val); \ 156 *(ptr) = (val); \
155 break; \ 157 break; \
156 case 2: __put_unaligned_2_be((val),(__u8 *)(ptr)); \ 158 case 2: __put_unaligned_2_be((__force u16)(val),(__u8 *)(ptr)); \
157 break; \ 159 break; \
158 case 4: __put_unaligned_4_be((val),(__u8 *)(ptr)); \ 160 case 4: __put_unaligned_4_be((__force u32)(val),(__u8 *)(ptr)); \
159 break; \ 161 break; \
160 case 8: __put_unaligned_8_be((val),(__u8 *)(ptr)); \ 162 case 8: __put_unaligned_8_be((__force u64)(val),(__u8 *)(ptr)); \
161 break; \ 163 break; \
162 default: __bug_unaligned_x(ptr); \ 164 default: __bug_unaligned_x(ptr); \
163 break; \ 165 break; \
diff --git a/include/asm-m68k/raw_io.h b/include/asm-m68k/raw_io.h
index 91c623f0994c..d9eb9834ccc8 100644
--- a/include/asm-m68k/raw_io.h
+++ b/include/asm-m68k/raw_io.h
@@ -36,15 +36,15 @@ extern void __iounmap(void *addr, unsigned long size);
36#define in_be32(addr) \ 36#define in_be32(addr) \
37 ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) 37 ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; })
38#define in_le16(addr) \ 38#define in_le16(addr) \
39 ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; }) 39 ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (addr)); __v; })
40#define in_le32(addr) \ 40#define in_le32(addr) \
41 ({ u32 __v = le32_to_cpu(*(__force volatile u32 *) (addr)); __v; }) 41 ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (addr)); __v; })
42 42
43#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b)) 43#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b))
44#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w)) 44#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w))
45#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l)) 45#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l))
46#define out_le16(addr,w) (void)((*(__force volatile u16 *) (addr)) = cpu_to_le16(w)) 46#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (addr)) = cpu_to_le16(w))
47#define out_le32(addr,l) (void)((*(__force volatile u32 *) (addr)) = cpu_to_le32(l)) 47#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (addr)) = cpu_to_le32(l))
48 48
49#define raw_inb in_8 49#define raw_inb in_8
50#define raw_inw in_be16 50#define raw_inw in_be16
diff --git a/include/asm-m68knommu/hardirq.h b/include/asm-m68knommu/hardirq.h
index 980075bab792..bfad28149a49 100644
--- a/include/asm-m68knommu/hardirq.h
+++ b/include/asm-m68knommu/hardirq.h
@@ -22,4 +22,6 @@ typedef struct {
22# error HARDIRQ_BITS is too low! 22# error HARDIRQ_BITS is too low!
23#endif 23#endif
24 24
25void ack_bad_irq(unsigned int irq);
26
25#endif /* __M68K_HARDIRQ_H */ 27#endif /* __M68K_HARDIRQ_H */
diff --git a/include/asm-m68knommu/machdep.h b/include/asm-m68knommu/machdep.h
index 6ce28f8e0ead..2b75a300df4f 100644
--- a/include/asm-m68knommu/machdep.h
+++ b/include/asm-m68knommu/machdep.h
@@ -48,6 +48,5 @@ extern char *mach_sysrq_xlate;
48 48
49extern void config_BSP(char *command, int len); 49extern void config_BSP(char *command, int len);
50extern void (*mach_tick)(void); 50extern void (*mach_tick)(void);
51extern void (*mach_trap_init)(void);
52 51
53#endif /* _M68KNOMMU_MACHDEP_H */ 52#endif /* _M68KNOMMU_MACHDEP_H */
diff --git a/include/asm-mips/edac.h b/include/asm-mips/edac.h
new file mode 100644
index 000000000000..83719eee2d13
--- /dev/null
+++ b/include/asm-mips/edac.h
@@ -0,0 +1,35 @@
1#ifndef ASM_EDAC_H
2#define ASM_EDAC_H
3
4/* ECC atomic, DMA, SMP and interrupt safe scrub function */
5
6static inline void atomic_scrub(void *va, u32 size)
7{
8 unsigned long *virt_addr = va;
9 unsigned long temp;
10 u32 i;
11
12 for (i = 0; i < size / sizeof(unsigned long); i++, virt_addr++) {
13
14 /*
15 * Very carefully read and write to memory atomically
16 * so we are interrupt, DMA and SMP safe.
17 *
18 * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr));
19 */
20
21 __asm__ __volatile__ (
22 " .set mips3 \n"
23 "1: ll %0, %1 # atomic_add \n"
24 " ll %0, %1 # atomic_add \n"
25 " addu %0, $0 \n"
26 " sc %0, %1 \n"
27 " beqz %0, 1b \n"
28 " .set mips0 \n"
29 : "=&r" (temp), "=m" (*virt_addr)
30 : "m" (*virt_addr));
31
32 }
33}
34
35#endif
diff --git a/include/asm-powerpc/bug.h b/include/asm-powerpc/bug.h
index f6fa39474846..a248b8bd4d7c 100644
--- a/include/asm-powerpc/bug.h
+++ b/include/asm-powerpc/bug.h
@@ -79,7 +79,7 @@
79 _EMIT_BUG_ENTRY \ 79 _EMIT_BUG_ENTRY \
80 : : "i" (__FILE__), "i" (__LINE__), "i" (0), \ 80 : : "i" (__FILE__), "i" (__LINE__), "i" (0), \
81 "i" (sizeof(struct bug_entry)), \ 81 "i" (sizeof(struct bug_entry)), \
82 "r" ((long)(x))); \ 82 "r" ((__force long)(x))); \
83 } \ 83 } \
84} while (0) 84} while (0)
85 85
diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index 10c51f457d48..236a9210e5fc 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -190,7 +190,6 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr,
190extern int page_is_ram(unsigned long pfn); 190extern int page_is_ram(unsigned long pfn);
191 191
192struct vm_area_struct; 192struct vm_area_struct;
193extern const char *arch_vma_name(struct vm_area_struct *vma);
194 193
195#include <asm-generic/memory_model.h> 194#include <asm-generic/memory_model.h>
196#endif /* __ASSEMBLY__ */ 195#endif /* __ASSEMBLY__ */
diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h
index 9df30b939c4e..f4ce8768ad44 100644
--- a/include/asm-x86_64/uaccess.h
+++ b/include/asm-x86_64/uaccess.h
@@ -100,7 +100,7 @@ struct exception_table_entry
100 case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \ 100 case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \
101 default: __get_user_bad(); break; \ 101 default: __get_user_bad(); break; \
102 } \ 102 } \
103 (x) = (typeof(*(ptr)))__val_gu; \ 103 (x) = (__force typeof(*(ptr)))__val_gu; \
104 __ret_gu; \ 104 __ret_gu; \
105}) 105})
106 106
@@ -192,7 +192,7 @@ struct __large_struct { unsigned long buf[100]; };
192 int __gu_err; \ 192 int __gu_err; \
193 unsigned long __gu_val; \ 193 unsigned long __gu_val; \
194 __get_user_size(__gu_val,(ptr),(size),__gu_err); \ 194 __get_user_size(__gu_val,(ptr),(size),__gu_err); \
195 (x) = (typeof(*(ptr)))__gu_val; \ 195 (x) = (__force typeof(*(ptr)))__gu_val; \
196 __gu_err; \ 196 __gu_err; \
197}) 197})
198 198
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 12a1291855e2..86f9a3a6137d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,8 +15,8 @@
15# define __acquire(x) __context__(x,1) 15# define __acquire(x) __context__(x,1)
16# define __release(x) __context__(x,-1) 16# define __release(x) __context__(x,-1)
17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 17# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
18extern void __chk_user_ptr(const void __user *); 18extern void __chk_user_ptr(const volatile void __user *);
19extern void __chk_io_ptr(const void __iomem *); 19extern void __chk_io_ptr(const volatile void __iomem *);
20#else 20#else
21# define __user 21# define __user
22# define __kernel 22# define __kernel
diff --git a/include/linux/device.h b/include/linux/device.h
index d9f0a57f5a2f..3a38d1f70cb7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -551,6 +551,9 @@ extern void put_device(struct device * dev);
551/* drivers/base/power/shutdown.c */ 551/* drivers/base/power/shutdown.c */
552extern void device_shutdown(void); 552extern void device_shutdown(void);
553 553
554/* drivers/base/sys.c */
555extern void sysdev_shutdown(void);
556
554 557
555/* drivers/base/firmware.c */ 558/* drivers/base/firmware.c */
556extern int __must_check firmware_register(struct kset *); 559extern int __must_check firmware_register(struct kset *);
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 500aace21ca7..e76c151c7129 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -27,18 +27,38 @@
27#define LG_CLOCK_MIN_DELTA 100UL 27#define LG_CLOCK_MIN_DELTA 100UL
28#define LG_CLOCK_MAX_DELTA ULONG_MAX 28#define LG_CLOCK_MAX_DELTA ULONG_MAX
29 29
30/*G:031 First, how does our Guest contact the Host to ask for privileged
31 * operations? There are two ways: the direct way is to make a "hypercall",
32 * to make requests of the Host Itself.
33 *
34 * Our hypercall mechanism uses the highest unused trap code (traps 32 and
35 * above are used by real hardware interrupts). Seventeen hypercalls are
36 * available: the hypercall number is put in the %eax register, and the
37 * arguments (when required) are placed in %edx, %ebx and %ecx. If a return
38 * value makes sense, it's returned in %eax.
39 *
40 * Grossly invalid calls result in Sudden Death at the hands of the vengeful
41 * Host, rather than returning failure. This reflects Winston Churchill's
42 * definition of a gentleman: "someone who is only rude intentionally". */
30#define LGUEST_TRAP_ENTRY 0x1F 43#define LGUEST_TRAP_ENTRY 0x1F
31 44
32static inline unsigned long 45static inline unsigned long
33hcall(unsigned long call, 46hcall(unsigned long call,
34 unsigned long arg1, unsigned long arg2, unsigned long arg3) 47 unsigned long arg1, unsigned long arg2, unsigned long arg3)
35{ 48{
49 /* "int" is the Intel instruction to trigger a trap. */
36 asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) 50 asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
51 /* The call is in %eax (aka "a"), and can be replaced */
37 : "=a"(call) 52 : "=a"(call)
53 /* The other arguments are in %eax, %edx, %ebx & %ecx */
38 : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3) 54 : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
55 /* "memory" means this might write somewhere in memory.
56 * This isn't true for all calls, but it's safe to tell
57 * gcc that it might happen so it doesn't get clever. */
39 : "memory"); 58 : "memory");
40 return call; 59 return call;
41} 60}
61/*:*/
42 62
43void async_hcall(unsigned long call, 63void async_hcall(unsigned long call,
44 unsigned long arg1, unsigned long arg2, unsigned long arg3); 64 unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -52,31 +72,40 @@ struct hcall_ring
52 u32 eax, edx, ebx, ecx; 72 u32 eax, edx, ebx, ecx;
53}; 73};
54 74
55/* All the good stuff happens here: guest registers it with LGUEST_INIT */ 75/*G:032 The second method of communicating with the Host is to via "struct
76 * lguest_data". The Guest's very first hypercall is to tell the Host where
77 * this is, and then the Guest and Host both publish information in it. :*/
56struct lguest_data 78struct lguest_data
57{ 79{
58/* Fields which change during running: */ 80 /* 512 == enabled (same as eflags in normal hardware). The Guest
59 /* 512 == enabled (same as eflags) */ 81 * changes interrupts so often that a hypercall is too slow. */
60 unsigned int irq_enabled; 82 unsigned int irq_enabled;
61 /* Interrupts blocked by guest. */ 83 /* Fine-grained interrupt disabling by the Guest */
62 DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); 84 DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
63 85
64 /* Virtual address of page fault. */ 86 /* The Host writes the virtual address of the last page fault here,
87 * which saves the Guest a hypercall. CR2 is the native register where
88 * this address would normally be found. */
65 unsigned long cr2; 89 unsigned long cr2;
66 90
67 /* Async hypercall ring. 0xFF == done, 0 == pending. */ 91 /* Async hypercall ring. Instead of directly making hypercalls, we can
92 * place them in here for processing the next time the Host wants.
93 * This batching can be quite efficient. */
94
95 /* 0xFF == done (set by Host), 0 == pending (set by Guest). */
68 u8 hcall_status[LHCALL_RING_SIZE]; 96 u8 hcall_status[LHCALL_RING_SIZE];
97 /* The actual registers for the hypercalls. */
69 struct hcall_ring hcalls[LHCALL_RING_SIZE]; 98 struct hcall_ring hcalls[LHCALL_RING_SIZE];
70 99
71/* Fields initialized by the hypervisor at boot: */ 100/* Fields initialized by the Host at boot: */
72 /* Memory not to try to access */ 101 /* Memory not to try to access */
73 unsigned long reserve_mem; 102 unsigned long reserve_mem;
74 /* ID of this guest (used by network driver to set ethernet address) */ 103 /* ID of this Guest (used by network driver to set ethernet address) */
75 u16 guestid; 104 u16 guestid;
76 /* KHz for the TSC clock. */ 105 /* KHz for the TSC clock. */
77 u32 tsc_khz; 106 u32 tsc_khz;
78 107
79/* Fields initialized by the guest at boot: */ 108/* Fields initialized by the Guest at boot: */
80 /* Instruction range to suppress interrupts even if enabled */ 109 /* Instruction range to suppress interrupts even if enabled */
81 unsigned long noirq_start, noirq_end; 110 unsigned long noirq_start, noirq_end;
82}; 111};
diff --git a/include/linux/lguest_bus.h b/include/linux/lguest_bus.h
index c9b4e05fee49..d27853ddc644 100644
--- a/include/linux/lguest_bus.h
+++ b/include/linux/lguest_bus.h
@@ -15,11 +15,14 @@ struct lguest_device {
15 void *private; 15 void *private;
16}; 16};
17 17
18/* By convention, each device can use irq index+1 if it wants to. */ 18/*D:380 Since interrupt numbers are arbitrary, we use a convention: each device
19 * can use the interrupt number corresponding to its index. The +1 is because
20 * interrupt 0 is not usable (it's actually the timer interrupt). */
19static inline int lgdev_irq(const struct lguest_device *dev) 21static inline int lgdev_irq(const struct lguest_device *dev)
20{ 22{
21 return dev->index + 1; 23 return dev->index + 1;
22} 24}
25/*:*/
23 26
24/* dma args must not be vmalloced! */ 27/* dma args must not be vmalloced! */
25void lguest_send_dma(unsigned long key, struct lguest_dma *dma); 28void lguest_send_dma(unsigned long key, struct lguest_dma *dma);
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index 0ba414a40c80..641670579446 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -9,14 +9,45 @@
9/* How many devices? Assume each one wants up to two dma arrays per device. */ 9/* How many devices? Assume each one wants up to two dma arrays per device. */
10#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2) 10#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2)
11 11
12/*D:200
13 * Lguest I/O
14 *
15 * The lguest I/O mechanism is the only way Guests can talk to devices. There
16 * are two hypercalls involved: SEND_DMA for output and BIND_DMA for input. In
17 * each case, "struct lguest_dma" describes the buffer: this contains 16
18 * addr/len pairs, and if there are fewer buffer elements the len array is
19 * terminated with a 0.
20 *
21 * I/O is organized by keys: BIND_DMA attaches buffers to a particular key, and
22 * SEND_DMA transfers to buffers bound to particular key. By convention, keys
23 * correspond to a physical address within the device's page. This means that
24 * devices will never accidentally end up with the same keys, and allows the
25 * Host use The Futex Trick (as we'll see later in our journey).
26 *
27 * SEND_DMA simply indicates a key to send to, and the physical address of the
28 * "struct lguest_dma" to send. The Host will write the number of bytes
29 * transferred into the "struct lguest_dma"'s used_len member.
30 *
31 * BIND_DMA indicates a key to bind to, a pointer to an array of "struct
32 * lguest_dma"s ready for receiving, the size of that array, and an interrupt
33 * to trigger when data is received. The Host will only allow transfers into
34 * buffers with a used_len of zero: it then sets used_len to the number of
35 * bytes transferred and triggers the interrupt for the Guest to process the
36 * new input. */
12struct lguest_dma 37struct lguest_dma
13{ 38{
14 /* 0 if free to be used, filled by hypervisor. */ 39 /* 0 if free to be used, filled by the Host. */
15 u32 used_len; 40 u32 used_len;
16 unsigned long addr[LGUEST_MAX_DMA_SECTIONS]; 41 unsigned long addr[LGUEST_MAX_DMA_SECTIONS];
17 u16 len[LGUEST_MAX_DMA_SECTIONS]; 42 u16 len[LGUEST_MAX_DMA_SECTIONS];
18}; 43};
44/*:*/
19 45
46/*D:460 This is the layout of a block device memory page. The Launcher sets up
47 * the num_sectors initially to tell the Guest the size of the disk. The Guest
48 * puts the type, sector and length of the request in the first three fields,
49 * then DMAs to the Host. The Host processes the request, sets up the result,
50 * then DMAs back to the Guest. */
20struct lguest_block_page 51struct lguest_block_page
21{ 52{
22 /* 0 is a read, 1 is a write. */ 53 /* 0 is a read, 1 is a write. */
@@ -28,27 +59,47 @@ struct lguest_block_page
28 u32 num_sectors; /* Disk length = num_sectors * 512 */ 59 u32 num_sectors; /* Disk length = num_sectors * 512 */
29}; 60};
30 61
31/* There is a shared page of these. */ 62/*D:520 The network device is basically a memory page where all the Guests on
63 * the network publish their MAC (ethernet) addresses: it's an array of "struct
64 * lguest_net": */
32struct lguest_net 65struct lguest_net
33{ 66{
34 /* Simply the mac address (with multicast bit meaning promisc). */ 67 /* Simply the mac address (with multicast bit meaning promisc). */
35 unsigned char mac[6]; 68 unsigned char mac[6];
36}; 69};
70/*:*/
37 71
38/* Where the Host expects the Guest to SEND_DMA console output to. */ 72/* Where the Host expects the Guest to SEND_DMA console output to. */
39#define LGUEST_CONSOLE_DMA_KEY 0 73#define LGUEST_CONSOLE_DMA_KEY 0
40 74
41/* We have a page of these descriptors in the lguest_device page. */ 75/*D:010
76 * Drivers
77 *
78 * The Guest needs devices to do anything useful. Since we don't let it touch
79 * real devices (think of the damage it could do!) we provide virtual devices.
80 * We could emulate a PCI bus with various devices on it, but that is a fairly
81 * complex burden for the Host and suboptimal for the Guest, so we have our own
82 * "lguest" bus and simple drivers.
83 *
84 * Devices are described by an array of LGUEST_MAX_DEVICES of these structs,
85 * placed by the Launcher just above the top of physical memory:
86 */
42struct lguest_device_desc { 87struct lguest_device_desc {
88 /* The device type: console, network, disk etc. */
43 u16 type; 89 u16 type;
44#define LGUEST_DEVICE_T_CONSOLE 1 90#define LGUEST_DEVICE_T_CONSOLE 1
45#define LGUEST_DEVICE_T_NET 2 91#define LGUEST_DEVICE_T_NET 2
46#define LGUEST_DEVICE_T_BLOCK 3 92#define LGUEST_DEVICE_T_BLOCK 3
47 93
94 /* The specific features of this device: these depends on device type
95 * except for LGUEST_DEVICE_F_RANDOMNESS. */
48 u16 features; 96 u16 features;
49#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */ 97#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */
50#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */ 98#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */
51 99
100 /* This is how the Guest reports status of the device: the Host can set
101 * LGUEST_DEVICE_S_REMOVED to indicate removal, but the rest are only
102 * ever manipulated by the Guest, and only ever set. */
52 u16 status; 103 u16 status;
53/* 256 and above are device specific. */ 104/* 256 and above are device specific. */
54#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */ 105#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */
@@ -58,9 +109,12 @@ struct lguest_device_desc {
58#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */ 109#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */
59#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */ 110#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */
60 111
112 /* Each device exists somewhere in Guest physical memory, over some
113 * number of pages. */
61 u16 num_pages; 114 u16 num_pages;
62 u32 pfn; 115 u32 pfn;
63}; 116};
117/*:*/
64 118
65/* Write command first word is a request. */ 119/* Write command first word is a request. */
66enum lguest_req 120enum lguest_req
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c456c3a1c28e..3e9e8fec5a41 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1246,7 +1246,7 @@ void drop_slab(void);
1246extern int randomize_va_space; 1246extern int randomize_va_space;
1247#endif 1247#endif
1248 1248
1249__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma); 1249const char * arch_vma_name(struct vm_area_struct *vma);
1250 1250
1251#endif /* __KERNEL__ */ 1251#endif /* __KERNEL__ */
1252#endif /* _LINUX_MM_H */ 1252#endif /* _LINUX_MM_H */
diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h
index 90ae8b474cb8..37e933c9987d 100644
--- a/include/linux/netfilter/xt_connlimit.h
+++ b/include/linux/netfilter/xt_connlimit.h
@@ -5,8 +5,8 @@ struct xt_connlimit_data;
5 5
6struct xt_connlimit_info { 6struct xt_connlimit_info {
7 union { 7 union {
8 u_int32_t v4_mask; 8 __be32 v4_mask;
9 u_int32_t v6_mask[4]; 9 __be32 v6_mask[4];
10 }; 10 };
11 unsigned int limit, inverse; 11 unsigned int limit, inverse;
12 12
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 66edb2293184..16b46aace349 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -1,7 +1,6 @@
1/* 1/*
2 * Linux Plug and Play Support 2 * Linux Plug and Play Support
3 * Copyright by Adam Belay <ambx1@neo.rr.com> 3 * Copyright by Adam Belay <ambx1@neo.rr.com>
4 *
5 */ 4 */
6 5
7#ifndef _LINUX_PNP_H 6#ifndef _LINUX_PNP_H
@@ -23,7 +22,6 @@
23struct pnp_protocol; 22struct pnp_protocol;
24struct pnp_dev; 23struct pnp_dev;
25 24
26
27/* 25/*
28 * Resource Management 26 * Resource Management
29 */ 27 */
@@ -73,37 +71,37 @@ struct pnp_dev;
73#define PNP_PORT_FLAG_FIXED (1<<1) 71#define PNP_PORT_FLAG_FIXED (1<<1)
74 72
75struct pnp_port { 73struct pnp_port {
76 unsigned short min; /* min base number */ 74 unsigned short min; /* min base number */
77 unsigned short max; /* max base number */ 75 unsigned short max; /* max base number */
78 unsigned char align; /* align boundary */ 76 unsigned char align; /* align boundary */
79 unsigned char size; /* size of range */ 77 unsigned char size; /* size of range */
80 unsigned char flags; /* port flags */ 78 unsigned char flags; /* port flags */
81 unsigned char pad; /* pad */ 79 unsigned char pad; /* pad */
82 struct pnp_port *next; /* next port */ 80 struct pnp_port *next; /* next port */
83}; 81};
84 82
85#define PNP_IRQ_NR 256 83#define PNP_IRQ_NR 256
86struct pnp_irq { 84struct pnp_irq {
87 DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmaks for IRQ lines */ 85 DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */
88 unsigned char flags; /* IRQ flags */ 86 unsigned char flags; /* IRQ flags */
89 unsigned char pad; /* pad */ 87 unsigned char pad; /* pad */
90 struct pnp_irq *next; /* next IRQ */ 88 struct pnp_irq *next; /* next IRQ */
91}; 89};
92 90
93struct pnp_dma { 91struct pnp_dma {
94 unsigned char map; /* bitmask for DMA channels */ 92 unsigned char map; /* bitmask for DMA channels */
95 unsigned char flags; /* DMA flags */ 93 unsigned char flags; /* DMA flags */
96 struct pnp_dma *next; /* next port */ 94 struct pnp_dma *next; /* next port */
97}; 95};
98 96
99struct pnp_mem { 97struct pnp_mem {
100 unsigned int min; /* min base number */ 98 unsigned int min; /* min base number */
101 unsigned int max; /* max base number */ 99 unsigned int max; /* max base number */
102 unsigned int align; /* align boundary */ 100 unsigned int align; /* align boundary */
103 unsigned int size; /* size of range */ 101 unsigned int size; /* size of range */
104 unsigned char flags; /* memory flags */ 102 unsigned char flags; /* memory flags */
105 unsigned char pad; /* pad */ 103 unsigned char pad; /* pad */
106 struct pnp_mem *next; /* next memory resource */ 104 struct pnp_mem *next; /* next memory resource */
107}; 105};
108 106
109#define PNP_RES_PRIORITY_PREFERRED 0 107#define PNP_RES_PRIORITY_PREFERRED 0
@@ -127,7 +125,6 @@ struct pnp_resource_table {
127 struct resource irq_resource[PNP_MAX_IRQ]; 125 struct resource irq_resource[PNP_MAX_IRQ];
128}; 126};
129 127
130
131/* 128/*
132 * Device Managemnt 129 * Device Managemnt
133 */ 130 */
@@ -139,14 +136,14 @@ struct pnp_card {
139 struct list_head protocol_list; /* node in protocol's list of cards */ 136 struct list_head protocol_list; /* node in protocol's list of cards */
140 struct list_head devices; /* devices attached to the card */ 137 struct list_head devices; /* devices attached to the card */
141 138
142 struct pnp_protocol * protocol; 139 struct pnp_protocol *protocol;
143 struct pnp_id * id; /* contains supported EISA IDs*/ 140 struct pnp_id *id; /* contains supported EISA IDs */
144 141
145 char name[PNP_NAME_LEN]; /* contains a human-readable name */ 142 char name[PNP_NAME_LEN]; /* contains a human-readable name */
146 unsigned char pnpver; /* Plug & Play version */ 143 unsigned char pnpver; /* Plug & Play version */
147 unsigned char productver; /* product version */ 144 unsigned char productver; /* product version */
148 unsigned int serial; /* serial number */ 145 unsigned int serial; /* serial number */
149 unsigned char checksum; /* if zero - checksum passed */ 146 unsigned char checksum; /* if zero - checksum passed */
150 struct proc_dir_entry *procdir; /* directory entry in /proc/bus/isapnp */ 147 struct proc_dir_entry *procdir; /* directory entry in /proc/bus/isapnp */
151}; 148};
152 149
@@ -159,18 +156,18 @@ struct pnp_card {
159 (card) = global_to_pnp_card((card)->global_list.next)) 156 (card) = global_to_pnp_card((card)->global_list.next))
160 157
161struct pnp_card_link { 158struct pnp_card_link {
162 struct pnp_card * card; 159 struct pnp_card *card;
163 struct pnp_card_driver * driver; 160 struct pnp_card_driver *driver;
164 void * driver_data; 161 void *driver_data;
165 pm_message_t pm_state; 162 pm_message_t pm_state;
166}; 163};
167 164
168static inline void *pnp_get_card_drvdata (struct pnp_card_link *pcard) 165static inline void *pnp_get_card_drvdata(struct pnp_card_link *pcard)
169{ 166{
170 return pcard->driver_data; 167 return pcard->driver_data;
171} 168}
172 169
173static inline void pnp_set_card_drvdata (struct pnp_card_link *pcard, void *data) 170static inline void pnp_set_card_drvdata(struct pnp_card_link *pcard, void *data)
174{ 171{
175 pcard->driver_data = data; 172 pcard->driver_data = data;
176} 173}
@@ -186,22 +183,22 @@ struct pnp_dev {
186 struct list_head card_list; /* node in card's list of devices */ 183 struct list_head card_list; /* node in card's list of devices */
187 struct list_head rdev_list; /* node in cards list of requested devices */ 184 struct list_head rdev_list; /* node in cards list of requested devices */
188 185
189 struct pnp_protocol * protocol; 186 struct pnp_protocol *protocol;
190 struct pnp_card * card; /* card the device is attached to, none if NULL */ 187 struct pnp_card *card; /* card the device is attached to, none if NULL */
191 struct pnp_driver * driver; 188 struct pnp_driver *driver;
192 struct pnp_card_link * card_link; 189 struct pnp_card_link *card_link;
193 190
194 struct pnp_id * id; /* supported EISA IDs*/ 191 struct pnp_id *id; /* supported EISA IDs */
195 192
196 int active; 193 int active;
197 int capabilities; 194 int capabilities;
198 struct pnp_option * independent; 195 struct pnp_option *independent;
199 struct pnp_option * dependent; 196 struct pnp_option *dependent;
200 struct pnp_resource_table res; 197 struct pnp_resource_table res;
201 198
202 char name[PNP_NAME_LEN]; /* contains a human-readable name */ 199 char name[PNP_NAME_LEN]; /* contains a human-readable name */
203 unsigned short regs; /* ISAPnP: supported registers */ 200 unsigned short regs; /* ISAPnP: supported registers */
204 int flags; /* used by protocols */ 201 int flags; /* used by protocols */
205 struct proc_dir_entry *procent; /* device entry in /proc/bus/isapnp */ 202 struct proc_dir_entry *procent; /* device entry in /proc/bus/isapnp */
206 void *data; 203 void *data;
207}; 204};
@@ -220,19 +217,19 @@ struct pnp_dev {
220 (dev) = card_to_pnp_dev((dev)->card_list.next)) 217 (dev) = card_to_pnp_dev((dev)->card_list.next))
221#define pnp_dev_name(dev) (dev)->name 218#define pnp_dev_name(dev) (dev)->name
222 219
223static inline void *pnp_get_drvdata (struct pnp_dev *pdev) 220static inline void *pnp_get_drvdata(struct pnp_dev *pdev)
224{ 221{
225 return dev_get_drvdata(&pdev->dev); 222 return dev_get_drvdata(&pdev->dev);
226} 223}
227 224
228static inline void pnp_set_drvdata (struct pnp_dev *pdev, void *data) 225static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data)
229{ 226{
230 dev_set_drvdata(&pdev->dev, data); 227 dev_set_drvdata(&pdev->dev, data);
231} 228}
232 229
233struct pnp_fixup { 230struct pnp_fixup {
234 char id[7]; 231 char id[7];
235 void (*quirk_function)(struct pnp_dev *dev); /* fixup function */ 232 void (*quirk_function) (struct pnp_dev * dev); /* fixup function */
236}; 233};
237 234
238/* config parameters */ 235/* config parameters */
@@ -269,7 +266,6 @@ extern struct pnp_protocol pnpbios_protocol;
269#define pnp_device_is_pnpbios(dev) 0 266#define pnp_device_is_pnpbios(dev) 0
270#endif 267#endif
271 268
272
273/* status */ 269/* status */
274#define PNP_READY 0x0000 270#define PNP_READY 0x0000
275#define PNP_ATTACHED 0x0001 271#define PNP_ATTACHED 0x0001
@@ -287,17 +283,17 @@ extern struct pnp_protocol pnpbios_protocol;
287 283
288struct pnp_id { 284struct pnp_id {
289 char id[PNP_ID_LEN]; 285 char id[PNP_ID_LEN];
290 struct pnp_id * next; 286 struct pnp_id *next;
291}; 287};
292 288
293struct pnp_driver { 289struct pnp_driver {
294 char * name; 290 char *name;
295 const struct pnp_device_id *id_table; 291 const struct pnp_device_id *id_table;
296 unsigned int flags; 292 unsigned int flags;
297 int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id); 293 int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
298 void (*remove) (struct pnp_dev *dev); 294 void (*remove) (struct pnp_dev *dev);
299 int (*suspend) (struct pnp_dev *dev, pm_message_t state); 295 int (*suspend) (struct pnp_dev *dev, pm_message_t state);
300 int (*resume) (struct pnp_dev *dev); 296 int (*resume) (struct pnp_dev *dev);
301 struct device_driver driver; 297 struct device_driver driver;
302}; 298};
303 299
@@ -305,13 +301,14 @@ struct pnp_driver {
305 301
306struct pnp_card_driver { 302struct pnp_card_driver {
307 struct list_head global_list; 303 struct list_head global_list;
308 char * name; 304 char *name;
309 const struct pnp_card_device_id *id_table; 305 const struct pnp_card_device_id *id_table;
310 unsigned int flags; 306 unsigned int flags;
311 int (*probe) (struct pnp_card_link *card, const struct pnp_card_device_id *card_id); 307 int (*probe) (struct pnp_card_link *card,
308 const struct pnp_card_device_id *card_id);
312 void (*remove) (struct pnp_card_link *card); 309 void (*remove) (struct pnp_card_link *card);
313 int (*suspend) (struct pnp_card_link *card, pm_message_t state); 310 int (*suspend) (struct pnp_card_link *card, pm_message_t state);
314 int (*resume) (struct pnp_card_link *card); 311 int (*resume) (struct pnp_card_link *card);
315 struct pnp_driver link; 312 struct pnp_driver link;
316}; 313};
317 314
@@ -321,29 +318,28 @@ struct pnp_card_driver {
321#define PNP_DRIVER_RES_DO_NOT_CHANGE 0x0001 /* do not change the state of the device */ 318#define PNP_DRIVER_RES_DO_NOT_CHANGE 0x0001 /* do not change the state of the device */
322#define PNP_DRIVER_RES_DISABLE 0x0003 /* ensure the device is disabled */ 319#define PNP_DRIVER_RES_DISABLE 0x0003 /* ensure the device is disabled */
323 320
324
325/* 321/*
326 * Protocol Management 322 * Protocol Management
327 */ 323 */
328 324
329struct pnp_protocol { 325struct pnp_protocol {
330 struct list_head protocol_list; 326 struct list_head protocol_list;
331 char * name; 327 char *name;
332 328
333 /* resource control functions */ 329 /* resource control functions */
334 int (*get)(struct pnp_dev *dev, struct pnp_resource_table *res); 330 int (*get) (struct pnp_dev *dev, struct pnp_resource_table *res);
335 int (*set)(struct pnp_dev *dev, struct pnp_resource_table *res); 331 int (*set) (struct pnp_dev *dev, struct pnp_resource_table *res);
336 int (*disable)(struct pnp_dev *dev); 332 int (*disable) (struct pnp_dev *dev);
337 333
338 /* protocol specific suspend/resume */ 334 /* protocol specific suspend/resume */
339 int (*suspend)(struct pnp_dev *dev, pm_message_t state); 335 int (*suspend) (struct pnp_dev * dev, pm_message_t state);
340 int (*resume)(struct pnp_dev *dev); 336 int (*resume) (struct pnp_dev * dev);
341 337
342 /* used by pnp layer only (look but don't touch) */ 338 /* used by pnp layer only (look but don't touch) */
343 unsigned char number; /* protocol number*/ 339 unsigned char number; /* protocol number */
344 struct device dev; /* link to driver model */ 340 struct device dev; /* link to driver model */
345 struct list_head cards; 341 struct list_head cards;
346 struct list_head devices; 342 struct list_head devices;
347}; 343};
348 344
349#define to_pnp_protocol(n) list_entry(n, struct pnp_protocol, protocol_list) 345#define to_pnp_protocol(n) list_entry(n, struct pnp_protocol, protocol_list)
@@ -356,7 +352,6 @@ struct pnp_protocol {
356 (dev) != protocol_to_pnp_dev(&(protocol)->devices); \ 352 (dev) != protocol_to_pnp_dev(&(protocol)->devices); \
357 (dev) = protocol_to_pnp_dev((dev)->protocol_list.next)) 353 (dev) = protocol_to_pnp_dev((dev)->protocol_list.next))
358 354
359
360extern struct bus_type pnp_bus_type; 355extern struct bus_type pnp_bus_type;
361 356
362#if defined(CONFIG_PNP) 357#if defined(CONFIG_PNP)
@@ -376,21 +371,25 @@ void pnp_remove_card(struct pnp_card *card);
376int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev); 371int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev);
377void pnp_remove_card_device(struct pnp_dev *dev); 372void pnp_remove_card_device(struct pnp_dev *dev);
378int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card); 373int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card);
379struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char * id, struct pnp_dev * from); 374struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink,
380void pnp_release_card_device(struct pnp_dev * dev); 375 const char *id, struct pnp_dev *from);
381int pnp_register_card_driver(struct pnp_card_driver * drv); 376void pnp_release_card_device(struct pnp_dev *dev);
382void pnp_unregister_card_driver(struct pnp_card_driver * drv); 377int pnp_register_card_driver(struct pnp_card_driver *drv);
378void pnp_unregister_card_driver(struct pnp_card_driver *drv);
383extern struct list_head pnp_cards; 379extern struct list_head pnp_cards;
384 380
385/* resource management */ 381/* resource management */
386struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev); 382struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev);
387struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int priority); 383struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev,
384 int priority);
388int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data); 385int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data);
389int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data); 386int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data);
390int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data); 387int pnp_register_port_resource(struct pnp_option *option,
388 struct pnp_port *data);
391int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data); 389int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data);
392void pnp_init_resource_table(struct pnp_resource_table *table); 390void pnp_init_resource_table(struct pnp_resource_table *table);
393int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode); 391int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res,
392 int mode);
394int pnp_auto_config_dev(struct pnp_dev *dev); 393int pnp_auto_config_dev(struct pnp_dev *dev);
395int pnp_validate_config(struct pnp_dev *dev); 394int pnp_validate_config(struct pnp_dev *dev);
396int pnp_start_dev(struct pnp_dev *dev); 395int pnp_start_dev(struct pnp_dev *dev);
@@ -398,11 +397,11 @@ int pnp_stop_dev(struct pnp_dev *dev);
398int pnp_activate_dev(struct pnp_dev *dev); 397int pnp_activate_dev(struct pnp_dev *dev);
399int pnp_disable_dev(struct pnp_dev *dev); 398int pnp_disable_dev(struct pnp_dev *dev);
400void pnp_resource_change(struct resource *resource, resource_size_t start, 399void pnp_resource_change(struct resource *resource, resource_size_t start,
401 resource_size_t size); 400 resource_size_t size);
402 401
403/* protocol helpers */ 402/* protocol helpers */
404int pnp_is_active(struct pnp_dev * dev); 403int pnp_is_active(struct pnp_dev *dev);
405int compare_pnp_id(struct pnp_id * pos, const char * id); 404int compare_pnp_id(struct pnp_id *pos, const char *id);
406int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev); 405int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev);
407int pnp_register_driver(struct pnp_driver *drv); 406int pnp_register_driver(struct pnp_driver *drv);
408void pnp_unregister_driver(struct pnp_driver *drv); 407void pnp_unregister_driver(struct pnp_driver *drv);
@@ -415,23 +414,24 @@ static inline void pnp_unregister_protocol(struct pnp_protocol *protocol) { }
415static inline int pnp_init_device(struct pnp_dev *dev) { return -ENODEV; } 414static inline int pnp_init_device(struct pnp_dev *dev) { return -ENODEV; }
416static inline int pnp_add_device(struct pnp_dev *dev) { return -ENODEV; } 415static inline int pnp_add_device(struct pnp_dev *dev) { return -ENODEV; }
417static inline int pnp_device_attach(struct pnp_dev *pnp_dev) { return -ENODEV; } 416static inline int pnp_device_attach(struct pnp_dev *pnp_dev) { return -ENODEV; }
418static inline void pnp_device_detach(struct pnp_dev *pnp_dev) { ; } 417static inline void pnp_device_detach(struct pnp_dev *pnp_dev) { }
418
419#define pnp_platform_devices 0 419#define pnp_platform_devices 0
420 420
421/* multidevice card support */ 421/* multidevice card support */
422static inline int pnp_add_card(struct pnp_card *card) { return -ENODEV; } 422static inline int pnp_add_card(struct pnp_card *card) { return -ENODEV; }
423static inline void pnp_remove_card(struct pnp_card *card) { ; } 423static inline void pnp_remove_card(struct pnp_card *card) { }
424static inline int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev) { return -ENODEV; } 424static inline int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev) { return -ENODEV; }
425static inline void pnp_remove_card_device(struct pnp_dev *dev) { ; } 425static inline void pnp_remove_card_device(struct pnp_dev *dev) { }
426static inline int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card) { return -ENODEV; } 426static inline int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card) { return -ENODEV; }
427static inline struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char * id, struct pnp_dev * from) { return NULL; } 427static inline struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink, const char *id, struct pnp_dev *from) { return NULL; }
428static inline void pnp_release_card_device(struct pnp_dev * dev) { ; } 428static inline void pnp_release_card_device(struct pnp_dev *dev) { }
429static inline int pnp_register_card_driver(struct pnp_card_driver * drv) { return -ENODEV; } 429static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return -ENODEV; }
430static inline void pnp_unregister_card_driver(struct pnp_card_driver * drv) { ; } 430static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { }
431 431
432/* resource management */ 432/* resource management */
433static inline struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev) { return NULL; } 433static inline struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev) { return NULL; }
434static inline struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int priority) { return NULL; } 434static inline struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev, int priority) { return NULL; }
435static inline int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) { return -ENODEV; } 435static inline int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) { return -ENODEV; }
436static inline int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; } 436static inline int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) { return -ENODEV; }
437static inline int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) { return -ENODEV; } 437static inline int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) { return -ENODEV; }
@@ -444,20 +444,17 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
444static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } 444static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
445static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } 445static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; }
446static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } 446static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; }
447static inline void pnp_resource_change(struct resource *resource, 447static inline void pnp_resource_change(struct resource *resource, resource_size_t start, resource_size_t size) { }
448 resource_size_t start,
449 resource_size_t size) { }
450 448
451/* protocol helpers */ 449/* protocol helpers */
452static inline int pnp_is_active(struct pnp_dev * dev) { return 0; } 450static inline int pnp_is_active(struct pnp_dev *dev) { return 0; }
453static inline int compare_pnp_id(struct pnp_id * pos, const char * id) { return -ENODEV; } 451static inline int compare_pnp_id(struct pnp_id *pos, const char *id) { return -ENODEV; }
454static inline int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev) { return -ENODEV; } 452static inline int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev) { return -ENODEV; }
455static inline int pnp_register_driver(struct pnp_driver *drv) { return -ENODEV; } 453static inline int pnp_register_driver(struct pnp_driver *drv) { return -ENODEV; }
456static inline void pnp_unregister_driver(struct pnp_driver *drv) { ; } 454static inline void pnp_unregister_driver(struct pnp_driver *drv) { }
457 455
458#endif /* CONFIG_PNP */ 456#endif /* CONFIG_PNP */
459 457
460
461#define pnp_err(format, arg...) printk(KERN_ERR "pnp: " format "\n" , ## arg) 458#define pnp_err(format, arg...) printk(KERN_ERR "pnp: " format "\n" , ## arg)
462#define pnp_info(format, arg...) printk(KERN_INFO "pnp: " format "\n" , ## arg) 459#define pnp_info(format, arg...) printk(KERN_INFO "pnp: " format "\n" , ## arg)
463#define pnp_warn(format, arg...) printk(KERN_WARNING "pnp: " format "\n" , ## arg) 460#define pnp_warn(format, arg...) printk(KERN_WARNING "pnp: " format "\n" , ## arg)
diff --git a/include/linux/pnpbios.h b/include/linux/pnpbios.h
index 0a282ac1f6b2..329192adc9dd 100644
--- a/include/linux/pnpbios.h
+++ b/include/linux/pnpbios.h
@@ -99,32 +99,32 @@
99 99
100#pragma pack(1) 100#pragma pack(1)
101struct pnp_dev_node_info { 101struct pnp_dev_node_info {
102 __u16 no_nodes; 102 __u16 no_nodes;
103 __u16 max_node_size; 103 __u16 max_node_size;
104}; 104};
105struct pnp_docking_station_info { 105struct pnp_docking_station_info {
106 __u32 location_id; 106 __u32 location_id;
107 __u32 serial; 107 __u32 serial;
108 __u16 capabilities; 108 __u16 capabilities;
109}; 109};
110struct pnp_isa_config_struc { 110struct pnp_isa_config_struc {
111 __u8 revision; 111 __u8 revision;
112 __u8 no_csns; 112 __u8 no_csns;
113 __u16 isa_rd_data_port; 113 __u16 isa_rd_data_port;
114 __u16 reserved; 114 __u16 reserved;
115}; 115};
116struct escd_info_struc { 116struct escd_info_struc {
117 __u16 min_escd_write_size; 117 __u16 min_escd_write_size;
118 __u16 escd_size; 118 __u16 escd_size;
119 __u32 nv_storage_base; 119 __u32 nv_storage_base;
120}; 120};
121struct pnp_bios_node { 121struct pnp_bios_node {
122 __u16 size; 122 __u16 size;
123 __u8 handle; 123 __u8 handle;
124 __u32 eisa_id; 124 __u32 eisa_id;
125 __u8 type_code[3]; 125 __u8 type_code[3];
126 __u16 flags; 126 __u16 flags;
127 __u8 data[0]; 127 __u8 data[0];
128}; 128};
129#pragma pack() 129#pragma pack()
130 130
@@ -133,22 +133,16 @@ struct pnp_bios_node {
133/* non-exported */ 133/* non-exported */
134extern struct pnp_dev_node_info node_info; 134extern struct pnp_dev_node_info node_info;
135 135
136extern int pnp_bios_dev_node_info (struct pnp_dev_node_info *data); 136extern int pnp_bios_dev_node_info(struct pnp_dev_node_info *data);
137extern int pnp_bios_get_dev_node (u8 *nodenum, char config, struct pnp_bios_node *data); 137extern int pnp_bios_get_dev_node(u8 *nodenum, char config,
138extern int pnp_bios_set_dev_node (u8 nodenum, char config, struct pnp_bios_node *data); 138 struct pnp_bios_node *data);
139extern int pnp_bios_get_stat_res (char *info); 139extern int pnp_bios_set_dev_node(u8 nodenum, char config,
140extern int pnp_bios_isapnp_config (struct pnp_isa_config_struc *data); 140 struct pnp_bios_node *data);
141extern int pnp_bios_escd_info (struct escd_info_struc *data); 141extern int pnp_bios_get_stat_res(char *info);
142extern int pnp_bios_read_escd (char *data, u32 nvram_base); 142extern int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data);
143extern int pnp_bios_escd_info(struct escd_info_struc *data);
144extern int pnp_bios_read_escd(char *data, u32 nvram_base);
143extern int pnp_bios_dock_station_info(struct pnp_docking_station_info *data); 145extern int pnp_bios_dock_station_info(struct pnp_docking_station_info *data);
144#define needed 0
145#if needed
146extern int pnp_bios_get_event (u16 *message);
147extern int pnp_bios_send_message (u16 message);
148extern int pnp_bios_set_stat_res (char *info);
149extern int pnp_bios_apm_id_table (char *table, u16 *size);
150extern int pnp_bios_write_escd (char *data, u32 nvram_base);
151#endif
152 146
153#endif /* CONFIG_PNPBIOS */ 147#endif /* CONFIG_PNPBIOS */
154 148
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index e8e6da394c92..618f93c32b7f 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -125,6 +125,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
125static inline void register_nosave_region(unsigned long b, unsigned long e) 125static inline void register_nosave_region(unsigned long b, unsigned long e)
126{ 126{
127} 127}
128static inline void register_nosave_region_late(unsigned long b, unsigned long e)
129{
130}
128#endif 131#endif
129 132
130#endif /* _LINUX_SWSUSP_H */ 133#endif /* _LINUX_SWSUSP_H */
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 040dae5f0c9e..c48e390f4b0f 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -35,7 +35,7 @@ union nf_conntrack_address {
35union nf_conntrack_man_proto 35union nf_conntrack_man_proto
36{ 36{
37 /* Add other protocols here. */ 37 /* Add other protocols here. */
38 u_int16_t all; 38 __be16 all;
39 39
40 struct { 40 struct {
41 __be16 port; 41 __be16 port;
@@ -73,7 +73,7 @@ struct nf_conntrack_tuple
73 union nf_conntrack_address u3; 73 union nf_conntrack_address u3;
74 union { 74 union {
75 /* Add other protocols here. */ 75 /* Add other protocols here. */
76 u_int16_t all; 76 __be16 all;
77 77
78 struct { 78 struct {
79 __be16 port; 79 __be16 port;
diff --git a/include/xen/page.h b/include/xen/page.h
index 1df6c1930578..c0c8fcb27899 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -4,6 +4,7 @@
4#include <linux/pfn.h> 4#include <linux/pfn.h>
5 5
6#include <asm/uaccess.h> 6#include <asm/uaccess.h>
7#include <asm/pgtable.h>
7 8
8#include <xen/features.h> 9#include <xen/features.h>
9 10
diff --git a/init/initramfs.c b/init/initramfs.c
index 00eff7a11085..1db02a0025db 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -133,7 +133,7 @@ static __initdata loff_t this_header, next_header;
133 133
134static __initdata int dry_run; 134static __initdata int dry_run;
135 135
136static inline void eat(unsigned n) 136static inline void __init eat(unsigned n)
137{ 137{
138 victim += n; 138 victim += n;
139 this_header += n; 139 this_header += n;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index beedbdc64608..9809cc1f33d6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -351,11 +351,11 @@ static inline void register_pm_notifier_callback(void) {}
351 351
352/** 352/**
353 * call_usermodehelper_setup - prepare to call a usermode helper 353 * call_usermodehelper_setup - prepare to call a usermode helper
354 * @path - path to usermode executable 354 * @path: path to usermode executable
355 * @argv - arg vector for process 355 * @argv: arg vector for process
356 * @envp - environment for process 356 * @envp: environment for process
357 * 357 *
358 * Returns either NULL on allocation failure, or a subprocess_info 358 * Returns either %NULL on allocation failure, or a subprocess_info
359 * structure. This should be passed to call_usermodehelper_exec to 359 * structure. This should be passed to call_usermodehelper_exec to
360 * exec the process and free the structure. 360 * exec the process and free the structure.
361 */ 361 */
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 324ac0188ce1..eb72255b5c86 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -216,6 +216,7 @@ int hibernation_platform_enter(void)
216 * sleep state after all 216 * sleep state after all
217 */ 217 */
218 error = hibernation_ops->prepare(); 218 error = hibernation_ops->prepare();
219 sysdev_shutdown();
219 if (!error) 220 if (!error)
220 error = hibernation_ops->enter(); 221 error = hibernation_ops->enter();
221 } else { 222 } else {
diff --git a/kernel/sys.c b/kernel/sys.c
index 08562f419768..14f8adcfffd9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -804,6 +804,7 @@ static void kernel_restart_prepare(char *cmd)
804 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 804 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
805 system_state = SYSTEM_RESTART; 805 system_state = SYSTEM_RESTART;
806 device_shutdown(); 806 device_shutdown();
807 sysdev_shutdown();
807} 808}
808 809
809/** 810/**
@@ -860,6 +861,7 @@ void kernel_shutdown_prepare(enum system_states state)
860void kernel_halt(void) 861void kernel_halt(void)
861{ 862{
862 kernel_shutdown_prepare(SYSTEM_HALT); 863 kernel_shutdown_prepare(SYSTEM_HALT);
864 sysdev_shutdown();
863 printk(KERN_EMERG "System halted.\n"); 865 printk(KERN_EMERG "System halted.\n");
864 machine_halt(); 866 machine_halt();
865} 867}
@@ -876,6 +878,7 @@ void kernel_power_off(void)
876 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 878 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
877 if (pm_power_off_prepare) 879 if (pm_power_off_prepare)
878 pm_power_off_prepare(); 880 pm_power_off_prepare();
881 sysdev_shutdown();
879 printk(KERN_EMERG "Power down.\n"); 882 printk(KERN_EMERG "Power down.\n");
880 machine_power_off(); 883 machine_power_off();
881} 884}
diff --git a/mm/migrate.c b/mm/migrate.c
index 34d8ada053e4..37c73b902008 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -49,9 +49,8 @@ int isolate_lru_page(struct page *page, struct list_head *pagelist)
49 struct zone *zone = page_zone(page); 49 struct zone *zone = page_zone(page);
50 50
51 spin_lock_irq(&zone->lru_lock); 51 spin_lock_irq(&zone->lru_lock);
52 if (PageLRU(page)) { 52 if (PageLRU(page) && get_page_unless_zero(page)) {
53 ret = 0; 53 ret = 0;
54 get_page(page);
55 ClearPageLRU(page); 54 ClearPageLRU(page);
56 if (PageActive(page)) 55 if (PageActive(page))
57 del_page_from_active_list(zone, page); 56 del_page_from_active_list(zone, page);
@@ -632,18 +631,35 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
632 goto unlock; 631 goto unlock;
633 wait_on_page_writeback(page); 632 wait_on_page_writeback(page);
634 } 633 }
635
636 /* 634 /*
637 * Establish migration ptes or remove ptes 635 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
636 * we cannot notice that anon_vma is freed while we migrates a page.
637 * This rcu_read_lock() delays freeing anon_vma pointer until the end
638 * of migration. File cache pages are no problem because of page_lock()
639 */
640 rcu_read_lock();
641 /*
642 * This is a corner case handling.
643 * When a new swap-cache is read into, it is linked to LRU
644 * and treated as swapcache but has no rmap yet.
645 * Calling try_to_unmap() against a page->mapping==NULL page is
646 * BUG. So handle it here.
638 */ 647 */
648 if (!page->mapping)
649 goto rcu_unlock;
650 /* Establish migration ptes or remove ptes */
639 try_to_unmap(page, 1); 651 try_to_unmap(page, 1);
652
640 if (!page_mapped(page)) 653 if (!page_mapped(page))
641 rc = move_to_new_page(newpage, page); 654 rc = move_to_new_page(newpage, page);
642 655
643 if (rc) 656 if (rc)
644 remove_migration_ptes(page, page); 657 remove_migration_ptes(page, page);
658rcu_unlock:
659 rcu_read_unlock();
645 660
646unlock: 661unlock:
662
647 unlock_page(page); 663 unlock_page(page);
648 664
649 if (rc != -EAGAIN) { 665 if (rc != -EAGAIN) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40954fb81598..6d3550ca0282 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2775,11 +2775,11 @@ unsigned long __meminit __absent_pages_in_range(int nid,
2775 if (i == -1) 2775 if (i == -1)
2776 return 0; 2776 return 0;
2777 2777
2778 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
2779
2778 /* Account for ranges before physical memory on this node */ 2780 /* Account for ranges before physical memory on this node */
2779 if (early_node_map[i].start_pfn > range_start_pfn) 2781 if (early_node_map[i].start_pfn > range_start_pfn)
2780 hole_pages = early_node_map[i].start_pfn - range_start_pfn; 2782 hole_pages = prev_end_pfn - range_start_pfn;
2781
2782 prev_end_pfn = early_node_map[i].start_pfn;
2783 2783
2784 /* Find all holes for the zone within the node */ 2784 /* Find all holes for the zone within the node */
2785 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) { 2785 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 420bbb9955e9..5c18595b7616 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,9 +112,9 @@ static int br_handle_local_finish(struct sk_buff *skb)
112 */ 112 */
113static inline int is_link_local(const unsigned char *dest) 113static inline int is_link_local(const unsigned char *dest)
114{ 114{
115 const u16 *a = (const u16 *) dest; 115 __be16 *a = (__be16 *)dest;
116 static const u16 *const b = (const u16 *const ) br_group_address; 116 static const __be16 *b = (const __be16 *)br_group_address;
117 static const u16 m = __constant_cpu_to_be16(0xfff0); 117 static const __be16 m = __constant_cpu_to_be16(0xfff0);
118 118
119 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; 119 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
120} 120}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index e848d8d6292f..deab27facbad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -77,7 +77,8 @@ static inline unsigned int
77hash_by_src(const struct nf_conntrack_tuple *tuple) 77hash_by_src(const struct nf_conntrack_tuple *tuple)
78{ 78{
79 /* Original src, to ensure we map it consistently if poss. */ 79 /* Original src, to ensure we map it consistently if poss. */
80 return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all, 80 return jhash_3words((__force u32)tuple->src.u3.ip,
81 (__force u32)tuple->src.u.all,
81 tuple->dst.protonum, 0) % nf_nat_htable_size; 82 tuple->dst.protonum, 0) % nf_nat_htable_size;
82} 83}
83 84
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 0f45427e5fdc..76ec59ae524d 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -192,7 +192,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
192 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC 192 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
193 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip 193 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
194 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 194 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
195 u_int16_t all 195 __be16 all
196 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC 196 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
197 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all 197 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
198 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); 198 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index df30976f6dfd..ca774d8e3be3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -385,7 +385,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
385 385
386static int 386static int
387ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 387ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
388 int *type, int *code, int *msg, __be32 *info, int offset) 388 int *type, int *code, int *msg, __u32 *info, int offset)
389{ 389{
390 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; 390 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
391 struct ip6_tnl *t; 391 struct ip6_tnl *t;
@@ -435,7 +435,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
435 if ((*code) == ICMPV6_HDR_FIELD) 435 if ((*code) == ICMPV6_HDR_FIELD)
436 teli = parse_tlv_tnl_enc_lim(skb, skb->data); 436 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
437 437
438 if (teli && teli == ntohl(*info) - 2) { 438 if (teli && teli == *info - 2) {
439 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 439 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
440 if (tel->encap_limit == 0) { 440 if (tel->encap_limit == 0) {
441 if (net_ratelimit()) 441 if (net_ratelimit())
@@ -452,7 +452,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
452 } 452 }
453 break; 453 break;
454 case ICMPV6_PKT_TOOBIG: 454 case ICMPV6_PKT_TOOBIG:
455 mtu = ntohl(*info) - offset; 455 mtu = *info - offset;
456 if (mtu < IPV6_MIN_MTU) 456 if (mtu < IPV6_MIN_MTU)
457 mtu = IPV6_MIN_MTU; 457 mtu = IPV6_MIN_MTU;
458 t->dev->mtu = mtu; 458 t->dev->mtu = mtu;
@@ -478,12 +478,12 @@ out:
478 478
479static int 479static int
480ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 480ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
481 int type, int code, int offset, __u32 info) 481 int type, int code, int offset, __be32 info)
482{ 482{
483 int rel_msg = 0; 483 int rel_msg = 0;
484 int rel_type = type; 484 int rel_type = type;
485 int rel_code = code; 485 int rel_code = code;
486 __u32 rel_info = info; 486 __u32 rel_info = ntohl(info);
487 int err; 487 int err;
488 struct sk_buff *skb2; 488 struct sk_buff *skb2;
489 struct iphdr *eiph; 489 struct iphdr *eiph;
@@ -564,10 +564,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
564 goto out; 564 goto out;
565 565
566 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 566 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
567 rel_info = htonl(rel_info);
568 } 567 }
569 568
570 icmp_send(skb2, rel_type, rel_code, rel_info); 569 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
571 570
572out: 571out:
573 kfree_skb(skb2); 572 kfree_skb(skb2);
@@ -576,12 +575,12 @@ out:
576 575
577static int 576static int
578ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 577ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
579 int type, int code, int offset, __u32 info) 578 int type, int code, int offset, __be32 info)
580{ 579{
581 int rel_msg = 0; 580 int rel_msg = 0;
582 int rel_type = type; 581 int rel_type = type;
583 int rel_code = code; 582 int rel_code = code;
584 __u32 rel_info = info; 583 __u32 rel_info = ntohl(info);
585 int err; 584 int err;
586 585
587 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 586 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0f8304b0246b..7b0a95abe934 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2540,7 +2540,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2540 sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); 2540 sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
2541 sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; 2541 sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port;
2542 if (sel.sport) 2542 if (sel.sport)
2543 sel.sport_mask = ~0; 2543 sel.sport_mask = htons(0xffff);
2544 2544
2545 /* set destination address info of selector */ 2545 /* set destination address info of selector */
2546 sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1], 2546 sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1],
@@ -2549,7 +2549,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
2549 sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); 2549 sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
2550 sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; 2550 sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port;
2551 if (sel.dport) 2551 if (sel.dport)
2552 sel.dport_mask = ~0; 2552 sel.dport_mask = htons(0xffff);
2553 2553
2554 rq = (struct sadb_x_ipsecrequest *)(pol + 1); 2554 rq = (struct sadb_x_ipsecrequest *)(pol + 1);
2555 2555
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index aa086c83af80..0fe11889ce14 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -79,7 +79,8 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
79 a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all), 79 a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
80 (tuple->src.l3num << 16) | tuple->dst.protonum); 80 (tuple->src.l3num << 16) | tuple->dst.protonum);
81 b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 81 b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
82 (tuple->src.u.all << 16) | tuple->dst.u.all); 82 ((__force __u16)tuple->src.u.all << 16) |
83 (__force __u16)tuple->dst.u.all);
83 84
84 return jhash_2words(a, b, rnd) % size; 85 return jhash_2words(a, b, rnd) % size;
85} 86}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 1aa6229ca99f..eb6695dcd73b 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -80,7 +80,7 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
80 80
81 return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 81 return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
82 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | 82 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
83 tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) % 83 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) %
84 nf_ct_expect_hsize; 84 nf_ct_expect_hsize;
85} 85}
86 86
@@ -259,8 +259,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
259 } 259 }
260 260
261 if (src) { 261 if (src) {
262 exp->tuple.src.u.all = (__force u16)*src; 262 exp->tuple.src.u.all = *src;
263 exp->mask.src.u.all = 0xFFFF; 263 exp->mask.src.u.all = htons(0xFFFF);
264 } else { 264 } else {
265 exp->tuple.src.u.all = 0; 265 exp->tuple.src.u.all = 0;
266 exp->mask.src.u.all = 0; 266 exp->mask.src.u.all = 0;
@@ -272,7 +272,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
272 memset((void *)&exp->tuple.dst.u3 + len, 0x00, 272 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
273 sizeof(exp->tuple.dst.u3) - len); 273 sizeof(exp->tuple.dst.u3) - len);
274 274
275 exp->tuple.dst.u.all = (__force u16)*dst; 275 exp->tuple.dst.u.all = *dst;
276} 276}
277EXPORT_SYMBOL_GPL(nf_ct_expect_init); 277EXPORT_SYMBOL_GPL(nf_ct_expect_init);
278 278
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ca10df40784f..96aa637c0932 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -39,7 +39,7 @@ static int nf_ct_helper_vmalloc;
39static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) 39static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
40{ 40{
41 return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ 41 return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^
42 tuple->src.u.all) % nf_ct_helper_hsize; 42 (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
43} 43}
44 44
45struct nf_conntrack_helper * 45struct nf_conntrack_helper *
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 3335dd5be962..06cff1d13690 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -42,13 +42,13 @@ struct xt_connlimit_data {
42static u_int32_t connlimit_rnd; 42static u_int32_t connlimit_rnd;
43static bool connlimit_rnd_inited; 43static bool connlimit_rnd_inited;
44 44
45static inline unsigned int connlimit_iphash(u_int32_t addr) 45static inline unsigned int connlimit_iphash(__be32 addr)
46{ 46{
47 if (unlikely(!connlimit_rnd_inited)) { 47 if (unlikely(!connlimit_rnd_inited)) {
48 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); 48 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
49 connlimit_rnd_inited = true; 49 connlimit_rnd_inited = true;
50 } 50 }
51 return jhash_1word(addr, connlimit_rnd) & 0xFF; 51 return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
52} 52}
53 53
54static inline unsigned int 54static inline unsigned int
@@ -66,7 +66,7 @@ connlimit_iphash6(const union nf_conntrack_address *addr,
66 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) 66 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
67 res.ip6[i] = addr->ip6[i] & mask->ip6[i]; 67 res.ip6[i] = addr->ip6[i] & mask->ip6[i];
68 68
69 return jhash2(res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; 69 return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF;
70} 70}
71 71
72static inline bool already_closed(const struct nf_conn *conn) 72static inline bool already_closed(const struct nf_conn *conn)
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 04b677ae8dae..74f9b14c012f 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -21,6 +21,7 @@ static bool u32_match_it(const struct xt_u32 *data,
21 unsigned int nnums; 21 unsigned int nnums;
22 unsigned int nvals; 22 unsigned int nvals;
23 unsigned int i; 23 unsigned int i;
24 __be32 n;
24 u_int32_t pos; 25 u_int32_t pos;
25 u_int32_t val; 26 u_int32_t val;
26 u_int32_t at; 27 u_int32_t at;
@@ -38,9 +39,9 @@ static bool u32_match_it(const struct xt_u32 *data,
38 if (skb->len < 4 || pos > skb->len - 4); 39 if (skb->len < 4 || pos > skb->len - 4);
39 return false; 40 return false;
40 41
41 ret = skb_copy_bits(skb, pos, &val, sizeof(val)); 42 ret = skb_copy_bits(skb, pos, &n, sizeof(n));
42 BUG_ON(ret < 0); 43 BUG_ON(ret < 0);
43 val = ntohl(val); 44 val = ntohl(n);
44 nnums = ct->nnums; 45 nnums = ct->nnums;
45 46
46 /* Inner loop runs over "&", "<<", ">>" and "@" operands */ 47 /* Inner loop runs over "&", "<<", ">>" and "@" operands */
@@ -65,10 +66,10 @@ static bool u32_match_it(const struct xt_u32 *data,
65 pos > skb->len - at - 4) 66 pos > skb->len - at - 4)
66 return false; 67 return false;
67 68
68 ret = skb_copy_bits(skb, at + pos, &val, 69 ret = skb_copy_bits(skb, at + pos, &n,
69 sizeof(val)); 70 sizeof(n));
70 BUG_ON(ret < 0); 71 BUG_ON(ret < 0);
71 val = ntohl(val); 72 val = ntohl(n);
72 break; 73 break;
73 } 74 }
74 } 75 }
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 372b24466dc7..d6667f7bc85e 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -71,7 +71,7 @@ struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
71 struct rb_node *p, *parent, **pp; 71 struct rb_node *p, *parent, **pp;
72 72
73 _enter("%p{%x},%x,%hx,", 73 _enter("%p{%x},%x,%hx,",
74 rx, key_serial(key), trans->debug_id, ntohl(service_id)); 74 rx, key_serial(key), trans->debug_id, ntohs(service_id));
75 75
76 if (rx->trans == trans && rx->bundle) { 76 if (rx->trans == trans && rx->bundle) {
77 atomic_inc(&rx->bundle->usage); 77 atomic_inc(&rx->bundle->usage);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 64b9b8c743c4..12ff5da8160e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -131,13 +131,13 @@ static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
131 case AF_INET: 131 case AF_INET:
132 snprintf(buf, len, "%u.%u.%u.%u, port=%u", 132 snprintf(buf, len, "%u.%u.%u.%u, port=%u",
133 NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), 133 NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
134 htons(((struct sockaddr_in *) addr)->sin_port)); 134 ntohs(((struct sockaddr_in *) addr)->sin_port));
135 break; 135 break;
136 136
137 case AF_INET6: 137 case AF_INET6:
138 snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", 138 snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
139 NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), 139 NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
140 htons(((struct sockaddr_in6 *) addr)->sin6_port)); 140 ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
141 break; 141 break;
142 142
143 default: 143 default:
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 35d5ba1d4f42..ce2659836374 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -72,10 +72,8 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
72 u32 pos, u32 mask, u32 val) 72 u32 pos, u32 mask, u32 val)
73{ 73{
74 val = (val & mask) << pos; 74 val = (val & mask) << pos;
75 val = htonl(val); 75 m->hdr[w] &= ~htonl(mask << pos);
76 mask = htonl(mask << pos); 76 m->hdr[w] |= htonl(val);
77 m->hdr[w] &= ~mask;
78 m->hdr[w] |= val;
79} 77}
80 78
81/* 79/*