aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/base/power/shutdown.c2
-rw-r--r--drivers/block/lguest_blk.c169
-rw-r--r--drivers/char/Kconfig1
-rw-r--r--drivers/char/hpet.c2
-rw-r--r--drivers/char/hvc_lguest.c80
-rw-r--r--drivers/edac/Kconfig4
-rw-r--r--drivers/edac/edac_mc.c64
-rw-r--r--drivers/edac/edac_mc_sysfs.c19
-rw-r--r--drivers/edac/edac_module.h8
-rw-r--r--drivers/edac/edac_pci.c162
-rw-r--r--drivers/edac/edac_pci_sysfs.c297
-rw-r--r--drivers/edac/i3000_edac.c2
-rw-r--r--drivers/i2c/chips/ds1682.c3
-rw-r--r--drivers/ide/pci/scc_pata.c4
-rw-r--r--drivers/ieee1394/raw1394.c2
-rw-r--r--drivers/lguest/Makefile12
-rw-r--r--drivers/lguest/README47
-rw-r--r--drivers/lguest/core.c357
-rw-r--r--drivers/lguest/hypercalls.c127
-rw-r--r--drivers/lguest/interrupts_and_traps.c205
-rw-r--r--drivers/lguest/io.c265
-rw-r--r--drivers/lguest/lg.h44
-rw-r--r--drivers/lguest/lguest.c490
-rw-r--r--drivers/lguest/lguest_asm.S71
-rw-r--r--drivers/lguest/lguest_bus.c75
-rw-r--r--drivers/lguest/lguest_user.c166
-rw-r--r--drivers/lguest/page_tables.c329
-rw-r--r--drivers/lguest/segments.c126
-rw-r--r--drivers/lguest/switcher.S284
-rw-r--r--drivers/media/video/Kconfig4
-rw-r--r--drivers/mtd/maps/Kconfig2
-rw-r--r--drivers/net/ax88796.c2
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.c2
-rw-r--r--drivers/net/lguest_net.c237
-rw-r--r--drivers/net/lib8390.c9
-rw-r--r--drivers/net/pppol2tp.c4
-rw-r--r--drivers/pnp/card.c166
-rw-r--r--drivers/pnp/core.c50
-rw-r--r--drivers/pnp/driver.c70
-rw-r--r--drivers/pnp/interface.c217
-rw-r--r--drivers/pnp/isapnp/compat.c39
-rw-r--r--drivers/pnp/isapnp/core.c332
-rw-r--r--drivers/pnp/isapnp/proc.c21
-rw-r--r--drivers/pnp/manager.c144
-rw-r--r--drivers/pnp/pnpacpi/core.c98
-rw-r--r--drivers/pnp/pnpacpi/rsparser.c441
-rw-r--r--drivers/pnp/pnpbios/bioscalls.c339
-rw-r--r--drivers/pnp/pnpbios/core.c257
-rw-r--r--drivers/pnp/pnpbios/proc.c107
-rw-r--r--drivers/pnp/pnpbios/rsparser.c349
-rw-r--r--drivers/pnp/quirks.c80
-rw-r--r--drivers/pnp/resource.c102
-rw-r--r--drivers/pnp/support.c17
-rw-r--r--drivers/pnp/system.c40
-rw-r--r--drivers/rtc/Makefile42
-rw-r--r--drivers/rtc/rtc-ds1307.c2
-rw-r--r--drivers/rtc/rtc-stk17ta8.c6
-rw-r--r--drivers/spi/spi_s3c24xx.c2
-rw-r--r--drivers/video/chipsfb.c3
-rw-r--r--drivers/video/tgafb.c2
-rw-r--r--drivers/w1/masters/ds1wm.c2
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c2
63 files changed, 4734 insertions, 1876 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 251344cb29ae..22b401b2e088 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -11,9 +11,6 @@ menuconfig ACPI
11 depends on PCI 11 depends on PCI
12 depends on PM 12 depends on PM
13 select PNP 13 select PNP
14 # for sleep
15 select HOTPLUG_CPU if X86 && SMP
16 select SUSPEND_SMP if X86 && SMP
17 default y 14 default y
18 ---help--- 15 ---help---
19 Advanced Configuration and Power Interface (ACPI) support for 16 Advanced Configuration and Power Interface (ACPI) support for
diff --git a/drivers/base/power/shutdown.c b/drivers/base/power/shutdown.c
index a47ee1b70d20..56e8eaaac012 100644
--- a/drivers/base/power/shutdown.c
+++ b/drivers/base/power/shutdown.c
@@ -44,7 +44,5 @@ void device_shutdown(void)
44 dev->driver->shutdown(dev); 44 dev->driver->shutdown(dev);
45 } 45 }
46 } 46 }
47
48 sysdev_shutdown();
49} 47}
50 48
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c
index 5b79d0724171..93e3c4001bf5 100644
--- a/drivers/block/lguest_blk.c
+++ b/drivers/block/lguest_blk.c
@@ -1,6 +1,12 @@
1/* A simple block driver for lguest. 1/*D:400
2 * The Guest block driver
2 * 3 *
3 * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 4 * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
5 * The mechanism is simple: we place the information about the request in the
6 * device page, then use SEND_DMA (containing the data for a write, or an empty
7 * "ping" DMA for a read).
8 :*/
9/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 * 10 *
5 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 12 * it under the terms of the GNU General Public License as published by
@@ -25,27 +31,50 @@
25 31
26static char next_block_index = 'a'; 32static char next_block_index = 'a';
27 33
34/*D:420 Here is the structure which holds all the information we need about
35 * each Guest block device.
36 *
37 * I'm sure at this stage, you're wondering "hey, where was the adventure I was
38 * promised?" and thinking "Rusty sucks, I shall say nasty things about him on
39 * my blog". I think Real adventures have boring bits, too, and you're in the
40 * middle of one. But it gets better. Just not quite yet. */
28struct blockdev 41struct blockdev
29{ 42{
43 /* The block queue infrastructure wants a spinlock: it is held while it
44 * calls our block request function. We grab it in our interrupt
45 * handler so the responses don't mess with new requests. */
30 spinlock_t lock; 46 spinlock_t lock;
31 47
32 /* The disk structure for the kernel. */ 48 /* The disk structure registered with kernel. */
33 struct gendisk *disk; 49 struct gendisk *disk;
34 50
35 /* The major number for this disk. */ 51 /* The major device number for this disk, and the interrupt. We only
52 * really keep them here for completeness; we'd need them if we
53 * supported device unplugging. */
36 int major; 54 int major;
37 int irq; 55 int irq;
38 56
57 /* The physical address of this device's memory page */
39 unsigned long phys_addr; 58 unsigned long phys_addr;
40 /* The mapped block page. */ 59 /* The mapped memory page for convenient acces. */
41 struct lguest_block_page *lb_page; 60 struct lguest_block_page *lb_page;
42 61
43 /* We only have a single request outstanding at a time. */ 62 /* We only have a single request outstanding at a time: this is it. */
44 struct lguest_dma dma; 63 struct lguest_dma dma;
45 struct request *req; 64 struct request *req;
46}; 65};
47 66
48/* Jens gave me this nice helper to end all chunks of a request. */ 67/*D:495 We originally used end_request() throughout the driver, but it turns
68 * out that end_request() is deprecated, and doesn't actually end the request
69 * (which seems like a good reason to deprecate it!). It simply ends the first
70 * bio. So if we had 3 bios in a "struct request" we would do all 3,
71 * end_request(), do 2, end_request(), do 1 and end_request(): twice as much
72 * work as we needed to do.
73 *
74 * This reinforced to me that I do not understand the block layer.
75 *
76 * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
77 * request. This improved disk speed by 130%. */
49static void end_entire_request(struct request *req, int uptodate) 78static void end_entire_request(struct request *req, int uptodate)
50{ 79{
51 if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) 80 if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
@@ -55,30 +84,62 @@ static void end_entire_request(struct request *req, int uptodate)
55 end_that_request_last(req, uptodate); 84 end_that_request_last(req, uptodate);
56} 85}
57 86
87/* I'm told there are only two stories in the world worth telling: love and
88 * hate. So there used to be a love scene here like this:
89 *
90 * Launcher: We could make beautiful I/O together, you and I.
91 * Guest: My, that's a big disk!
92 *
93 * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
94
95/*D:490 This is the interrupt handler, called when a block read or write has
96 * been completed for us. */
58static irqreturn_t lgb_irq(int irq, void *_bd) 97static irqreturn_t lgb_irq(int irq, void *_bd)
59{ 98{
99 /* We handed our "struct blockdev" as the argument to request_irq(), so
100 * it is passed through to us here. This tells us which device we're
101 * dealing with in case we have more than one. */
60 struct blockdev *bd = _bd; 102 struct blockdev *bd = _bd;
61 unsigned long flags; 103 unsigned long flags;
62 104
105 /* We weren't doing anything? Strange, but could happen if we shared
106 * interrupts (we don't!). */
63 if (!bd->req) { 107 if (!bd->req) {
64 pr_debug("No work!\n"); 108 pr_debug("No work!\n");
65 return IRQ_NONE; 109 return IRQ_NONE;
66 } 110 }
67 111
112 /* Not done yet? That's equally strange. */
68 if (!bd->lb_page->result) { 113 if (!bd->lb_page->result) {
69 pr_debug("No result!\n"); 114 pr_debug("No result!\n");
70 return IRQ_NONE; 115 return IRQ_NONE;
71 } 116 }
72 117
118 /* We have to grab the lock before ending the request. */
73 spin_lock_irqsave(&bd->lock, flags); 119 spin_lock_irqsave(&bd->lock, flags);
120 /* "result" is 1 for success, 2 for failure: end_entire_request() wants
121 * to know whether this succeeded or not. */
74 end_entire_request(bd->req, bd->lb_page->result == 1); 122 end_entire_request(bd->req, bd->lb_page->result == 1);
123 /* Clear out request, it's done. */
75 bd->req = NULL; 124 bd->req = NULL;
125 /* Reset incoming DMA for next time. */
76 bd->dma.used_len = 0; 126 bd->dma.used_len = 0;
127 /* Ready for more reads or writes */
77 blk_start_queue(bd->disk->queue); 128 blk_start_queue(bd->disk->queue);
78 spin_unlock_irqrestore(&bd->lock, flags); 129 spin_unlock_irqrestore(&bd->lock, flags);
130
131 /* The interrupt was for us, we dealt with it. */
79 return IRQ_HANDLED; 132 return IRQ_HANDLED;
80} 133}
81 134
135/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
136 * each of which contains "struct bio_vec"s, each of which contains a page, an
137 * offset and a length.
138 *
139 * Fortunately there are iterators to help us walk through the "struct
140 * request". Even more fortunately, there were plenty of places to steal the
141 * code from. We pack the "struct request" into our "struct lguest_dma" and
142 * return the total length. */
82static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) 143static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
83{ 144{
84 unsigned int i = 0, idx, len = 0; 145 unsigned int i = 0, idx, len = 0;
@@ -87,8 +148,13 @@ static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
87 rq_for_each_bio(bio, req) { 148 rq_for_each_bio(bio, req) {
88 struct bio_vec *bvec; 149 struct bio_vec *bvec;
89 bio_for_each_segment(bvec, bio, idx) { 150 bio_for_each_segment(bvec, bio, idx) {
151 /* We told the block layer not to give us too many. */
90 BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); 152 BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
153 /* If we had a zero-length segment, it would look like
154 * the end of the data referred to by the "struct
155 * lguest_dma", so make sure that doesn't happen. */
91 BUG_ON(!bvec->bv_len); 156 BUG_ON(!bvec->bv_len);
157 /* Convert page & offset to a physical address */
92 dma->addr[i] = page_to_phys(bvec->bv_page) 158 dma->addr[i] = page_to_phys(bvec->bv_page)
93 + bvec->bv_offset; 159 + bvec->bv_offset;
94 dma->len[i] = bvec->bv_len; 160 dma->len[i] = bvec->bv_len;
@@ -96,26 +162,39 @@ static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
96 i++; 162 i++;
97 } 163 }
98 } 164 }
165 /* If the array isn't full, we mark the end with a 0 length */
99 if (i < LGUEST_MAX_DMA_SECTIONS) 166 if (i < LGUEST_MAX_DMA_SECTIONS)
100 dma->len[i] = 0; 167 dma->len[i] = 0;
101 return len; 168 return len;
102} 169}
103 170
171/* This creates an empty DMA, useful for prodding the Host without sending data
172 * (ie. when we want to do a read) */
104static void empty_dma(struct lguest_dma *dma) 173static void empty_dma(struct lguest_dma *dma)
105{ 174{
106 dma->len[0] = 0; 175 dma->len[0] = 0;
107} 176}
108 177
178/*D:470 Setting up a request is fairly easy: */
109static void setup_req(struct blockdev *bd, 179static void setup_req(struct blockdev *bd,
110 int type, struct request *req, struct lguest_dma *dma) 180 int type, struct request *req, struct lguest_dma *dma)
111{ 181{
182 /* The type is 1 (write) or 0 (read). */
112 bd->lb_page->type = type; 183 bd->lb_page->type = type;
184 /* The sector on disk where the read or write starts. */
113 bd->lb_page->sector = req->sector; 185 bd->lb_page->sector = req->sector;
186 /* The result is initialized to 0 (unfinished). */
114 bd->lb_page->result = 0; 187 bd->lb_page->result = 0;
188 /* The current request (so we can end it in the interrupt handler). */
115 bd->req = req; 189 bd->req = req;
190 /* The number of bytes: returned as a side-effect of req_to_dma(),
191 * which packs the block layer's "struct request" into our "struct
192 * lguest_dma" */
116 bd->lb_page->bytes = req_to_dma(req, dma); 193 bd->lb_page->bytes = req_to_dma(req, dma);
117} 194}
118 195
196/*D:450 Write is pretty straightforward: we pack the request into a "struct
197 * lguest_dma", then use SEND_DMA to send the request. */
119static void do_write(struct blockdev *bd, struct request *req) 198static void do_write(struct blockdev *bd, struct request *req)
120{ 199{
121 struct lguest_dma send; 200 struct lguest_dma send;
@@ -126,6 +205,9 @@ static void do_write(struct blockdev *bd, struct request *req)
126 lguest_send_dma(bd->phys_addr, &send); 205 lguest_send_dma(bd->phys_addr, &send);
127} 206}
128 207
208/* Read is similar to write, except we pack the request into our receive
209 * "struct lguest_dma" and send through an empty DMA just to tell the Host that
210 * there's a request pending. */
129static void do_read(struct blockdev *bd, struct request *req) 211static void do_read(struct blockdev *bd, struct request *req)
130{ 212{
131 struct lguest_dma ping; 213 struct lguest_dma ping;
@@ -137,21 +219,30 @@ static void do_read(struct blockdev *bd, struct request *req)
137 lguest_send_dma(bd->phys_addr, &ping); 219 lguest_send_dma(bd->phys_addr, &ping);
138} 220}
139 221
222/*D:440 This where requests come in: we get handed the request queue and are
223 * expected to pull a "struct request" off it until we've finished them or
224 * we're waiting for a reply: */
140static void do_lgb_request(struct request_queue *q) 225static void do_lgb_request(struct request_queue *q)
141{ 226{
142 struct blockdev *bd; 227 struct blockdev *bd;
143 struct request *req; 228 struct request *req;
144 229
145again: 230again:
231 /* This sometimes returns NULL even on the very first time around. I
232 * wonder if it's something to do with letting elves handle the request
233 * queue... */
146 req = elv_next_request(q); 234 req = elv_next_request(q);
147 if (!req) 235 if (!req)
148 return; 236 return;
149 237
238 /* We attached the struct blockdev to the disk: get it back */
150 bd = req->rq_disk->private_data; 239 bd = req->rq_disk->private_data;
151 /* Sometimes we get repeated requests after blk_stop_queue. */ 240 /* Sometimes we get repeated requests after blk_stop_queue(), but we
241 * can only handle one at a time. */
152 if (bd->req) 242 if (bd->req)
153 return; 243 return;
154 244
245 /* We only do reads and writes: no tricky business! */
155 if (!blk_fs_request(req)) { 246 if (!blk_fs_request(req)) {
156 pr_debug("Got non-command 0x%08x\n", req->cmd_type); 247 pr_debug("Got non-command 0x%08x\n", req->cmd_type);
157 req->errors++; 248 req->errors++;
@@ -164,20 +255,31 @@ again:
164 else 255 else
165 do_read(bd, req); 256 do_read(bd, req);
166 257
167 /* Wait for interrupt to tell us it's done. */ 258 /* We've put out the request, so stop any more coming in until we get
259 * an interrupt, which takes us to lgb_irq() to re-enable the queue. */
168 blk_stop_queue(q); 260 blk_stop_queue(q);
169} 261}
170 262
263/*D:430 This is the "struct block_device_operations" we attach to the disk at
264 * the end of lguestblk_probe(). It doesn't seem to want much. */
171static struct block_device_operations lguestblk_fops = { 265static struct block_device_operations lguestblk_fops = {
172 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
173}; 267};
174 268
269/*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure
270 * quite why. I do know that the IDE code sent two or three of the maintainers
271 * insane, perhaps this is the fringe of the same disease?
272 *
273 * As in the console code, the probe function gets handed the generic
274 * lguest_device from lguest_bus.c: */
175static int lguestblk_probe(struct lguest_device *lgdev) 275static int lguestblk_probe(struct lguest_device *lgdev)
176{ 276{
177 struct blockdev *bd; 277 struct blockdev *bd;
178 int err; 278 int err;
179 int irqflags = IRQF_SHARED; 279 int irqflags = IRQF_SHARED;
180 280
281 /* First we allocate our own "struct blockdev" and initialize the easy
282 * fields. */
181 bd = kmalloc(sizeof(*bd), GFP_KERNEL); 283 bd = kmalloc(sizeof(*bd), GFP_KERNEL);
182 if (!bd) 284 if (!bd)
183 return -ENOMEM; 285 return -ENOMEM;
@@ -187,59 +289,100 @@ static int lguestblk_probe(struct lguest_device *lgdev)
187 bd->req = NULL; 289 bd->req = NULL;
188 bd->dma.used_len = 0; 290 bd->dma.used_len = 0;
189 bd->dma.len[0] = 0; 291 bd->dma.len[0] = 0;
292 /* The descriptor in the lguest_devices array provided by the Host
293 * gives the Guest the physical page number of the device's page. */
190 bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); 294 bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
191 295
296 /* We use lguest_map() to get a pointer to the device page */
192 bd->lb_page = lguest_map(bd->phys_addr, 1); 297 bd->lb_page = lguest_map(bd->phys_addr, 1);
193 if (!bd->lb_page) { 298 if (!bd->lb_page) {
194 err = -ENOMEM; 299 err = -ENOMEM;
195 goto out_free_bd; 300 goto out_free_bd;
196 } 301 }
197 302
303 /* We need a major device number: 0 means "assign one dynamically". */
198 bd->major = register_blkdev(0, "lguestblk"); 304 bd->major = register_blkdev(0, "lguestblk");
199 if (bd->major < 0) { 305 if (bd->major < 0) {
200 err = bd->major; 306 err = bd->major;
201 goto out_unmap; 307 goto out_unmap;
202 } 308 }
203 309
310 /* This allocates a "struct gendisk" where we pack all the information
311 * about the disk which the rest of Linux sees. We ask for one minor
312 * number; I do wonder if we should be asking for more. */
204 bd->disk = alloc_disk(1); 313 bd->disk = alloc_disk(1);
205 if (!bd->disk) { 314 if (!bd->disk) {
206 err = -ENOMEM; 315 err = -ENOMEM;
207 goto out_unregister_blkdev; 316 goto out_unregister_blkdev;
208 } 317 }
209 318
319 /* Every disk needs a queue for requests to come in: we set up the
320 * queue with a callback function (the core of our driver) and the lock
321 * to use. */
210 bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); 322 bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
211 if (!bd->disk->queue) { 323 if (!bd->disk->queue) {
212 err = -ENOMEM; 324 err = -ENOMEM;
213 goto out_put_disk; 325 goto out_put_disk;
214 } 326 }
215 327
216 /* We can only handle a certain number of sg entries */ 328 /* We can only handle a certain number of pointers in our SEND_DMA
329 * call, so we set that with blk_queue_max_hw_segments(). This is not
330 * to be confused with blk_queue_max_phys_segments() of course! I
331 * know, who could possibly confuse the two?
332 *
333 * Well, it's simple to tell them apart: this one seems to work and the
334 * other one didn't. */
217 blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); 335 blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
218 /* Buffers must not cross page boundaries */ 336
337 /* Due to technical limitations of our Host (and simple coding) we
338 * can't have a single buffer which crosses a page boundary. Tell it
339 * here. This means that our maximum request size is 16
340 * (LGUEST_MAX_DMA_SECTIONS) pages. */
219 blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); 341 blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
220 342
343 /* We name our disk: this becomes the device name when udev does its
344 * magic thing and creates the device node, such as /dev/lgba.
345 * next_block_index is a global which starts at 'a'. Unfortunately
346 * this simple increment logic means that the 27th disk will be called
347 * "/dev/lgb{". In that case, I recommend having at least 29 disks, so
348 * your /dev directory will be balanced. */
221 sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); 349 sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
350
351 /* We look to the device descriptor again to see if this device's
352 * interrupts are expected to be random. If they are, we tell the irq
353 * subsystem. At the moment this bit is always set. */
222 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) 354 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
223 irqflags |= IRQF_SAMPLE_RANDOM; 355 irqflags |= IRQF_SAMPLE_RANDOM;
356
357 /* Now we have the name and irqflags, we can request the interrupt; we
358 * give it the "struct blockdev" we have set up to pass to lgb_irq()
359 * when there is an interrupt. */
224 err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); 360 err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
225 if (err) 361 if (err)
226 goto out_cleanup_queue; 362 goto out_cleanup_queue;
227 363
364 /* We bind our one-entry DMA pool to the key for this block device so
365 * the Host can reply to our requests. The key is equal to the
366 * physical address of the device's page, which is conveniently
367 * unique. */
228 err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); 368 err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
229 if (err) 369 if (err)
230 goto out_free_irq; 370 goto out_free_irq;
231 371
372 /* We finish our disk initialization and add the disk to the system. */
232 bd->disk->major = bd->major; 373 bd->disk->major = bd->major;
233 bd->disk->first_minor = 0; 374 bd->disk->first_minor = 0;
234 bd->disk->private_data = bd; 375 bd->disk->private_data = bd;
235 bd->disk->fops = &lguestblk_fops; 376 bd->disk->fops = &lguestblk_fops;
236 /* This is initialized to the disk size by the other end. */ 377 /* This is initialized to the disk size by the Launcher. */
237 set_capacity(bd->disk, bd->lb_page->num_sectors); 378 set_capacity(bd->disk, bd->lb_page->num_sectors);
238 add_disk(bd->disk); 379 add_disk(bd->disk);
239 380
240 printk(KERN_INFO "%s: device %i at major %d\n", 381 printk(KERN_INFO "%s: device %i at major %d\n",
241 bd->disk->disk_name, lgdev->index, bd->major); 382 bd->disk->disk_name, lgdev->index, bd->major);
242 383
384 /* We don't need to keep the "struct blockdev" around, but if we ever
385 * implemented device removal, we'd need this. */
243 lgdev->private = bd; 386 lgdev->private = bd;
244 return 0; 387 return 0;
245 388
@@ -258,6 +401,8 @@ out_free_bd:
258 return err; 401 return err;
259} 402}
260 403
404/*D:410 The boilerplate code for registering the lguest block driver is just
405 * like the console: */
261static struct lguest_driver lguestblk_drv = { 406static struct lguest_driver lguestblk_drv = {
262 .name = "lguestblk", 407 .name = "lguestblk",
263 .owner = THIS_MODULE, 408 .owner = THIS_MODULE,
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index acdbcdc3e457..b391776e5bf3 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -130,6 +130,7 @@ config ROCKETPORT
130config CYCLADES 130config CYCLADES
131 tristate "Cyclades async mux support" 131 tristate "Cyclades async mux support"
132 depends on SERIAL_NONSTANDARD && (PCI || ISA) 132 depends on SERIAL_NONSTANDARD && (PCI || ISA)
133 select FW_LOADER
133 ---help--- 134 ---help---
134 This driver supports Cyclades Z and Y multiserial boards. 135 This driver supports Cyclades Z and Y multiserial boards.
135 You would need something like this to connect more than two modems to 136 You would need something like this to connect more than two modems to
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 9a2694e5f8b9..77bf4aa217a8 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -73,7 +73,7 @@ static struct clocksource clocksource_hpet = {
73 .name = "hpet", 73 .name = "hpet",
74 .rating = 250, 74 .rating = 250,
75 .read = read_hpet, 75 .read = read_hpet,
76 .mask = 0xffffffffffffffff, 76 .mask = CLOCKSOURCE_MASK(64),
77 .mult = 0, /*to be caluclated*/ 77 .mult = 0, /*to be caluclated*/
78 .shift = 10, 78 .shift = 10,
79 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 79 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
index e7b889e404a7..feeccbaec438 100644
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -1,6 +1,22 @@
1/* Simple console for lguest. 1/*D:300
2 * The Guest console driver
2 * 3 *
3 * Copyright (C) 2006 Rusty Russell, IBM Corporation 4 * This is a trivial console driver: we use lguest's DMA mechanism to send
5 * bytes out, and register a DMA buffer to receive bytes in. It is assumed to
6 * be present and available from the very beginning of boot.
7 *
8 * Writing console drivers is one of the few remaining Dark Arts in Linux.
9 * Fortunately for us, the path of virtual consoles has been well-trodden by
10 * the PowerPC folks, who wrote "hvc_console.c" to generically support any
11 * virtual console. We use that infrastructure which only requires us to write
12 * the basic put_chars and get_chars functions and call the right register
13 * functions.
14 :*/
15
16/*M:002 The console can be flooded: while the Guest is processing input the
17 * Host can send more. Buffering in the Host could alleviate this, but it is a
18 * difficult problem in general. :*/
19/* Copyright (C) 2006 Rusty Russell, IBM Corporation
4 * 20 *
5 * This program is free software; you can redistribute it and/or modify 21 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 22 * it under the terms of the GNU General Public License as published by
@@ -21,49 +37,81 @@
21#include <linux/lguest_bus.h> 37#include <linux/lguest_bus.h>
22#include "hvc_console.h" 38#include "hvc_console.h"
23 39
40/*D:340 This is our single console input buffer, with associated "struct
41 * lguest_dma" referring to it. Note the 0-terminated length array, and the
42 * use of physical address for the buffer itself. */
24static char inbuf[256]; 43static char inbuf[256];
25static struct lguest_dma cons_input = { .used_len = 0, 44static struct lguest_dma cons_input = { .used_len = 0,
26 .addr[0] = __pa(inbuf), 45 .addr[0] = __pa(inbuf),
27 .len[0] = sizeof(inbuf), 46 .len[0] = sizeof(inbuf),
28 .len[1] = 0 }; 47 .len[1] = 0 };
29 48
49/*D:310 The put_chars() callback is pretty straightforward.
50 *
51 * First we put the pointer and length in a "struct lguest_dma": we only have
52 * one pointer, so we set the second length to 0. Then we use SEND_DMA to send
53 * the data to (Host) buffers attached to the console key. Usually a device's
54 * key is a physical address within the device's memory, but because the
55 * console device doesn't have any associated physical memory, we use the
56 * LGUEST_CONSOLE_DMA_KEY constant (aka 0). */
30static int put_chars(u32 vtermno, const char *buf, int count) 57static int put_chars(u32 vtermno, const char *buf, int count)
31{ 58{
32 struct lguest_dma dma; 59 struct lguest_dma dma;
33 60
34 /* FIXME: what if it's over a page boundary? */ 61 /* FIXME: DMA buffers in a "struct lguest_dma" are not allowed
62 * to go over page boundaries. This never seems to happen,
63 * but if it did we'd need to fix this code. */
35 dma.len[0] = count; 64 dma.len[0] = count;
36 dma.len[1] = 0; 65 dma.len[1] = 0;
37 dma.addr[0] = __pa(buf); 66 dma.addr[0] = __pa(buf);
38 67
39 lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma); 68 lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
69 /* We're expected to return the amount of data we wrote: all of it. */
40 return count; 70 return count;
41} 71}
42 72
73/*D:350 get_chars() is the callback from the hvc_console infrastructure when
74 * an interrupt is received.
75 *
76 * Firstly we see if our buffer has been filled: if not, we return. The rest
77 * of the code deals with the fact that the hvc_console() infrastructure only
78 * asks us for 16 bytes at a time. We keep a "cons_offset" variable for
79 * partially-read buffers. */
43static int get_chars(u32 vtermno, char *buf, int count) 80static int get_chars(u32 vtermno, char *buf, int count)
44{ 81{
45 static int cons_offset; 82 static int cons_offset;
46 83
84 /* Nothing left to see here... */
47 if (!cons_input.used_len) 85 if (!cons_input.used_len)
48 return 0; 86 return 0;
49 87
88 /* You want more than we have to give? Well, try wanting less! */
50 if (cons_input.used_len - cons_offset < count) 89 if (cons_input.used_len - cons_offset < count)
51 count = cons_input.used_len - cons_offset; 90 count = cons_input.used_len - cons_offset;
52 91
92 /* Copy across to their buffer and increment offset. */
53 memcpy(buf, inbuf + cons_offset, count); 93 memcpy(buf, inbuf + cons_offset, count);
54 cons_offset += count; 94 cons_offset += count;
95
96 /* Finished? Zero offset, and reset cons_input so Host will use it
97 * again. */
55 if (cons_offset == cons_input.used_len) { 98 if (cons_offset == cons_input.used_len) {
56 cons_offset = 0; 99 cons_offset = 0;
57 cons_input.used_len = 0; 100 cons_input.used_len = 0;
58 } 101 }
59 return count; 102 return count;
60} 103}
104/*:*/
61 105
62static struct hv_ops lguest_cons = { 106static struct hv_ops lguest_cons = {
63 .get_chars = get_chars, 107 .get_chars = get_chars,
64 .put_chars = put_chars, 108 .put_chars = put_chars,
65}; 109};
66 110
111/*D:320 Console drivers are initialized very early so boot messages can go
112 * out. At this stage, the console is output-only. Our driver checks we're a
113 * Guest, and if so hands hvc_instantiate() the console number (0), priority
114 * (0), and the struct hv_ops containing the put_chars() function. */
67static int __init cons_init(void) 115static int __init cons_init(void)
68{ 116{
69 if (strcmp(paravirt_ops.name, "lguest") != 0) 117 if (strcmp(paravirt_ops.name, "lguest") != 0)
@@ -73,21 +121,46 @@ static int __init cons_init(void)
73} 121}
74console_initcall(cons_init); 122console_initcall(cons_init);
75 123
124/*D:370 To set up and manage our virtual console, we call hvc_alloc() and
125 * stash the result in the private pointer of the "struct lguest_device".
126 * Since we never remove the console device we never need this pointer again,
127 * but using ->private is considered good form, and you never know who's going
128 * to copy your driver.
129 *
130 * Once the console is set up, we bind our input buffer ready for input. */
76static int lguestcons_probe(struct lguest_device *lgdev) 131static int lguestcons_probe(struct lguest_device *lgdev)
77{ 132{
78 int err; 133 int err;
79 134
135 /* The first argument of hvc_alloc() is the virtual console number, so
136 * we use zero. The second argument is the interrupt number.
137 *
138 * The third argument is a "struct hv_ops" containing the put_chars()
139 * and get_chars() pointers. The final argument is the output buffer
140 * size: we use 256 and expect the Host to have room for us to send
141 * that much. */
80 lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256); 142 lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
81 if (IS_ERR(lgdev->private)) 143 if (IS_ERR(lgdev->private))
82 return PTR_ERR(lgdev->private); 144 return PTR_ERR(lgdev->private);
83 145
146 /* We bind a single DMA buffer at key LGUEST_CONSOLE_DMA_KEY.
147 * "cons_input" is that statically-initialized global DMA buffer we saw
148 * above, and we also give the interrupt we want. */
84 err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1, 149 err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
85 lgdev_irq(lgdev)); 150 lgdev_irq(lgdev));
86 if (err) 151 if (err)
87 printk("lguest console: failed to bind buffer.\n"); 152 printk("lguest console: failed to bind buffer.\n");
88 return err; 153 return err;
89} 154}
155/* Note the use of lgdev_irq() for the interrupt number. We tell hvc_alloc()
156 * to expect input when this interrupt is triggered, and then tell
157 * lguest_bind_dma() that is the interrupt to send us when input comes in. */
90 158
159/*D:360 From now on the console driver follows standard Guest driver form:
160 * register_lguest_driver() registers the device type and probe function, and
161 * the probe function sets up the device.
162 *
163 * The standard "struct lguest_driver": */
91static struct lguest_driver lguestcons_drv = { 164static struct lguest_driver lguestcons_drv = {
92 .name = "lguestcons", 165 .name = "lguestcons",
93 .owner = THIS_MODULE, 166 .owner = THIS_MODULE,
@@ -95,6 +168,7 @@ static struct lguest_driver lguestcons_drv = {
95 .probe = lguestcons_probe, 168 .probe = lguestcons_probe,
96}; 169};
97 170
171/* The standard init function */
98static int __init hvc_lguest_init(void) 172static int __init hvc_lguest_init(void)
99{ 173{
100 return register_lguest_driver(&lguestcons_drv); 174 return register_lguest_driver(&lguestcons_drv);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 1724c41d2414..98b6b4fb4257 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -8,7 +8,7 @@ menuconfig EDAC
8 bool "EDAC - error detection and reporting (EXPERIMENTAL)" 8 bool "EDAC - error detection and reporting (EXPERIMENTAL)"
9 depends on HAS_IOMEM 9 depends on HAS_IOMEM
10 depends on EXPERIMENTAL 10 depends on EXPERIMENTAL
11 depends on X86 || MIPS || PPC 11 depends on X86 || PPC
12 help 12 help
13 EDAC is designed to report errors in the core system. 13 EDAC is designed to report errors in the core system.
14 These are low-level errors that are reported in the CPU or 14 These are low-level errors that are reported in the CPU or
@@ -126,7 +126,7 @@ config EDAC_I5000
126config EDAC_PASEMI 126config EDAC_PASEMI
127 tristate "PA Semi PWRficient" 127 tristate "PA Semi PWRficient"
128 depends on EDAC_MM_EDAC && PCI 128 depends on EDAC_MM_EDAC && PCI
129 depends on PPC 129 depends on PPC_PASEMI
130 help 130 help
131 Support for error detection and correction on PA Semi 131 Support for error detection and correction on PA Semi
132 PWRficient. 132 PWRficient.
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4471be362599..063a1bffe38b 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -214,6 +214,13 @@ void edac_mc_free(struct mem_ctl_info *mci)
214} 214}
215EXPORT_SYMBOL_GPL(edac_mc_free); 215EXPORT_SYMBOL_GPL(edac_mc_free);
216 216
217
218/*
219 * find_mci_by_dev
220 *
221 * scan list of controllers looking for the one that manages
222 * the 'dev' device
223 */
217static struct mem_ctl_info *find_mci_by_dev(struct device *dev) 224static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
218{ 225{
219 struct mem_ctl_info *mci; 226 struct mem_ctl_info *mci;
@@ -268,12 +275,6 @@ static void edac_mc_workq_function(struct work_struct *work_req)
268 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 275 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
269 mci->edac_check(mci); 276 mci->edac_check(mci);
270 277
271 /*
272 * FIXME: temp place holder for PCI checks,
273 * goes away when we break out PCI
274 */
275 edac_pci_do_parity_check();
276
277 mutex_unlock(&mem_ctls_mutex); 278 mutex_unlock(&mem_ctls_mutex);
278 279
279 /* Reschedule */ 280 /* Reschedule */
@@ -314,36 +315,55 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
314{ 315{
315 int status; 316 int status;
316 317
317 /* if not running POLL, leave now */ 318 status = cancel_delayed_work(&mci->work);
318 if (mci->op_state == OP_RUNNING_POLL) { 319 if (status == 0) {
319 status = cancel_delayed_work(&mci->work); 320 debugf0("%s() not canceled, flush the queue\n",
320 if (status == 0) { 321 __func__);
321 debugf0("%s() not canceled, flush the queue\n",
322 __func__);
323 322
324 /* workq instance might be running, wait for it */ 323 /* workq instance might be running, wait for it */
325 flush_workqueue(edac_workqueue); 324 flush_workqueue(edac_workqueue);
326 }
327 } 325 }
328} 326}
329 327
330/* 328/*
331 * edac_reset_delay_period 329 * edac_mc_reset_delay_period(unsigned long value)
330 *
331 * user space has updated our poll period value, need to
332 * reset our workq delays
332 */ 333 */
333static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) 334void edac_mc_reset_delay_period(int value)
334{ 335{
335 /* cancel the current workq request */ 336 struct mem_ctl_info *mci;
336 edac_mc_workq_teardown(mci); 337 struct list_head *item;
337 338
338 /* lock the list of devices for the new setup */
339 mutex_lock(&mem_ctls_mutex); 339 mutex_lock(&mem_ctls_mutex);
340 340
341 /* restart the workq request, with new delay value */ 341 /* scan the list and turn off all workq timers, doing so under lock
342 edac_mc_workq_setup(mci, value); 342 */
343 list_for_each(item, &mc_devices) {
344 mci = list_entry(item, struct mem_ctl_info, link);
345
346 if (mci->op_state == OP_RUNNING_POLL)
347 cancel_delayed_work(&mci->work);
348 }
349
350 mutex_unlock(&mem_ctls_mutex);
351
352
353 /* re-walk the list, and reset the poll delay */
354 mutex_lock(&mem_ctls_mutex);
355
356 list_for_each(item, &mc_devices) {
357 mci = list_entry(item, struct mem_ctl_info, link);
358
359 edac_mc_workq_setup(mci, (unsigned long) value);
360 }
343 361
344 mutex_unlock(&mem_ctls_mutex); 362 mutex_unlock(&mem_ctls_mutex);
345} 363}
346 364
365
366
347/* Return 0 on success, 1 on failure. 367/* Return 0 on success, 1 on failure.
348 * Before calling this function, caller must 368 * Before calling this function, caller must
349 * assign a unique value to mci->mc_idx. 369 * assign a unique value to mci->mc_idx.
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index cd090b0677a7..4a0576bd06fc 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -122,6 +122,23 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
122 return count; 122 return count;
123} 123}
124 124
125/*
126 * mc poll_msec time value
127 */
128static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
129{
130 int *value = (int *)ptr;
131
132 if (isdigit(*buffer)) {
133 *value = simple_strtoul(buffer, NULL, 0);
134
135 /* notify edac_mc engine to reset the poll period */
136 edac_mc_reset_delay_period(*value);
137 }
138
139 return count;
140}
141
125 142
126/* EDAC sysfs CSROW data structures and methods 143/* EDAC sysfs CSROW data structures and methods
127 */ 144 */
@@ -704,7 +721,7 @@ MEMCTRL_ATTR(edac_mc_log_ce,
704 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); 721 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
705 722
706MEMCTRL_ATTR(edac_mc_poll_msec, 723MEMCTRL_ATTR(edac_mc_poll_msec,
707 S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store); 724 S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
708 725
709/* Base Attributes of the memory ECC object */ 726/* Base Attributes of the memory ECC object */
710static struct memctrl_dev_attribute *memctrl_attr[] = { 727static struct memctrl_dev_attribute *memctrl_attr[] = {
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index a2134dfc3cc6..cbc419c8ebc1 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -52,6 +52,8 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
52extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev); 52extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev);
53extern void edac_device_reset_delay_period(struct edac_device_ctl_info 53extern void edac_device_reset_delay_period(struct edac_device_ctl_info
54 *edac_dev, unsigned long value); 54 *edac_dev, unsigned long value);
55extern void edac_mc_reset_delay_period(int value);
56
55extern void *edac_align_ptr(void *ptr, unsigned size); 57extern void *edac_align_ptr(void *ptr, unsigned size);
56 58
57/* 59/*
@@ -64,6 +66,10 @@ extern int edac_sysfs_pci_setup(void);
64extern void edac_sysfs_pci_teardown(void); 66extern void edac_sysfs_pci_teardown(void);
65extern int edac_pci_get_check_errors(void); 67extern int edac_pci_get_check_errors(void);
66extern int edac_pci_get_poll_msec(void); 68extern int edac_pci_get_poll_msec(void);
69extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
70extern void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg);
71extern void edac_pci_handle_npe(struct edac_pci_ctl_info *pci,
72 const char *msg);
67#else /* CONFIG_PCI */ 73#else /* CONFIG_PCI */
68/* pre-process these away */ 74/* pre-process these away */
69#define edac_pci_do_parity_check() 75#define edac_pci_do_parity_check()
@@ -72,6 +78,8 @@ extern int edac_pci_get_poll_msec(void);
72#define edac_sysfs_pci_teardown() 78#define edac_sysfs_pci_teardown()
73#define edac_pci_get_check_errors() 79#define edac_pci_get_check_errors()
74#define edac_pci_get_poll_msec() 80#define edac_pci_get_poll_msec()
81#define edac_pci_handle_pe()
82#define edac_pci_handle_npe()
75#endif /* CONFIG_PCI */ 83#endif /* CONFIG_PCI */
76 84
77#endif /* __EDAC_MODULE_H__ */ 85#endif /* __EDAC_MODULE_H__ */
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index d9cd5e048cee..5dee9f50414b 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -31,20 +31,12 @@
31static DEFINE_MUTEX(edac_pci_ctls_mutex); 31static DEFINE_MUTEX(edac_pci_ctls_mutex);
32static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list); 32static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list);
33 33
34static inline void edac_lock_pci_list(void)
35{
36 mutex_lock(&edac_pci_ctls_mutex);
37}
38
39static inline void edac_unlock_pci_list(void)
40{
41 mutex_unlock(&edac_pci_ctls_mutex);
42}
43
44/* 34/*
45 * The alloc() and free() functions for the 'edac_pci' control info 35 * edac_pci_alloc_ctl_info
46 * structure. The chip driver will allocate one of these for each 36 *
47 * edac_pci it is going to control/register with the EDAC CORE. 37 * The alloc() function for the 'edac_pci' control info
38 * structure. The chip driver will allocate one of these for each
39 * edac_pci it is going to control/register with the EDAC CORE.
48 */ 40 */
49struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, 41struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
50 const char *edac_pci_name) 42 const char *edac_pci_name)
@@ -53,47 +45,59 @@ struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
53 void *pvt; 45 void *pvt;
54 unsigned int size; 46 unsigned int size;
55 47
48 debugf1("%s()\n", __func__);
49
56 pci = (struct edac_pci_ctl_info *)0; 50 pci = (struct edac_pci_ctl_info *)0;
57 pvt = edac_align_ptr(&pci[1], sz_pvt); 51 pvt = edac_align_ptr(&pci[1], sz_pvt);
58 size = ((unsigned long)pvt) + sz_pvt; 52 size = ((unsigned long)pvt) + sz_pvt;
59 53
60 if ((pci = kzalloc(size, GFP_KERNEL)) == NULL) 54 /* Alloc the needed control struct memory */
55 pci = kzalloc(size, GFP_KERNEL);
56 if (pci == NULL)
61 return NULL; 57 return NULL;
62 58
59 /* Now much private space */
63 pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; 60 pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL;
64 61
65 pci->pvt_info = pvt; 62 pci->pvt_info = pvt;
66
67 pci->op_state = OP_ALLOC; 63 pci->op_state = OP_ALLOC;
68 64
69 snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); 65 snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name);
70 66
71 return pci; 67 return pci;
72} 68}
73
74EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); 69EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info);
75 70
76/* 71/*
77 * edac_pci_free_ctl_info() 72 * edac_pci_free_ctl_info()
78 * frees the memory allocated by edac_pci_alloc_ctl_info() function 73 *
74 * Last action on the pci control structure.
75 *
76 * call the remove sysfs informaton, which will unregister
77 * this control struct's kobj. When that kobj's ref count
78 * goes to zero, its release function will be call and then
79 * kfree() the memory.
79 */ 80 */
80void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci) 81void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci)
81{ 82{
82 kfree(pci); 83 debugf1("%s()\n", __func__);
83}
84 84
85 edac_pci_remove_sysfs(pci);
86}
85EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info); 87EXPORT_SYMBOL_GPL(edac_pci_free_ctl_info);
86 88
87/* 89/*
88 * find_edac_pci_by_dev() 90 * find_edac_pci_by_dev()
89 * scans the edac_pci list for a specific 'struct device *' 91 * scans the edac_pci list for a specific 'struct device *'
92 *
93 * return NULL if not found, or return control struct pointer
90 */ 94 */
91static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev) 95static struct edac_pci_ctl_info *find_edac_pci_by_dev(struct device *dev)
92{ 96{
93 struct edac_pci_ctl_info *pci; 97 struct edac_pci_ctl_info *pci;
94 struct list_head *item; 98 struct list_head *item;
95 99
96 debugf3("%s()\n", __func__); 100 debugf1("%s()\n", __func__);
97 101
98 list_for_each(item, &edac_pci_list) { 102 list_for_each(item, &edac_pci_list) {
99 pci = list_entry(item, struct edac_pci_ctl_info, link); 103 pci = list_entry(item, struct edac_pci_ctl_info, link);
@@ -118,10 +122,13 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
118 struct list_head *item, *insert_before; 122 struct list_head *item, *insert_before;
119 struct edac_pci_ctl_info *rover; 123 struct edac_pci_ctl_info *rover;
120 124
125 debugf1("%s()\n", __func__);
126
121 insert_before = &edac_pci_list; 127 insert_before = &edac_pci_list;
122 128
123 /* Determine if already on the list */ 129 /* Determine if already on the list */
124 if (unlikely((rover = find_edac_pci_by_dev(pci->dev)) != NULL)) 130 rover = find_edac_pci_by_dev(pci->dev);
131 if (unlikely(rover != NULL))
125 goto fail0; 132 goto fail0;
126 133
127 /* Insert in ascending order by 'pci_idx', so find position */ 134 /* Insert in ascending order by 'pci_idx', so find position */
@@ -157,6 +164,8 @@ fail1:
157 164
158/* 165/*
159 * complete_edac_pci_list_del 166 * complete_edac_pci_list_del
167 *
168 * RCU completion callback to indicate item is deleted
160 */ 169 */
161static void complete_edac_pci_list_del(struct rcu_head *head) 170static void complete_edac_pci_list_del(struct rcu_head *head)
162{ 171{
@@ -169,6 +178,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
169 178
170/* 179/*
171 * del_edac_pci_from_global_list 180 * del_edac_pci_from_global_list
181 *
182 * remove the PCI control struct from the global list
172 */ 183 */
173static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) 184static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
174{ 185{
@@ -207,35 +218,52 @@ struct edac_pci_ctl_info *edac_pci_find(int idx)
207 218
208 return NULL; 219 return NULL;
209} 220}
210
211EXPORT_SYMBOL_GPL(edac_pci_find); 221EXPORT_SYMBOL_GPL(edac_pci_find);
212 222
213/* 223/*
214 * edac_pci_workq_function() 224 * edac_pci_workq_function()
215 * performs the operation scheduled by a workq request 225 *
226 * periodic function that performs the operation
227 * scheduled by a workq request, for a given PCI control struct
216 */ 228 */
217static void edac_pci_workq_function(struct work_struct *work_req) 229static void edac_pci_workq_function(struct work_struct *work_req)
218{ 230{
219 struct delayed_work *d_work = (struct delayed_work *)work_req; 231 struct delayed_work *d_work = (struct delayed_work *)work_req;
220 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work); 232 struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
233 int msec;
234 unsigned long delay;
221 235
222 edac_lock_pci_list(); 236 debugf3("%s() checking\n", __func__);
223 237
224 if ((pci->op_state == OP_RUNNING_POLL) && 238 mutex_lock(&edac_pci_ctls_mutex);
225 (pci->edac_check != NULL) && (edac_pci_get_check_errors()))
226 pci->edac_check(pci);
227 239
228 edac_unlock_pci_list(); 240 if (pci->op_state == OP_RUNNING_POLL) {
241 /* we might be in POLL mode, but there may NOT be a poll func
242 */
243 if ((pci->edac_check != NULL) && edac_pci_get_check_errors())
244 pci->edac_check(pci);
245
246 /* if we are on a one second period, then use round */
247 msec = edac_pci_get_poll_msec();
248 if (msec == 1000)
249 delay = round_jiffies(msecs_to_jiffies(msec));
250 else
251 delay = msecs_to_jiffies(msec);
252
253 /* Reschedule only if we are in POLL mode */
254 queue_delayed_work(edac_workqueue, &pci->work, delay);
255 }
229 256
230 /* Reschedule */ 257 mutex_unlock(&edac_pci_ctls_mutex);
231 queue_delayed_work(edac_workqueue, &pci->work,
232 msecs_to_jiffies(edac_pci_get_poll_msec()));
233} 258}
234 259
235/* 260/*
236 * edac_pci_workq_setup() 261 * edac_pci_workq_setup()
237 * initialize a workq item for this edac_pci instance 262 * initialize a workq item for this edac_pci instance
238 * passing in the new delay period in msec 263 * passing in the new delay period in msec
264 *
265 * locking model:
266 * called when 'edac_pci_ctls_mutex' is locked
239 */ 267 */
240static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, 268static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci,
241 unsigned int msec) 269 unsigned int msec)
@@ -255,6 +283,8 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
255{ 283{
256 int status; 284 int status;
257 285
286 debugf0("%s()\n", __func__);
287
258 status = cancel_delayed_work(&pci->work); 288 status = cancel_delayed_work(&pci->work);
259 if (status == 0) 289 if (status == 0)
260 flush_workqueue(edac_workqueue); 290 flush_workqueue(edac_workqueue);
@@ -262,19 +292,25 @@ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci)
262 292
263/* 293/*
264 * edac_pci_reset_delay_period 294 * edac_pci_reset_delay_period
295 *
296 * called with a new period value for the workq period
297 * a) stop current workq timer
298 * b) restart workq timer with new value
265 */ 299 */
266void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci, 300void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
267 unsigned long value) 301 unsigned long value)
268{ 302{
269 edac_lock_pci_list(); 303 debugf0("%s()\n", __func__);
270 304
271 edac_pci_workq_teardown(pci); 305 edac_pci_workq_teardown(pci);
272 306
307 /* need to lock for the setup */
308 mutex_lock(&edac_pci_ctls_mutex);
309
273 edac_pci_workq_setup(pci, value); 310 edac_pci_workq_setup(pci, value);
274 311
275 edac_unlock_pci_list(); 312 mutex_unlock(&edac_pci_ctls_mutex);
276} 313}
277
278EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period); 314EXPORT_SYMBOL_GPL(edac_pci_reset_delay_period);
279 315
280/* 316/*
@@ -294,14 +330,13 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
294 debugf0("%s()\n", __func__); 330 debugf0("%s()\n", __func__);
295 331
296 pci->pci_idx = edac_idx; 332 pci->pci_idx = edac_idx;
333 pci->start_time = jiffies;
297 334
298 edac_lock_pci_list(); 335 mutex_lock(&edac_pci_ctls_mutex);
299 336
300 if (add_edac_pci_to_global_list(pci)) 337 if (add_edac_pci_to_global_list(pci))
301 goto fail0; 338 goto fail0;
302 339
303 pci->start_time = jiffies;
304
305 if (edac_pci_create_sysfs(pci)) { 340 if (edac_pci_create_sysfs(pci)) {
306 edac_pci_printk(pci, KERN_WARNING, 341 edac_pci_printk(pci, KERN_WARNING,
307 "failed to create sysfs pci\n"); 342 "failed to create sysfs pci\n");
@@ -323,16 +358,16 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
323 pci->ctl_name, 358 pci->ctl_name,
324 dev_name(pci), edac_op_state_to_string(pci->op_state)); 359 dev_name(pci), edac_op_state_to_string(pci->op_state));
325 360
326 edac_unlock_pci_list(); 361 mutex_unlock(&edac_pci_ctls_mutex);
327 return 0; 362 return 0;
328 363
364 /* error unwind stack */
329fail1: 365fail1:
330 del_edac_pci_from_global_list(pci); 366 del_edac_pci_from_global_list(pci);
331fail0: 367fail0:
332 edac_unlock_pci_list(); 368 mutex_unlock(&edac_pci_ctls_mutex);
333 return 1; 369 return 1;
334} 370}
335
336EXPORT_SYMBOL_GPL(edac_pci_add_device); 371EXPORT_SYMBOL_GPL(edac_pci_add_device);
337 372
338/* 373/*
@@ -354,22 +389,25 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
354 389
355 debugf0("%s()\n", __func__); 390 debugf0("%s()\n", __func__);
356 391
357 edac_lock_pci_list(); 392 mutex_lock(&edac_pci_ctls_mutex);
358 393
359 if ((pci = find_edac_pci_by_dev(dev)) == NULL) { 394 /* ensure the control struct is on the global list
360 edac_unlock_pci_list(); 395 * if not, then leave
396 */
397 pci = find_edac_pci_by_dev(dev);
398 if (pci == NULL) {
399 mutex_unlock(&edac_pci_ctls_mutex);
361 return NULL; 400 return NULL;
362 } 401 }
363 402
364 pci->op_state = OP_OFFLINE; 403 pci->op_state = OP_OFFLINE;
365 404
366 edac_pci_workq_teardown(pci);
367
368 edac_pci_remove_sysfs(pci);
369
370 del_edac_pci_from_global_list(pci); 405 del_edac_pci_from_global_list(pci);
371 406
372 edac_unlock_pci_list(); 407 mutex_unlock(&edac_pci_ctls_mutex);
408
409 /* stop the workq timer */
410 edac_pci_workq_teardown(pci);
373 411
374 edac_printk(KERN_INFO, EDAC_PCI, 412 edac_printk(KERN_INFO, EDAC_PCI,
375 "Removed device %d for %s %s: DEV %s\n", 413 "Removed device %d for %s %s: DEV %s\n",
@@ -377,14 +415,20 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
377 415
378 return pci; 416 return pci;
379} 417}
380
381EXPORT_SYMBOL_GPL(edac_pci_del_device); 418EXPORT_SYMBOL_GPL(edac_pci_del_device);
382 419
420/*
421 * edac_pci_generic_check
422 *
423 * a Generic parity check API
424 */
383void edac_pci_generic_check(struct edac_pci_ctl_info *pci) 425void edac_pci_generic_check(struct edac_pci_ctl_info *pci)
384{ 426{
427 debugf4("%s()\n", __func__);
385 edac_pci_do_parity_check(); 428 edac_pci_do_parity_check();
386} 429}
387 430
431/* free running instance index counter */
388static int edac_pci_idx; 432static int edac_pci_idx;
389#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller" 433#define EDAC_PCI_GENCTL_NAME "EDAC PCI controller"
390 434
@@ -392,6 +436,17 @@ struct edac_pci_gen_data {
392 int edac_idx; 436 int edac_idx;
393}; 437};
394 438
439/*
440 * edac_pci_create_generic_ctl
441 *
442 * A generic constructor for a PCI parity polling device
443 * Some systems have more than one domain of PCI busses.
444 * For systems with one domain, then this API will
445 * provide for a generic poller.
446 *
447 * This routine calls the edac_pci_alloc_ctl_info() for
448 * the generic device, with default values
449 */
395struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev, 450struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
396 const char *mod_name) 451 const char *mod_name)
397{ 452{
@@ -421,13 +476,18 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
421 476
422 return pci; 477 return pci;
423} 478}
424
425EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl); 479EXPORT_SYMBOL_GPL(edac_pci_create_generic_ctl);
426 480
481/*
482 * edac_pci_release_generic_ctl
483 *
484 * The release function of a generic EDAC PCI polling device
485 */
427void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci) 486void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci)
428{ 487{
488 debugf0("%s() pci mod=%s\n", __func__, pci->mod_name);
489
429 edac_pci_del_device(pci->dev); 490 edac_pci_del_device(pci->dev);
430 edac_pci_free_ctl_info(pci); 491 edac_pci_free_ctl_info(pci);
431} 492}
432
433EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl); 493EXPORT_SYMBOL_GPL(edac_pci_release_generic_ctl);
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index fac94cae2c3d..69f5dddabddf 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -13,22 +13,25 @@
13#include "edac_core.h" 13#include "edac_core.h"
14#include "edac_module.h" 14#include "edac_module.h"
15 15
16/* Turn off this whole feature if PCI is not configured */
16#ifdef CONFIG_PCI 17#ifdef CONFIG_PCI
17 18
18#define EDAC_PCI_SYMLINK "device" 19#define EDAC_PCI_SYMLINK "device"
19 20
20static int check_pci_errors; /* default YES check PCI parity */ 21/* data variables exported via sysfs */
21static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */ 22static int check_pci_errors; /* default NO check PCI parity */
22static int edac_pci_log_pe = 1; /* log PCI parity errors */ 23static int edac_pci_panic_on_pe; /* default NO panic on PCI Parity */
24static int edac_pci_log_pe = 1; /* log PCI parity errors */
23static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ 25static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */
26static int edac_pci_poll_msec = 1000; /* one second workq period */
27
24static atomic_t pci_parity_count = ATOMIC_INIT(0); 28static atomic_t pci_parity_count = ATOMIC_INIT(0);
25static atomic_t pci_nonparity_count = ATOMIC_INIT(0); 29static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
26static int edac_pci_poll_msec = 1000;
27 30
28static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ 31static struct kobject edac_pci_top_main_kobj;
29static struct completion edac_pci_kobj_complete;
30static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); 32static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
31 33
34/* getter functions for the data variables */
32int edac_pci_get_check_errors(void) 35int edac_pci_get_check_errors(void)
33{ 36{
34 return check_pci_errors; 37 return check_pci_errors;
@@ -74,17 +77,22 @@ static void edac_pci_instance_release(struct kobject *kobj)
74{ 77{
75 struct edac_pci_ctl_info *pci; 78 struct edac_pci_ctl_info *pci;
76 79
77 debugf1("%s()\n", __func__); 80 debugf0("%s()\n", __func__);
78 81
82 /* Form pointer to containing struct, the pci control struct */
79 pci = to_instance(kobj); 83 pci = to_instance(kobj);
80 complete(&pci->kobj_complete); 84
85 /* decrement reference count on top main kobj */
86 kobject_put(&edac_pci_top_main_kobj);
87
88 kfree(pci); /* Free the control struct */
81} 89}
82 90
83/* instance specific attribute structure */ 91/* instance specific attribute structure */
84struct instance_attribute { 92struct instance_attribute {
85 struct attribute attr; 93 struct attribute attr;
86 ssize_t(*show) (struct edac_pci_ctl_info *, char *); 94 ssize_t(*show) (struct edac_pci_ctl_info *, char *);
87 ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t); 95 ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t);
88}; 96};
89 97
90/* Function to 'show' fields from the edac_pci 'instance' structure */ 98/* Function to 'show' fields from the edac_pci 'instance' structure */
@@ -112,6 +120,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj,
112 return -EIO; 120 return -EIO;
113} 121}
114 122
123/* fs_ops table */
115static struct sysfs_ops pci_instance_ops = { 124static struct sysfs_ops pci_instance_ops = {
116 .show = edac_pci_instance_show, 125 .show = edac_pci_instance_show,
117 .store = edac_pci_instance_store 126 .store = edac_pci_instance_store
@@ -134,48 +143,82 @@ static struct instance_attribute *pci_instance_attr[] = {
134 NULL 143 NULL
135}; 144};
136 145
137/* the ktype for pci instance */ 146/* the ktype for a pci instance */
138static struct kobj_type ktype_pci_instance = { 147static struct kobj_type ktype_pci_instance = {
139 .release = edac_pci_instance_release, 148 .release = edac_pci_instance_release,
140 .sysfs_ops = &pci_instance_ops, 149 .sysfs_ops = &pci_instance_ops,
141 .default_attrs = (struct attribute **)pci_instance_attr, 150 .default_attrs = (struct attribute **)pci_instance_attr,
142}; 151};
143 152
153/*
154 * edac_pci_create_instance_kobj
155 *
156 * construct one EDAC PCI instance's kobject for use
157 */
144static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) 158static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
145{ 159{
160 struct kobject *main_kobj;
146 int err; 161 int err;
147 162
148 pci->kobj.parent = &edac_pci_kobj; 163 debugf0("%s()\n", __func__);
164
165 /* Set the parent and the instance's ktype */
166 pci->kobj.parent = &edac_pci_top_main_kobj;
149 pci->kobj.ktype = &ktype_pci_instance; 167 pci->kobj.ktype = &ktype_pci_instance;
150 168
151 err = kobject_set_name(&pci->kobj, "pci%d", idx); 169 err = kobject_set_name(&pci->kobj, "pci%d", idx);
152 if (err) 170 if (err)
153 return err; 171 return err;
154 172
173 /* First bump the ref count on the top main kobj, which will
174 * track the number of PCI instances we have, and thus nest
175 * properly on keeping the module loaded
176 */
177 main_kobj = kobject_get(&edac_pci_top_main_kobj);
178 if (!main_kobj) {
179 err = -ENODEV;
180 goto error_out;
181 }
182
183 /* And now register this new kobject under the main kobj */
155 err = kobject_register(&pci->kobj); 184 err = kobject_register(&pci->kobj);
156 if (err != 0) { 185 if (err != 0) {
157 debugf2("%s() failed to register instance pci%d\n", 186 debugf2("%s() failed to register instance pci%d\n",
158 __func__, idx); 187 __func__, idx);
159 return err; 188 kobject_put(&edac_pci_top_main_kobj);
189 goto error_out;
160 } 190 }
161 191
162 debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); 192 debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx);
163 193
164 return 0; 194 return 0;
195
196 /* Error unwind statck */
197error_out:
198 return err;
165} 199}
166 200
167static void 201/*
168edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx) 202 * edac_pci_unregister_sysfs_instance_kobj
203 *
204 * unregister the kobj for the EDAC PCI instance
205 */
206void edac_pci_unregister_sysfs_instance_kobj(struct edac_pci_ctl_info *pci)
169{ 207{
170 init_completion(&pci->kobj_complete); 208 debugf0("%s()\n", __func__);
209
210 /* Unregister the instance kobject and allow its release
211 * function release the main reference count and then
212 * kfree the memory
213 */
171 kobject_unregister(&pci->kobj); 214 kobject_unregister(&pci->kobj);
172 wait_for_completion(&pci->kobj_complete);
173} 215}
174 216
175/***************************** EDAC PCI sysfs root **********************/ 217/***************************** EDAC PCI sysfs root **********************/
176#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj) 218#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
177#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr) 219#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
178 220
221/* simple show/store functions for attributes */
179static ssize_t edac_pci_int_show(void *ptr, char *buffer) 222static ssize_t edac_pci_int_show(void *ptr, char *buffer)
180{ 223{
181 int *value = ptr; 224 int *value = ptr;
@@ -267,118 +310,189 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
267 NULL, 310 NULL,
268}; 311};
269 312
270/* No memory to release */ 313/*
271static void edac_pci_release(struct kobject *kobj) 314 * edac_pci_release_main_kobj
315 *
316 * This release function is called when the reference count to the
317 * passed kobj goes to zero.
318 *
319 * This kobj is the 'main' kobject that EDAC PCI instances
320 * link to, and thus provide for proper nesting counts
321 */
322static void edac_pci_release_main_kobj(struct kobject *kobj)
272{ 323{
273 struct edac_pci_ctl_info *pci;
274 324
275 pci = to_edacpci(kobj); 325 debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
276 326
277 debugf1("%s()\n", __func__); 327 /* last reference to top EDAC PCI kobject has been removed,
278 complete(&pci->kobj_complete); 328 * NOW release our ref count on the core module
329 */
330 module_put(THIS_MODULE);
279} 331}
280 332
281static struct kobj_type ktype_edac_pci = { 333/* ktype struct for the EDAC PCI main kobj */
282 .release = edac_pci_release, 334static struct kobj_type ktype_edac_pci_main_kobj = {
335 .release = edac_pci_release_main_kobj,
283 .sysfs_ops = &edac_pci_sysfs_ops, 336 .sysfs_ops = &edac_pci_sysfs_ops,
284 .default_attrs = (struct attribute **)edac_pci_attr, 337 .default_attrs = (struct attribute **)edac_pci_attr,
285}; 338};
286 339
287/** 340/**
288 * edac_sysfs_pci_setup() 341 * edac_pci_main_kobj_setup()
289 * 342 *
290 * setup the sysfs for EDAC PCI attributes 343 * setup the sysfs for EDAC PCI attributes
291 * assumes edac_class has already been initialized 344 * assumes edac_class has already been initialized
292 */ 345 */
293int edac_pci_register_main_kobj(void) 346int edac_pci_main_kobj_setup(void)
294{ 347{
295 int err; 348 int err;
296 struct sysdev_class *edac_class; 349 struct sysdev_class *edac_class;
297 350
298 debugf1("%s()\n", __func__); 351 debugf0("%s()\n", __func__);
352
353 /* check and count if we have already created the main kobject */
354 if (atomic_inc_return(&edac_pci_sysfs_refcount) != 1)
355 return 0;
299 356
357 /* First time, so create the main kobject and its
358 * controls and atributes
359 */
300 edac_class = edac_get_edac_class(); 360 edac_class = edac_get_edac_class();
301 if (edac_class == NULL) { 361 if (edac_class == NULL) {
302 debugf1("%s() no edac_class\n", __func__); 362 debugf1("%s() no edac_class\n", __func__);
303 return -ENODEV; 363 err = -ENODEV;
364 goto decrement_count_fail;
304 } 365 }
305 366
306 edac_pci_kobj.ktype = &ktype_edac_pci; 367 /* Need the kobject hook ups, and name setting */
368 edac_pci_top_main_kobj.ktype = &ktype_edac_pci_main_kobj;
369 edac_pci_top_main_kobj.parent = &edac_class->kset.kobj;
307 370
308 edac_pci_kobj.parent = &edac_class->kset.kobj; 371 err = kobject_set_name(&edac_pci_top_main_kobj, "pci");
309
310 err = kobject_set_name(&edac_pci_kobj, "pci");
311 if (err) 372 if (err)
312 return err; 373 goto decrement_count_fail;
374
375 /* Bump the reference count on this module to ensure the
376 * modules isn't unloaded until we deconstruct the top
377 * level main kobj for EDAC PCI
378 */
379 if (!try_module_get(THIS_MODULE)) {
380 debugf1("%s() try_module_get() failed\n", __func__);
381 err = -ENODEV;
382 goto decrement_count_fail;
383 }
313 384
314 /* Instanstiate the pci object */ 385 /* Instanstiate the pci object */
315 /* FIXME: maybe new sysdev_create_subdir() */ 386 /* FIXME: maybe new sysdev_create_subdir() */
316 err = kobject_register(&edac_pci_kobj); 387 err = kobject_register(&edac_pci_top_main_kobj);
317
318 if (err) { 388 if (err) {
319 debugf1("Failed to register '.../edac/pci'\n"); 389 debugf1("Failed to register '.../edac/pci'\n");
320 return err; 390 goto kobject_register_fail;
321 } 391 }
322 392
393 /* At this point, to 'release' the top level kobject
394 * for EDAC PCI, then edac_pci_main_kobj_teardown()
395 * must be used, for resources to be cleaned up properly
396 */
323 debugf1("Registered '.../edac/pci' kobject\n"); 397 debugf1("Registered '.../edac/pci' kobject\n");
324 398
325 return 0; 399 return 0;
400
401 /* Error unwind statck */
402kobject_register_fail:
403 module_put(THIS_MODULE);
404
405decrement_count_fail:
406 /* if are on this error exit, nothing to tear down */
407 atomic_dec(&edac_pci_sysfs_refcount);
408
409 return err;
326} 410}
327 411
328/* 412/*
329 * edac_pci_unregister_main_kobj() 413 * edac_pci_main_kobj_teardown()
330 * 414 *
331 * perform the sysfs teardown for the PCI attributes 415 * if no longer linked (needed) remove the top level EDAC PCI
416 * kobject with its controls and attributes
332 */ 417 */
333void edac_pci_unregister_main_kobj(void) 418static void edac_pci_main_kobj_teardown(void)
334{ 419{
335 debugf0("%s()\n", __func__); 420 debugf0("%s()\n", __func__);
336 init_completion(&edac_pci_kobj_complete); 421
337 kobject_unregister(&edac_pci_kobj); 422 /* Decrement the count and only if no more controller instances
338 wait_for_completion(&edac_pci_kobj_complete); 423 * are connected perform the unregisteration of the top level
424 * main kobj
425 */
426 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
427 debugf0("%s() called kobject_unregister on main kobj\n",
428 __func__);
429 kobject_unregister(&edac_pci_top_main_kobj);
430 }
339} 431}
340 432
433/*
434 *
435 * edac_pci_create_sysfs
436 *
437 * Create the controls/attributes for the specified EDAC PCI device
438 */
341int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) 439int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci)
342{ 440{
343 int err; 441 int err;
344 struct kobject *edac_kobj = &pci->kobj; 442 struct kobject *edac_kobj = &pci->kobj;
345 443
346 if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) { 444 debugf0("%s() idx=%d\n", __func__, pci->pci_idx);
347 err = edac_pci_register_main_kobj();
348 if (err) {
349 atomic_dec(&edac_pci_sysfs_refcount);
350 return err;
351 }
352 }
353 445
354 err = edac_pci_create_instance_kobj(pci, pci->pci_idx); 446 /* create the top main EDAC PCI kobject, IF needed */
355 if (err) { 447 err = edac_pci_main_kobj_setup();
356 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) 448 if (err)
357 edac_pci_unregister_main_kobj(); 449 return err;
358 }
359 450
360 debugf0("%s() idx=%d\n", __func__, pci->pci_idx); 451 /* Create this instance's kobject under the MAIN kobject */
452 err = edac_pci_create_instance_kobj(pci, pci->pci_idx);
453 if (err)
454 goto unregister_cleanup;
361 455
362 err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); 456 err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK);
363 if (err) { 457 if (err) {
364 debugf0("%s() sysfs_create_link() returned err= %d\n", 458 debugf0("%s() sysfs_create_link() returned err= %d\n",
365 __func__, err); 459 __func__, err);
366 return err; 460 goto symlink_fail;
367 } 461 }
368 462
369 return 0; 463 return 0;
464
465 /* Error unwind stack */
466symlink_fail:
467 edac_pci_unregister_sysfs_instance_kobj(pci);
468
469unregister_cleanup:
470 edac_pci_main_kobj_teardown();
471
472 return err;
370} 473}
371 474
475/*
476 * edac_pci_remove_sysfs
477 *
478 * remove the controls and attributes for this EDAC PCI device
479 */
372void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) 480void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci)
373{ 481{
374 debugf0("%s()\n", __func__); 482 debugf0("%s() index=%d\n", __func__, pci->pci_idx);
375
376 edac_pci_delete_instance_kobj(pci, pci->pci_idx);
377 483
484 /* Remove the symlink */
378 sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); 485 sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK);
379 486
380 if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) 487 /* remove this PCI instance's sysfs entries */
381 edac_pci_unregister_main_kobj(); 488 edac_pci_unregister_sysfs_instance_kobj(pci);
489
490 /* Call the main unregister function, which will determine
491 * if this 'pci' is the last instance.
492 * If it is, the main kobject will be unregistered as a result
493 */
494 debugf0("%s() calling edac_pci_main_kobj_teardown()\n", __func__);
495 edac_pci_main_kobj_teardown();
382} 496}
383 497
384/************************ PCI error handling *************************/ 498/************************ PCI error handling *************************/
@@ -414,13 +528,14 @@ static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
414 return status; 528 return status;
415} 529}
416 530
417typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev);
418 531
419/* Clear any PCI parity errors logged by this device. */ 532/* Clear any PCI parity errors logged by this device. */
420static void edac_pci_dev_parity_clear(struct pci_dev *dev) 533static void edac_pci_dev_parity_clear(struct pci_dev *dev)
421{ 534{
422 u8 header_type; 535 u8 header_type;
423 536
537 debugf0("%s()\n", __func__);
538
424 get_pci_parity_status(dev, 0); 539 get_pci_parity_status(dev, 0);
425 540
426 /* read the device TYPE, looking for bridges */ 541 /* read the device TYPE, looking for bridges */
@@ -433,17 +548,28 @@ static void edac_pci_dev_parity_clear(struct pci_dev *dev)
433/* 548/*
434 * PCI Parity polling 549 * PCI Parity polling
435 * 550 *
551 * Fucntion to retrieve the current parity status
552 * and decode it
553 *
436 */ 554 */
437static void edac_pci_dev_parity_test(struct pci_dev *dev) 555static void edac_pci_dev_parity_test(struct pci_dev *dev)
438{ 556{
557 unsigned long flags;
439 u16 status; 558 u16 status;
440 u8 header_type; 559 u8 header_type;
441 560
442 /* read the STATUS register on this device 561 /* stop any interrupts until we can acquire the status */
443 */ 562 local_irq_save(flags);
563
564 /* read the STATUS register on this device */
444 status = get_pci_parity_status(dev, 0); 565 status = get_pci_parity_status(dev, 0);
445 566
446 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 567 /* read the device TYPE, looking for bridges */
568 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
569
570 local_irq_restore(flags);
571
572 debugf4("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
447 573
448 /* check the status reg for errors */ 574 /* check the status reg for errors */
449 if (status) { 575 if (status) {
@@ -471,16 +597,14 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
471 } 597 }
472 } 598 }
473 599
474 /* read the device TYPE, looking for bridges */
475 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
476 600
477 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); 601 debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id);
478 602
479 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { 603 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
480 /* On bridges, need to examine secondary status register */ 604 /* On bridges, need to examine secondary status register */
481 status = get_pci_parity_status(dev, 1); 605 status = get_pci_parity_status(dev, 1);
482 606
483 debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); 607 debugf4("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id);
484 608
485 /* check the secondary status reg for errors */ 609 /* check the secondary status reg for errors */
486 if (status) { 610 if (status) {
@@ -510,9 +634,12 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev)
510 } 634 }
511} 635}
512 636
637/* reduce some complexity in definition of the iterator */
638typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
639
513/* 640/*
514 * pci_dev parity list iterator 641 * pci_dev parity list iterator
515 * Scan the PCI device list for one iteration, looking for SERRORs 642 * Scan the PCI device list for one pass, looking for SERRORs
516 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices 643 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
517 */ 644 */
518static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) 645static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
@@ -535,22 +662,22 @@ static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
535 */ 662 */
536void edac_pci_do_parity_check(void) 663void edac_pci_do_parity_check(void)
537{ 664{
538 unsigned long flags;
539 int before_count; 665 int before_count;
540 666
541 debugf3("%s()\n", __func__); 667 debugf3("%s()\n", __func__);
542 668
669 /* if policy has PCI check off, leave now */
543 if (!check_pci_errors) 670 if (!check_pci_errors)
544 return; 671 return;
545 672
546 before_count = atomic_read(&pci_parity_count); 673 before_count = atomic_read(&pci_parity_count);
547 674
548 /* scan all PCI devices looking for a Parity Error on devices and 675 /* scan all PCI devices looking for a Parity Error on devices and
549 * bridges 676 * bridges.
677 * The iterator calls pci_get_device() which might sleep, thus
678 * we cannot disable interrupts in this scan.
550 */ 679 */
551 local_irq_save(flags);
552 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); 680 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
553 local_irq_restore(flags);
554 681
555 /* Only if operator has selected panic on PCI Error */ 682 /* Only if operator has selected panic on PCI Error */
556 if (edac_pci_get_panic_on_pe()) { 683 if (edac_pci_get_panic_on_pe()) {
@@ -560,6 +687,12 @@ void edac_pci_do_parity_check(void)
560 } 687 }
561} 688}
562 689
690/*
691 * edac_pci_clear_parity_errors
692 *
693 * function to perform an iteration over the PCI devices
694 * and clearn their current status
695 */
563void edac_pci_clear_parity_errors(void) 696void edac_pci_clear_parity_errors(void)
564{ 697{
565 /* Clear any PCI bus parity errors that devices initially have logged 698 /* Clear any PCI bus parity errors that devices initially have logged
@@ -567,6 +700,12 @@ void edac_pci_clear_parity_errors(void)
567 */ 700 */
568 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); 701 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
569} 702}
703
704/*
705 * edac_pci_handle_pe
706 *
707 * Called to handle a PARITY ERROR event
708 */
570void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg) 709void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
571{ 710{
572 711
@@ -584,9 +723,14 @@ void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg)
584 */ 723 */
585 edac_pci_do_parity_check(); 724 edac_pci_do_parity_check();
586} 725}
587
588EXPORT_SYMBOL_GPL(edac_pci_handle_pe); 726EXPORT_SYMBOL_GPL(edac_pci_handle_pe);
589 727
728
729/*
730 * edac_pci_handle_npe
731 *
732 * Called to handle a NON-PARITY ERROR event
733 */
590void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg) 734void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
591{ 735{
592 736
@@ -604,7 +748,6 @@ void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg)
604 */ 748 */
605 edac_pci_do_parity_check(); 749 edac_pci_do_parity_check();
606} 750}
607
608EXPORT_SYMBOL_GPL(edac_pci_handle_npe); 751EXPORT_SYMBOL_GPL(edac_pci_handle_npe);
609 752
610/* 753/*
diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c
index 0ecfdc432f87..e895f9f887ab 100644
--- a/drivers/edac/i3000_edac.c
+++ b/drivers/edac/i3000_edac.c
@@ -275,7 +275,7 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx)
275 unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2]; 275 unsigned char *c0dra = dra, *c1dra = &dra[I3000_RANKS_PER_CHANNEL / 2];
276 unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL]; 276 unsigned char *c0drb = drb, *c1drb = &drb[I3000_RANKS_PER_CHANNEL];
277 unsigned long mchbar; 277 unsigned long mchbar;
278 void *window; 278 void __iomem *window;
279 279
280 debugf0("MC: %s()\n", __func__); 280 debugf0("MC: %s()\n", __func__);
281 281
diff --git a/drivers/i2c/chips/ds1682.c b/drivers/i2c/chips/ds1682.c
index 5879f0f25495..9e94542c18a2 100644
--- a/drivers/i2c/chips/ds1682.c
+++ b/drivers/i2c/chips/ds1682.c
@@ -75,7 +75,8 @@ static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
75 /* Special case: the 32 bit regs are time values with 1/4s 75 /* Special case: the 32 bit regs are time values with 1/4s
76 * resolution, scale them up to milliseconds */ 76 * resolution, scale them up to milliseconds */
77 if (sattr->nr == 4) 77 if (sattr->nr == 4)
78 return sprintf(buf, "%llu\n", ((u64) le32_to_cpu(val)) * 250); 78 return sprintf(buf, "%llu\n",
79 ((unsigned long long)le32_to_cpu(val)) * 250);
79 80
80 /* Format the output string and return # of bytes */ 81 /* Format the output string and return # of bytes */
81 return sprintf(buf, "%li\n", (long)le32_to_cpu(val)); 82 return sprintf(buf, "%li\n", (long)le32_to_cpu(val));
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index f668d235e6be..bf19ddfa6cda 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -551,8 +551,8 @@ static int setup_mmio_scc (struct pci_dev *dev, const char *name)
551 unsigned long dma_base = pci_resource_start(dev, 1); 551 unsigned long dma_base = pci_resource_start(dev, 1);
552 unsigned long ctl_size = pci_resource_len(dev, 0); 552 unsigned long ctl_size = pci_resource_len(dev, 0);
553 unsigned long dma_size = pci_resource_len(dev, 1); 553 unsigned long dma_size = pci_resource_len(dev, 1);
554 void *ctl_addr; 554 void __iomem *ctl_addr;
555 void *dma_addr; 555 void __iomem *dma_addr;
556 int i; 556 int i;
557 557
558 for (i = 0; i < MAX_HWIFS; i++) { 558 for (i = 0; i < MAX_HWIFS; i++) {
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index 336e5ff4cfcf..cadf0479cce5 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -2677,7 +2677,7 @@ static long raw1394_iso_xmit_recv_packets32(struct file *file, unsigned int cmd,
2677 struct raw1394_iso_packets32 __user *arg) 2677 struct raw1394_iso_packets32 __user *arg)
2678{ 2678{
2679 compat_uptr_t infos32; 2679 compat_uptr_t infos32;
2680 void *infos; 2680 void __user *infos;
2681 long err = -EFAULT; 2681 long err = -EFAULT;
2682 struct raw1394_iso_packets __user *dst = compat_alloc_user_space(sizeof(struct raw1394_iso_packets)); 2682 struct raw1394_iso_packets __user *dst = compat_alloc_user_space(sizeof(struct raw1394_iso_packets));
2683 2683
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index 55382c7d799c..e5047471c334 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -5,3 +5,15 @@ obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o
5obj-$(CONFIG_LGUEST) += lg.o 5obj-$(CONFIG_LGUEST) += lg.o
6lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ 6lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \
7 segments.o io.o lguest_user.o switcher.o 7 segments.o io.o lguest_user.o switcher.o
8
9Preparation Preparation!: PREFIX=P
10Guest: PREFIX=G
11Drivers: PREFIX=D
12Launcher: PREFIX=L
13Host: PREFIX=H
14Switcher: PREFIX=S
15Mastery: PREFIX=M
16Beer:
17 @for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}"
18Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery:
19 @sh ../../Documentation/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
diff --git a/drivers/lguest/README b/drivers/lguest/README
new file mode 100644
index 000000000000..b7db39a64c66
--- /dev/null
+++ b/drivers/lguest/README
@@ -0,0 +1,47 @@
1Welcome, friend reader, to lguest.
2
3Lguest is an adventure, with you, the reader, as Hero. I can't think of many
45000-line projects which offer both such capability and glimpses of future
5potential; it is an exciting time to be delving into the source!
6
7But be warned; this is an arduous journey of several hours or more! And as we
8know, all true Heroes are driven by a Noble Goal. Thus I offer a Beer (or
9equivalent) to anyone I meet who has completed this documentation.
10
11So get comfortable and keep your wits about you (both quick and humorous).
12Along your way to the Noble Goal, you will also gain masterly insight into
13lguest, and hypervisors and x86 virtualization in general.
14
15Our Quest is in seven parts: (best read with C highlighting turned on)
16
17I) Preparation
18 - In which our potential hero is flown quickly over the landscape for a
19 taste of its scope. Suitable for the armchair coders and other such
20 persons of faint constitution.
21
22II) Guest
23 - Where we encounter the first tantalising wisps of code, and come to
24 understand the details of the life of a Guest kernel.
25
26III) Drivers
27 - Whereby the Guest finds its voice and become useful, and our
28 understanding of the Guest is completed.
29
30IV) Launcher
31 - Where we trace back to the creation of the Guest, and thus begin our
32 understanding of the Host.
33
34V) Host
35 - Where we master the Host code, through a long and tortuous journey.
36 Indeed, it is here that our hero is tested in the Bit of Despair.
37
38VI) Switcher
39 - Where our understanding of the intertwined nature of Guests and Hosts
40 is completed.
41
42VII) Mastery
43 - Where our fully fledged hero grapples with the Great Question:
44 "What next?"
45
46make Preparation!
47Rusty Russell.
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index ce909ec57499..0a46e8837d9a 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,5 +1,8 @@
1/* World's simplest hypervisor, to test paravirt_ops and show 1/*P:400 This contains run_guest() which actually calls into the Host<->Guest
2 * unbelievers that virtualization is the future. Plus, it's fun! */ 2 * Switcher and analyzes the return, such as determining if the Guest wants the
3 * Host to do something. This file also contains useful helper routines, and a
4 * couple of non-obvious setup and teardown pieces which were implemented after
5 * days of debugging pain. :*/
3#include <linux/module.h> 6#include <linux/module.h>
4#include <linux/stringify.h> 7#include <linux/stringify.h>
5#include <linux/stddef.h> 8#include <linux/stddef.h>
@@ -61,11 +64,33 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
61 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); 64 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
62} 65}
63 66
67/*H:010 We need to set up the Switcher at a high virtual address. Remember the
68 * Switcher is a few hundred bytes of assembler code which actually changes the
69 * CPU to run the Guest, and then changes back to the Host when a trap or
70 * interrupt happens.
71 *
72 * The Switcher code must be at the same virtual address in the Guest as the
73 * Host since it will be running as the switchover occurs.
74 *
75 * Trying to map memory at a particular address is an unusual thing to do, so
76 * it's not a simple one-liner. We also set up the per-cpu parts of the
77 * Switcher here.
78 */
64static __init int map_switcher(void) 79static __init int map_switcher(void)
65{ 80{
66 int i, err; 81 int i, err;
67 struct page **pagep; 82 struct page **pagep;
68 83
84 /*
85 * Map the Switcher in to high memory.
86 *
87 * It turns out that if we choose the address 0xFFC00000 (4MB under the
88 * top virtual address), it makes setting up the page tables really
89 * easy.
90 */
91
92 /* We allocate an array of "struct page"s. map_vm_area() wants the
93 * pages in this form, rather than just an array of pointers. */
69 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, 94 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
70 GFP_KERNEL); 95 GFP_KERNEL);
71 if (!switcher_page) { 96 if (!switcher_page) {
@@ -73,6 +98,8 @@ static __init int map_switcher(void)
73 goto out; 98 goto out;
74 } 99 }
75 100
101 /* Now we actually allocate the pages. The Guest will see these pages,
102 * so we make sure they're zeroed. */
76 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { 103 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
77 unsigned long addr = get_zeroed_page(GFP_KERNEL); 104 unsigned long addr = get_zeroed_page(GFP_KERNEL);
78 if (!addr) { 105 if (!addr) {
@@ -82,6 +109,9 @@ static __init int map_switcher(void)
82 switcher_page[i] = virt_to_page(addr); 109 switcher_page[i] = virt_to_page(addr);
83 } 110 }
84 111
112 /* Now we reserve the "virtual memory area" we want: 0xFFC00000
113 * (SWITCHER_ADDR). We might not get it in theory, but in practice
114 * it's worked so far. */
85 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, 115 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
86 VM_ALLOC, SWITCHER_ADDR, VMALLOC_END); 116 VM_ALLOC, SWITCHER_ADDR, VMALLOC_END);
87 if (!switcher_vma) { 117 if (!switcher_vma) {
@@ -90,49 +120,105 @@ static __init int map_switcher(void)
90 goto free_pages; 120 goto free_pages;
91 } 121 }
92 122
123 /* This code actually sets up the pages we've allocated to appear at
124 * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the
125 * kind of pages we're mapping (kernel pages), and a pointer to our
126 * array of struct pages. It increments that pointer, but we don't
127 * care. */
93 pagep = switcher_page; 128 pagep = switcher_page;
94 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); 129 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep);
95 if (err) { 130 if (err) {
96 printk("lguest: map_vm_area failed: %i\n", err); 131 printk("lguest: map_vm_area failed: %i\n", err);
97 goto free_vma; 132 goto free_vma;
98 } 133 }
134
135 /* Now the switcher is mapped at the right address, we can't fail!
136 * Copy in the compiled-in Switcher code (from switcher.S). */
99 memcpy(switcher_vma->addr, start_switcher_text, 137 memcpy(switcher_vma->addr, start_switcher_text,
100 end_switcher_text - start_switcher_text); 138 end_switcher_text - start_switcher_text);
101 139
102 /* Fix up IDT entries to point into copied text. */ 140 /* Most of the switcher.S doesn't care that it's been moved; on Intel,
141 * jumps are relative, and it doesn't access any references to external
142 * code or data.
143 *
144 * The only exception is the interrupt handlers in switcher.S: their
145 * addresses are placed in a table (default_idt_entries), so we need to
146 * update the table with the new addresses. switcher_offset() is a
147 * convenience function which returns the distance between the builtin
148 * switcher code and the high-mapped copy we just made. */
103 for (i = 0; i < IDT_ENTRIES; i++) 149 for (i = 0; i < IDT_ENTRIES; i++)
104 default_idt_entries[i] += switcher_offset(); 150 default_idt_entries[i] += switcher_offset();
105 151
152 /*
153 * Set up the Switcher's per-cpu areas.
154 *
155 * Each CPU gets two pages of its own within the high-mapped region
156 * (aka. "struct lguest_pages"). Much of this can be initialized now,
157 * but some depends on what Guest we are running (which is set up in
158 * copy_in_guest_info()).
159 */
106 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 /* lguest_pages() returns this CPU's two pages. */
107 struct lguest_pages *pages = lguest_pages(i); 162 struct lguest_pages *pages = lguest_pages(i);
163 /* This is a convenience pointer to make the code fit one
164 * statement to a line. */
108 struct lguest_ro_state *state = &pages->state; 165 struct lguest_ro_state *state = &pages->state;
109 166
110 /* These fields are static: rest done in copy_in_guest_info */ 167 /* The Global Descriptor Table: the Host has a different one
168 * for each CPU. We keep a descriptor for the GDT which says
169 * where it is and how big it is (the size is actually the last
170 * byte, not the size, hence the "-1"). */
111 state->host_gdt_desc.size = GDT_SIZE-1; 171 state->host_gdt_desc.size = GDT_SIZE-1;
112 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i); 172 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
173
174 /* All CPUs on the Host use the same Interrupt Descriptor
175 * Table, so we just use store_idt(), which gets this CPU's IDT
176 * descriptor. */
113 store_idt(&state->host_idt_desc); 177 store_idt(&state->host_idt_desc);
178
179 /* The descriptors for the Guest's GDT and IDT can be filled
180 * out now, too. We copy the GDT & IDT into ->guest_gdt and
181 * ->guest_idt before actually running the Guest. */
114 state->guest_idt_desc.size = sizeof(state->guest_idt)-1; 182 state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
115 state->guest_idt_desc.address = (long)&state->guest_idt; 183 state->guest_idt_desc.address = (long)&state->guest_idt;
116 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1; 184 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
117 state->guest_gdt_desc.address = (long)&state->guest_gdt; 185 state->guest_gdt_desc.address = (long)&state->guest_gdt;
186
187 /* We know where we want the stack to be when the Guest enters
188 * the switcher: in pages->regs. The stack grows upwards, so
189 * we start it at the end of that structure. */
118 state->guest_tss.esp0 = (long)(&pages->regs + 1); 190 state->guest_tss.esp0 = (long)(&pages->regs + 1);
191 /* And this is the GDT entry to use for the stack: we keep a
192 * couple of special LGUEST entries. */
119 state->guest_tss.ss0 = LGUEST_DS; 193 state->guest_tss.ss0 = LGUEST_DS;
120 /* No I/O for you! */ 194
195 /* x86 can have a finegrained bitmap which indicates what I/O
196 * ports the process can use. We set it to the end of our
197 * structure, meaning "none". */
121 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss); 198 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
199
200 /* Some GDT entries are the same across all Guests, so we can
201 * set them up now. */
122 setup_default_gdt_entries(state); 202 setup_default_gdt_entries(state);
203 /* Most IDT entries are the same for all Guests, too.*/
123 setup_default_idt_entries(state, default_idt_entries); 204 setup_default_idt_entries(state, default_idt_entries);
124 205
125 /* Setup LGUEST segments on all cpus */ 206 /* The Host needs to be able to use the LGUEST segments on this
207 * CPU, too, so put them in the Host GDT. */
126 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; 208 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
127 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; 209 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
128 } 210 }
129 211
130 /* Initialize entry point into switcher. */ 212 /* In the Switcher, we want the %cs segment register to use the
213 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
214 * it will be undisturbed when we switch. To change %cs and jump we
215 * need this structure to feed to Intel's "lcall" instruction. */
131 lguest_entry.offset = (long)switch_to_guest + switcher_offset(); 216 lguest_entry.offset = (long)switch_to_guest + switcher_offset();
132 lguest_entry.segment = LGUEST_CS; 217 lguest_entry.segment = LGUEST_CS;
133 218
134 printk(KERN_INFO "lguest: mapped switcher at %p\n", 219 printk(KERN_INFO "lguest: mapped switcher at %p\n",
135 switcher_vma->addr); 220 switcher_vma->addr);
221 /* And we succeeded... */
136 return 0; 222 return 0;
137 223
138free_vma: 224free_vma:
@@ -146,35 +232,58 @@ free_some_pages:
146out: 232out:
147 return err; 233 return err;
148} 234}
235/*:*/
149 236
237/* Cleaning up the mapping when the module is unloaded is almost...
238 * too easy. */
150static void unmap_switcher(void) 239static void unmap_switcher(void)
151{ 240{
152 unsigned int i; 241 unsigned int i;
153 242
243 /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */
154 vunmap(switcher_vma->addr); 244 vunmap(switcher_vma->addr);
245 /* Now we just need to free the pages we copied the switcher into */
155 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) 246 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
156 __free_pages(switcher_page[i], 0); 247 __free_pages(switcher_page[i], 0);
157} 248}
158 249
159/* IN/OUT insns: enough to get us past boot-time probing. */ 250/*H:130 Our Guest is usually so well behaved; it never tries to do things it
251 * isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite
252 * complete, because it doesn't contain replacements for the Intel I/O
253 * instructions. As a result, the Guest sometimes fumbles across one during
254 * the boot process as it probes for various things which are usually attached
255 * to a PC.
256 *
257 * When the Guest uses one of these instructions, we get trap #13 (General
258 * Protection Fault) and come here. We see if it's one of those troublesome
259 * instructions and skip over it. We return true if we did. */
160static int emulate_insn(struct lguest *lg) 260static int emulate_insn(struct lguest *lg)
161{ 261{
162 u8 insn; 262 u8 insn;
163 unsigned int insnlen = 0, in = 0, shift = 0; 263 unsigned int insnlen = 0, in = 0, shift = 0;
264 /* The eip contains the *virtual* address of the Guest's instruction:
265 * guest_pa just subtracts the Guest's page_offset. */
164 unsigned long physaddr = guest_pa(lg, lg->regs->eip); 266 unsigned long physaddr = guest_pa(lg, lg->regs->eip);
165 267
166 /* This only works for addresses in linear mapping... */ 268 /* The guest_pa() function only works for Guest kernel addresses, but
269 * that's all we're trying to do anyway. */
167 if (lg->regs->eip < lg->page_offset) 270 if (lg->regs->eip < lg->page_offset)
168 return 0; 271 return 0;
272
273 /* Decoding x86 instructions is icky. */
169 lgread(lg, &insn, physaddr, 1); 274 lgread(lg, &insn, physaddr, 1);
170 275
171 /* Operand size prefix means it's actually for ax. */ 276 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
277 of the eax register. */
172 if (insn == 0x66) { 278 if (insn == 0x66) {
173 shift = 16; 279 shift = 16;
280 /* The instruction is 1 byte so far, read the next byte. */
174 insnlen = 1; 281 insnlen = 1;
175 lgread(lg, &insn, physaddr + insnlen, 1); 282 lgread(lg, &insn, physaddr + insnlen, 1);
176 } 283 }
177 284
285 /* We can ignore the lower bit for the moment and decode the 4 opcodes
286 * we need to emulate. */
178 switch (insn & 0xFE) { 287 switch (insn & 0xFE) {
179 case 0xE4: /* in <next byte>,%al */ 288 case 0xE4: /* in <next byte>,%al */
180 insnlen += 2; 289 insnlen += 2;
@@ -191,9 +300,13 @@ static int emulate_insn(struct lguest *lg)
191 insnlen += 1; 300 insnlen += 1;
192 break; 301 break;
193 default: 302 default:
303 /* OK, we don't know what this is, can't emulate. */
194 return 0; 304 return 0;
195 } 305 }
196 306
307 /* If it was an "IN" instruction, they expect the result to be read
308 * into %eax, so we change %eax. We always return all-ones, which
309 * traditionally means "there's nothing there". */
197 if (in) { 310 if (in) {
198 /* Lower bit tells is whether it's a 16 or 32 bit access */ 311 /* Lower bit tells is whether it's a 16 or 32 bit access */
199 if (insn & 0x1) 312 if (insn & 0x1)
@@ -201,28 +314,46 @@ static int emulate_insn(struct lguest *lg)
201 else 314 else
202 lg->regs->eax |= (0xFFFF << shift); 315 lg->regs->eax |= (0xFFFF << shift);
203 } 316 }
317 /* Finally, we've "done" the instruction, so move past it. */
204 lg->regs->eip += insnlen; 318 lg->regs->eip += insnlen;
319 /* Success! */
205 return 1; 320 return 1;
206} 321}
207 322/*:*/
323
324/*L:305
325 * Dealing With Guest Memory.
326 *
327 * When the Guest gives us (what it thinks is) a physical address, we can use
328 * the normal copy_from_user() & copy_to_user() on that address: remember,
329 * Guest physical == Launcher virtual.
330 *
331 * But we can't trust the Guest: it might be trying to access the Launcher
332 * code. We have to check that the range is below the pfn_limit the Launcher
333 * gave us. We have to make sure that addr + len doesn't give us a false
334 * positive by overflowing, too. */
208int lguest_address_ok(const struct lguest *lg, 335int lguest_address_ok(const struct lguest *lg,
209 unsigned long addr, unsigned long len) 336 unsigned long addr, unsigned long len)
210{ 337{
211 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); 338 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
212} 339}
213 340
214/* Just like get_user, but don't let guest access lguest binary. */ 341/* This is a convenient routine to get a 32-bit value from the Guest (a very
342 * common operation). Here we can see how useful the kill_lguest() routine we
343 * met in the Launcher can be: we return a random value (0) instead of needing
344 * to return an error. */
215u32 lgread_u32(struct lguest *lg, unsigned long addr) 345u32 lgread_u32(struct lguest *lg, unsigned long addr)
216{ 346{
217 u32 val = 0; 347 u32 val = 0;
218 348
219 /* Don't let them access lguest binary */ 349 /* Don't let them access lguest binary. */
220 if (!lguest_address_ok(lg, addr, sizeof(val)) 350 if (!lguest_address_ok(lg, addr, sizeof(val))
221 || get_user(val, (u32 __user *)addr) != 0) 351 || get_user(val, (u32 __user *)addr) != 0)
222 kill_guest(lg, "bad read address %#lx", addr); 352 kill_guest(lg, "bad read address %#lx", addr);
223 return val; 353 return val;
224} 354}
225 355
356/* Same thing for writing a value. */
226void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val) 357void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
227{ 358{
228 if (!lguest_address_ok(lg, addr, sizeof(val)) 359 if (!lguest_address_ok(lg, addr, sizeof(val))
@@ -230,6 +361,9 @@ void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
230 kill_guest(lg, "bad write address %#lx", addr); 361 kill_guest(lg, "bad write address %#lx", addr);
231} 362}
232 363
364/* This routine is more generic, and copies a range of Guest bytes into a
365 * buffer. If the copy_from_user() fails, we fill the buffer with zeroes, so
366 * the caller doesn't end up using uninitialized kernel memory. */
233void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) 367void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
234{ 368{
235 if (!lguest_address_ok(lg, addr, bytes) 369 if (!lguest_address_ok(lg, addr, bytes)
@@ -240,6 +374,7 @@ void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
240 } 374 }
241} 375}
242 376
377/* Similarly, our generic routine to copy into a range of Guest bytes. */
243void lgwrite(struct lguest *lg, unsigned long addr, const void *b, 378void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
244 unsigned bytes) 379 unsigned bytes)
245{ 380{
@@ -247,6 +382,7 @@ void lgwrite(struct lguest *lg, unsigned long addr, const void *b,
247 || copy_to_user((void __user *)addr, b, bytes) != 0) 382 || copy_to_user((void __user *)addr, b, bytes) != 0)
248 kill_guest(lg, "bad write address %#lx len %u", addr, bytes); 383 kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
249} 384}
385/* (end of memory access helper routines) :*/
250 386
251static void set_ts(void) 387static void set_ts(void)
252{ 388{
@@ -257,54 +393,108 @@ static void set_ts(void)
257 write_cr0(cr0|8); 393 write_cr0(cr0|8);
258} 394}
259 395
396/*S:010
397 * We are getting close to the Switcher.
398 *
399 * Remember that each CPU has two pages which are visible to the Guest when it
400 * runs on that CPU. This has to contain the state for that Guest: we copy the
401 * state in just before we run the Guest.
402 *
403 * Each Guest has "changed" flags which indicate what has changed in the Guest
404 * since it last ran. We saw this set in interrupts_and_traps.c and
405 * segments.c.
406 */
260static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) 407static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
261{ 408{
409 /* Copying all this data can be quite expensive. We usually run the
410 * same Guest we ran last time (and that Guest hasn't run anywhere else
411 * meanwhile). If that's not the case, we pretend everything in the
412 * Guest has changed. */
262 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { 413 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
263 __get_cpu_var(last_guest) = lg; 414 __get_cpu_var(last_guest) = lg;
264 lg->last_pages = pages; 415 lg->last_pages = pages;
265 lg->changed = CHANGED_ALL; 416 lg->changed = CHANGED_ALL;
266 } 417 }
267 418
268 /* These are pretty cheap, so we do them unconditionally. */ 419 /* These copies are pretty cheap, so we do them unconditionally: */
420 /* Save the current Host top-level page directory. */
269 pages->state.host_cr3 = __pa(current->mm->pgd); 421 pages->state.host_cr3 = __pa(current->mm->pgd);
422 /* Set up the Guest's page tables to see this CPU's pages (and no
423 * other CPU's pages). */
270 map_switcher_in_guest(lg, pages); 424 map_switcher_in_guest(lg, pages);
425 /* Set up the two "TSS" members which tell the CPU what stack to use
426 * for traps which do directly into the Guest (ie. traps at privilege
427 * level 1). */
271 pages->state.guest_tss.esp1 = lg->esp1; 428 pages->state.guest_tss.esp1 = lg->esp1;
272 pages->state.guest_tss.ss1 = lg->ss1; 429 pages->state.guest_tss.ss1 = lg->ss1;
273 430
274 /* Copy direct trap entries. */ 431 /* Copy direct-to-Guest trap entries. */
275 if (lg->changed & CHANGED_IDT) 432 if (lg->changed & CHANGED_IDT)
276 copy_traps(lg, pages->state.guest_idt, default_idt_entries); 433 copy_traps(lg, pages->state.guest_idt, default_idt_entries);
277 434
278 /* Copy all GDT entries but the TSS. */ 435 /* Copy all GDT entries which the Guest can change. */
279 if (lg->changed & CHANGED_GDT) 436 if (lg->changed & CHANGED_GDT)
280 copy_gdt(lg, pages->state.guest_gdt); 437 copy_gdt(lg, pages->state.guest_gdt);
281 /* If only the TLS entries have changed, copy them. */ 438 /* If only the TLS entries have changed, copy them. */
282 else if (lg->changed & CHANGED_GDT_TLS) 439 else if (lg->changed & CHANGED_GDT_TLS)
283 copy_gdt_tls(lg, pages->state.guest_gdt); 440 copy_gdt_tls(lg, pages->state.guest_gdt);
284 441
442 /* Mark the Guest as unchanged for next time. */
285 lg->changed = 0; 443 lg->changed = 0;
286} 444}
287 445
446/* Finally: the code to actually call into the Switcher to run the Guest. */
288static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) 447static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
289{ 448{
449 /* This is a dummy value we need for GCC's sake. */
290 unsigned int clobber; 450 unsigned int clobber;
291 451
452 /* Copy the guest-specific information into this CPU's "struct
453 * lguest_pages". */
292 copy_in_guest_info(lg, pages); 454 copy_in_guest_info(lg, pages);
293 455
294 /* Put eflags on stack, lcall does rest: suitable for iret return. */ 456 /* Now: we push the "eflags" register on the stack, then do an "lcall".
457 * This is how we change from using the kernel code segment to using
458 * the dedicated lguest code segment, as well as jumping into the
459 * Switcher.
460 *
461 * The lcall also pushes the old code segment (KERNEL_CS) onto the
462 * stack, then the address of this call. This stack layout happens to
463 * exactly match the stack of an interrupt... */
295 asm volatile("pushf; lcall *lguest_entry" 464 asm volatile("pushf; lcall *lguest_entry"
465 /* This is how we tell GCC that %eax ("a") and %ebx ("b")
466 * are changed by this routine. The "=" means output. */
296 : "=a"(clobber), "=b"(clobber) 467 : "=a"(clobber), "=b"(clobber)
468 /* %eax contains the pages pointer. ("0" refers to the
469 * 0-th argument above, ie "a"). %ebx contains the
470 * physical address of the Guest's top-level page
471 * directory. */
297 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) 472 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
473 /* We tell gcc that all these registers could change,
474 * which means we don't have to save and restore them in
475 * the Switcher. */
298 : "memory", "%edx", "%ecx", "%edi", "%esi"); 476 : "memory", "%edx", "%ecx", "%edi", "%esi");
299} 477}
478/*:*/
300 479
480/*H:030 Let's jump straight to the the main loop which runs the Guest.
481 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
482 * going around and around until something interesting happens. */
301int run_guest(struct lguest *lg, unsigned long __user *user) 483int run_guest(struct lguest *lg, unsigned long __user *user)
302{ 484{
485 /* We stop running once the Guest is dead. */
303 while (!lg->dead) { 486 while (!lg->dead) {
487 /* We need to initialize this, otherwise gcc complains. It's
488 * not (yet) clever enough to see that it's initialized when we
489 * need it. */
304 unsigned int cr2 = 0; /* Damn gcc */ 490 unsigned int cr2 = 0; /* Damn gcc */
305 491
306 /* Hypercalls first: we might have been out to userspace */ 492 /* First we run any hypercalls the Guest wants done: either in
493 * the hypercall ring in "struct lguest_data", or directly by
494 * using int 31 (LGUEST_TRAP_ENTRY). */
307 do_hypercalls(lg); 495 do_hypercalls(lg);
496 /* It's possible the Guest did a SEND_DMA hypercall to the
497 * Launcher, in which case we return from the read() now. */
308 if (lg->dma_is_pending) { 498 if (lg->dma_is_pending) {
309 if (put_user(lg->pending_dma, user) || 499 if (put_user(lg->pending_dma, user) ||
310 put_user(lg->pending_key, user+1)) 500 put_user(lg->pending_key, user+1))
@@ -312,6 +502,7 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
312 return sizeof(unsigned long)*2; 502 return sizeof(unsigned long)*2;
313 } 503 }
314 504
505 /* Check for signals */
315 if (signal_pending(current)) 506 if (signal_pending(current))
316 return -ERESTARTSYS; 507 return -ERESTARTSYS;
317 508
@@ -319,77 +510,154 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
319 if (lg->break_out) 510 if (lg->break_out)
320 return -EAGAIN; 511 return -EAGAIN;
321 512
513 /* Check if there are any interrupts which can be delivered
514 * now: if so, this sets up the hander to be executed when we
515 * next run the Guest. */
322 maybe_do_interrupt(lg); 516 maybe_do_interrupt(lg);
323 517
518 /* All long-lived kernel loops need to check with this horrible
519 * thing called the freezer. If the Host is trying to suspend,
520 * it stops us. */
324 try_to_freeze(); 521 try_to_freeze();
325 522
523 /* Just make absolutely sure the Guest is still alive. One of
524 * those hypercalls could have been fatal, for example. */
326 if (lg->dead) 525 if (lg->dead)
327 break; 526 break;
328 527
528 /* If the Guest asked to be stopped, we sleep. The Guest's
529 * clock timer or LHCALL_BREAK from the Waker will wake us. */
329 if (lg->halted) { 530 if (lg->halted) {
330 set_current_state(TASK_INTERRUPTIBLE); 531 set_current_state(TASK_INTERRUPTIBLE);
331 schedule(); 532 schedule();
332 continue; 533 continue;
333 } 534 }
334 535
536 /* OK, now we're ready to jump into the Guest. First we put up
537 * the "Do Not Disturb" sign: */
335 local_irq_disable(); 538 local_irq_disable();
336 539
337 /* Even if *we* don't want FPU trap, guest might... */ 540 /* Remember the awfully-named TS bit? If the Guest has asked
541 * to set it we set it now, so we can trap and pass that trap
542 * to the Guest if it uses the FPU. */
338 if (lg->ts) 543 if (lg->ts)
339 set_ts(); 544 set_ts();
340 545
341 /* Don't let Guest do SYSENTER: we can't handle it. */ 546 /* SYSENTER is an optimized way of doing system calls. We
547 * can't allow it because it always jumps to privilege level 0.
548 * A normal Guest won't try it because we don't advertise it in
549 * CPUID, but a malicious Guest (or malicious Guest userspace
550 * program) could, so we tell the CPU to disable it before
551 * running the Guest. */
342 if (boot_cpu_has(X86_FEATURE_SEP)) 552 if (boot_cpu_has(X86_FEATURE_SEP))
343 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); 553 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
344 554
555 /* Now we actually run the Guest. It will pop back out when
556 * something interesting happens, and we can examine its
557 * registers to see what it was doing. */
345 run_guest_once(lg, lguest_pages(raw_smp_processor_id())); 558 run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
346 559
347 /* Save cr2 now if we page-faulted. */ 560 /* The "regs" pointer contains two extra entries which are not
561 * really registers: a trap number which says what interrupt or
562 * trap made the switcher code come back, and an error code
563 * which some traps set. */
564
565 /* If the Guest page faulted, then the cr2 register will tell
566 * us the bad virtual address. We have to grab this now,
567 * because once we re-enable interrupts an interrupt could
568 * fault and thus overwrite cr2, or we could even move off to a
569 * different CPU. */
348 if (lg->regs->trapnum == 14) 570 if (lg->regs->trapnum == 14)
349 cr2 = read_cr2(); 571 cr2 = read_cr2();
572 /* Similarly, if we took a trap because the Guest used the FPU,
573 * we have to restore the FPU it expects to see. */
350 else if (lg->regs->trapnum == 7) 574 else if (lg->regs->trapnum == 7)
351 math_state_restore(); 575 math_state_restore();
352 576
577 /* Restore SYSENTER if it's supposed to be on. */
353 if (boot_cpu_has(X86_FEATURE_SEP)) 578 if (boot_cpu_has(X86_FEATURE_SEP))
354 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 579 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
580
581 /* Now we're ready to be interrupted or moved to other CPUs */
355 local_irq_enable(); 582 local_irq_enable();
356 583
584 /* OK, so what happened? */
357 switch (lg->regs->trapnum) { 585 switch (lg->regs->trapnum) {
358 case 13: /* We've intercepted a GPF. */ 586 case 13: /* We've intercepted a GPF. */
587 /* Check if this was one of those annoying IN or OUT
588 * instructions which we need to emulate. If so, we
589 * just go back into the Guest after we've done it. */
359 if (lg->regs->errcode == 0) { 590 if (lg->regs->errcode == 0) {
360 if (emulate_insn(lg)) 591 if (emulate_insn(lg))
361 continue; 592 continue;
362 } 593 }
363 break; 594 break;
364 case 14: /* We've intercepted a page fault. */ 595 case 14: /* We've intercepted a page fault. */
596 /* The Guest accessed a virtual address that wasn't
597 * mapped. This happens a lot: we don't actually set
598 * up most of the page tables for the Guest at all when
599 * we start: as it runs it asks for more and more, and
600 * we set them up as required. In this case, we don't
601 * even tell the Guest that the fault happened.
602 *
603 * The errcode tells whether this was a read or a
604 * write, and whether kernel or userspace code. */
365 if (demand_page(lg, cr2, lg->regs->errcode)) 605 if (demand_page(lg, cr2, lg->regs->errcode))
366 continue; 606 continue;
367 607
368 /* If lguest_data is NULL, this won't hurt. */ 608 /* OK, it's really not there (or not OK): the Guest
609 * needs to know. We write out the cr2 value so it
610 * knows where the fault occurred.
611 *
612 * Note that if the Guest were really messed up, this
613 * could happen before it's done the INITIALIZE
614 * hypercall, so lg->lguest_data will be NULL, so
615 * &lg->lguest_data->cr2 will be address 8. Writing
616 * into that address won't hurt the Host at all,
617 * though. */
369 if (put_user(cr2, &lg->lguest_data->cr2)) 618 if (put_user(cr2, &lg->lguest_data->cr2))
370 kill_guest(lg, "Writing cr2"); 619 kill_guest(lg, "Writing cr2");
371 break; 620 break;
372 case 7: /* We've intercepted a Device Not Available fault. */ 621 case 7: /* We've intercepted a Device Not Available fault. */
373 /* If they don't want to know, just absorb it. */ 622 /* If the Guest doesn't want to know, we already
623 * restored the Floating Point Unit, so we just
624 * continue without telling it. */
374 if (!lg->ts) 625 if (!lg->ts)
375 continue; 626 continue;
376 break; 627 break;
377 case 32 ... 255: /* Real interrupt, fall thru */ 628 case 32 ... 255:
629 /* These values mean a real interrupt occurred, in
630 * which case the Host handler has already been run.
631 * We just do a friendly check if another process
632 * should now be run, then fall through to loop
633 * around: */
378 cond_resched(); 634 cond_resched();
379 case LGUEST_TRAP_ENTRY: /* Handled at top of loop */ 635 case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
380 continue; 636 continue;
381 } 637 }
382 638
639 /* If we get here, it's a trap the Guest wants to know
640 * about. */
383 if (deliver_trap(lg, lg->regs->trapnum)) 641 if (deliver_trap(lg, lg->regs->trapnum))
384 continue; 642 continue;
385 643
644 /* If the Guest doesn't have a handler (either it hasn't
645 * registered any yet, or it's one of the faults we don't let
646 * it handle), it dies with a cryptic error message. */
386 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", 647 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
387 lg->regs->trapnum, lg->regs->eip, 648 lg->regs->trapnum, lg->regs->eip,
388 lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode); 649 lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
389 } 650 }
651 /* The Guest is dead => "No such file or directory" */
390 return -ENOENT; 652 return -ENOENT;
391} 653}
392 654
655/* Now we can look at each of the routines this calls, in increasing order of
656 * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
657 * deliver_trap() and demand_page(). After all those, we'll be ready to
658 * examine the Switcher, and our philosophical understanding of the Host/Guest
659 * duality will be complete. :*/
660
393int find_free_guest(void) 661int find_free_guest(void)
394{ 662{
395 unsigned int i; 663 unsigned int i;
@@ -407,55 +675,96 @@ static void adjust_pge(void *on)
407 write_cr4(read_cr4() & ~X86_CR4_PGE); 675 write_cr4(read_cr4() & ~X86_CR4_PGE);
408} 676}
409 677
678/*H:000
679 * Welcome to the Host!
680 *
681 * By this point your brain has been tickled by the Guest code and numbed by
682 * the Launcher code; prepare for it to be stretched by the Host code. This is
683 * the heart. Let's begin at the initialization routine for the Host's lg
684 * module.
685 */
410static int __init init(void) 686static int __init init(void)
411{ 687{
412 int err; 688 int err;
413 689
690 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
414 if (paravirt_enabled()) { 691 if (paravirt_enabled()) {
415 printk("lguest is afraid of %s\n", paravirt_ops.name); 692 printk("lguest is afraid of %s\n", paravirt_ops.name);
416 return -EPERM; 693 return -EPERM;
417 } 694 }
418 695
696 /* First we put the Switcher up in very high virtual memory. */
419 err = map_switcher(); 697 err = map_switcher();
420 if (err) 698 if (err)
421 return err; 699 return err;
422 700
701 /* Now we set up the pagetable implementation for the Guests. */
423 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); 702 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
424 if (err) { 703 if (err) {
425 unmap_switcher(); 704 unmap_switcher();
426 return err; 705 return err;
427 } 706 }
707
708 /* The I/O subsystem needs some things initialized. */
428 lguest_io_init(); 709 lguest_io_init();
429 710
711 /* /dev/lguest needs to be registered. */
430 err = lguest_device_init(); 712 err = lguest_device_init();
431 if (err) { 713 if (err) {
432 free_pagetables(); 714 free_pagetables();
433 unmap_switcher(); 715 unmap_switcher();
434 return err; 716 return err;
435 } 717 }
718
719 /* Finally, we need to turn off "Page Global Enable". PGE is an
720 * optimization where page table entries are specially marked to show
721 * they never change. The Host kernel marks all the kernel pages this
722 * way because it's always present, even when userspace is running.
723 *
724 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
725 * switch to the Guest kernel. If you don't disable this on all CPUs,
726 * you'll get really weird bugs that you'll chase for two days.
727 *
728 * I used to turn PGE off every time we switched to the Guest and back
729 * on when we return, but that slowed the Switcher down noticibly. */
730
731 /* We don't need the complexity of CPUs coming and going while we're
732 * doing this. */
436 lock_cpu_hotplug(); 733 lock_cpu_hotplug();
437 if (cpu_has_pge) { /* We have a broader idea of "global". */ 734 if (cpu_has_pge) { /* We have a broader idea of "global". */
735 /* Remember that this was originally set (for cleanup). */
438 cpu_had_pge = 1; 736 cpu_had_pge = 1;
737 /* adjust_pge is a helper function which sets or unsets the PGE
738 * bit on its CPU, depending on the argument (0 == unset). */
439 on_each_cpu(adjust_pge, (void *)0, 0, 1); 739 on_each_cpu(adjust_pge, (void *)0, 0, 1);
740 /* Turn off the feature in the global feature set. */
440 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); 741 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
441 } 742 }
442 unlock_cpu_hotplug(); 743 unlock_cpu_hotplug();
744
745 /* All good! */
443 return 0; 746 return 0;
444} 747}
445 748
749/* Cleaning up is just the same code, backwards. With a little French. */
446static void __exit fini(void) 750static void __exit fini(void)
447{ 751{
448 lguest_device_remove(); 752 lguest_device_remove();
449 free_pagetables(); 753 free_pagetables();
450 unmap_switcher(); 754 unmap_switcher();
755
756 /* If we had PGE before we started, turn it back on now. */
451 lock_cpu_hotplug(); 757 lock_cpu_hotplug();
452 if (cpu_had_pge) { 758 if (cpu_had_pge) {
453 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); 759 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
760 /* adjust_pge's argument "1" means set PGE. */
454 on_each_cpu(adjust_pge, (void *)1, 0, 1); 761 on_each_cpu(adjust_pge, (void *)1, 0, 1);
455 } 762 }
456 unlock_cpu_hotplug(); 763 unlock_cpu_hotplug();
457} 764}
458 765
766/* The Host side of lguest can be a module. This is a nice way for people to
767 * play with it. */
459module_init(init); 768module_init(init);
460module_exit(fini); 769module_exit(fini);
461MODULE_LICENSE("GPL"); 770MODULE_LICENSE("GPL");
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index ea52ca451f74..7a5299f9679d 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -1,5 +1,10 @@
1/* Actual hypercalls, which allow guests to actually do something. 1/*P:500 Just as userspace programs request kernel operations through a system
2 Copyright (C) 2006 Rusty Russell IBM Corporation 2 * call, the Guest requests Host operations through a "hypercall". You might
3 * notice this nomenclature doesn't really follow any logic, but the name has
4 * been around for long enough that we're stuck with it. As you'd expect, this
5 * code is basically a one big switch statement. :*/
6
7/* Copyright (C) 2006 Rusty Russell IBM Corporation
3 8
4 This program is free software; you can redistribute it and/or modify 9 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by 10 it under the terms of the GNU General Public License as published by
@@ -23,37 +28,63 @@
23#include <irq_vectors.h> 28#include <irq_vectors.h>
24#include "lg.h" 29#include "lg.h"
25 30
31/*H:120 This is the core hypercall routine: where the Guest gets what it
32 * wants. Or gets killed. Or, in the case of LHCALL_CRASH, both.
33 *
34 * Remember from the Guest: %eax == which call to make, and the arguments are
35 * packed into %edx, %ebx and %ecx if needed. */
26static void do_hcall(struct lguest *lg, struct lguest_regs *regs) 36static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
27{ 37{
28 switch (regs->eax) { 38 switch (regs->eax) {
29 case LHCALL_FLUSH_ASYNC: 39 case LHCALL_FLUSH_ASYNC:
40 /* This call does nothing, except by breaking out of the Guest
41 * it makes us process all the asynchronous hypercalls. */
30 break; 42 break;
31 case LHCALL_LGUEST_INIT: 43 case LHCALL_LGUEST_INIT:
44 /* You can't get here unless you're already initialized. Don't
45 * do that. */
32 kill_guest(lg, "already have lguest_data"); 46 kill_guest(lg, "already have lguest_data");
33 break; 47 break;
34 case LHCALL_CRASH: { 48 case LHCALL_CRASH: {
49 /* Crash is such a trivial hypercall that we do it in four
50 * lines right here. */
35 char msg[128]; 51 char msg[128];
52 /* If the lgread fails, it will call kill_guest() itself; the
53 * kill_guest() with the message will be ignored. */
36 lgread(lg, msg, regs->edx, sizeof(msg)); 54 lgread(lg, msg, regs->edx, sizeof(msg));
37 msg[sizeof(msg)-1] = '\0'; 55 msg[sizeof(msg)-1] = '\0';
38 kill_guest(lg, "CRASH: %s", msg); 56 kill_guest(lg, "CRASH: %s", msg);
39 break; 57 break;
40 } 58 }
41 case LHCALL_FLUSH_TLB: 59 case LHCALL_FLUSH_TLB:
60 /* FLUSH_TLB comes in two flavors, depending on the
61 * argument: */
42 if (regs->edx) 62 if (regs->edx)
43 guest_pagetable_clear_all(lg); 63 guest_pagetable_clear_all(lg);
44 else 64 else
45 guest_pagetable_flush_user(lg); 65 guest_pagetable_flush_user(lg);
46 break; 66 break;
47 case LHCALL_GET_WALLCLOCK: { 67 case LHCALL_GET_WALLCLOCK: {
68 /* The Guest wants to know the real time in seconds since 1970,
69 * in good Unix tradition. */
48 struct timespec ts; 70 struct timespec ts;
49 ktime_get_real_ts(&ts); 71 ktime_get_real_ts(&ts);
50 regs->eax = ts.tv_sec; 72 regs->eax = ts.tv_sec;
51 break; 73 break;
52 } 74 }
53 case LHCALL_BIND_DMA: 75 case LHCALL_BIND_DMA:
76 /* BIND_DMA really wants four arguments, but it's the only call
77 * which does. So the Guest packs the number of buffers and
78 * the interrupt number into the final argument, and we decode
79 * it here. This can legitimately fail, since we currently
80 * place a limit on the number of DMA pools a Guest can have.
81 * So we return true or false from this call. */
54 regs->eax = bind_dma(lg, regs->edx, regs->ebx, 82 regs->eax = bind_dma(lg, regs->edx, regs->ebx,
55 regs->ecx >> 8, regs->ecx & 0xFF); 83 regs->ecx >> 8, regs->ecx & 0xFF);
56 break; 84 break;
85
86 /* All these calls simply pass the arguments through to the right
87 * routines. */
57 case LHCALL_SEND_DMA: 88 case LHCALL_SEND_DMA:
58 send_dma(lg, regs->edx, regs->ebx); 89 send_dma(lg, regs->edx, regs->ebx);
59 break; 90 break;
@@ -81,10 +112,13 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
81 case LHCALL_SET_CLOCKEVENT: 112 case LHCALL_SET_CLOCKEVENT:
82 guest_set_clockevent(lg, regs->edx); 113 guest_set_clockevent(lg, regs->edx);
83 break; 114 break;
115
84 case LHCALL_TS: 116 case LHCALL_TS:
117 /* This sets the TS flag, as we saw used in run_guest(). */
85 lg->ts = regs->edx; 118 lg->ts = regs->edx;
86 break; 119 break;
87 case LHCALL_HALT: 120 case LHCALL_HALT:
121 /* Similarly, this sets the halted flag for run_guest(). */
88 lg->halted = 1; 122 lg->halted = 1;
89 break; 123 break;
90 default: 124 default:
@@ -92,25 +126,42 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
92 } 126 }
93} 127}
94 128
95/* We always do queued calls before actual hypercall. */ 129/* Asynchronous hypercalls are easy: we just look in the array in the Guest's
130 * "struct lguest_data" and see if there are any new ones marked "ready".
131 *
132 * We are careful to do these in order: obviously we respect the order the
133 * Guest put them in the ring, but we also promise the Guest that they will
134 * happen before any normal hypercall (which is why we check this before
135 * checking for a normal hcall). */
96static void do_async_hcalls(struct lguest *lg) 136static void do_async_hcalls(struct lguest *lg)
97{ 137{
98 unsigned int i; 138 unsigned int i;
99 u8 st[LHCALL_RING_SIZE]; 139 u8 st[LHCALL_RING_SIZE];
100 140
141 /* For simplicity, we copy the entire call status array in at once. */
101 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) 142 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
102 return; 143 return;
103 144
145
146 /* We process "struct lguest_data"s hcalls[] ring once. */
104 for (i = 0; i < ARRAY_SIZE(st); i++) { 147 for (i = 0; i < ARRAY_SIZE(st); i++) {
105 struct lguest_regs regs; 148 struct lguest_regs regs;
149 /* We remember where we were up to from last time. This makes
150 * sure that the hypercalls are done in the order the Guest
151 * places them in the ring. */
106 unsigned int n = lg->next_hcall; 152 unsigned int n = lg->next_hcall;
107 153
154 /* 0xFF means there's no call here (yet). */
108 if (st[n] == 0xFF) 155 if (st[n] == 0xFF)
109 break; 156 break;
110 157
158 /* OK, we have hypercall. Increment the "next_hcall" cursor,
159 * and wrap back to 0 if we reach the end. */
111 if (++lg->next_hcall == LHCALL_RING_SIZE) 160 if (++lg->next_hcall == LHCALL_RING_SIZE)
112 lg->next_hcall = 0; 161 lg->next_hcall = 0;
113 162
163 /* We copy the hypercall arguments into a fake register
164 * structure. This makes life simple for do_hcall(). */
114 if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) 165 if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax)
115 || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) 166 || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx)
116 || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx) 167 || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
@@ -119,74 +170,126 @@ static void do_async_hcalls(struct lguest *lg)
119 break; 170 break;
120 } 171 }
121 172
173 /* Do the hypercall, same as a normal one. */
122 do_hcall(lg, &regs); 174 do_hcall(lg, &regs);
175
176 /* Mark the hypercall done. */
123 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { 177 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
124 kill_guest(lg, "Writing result for async hypercall"); 178 kill_guest(lg, "Writing result for async hypercall");
125 break; 179 break;
126 } 180 }
127 181
182 /* Stop doing hypercalls if we've just done a DMA to the
183 * Launcher: it needs to service this first. */
128 if (lg->dma_is_pending) 184 if (lg->dma_is_pending)
129 break; 185 break;
130 } 186 }
131} 187}
132 188
189/* Last of all, we look at what happens first of all. The very first time the
190 * Guest makes a hypercall, we end up here to set things up: */
133static void initialize(struct lguest *lg) 191static void initialize(struct lguest *lg)
134{ 192{
135 u32 tsc_speed; 193 u32 tsc_speed;
136 194
195 /* You can't do anything until you're initialized. The Guest knows the
196 * rules, so we're unforgiving here. */
137 if (lg->regs->eax != LHCALL_LGUEST_INIT) { 197 if (lg->regs->eax != LHCALL_LGUEST_INIT) {
138 kill_guest(lg, "hypercall %li before LGUEST_INIT", 198 kill_guest(lg, "hypercall %li before LGUEST_INIT",
139 lg->regs->eax); 199 lg->regs->eax);
140 return; 200 return;
141 } 201 }
142 202
143 /* We only tell the guest to use the TSC if it's reliable. */ 203 /* We insist that the Time Stamp Counter exist and doesn't change with
204 * cpu frequency. Some devious chip manufacturers decided that TSC
205 * changes could be handled in software. I decided that time going
206 * backwards might be good for benchmarks, but it's bad for users.
207 *
208 * We also insist that the TSC be stable: the kernel detects unreliable
209 * TSCs for its own purposes, and we use that here. */
144 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable()) 210 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
145 tsc_speed = tsc_khz; 211 tsc_speed = tsc_khz;
146 else 212 else
147 tsc_speed = 0; 213 tsc_speed = 0;
148 214
215 /* The pointer to the Guest's "struct lguest_data" is the only
216 * argument. */
149 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx; 217 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
150 /* We check here so we can simply copy_to_user/from_user */ 218 /* If we check the address they gave is OK now, we can simply
219 * copy_to_user/from_user from now on rather than using lgread/lgwrite.
220 * I put this in to show that I'm not immune to writing stupid
221 * optimizations. */
151 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) { 222 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
152 kill_guest(lg, "bad guest page %p", lg->lguest_data); 223 kill_guest(lg, "bad guest page %p", lg->lguest_data);
153 return; 224 return;
154 } 225 }
226 /* The Guest tells us where we're not to deliver interrupts by putting
227 * the range of addresses into "struct lguest_data". */
155 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 228 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
156 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) 229 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
157 /* We reserve the top pgd entry. */ 230 /* We tell the Guest that it can't use the top 4MB of virtual
231 * addresses used by the Switcher. */
158 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) 232 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
159 || put_user(tsc_speed, &lg->lguest_data->tsc_khz) 233 || put_user(tsc_speed, &lg->lguest_data->tsc_khz)
234 /* We also give the Guest a unique id, as used in lguest_net.c. */
160 || put_user(lg->guestid, &lg->lguest_data->guestid)) 235 || put_user(lg->guestid, &lg->lguest_data->guestid))
161 kill_guest(lg, "bad guest page %p", lg->lguest_data); 236 kill_guest(lg, "bad guest page %p", lg->lguest_data);
162 237
163 /* This is the one case where the above accesses might have 238 /* This is the one case where the above accesses might have been the
164 * been the first write to a Guest page. This may have caused 239 * first write to a Guest page. This may have caused a copy-on-write
165 * a copy-on-write fault, but the Guest might be referring to 240 * fault, but the Guest might be referring to the old (read-only)
166 * the old (read-only) page. */ 241 * page. */
167 guest_pagetable_clear_all(lg); 242 guest_pagetable_clear_all(lg);
168} 243}
244/* Now we've examined the hypercall code; our Guest can make requests. There
245 * is one other way we can do things for the Guest, as we see in
246 * emulate_insn(). */
169 247
170/* Even if we go out to userspace and come back, we don't want to do 248/*H:110 Tricky point: we mark the hypercall as "done" once we've done it.
171 * the hypercall again. */ 249 * Normally we don't need to do this: the Guest will run again and update the
250 * trap number before we come back around the run_guest() loop to
251 * do_hypercalls().
252 *
253 * However, if we are signalled or the Guest sends DMA to the Launcher, that
254 * loop will exit without running the Guest. When it comes back it would try
255 * to re-run the hypercall. */
172static void clear_hcall(struct lguest *lg) 256static void clear_hcall(struct lguest *lg)
173{ 257{
174 lg->regs->trapnum = 255; 258 lg->regs->trapnum = 255;
175} 259}
176 260
261/*H:100
262 * Hypercalls
263 *
264 * Remember from the Guest, hypercalls come in two flavors: normal and
265 * asynchronous. This file handles both of types.
266 */
177void do_hypercalls(struct lguest *lg) 267void do_hypercalls(struct lguest *lg)
178{ 268{
269 /* Not initialized yet? */
179 if (unlikely(!lg->lguest_data)) { 270 if (unlikely(!lg->lguest_data)) {
271 /* Did the Guest make a hypercall? We might have come back for
272 * some other reason (an interrupt, a different trap). */
180 if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 273 if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
274 /* Set up the "struct lguest_data" */
181 initialize(lg); 275 initialize(lg);
276 /* The hypercall is done. */
182 clear_hcall(lg); 277 clear_hcall(lg);
183 } 278 }
184 return; 279 return;
185 } 280 }
186 281
282 /* The Guest has initialized.
283 *
284 * Look in the hypercall ring for the async hypercalls: */
187 do_async_hcalls(lg); 285 do_async_hcalls(lg);
286
287 /* If we stopped reading the hypercall ring because the Guest did a
288 * SEND_DMA to the Launcher, we want to return now. Otherwise if the
289 * Guest asked us to do a hypercall, we do it. */
188 if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 290 if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) {
189 do_hcall(lg, lg->regs); 291 do_hcall(lg, lg->regs);
292 /* The hypercall is done. */
190 clear_hcall(lg); 293 clear_hcall(lg);
191 } 294 }
192} 295}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index bee029bb2c7b..bd0091bf79ec 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -1,100 +1,160 @@
1/*P:800 Interrupts (traps) are complicated enough to earn their own file.
2 * There are three classes of interrupts:
3 *
4 * 1) Real hardware interrupts which occur while we're running the Guest,
5 * 2) Interrupts for virtual devices attached to the Guest, and
6 * 3) Traps and faults from the Guest.
7 *
8 * Real hardware interrupts must be delivered to the Host, not the Guest.
9 * Virtual interrupts must be delivered to the Guest, but we make them look
10 * just like real hardware would deliver them. Traps from the Guest can be set
11 * up to go directly back into the Guest, but sometimes the Host wants to see
12 * them first, so we also have a way of "reflecting" them into the Guest as if
13 * they had been delivered to it directly. :*/
1#include <linux/uaccess.h> 14#include <linux/uaccess.h>
2#include "lg.h" 15#include "lg.h"
3 16
17/* The address of the interrupt handler is split into two bits: */
4static unsigned long idt_address(u32 lo, u32 hi) 18static unsigned long idt_address(u32 lo, u32 hi)
5{ 19{
6 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000); 20 return (lo & 0x0000FFFF) | (hi & 0xFFFF0000);
7} 21}
8 22
23/* The "type" of the interrupt handler is a 4 bit field: we only support a
24 * couple of types. */
9static int idt_type(u32 lo, u32 hi) 25static int idt_type(u32 lo, u32 hi)
10{ 26{
11 return (hi >> 8) & 0xF; 27 return (hi >> 8) & 0xF;
12} 28}
13 29
30/* An IDT entry can't be used unless the "present" bit is set. */
14static int idt_present(u32 lo, u32 hi) 31static int idt_present(u32 lo, u32 hi)
15{ 32{
16 return (hi & 0x8000); 33 return (hi & 0x8000);
17} 34}
18 35
36/* We need a helper to "push" a value onto the Guest's stack, since that's a
37 * big part of what delivering an interrupt does. */
19static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) 38static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
20{ 39{
40 /* Stack grows upwards: move stack then write value. */
21 *gstack -= 4; 41 *gstack -= 4;
22 lgwrite_u32(lg, *gstack, val); 42 lgwrite_u32(lg, *gstack, val);
23} 43}
24 44
45/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
46 * trap. The mechanics of delivering traps and interrupts to the Guest are the
47 * same, except some traps have an "error code" which gets pushed onto the
48 * stack as well: the caller tells us if this is one.
49 *
50 * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
51 * interrupt or trap. It's split into two parts for traditional reasons: gcc
52 * on i386 used to be frightened by 64 bit numbers.
53 *
54 * We set up the stack just like the CPU does for a real interrupt, so it's
55 * identical for the Guest (and the standard "iret" instruction will undo
56 * it). */
25static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) 57static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
26{ 58{
27 unsigned long gstack; 59 unsigned long gstack;
28 u32 eflags, ss, irq_enable; 60 u32 eflags, ss, irq_enable;
29 61
30 /* If they want a ring change, we use new stack and push old ss/esp */ 62 /* There are two cases for interrupts: one where the Guest is already
63 * in the kernel, and a more complex one where the Guest is in
64 * userspace. We check the privilege level to find out. */
31 if ((lg->regs->ss&0x3) != GUEST_PL) { 65 if ((lg->regs->ss&0x3) != GUEST_PL) {
66 /* The Guest told us their kernel stack with the SET_STACK
67 * hypercall: both the virtual address and the segment */
32 gstack = guest_pa(lg, lg->esp1); 68 gstack = guest_pa(lg, lg->esp1);
33 ss = lg->ss1; 69 ss = lg->ss1;
70 /* We push the old stack segment and pointer onto the new
71 * stack: when the Guest does an "iret" back from the interrupt
72 * handler the CPU will notice they're dropping privilege
73 * levels and expect these here. */
34 push_guest_stack(lg, &gstack, lg->regs->ss); 74 push_guest_stack(lg, &gstack, lg->regs->ss);
35 push_guest_stack(lg, &gstack, lg->regs->esp); 75 push_guest_stack(lg, &gstack, lg->regs->esp);
36 } else { 76 } else {
77 /* We're staying on the same Guest (kernel) stack. */
37 gstack = guest_pa(lg, lg->regs->esp); 78 gstack = guest_pa(lg, lg->regs->esp);
38 ss = lg->regs->ss; 79 ss = lg->regs->ss;
39 } 80 }
40 81
41 /* We use IF bit in eflags to indicate whether irqs were enabled 82 /* Remember that we never let the Guest actually disable interrupts, so
42 (it's always 1, since irqs are enabled when guest is running). */ 83 * the "Interrupt Flag" bit is always set. We copy that bit from the
84 * Guest's "irq_enabled" field into the eflags word: the Guest copies
85 * it back in "lguest_iret". */
43 eflags = lg->regs->eflags; 86 eflags = lg->regs->eflags;
44 if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 87 if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0
45 && !(irq_enable & X86_EFLAGS_IF)) 88 && !(irq_enable & X86_EFLAGS_IF))
46 eflags &= ~X86_EFLAGS_IF; 89 eflags &= ~X86_EFLAGS_IF;
47 90
91 /* An interrupt is expected to push three things on the stack: the old
92 * "eflags" word, the old code segment, and the old instruction
93 * pointer. */
48 push_guest_stack(lg, &gstack, eflags); 94 push_guest_stack(lg, &gstack, eflags);
49 push_guest_stack(lg, &gstack, lg->regs->cs); 95 push_guest_stack(lg, &gstack, lg->regs->cs);
50 push_guest_stack(lg, &gstack, lg->regs->eip); 96 push_guest_stack(lg, &gstack, lg->regs->eip);
51 97
98 /* For the six traps which supply an error code, we push that, too. */
52 if (has_err) 99 if (has_err)
53 push_guest_stack(lg, &gstack, lg->regs->errcode); 100 push_guest_stack(lg, &gstack, lg->regs->errcode);
54 101
55 /* Change the real stack so switcher returns to trap handler */ 102 /* Now we've pushed all the old state, we change the stack, the code
103 * segment and the address to execute. */
56 lg->regs->ss = ss; 104 lg->regs->ss = ss;
57 lg->regs->esp = gstack + lg->page_offset; 105 lg->regs->esp = gstack + lg->page_offset;
58 lg->regs->cs = (__KERNEL_CS|GUEST_PL); 106 lg->regs->cs = (__KERNEL_CS|GUEST_PL);
59 lg->regs->eip = idt_address(lo, hi); 107 lg->regs->eip = idt_address(lo, hi);
60 108
61 /* Disable interrupts for an interrupt gate. */ 109 /* There are two kinds of interrupt handlers: 0xE is an "interrupt
110 * gate" which expects interrupts to be disabled on entry. */
62 if (idt_type(lo, hi) == 0xE) 111 if (idt_type(lo, hi) == 0xE)
63 if (put_user(0, &lg->lguest_data->irq_enabled)) 112 if (put_user(0, &lg->lguest_data->irq_enabled))
64 kill_guest(lg, "Disabling interrupts"); 113 kill_guest(lg, "Disabling interrupts");
65} 114}
66 115
116/*H:200
117 * Virtual Interrupts.
118 *
119 * maybe_do_interrupt() gets called before every entry to the Guest, to see if
120 * we should divert the Guest to running an interrupt handler. */
67void maybe_do_interrupt(struct lguest *lg) 121void maybe_do_interrupt(struct lguest *lg)
68{ 122{
69 unsigned int irq; 123 unsigned int irq;
70 DECLARE_BITMAP(blk, LGUEST_IRQS); 124 DECLARE_BITMAP(blk, LGUEST_IRQS);
71 struct desc_struct *idt; 125 struct desc_struct *idt;
72 126
127 /* If the Guest hasn't even initialized yet, we can do nothing. */
73 if (!lg->lguest_data) 128 if (!lg->lguest_data)
74 return; 129 return;
75 130
76 /* Mask out any interrupts they have blocked. */ 131 /* Take our "irqs_pending" array and remove any interrupts the Guest
132 * wants blocked: the result ends up in "blk". */
77 if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, 133 if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts,
78 sizeof(blk))) 134 sizeof(blk)))
79 return; 135 return;
80 136
81 bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); 137 bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS);
82 138
139 /* Find the first interrupt. */
83 irq = find_first_bit(blk, LGUEST_IRQS); 140 irq = find_first_bit(blk, LGUEST_IRQS);
141 /* None? Nothing to do */
84 if (irq >= LGUEST_IRQS) 142 if (irq >= LGUEST_IRQS)
85 return; 143 return;
86 144
145 /* They may be in the middle of an iret, where they asked us never to
146 * deliver interrupts. */
87 if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) 147 if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end)
88 return; 148 return;
89 149
90 /* If they're halted, we re-enable interrupts. */ 150 /* If they're halted, interrupts restart them. */
91 if (lg->halted) { 151 if (lg->halted) {
92 /* Re-enable interrupts. */ 152 /* Re-enable interrupts. */
93 if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) 153 if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled))
94 kill_guest(lg, "Re-enabling interrupts"); 154 kill_guest(lg, "Re-enabling interrupts");
95 lg->halted = 0; 155 lg->halted = 0;
96 } else { 156 } else {
97 /* Maybe they have interrupts disabled? */ 157 /* Otherwise we check if they have interrupts disabled. */
98 u32 irq_enabled; 158 u32 irq_enabled;
99 if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) 159 if (get_user(irq_enabled, &lg->lguest_data->irq_enabled))
100 irq_enabled = 0; 160 irq_enabled = 0;
@@ -102,112 +162,211 @@ void maybe_do_interrupt(struct lguest *lg)
102 return; 162 return;
103 } 163 }
104 164
165 /* Look at the IDT entry the Guest gave us for this interrupt. The
166 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
167 * over them. */
105 idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; 168 idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq];
169 /* If they don't have a handler (yet?), we just ignore it */
106 if (idt_present(idt->a, idt->b)) { 170 if (idt_present(idt->a, idt->b)) {
171 /* OK, mark it no longer pending and deliver it. */
107 clear_bit(irq, lg->irqs_pending); 172 clear_bit(irq, lg->irqs_pending);
173 /* set_guest_interrupt() takes the interrupt descriptor and a
174 * flag to say whether this interrupt pushes an error code onto
175 * the stack as well: virtual interrupts never do. */
108 set_guest_interrupt(lg, idt->a, idt->b, 0); 176 set_guest_interrupt(lg, idt->a, idt->b, 0);
109 } 177 }
110} 178}
111 179
180/*H:220 Now we've got the routines to deliver interrupts, delivering traps
181 * like page fault is easy. The only trick is that Intel decided that some
182 * traps should have error codes: */
112static int has_err(unsigned int trap) 183static int has_err(unsigned int trap)
113{ 184{
114 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17); 185 return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
115} 186}
116 187
188/* deliver_trap() returns true if it could deliver the trap. */
117int deliver_trap(struct lguest *lg, unsigned int num) 189int deliver_trap(struct lguest *lg, unsigned int num)
118{ 190{
119 u32 lo = lg->idt[num].a, hi = lg->idt[num].b; 191 u32 lo = lg->idt[num].a, hi = lg->idt[num].b;
120 192
193 /* Early on the Guest hasn't set the IDT entries (or maybe it put a
194 * bogus one in): if we fail here, the Guest will be killed. */
121 if (!idt_present(lo, hi)) 195 if (!idt_present(lo, hi))
122 return 0; 196 return 0;
123 set_guest_interrupt(lg, lo, hi, has_err(num)); 197 set_guest_interrupt(lg, lo, hi, has_err(num));
124 return 1; 198 return 1;
125} 199}
126 200
201/*H:250 Here's the hard part: returning to the Host every time a trap happens
202 * and then calling deliver_trap() and re-entering the Guest is slow.
203 * Particularly because Guest userspace system calls are traps (trap 128).
204 *
205 * So we'd like to set up the IDT to tell the CPU to deliver traps directly
206 * into the Guest. This is possible, but the complexities cause the size of
207 * this file to double! However, 150 lines of code is worth writing for taking
208 * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all
209 * the other hypervisors would tease it.
210 *
211 * This routine determines if a trap can be delivered directly. */
127static int direct_trap(const struct lguest *lg, 212static int direct_trap(const struct lguest *lg,
128 const struct desc_struct *trap, 213 const struct desc_struct *trap,
129 unsigned int num) 214 unsigned int num)
130{ 215{
131 /* Hardware interrupts don't go to guest (except syscall). */ 216 /* Hardware interrupts don't go to the Guest at all (except system
217 * call). */
132 if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) 218 if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR)
133 return 0; 219 return 0;
134 220
135 /* We intercept page fault (demand shadow paging & cr2 saving) 221 /* The Host needs to see page faults (for shadow paging and to save the
136 protection fault (in/out emulation) and device not 222 * fault address), general protection faults (in/out emulation) and
137 available (TS handling), and hypercall */ 223 * device not available (TS handling), and of course, the hypercall
224 * trap. */
138 if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) 225 if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY)
139 return 0; 226 return 0;
140 227
141 /* Interrupt gates (0xE) or not present (0x0) can't go direct. */ 228 /* Only trap gates (type 15) can go direct to the Guest. Interrupt
229 * gates (type 14) disable interrupts as they are entered, which we
230 * never let the Guest do. Not present entries (type 0x0) also can't
231 * go direct, of course 8) */
142 return idt_type(trap->a, trap->b) == 0xF; 232 return idt_type(trap->a, trap->b) == 0xF;
143} 233}
144 234/*:*/
235
236/*M:005 The Guest has the ability to turn its interrupt gates into trap gates,
237 * if it is careful. The Host will let trap gates can go directly to the
238 * Guest, but the Guest needs the interrupts atomically disabled for an
239 * interrupt gate. It can do this by pointing the trap gate at instructions
240 * within noirq_start and noirq_end, where it can safely disable interrupts. */
241
242/*M:006 The Guests do not use the sysenter (fast system call) instruction,
243 * because it's hardcoded to enter privilege level 0 and so can't go direct.
244 * It's about twice as fast as the older "int 0x80" system call, so it might
245 * still be worthwhile to handle it in the Switcher and lcall down to the
246 * Guest. The sysenter semantics are hairy tho: search for that keyword in
247 * entry.S :*/
248
249/*H:260 When we make traps go directly into the Guest, we need to make sure
250 * the kernel stack is valid (ie. mapped in the page tables). Otherwise, the
251 * CPU trying to deliver the trap will fault while trying to push the interrupt
252 * words on the stack: this is called a double fault, and it forces us to kill
253 * the Guest.
254 *
255 * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
145void pin_stack_pages(struct lguest *lg) 256void pin_stack_pages(struct lguest *lg)
146{ 257{
147 unsigned int i; 258 unsigned int i;
148 259
260 /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
261 * two pages of stack space. */
149 for (i = 0; i < lg->stack_pages; i++) 262 for (i = 0; i < lg->stack_pages; i++)
263 /* The stack grows *upwards*, hence the subtraction */
150 pin_page(lg, lg->esp1 - i * PAGE_SIZE); 264 pin_page(lg, lg->esp1 - i * PAGE_SIZE);
151} 265}
152 266
267/* Direct traps also mean that we need to know whenever the Guest wants to use
268 * a different kernel stack, so we can change the IDT entries to use that
269 * stack. The IDT entries expect a virtual address, so unlike most addresses
270 * the Guest gives us, the "esp" (stack pointer) value here is virtual, not
271 * physical.
272 *
273 * In Linux each process has its own kernel stack, so this happens a lot: we
274 * change stacks on each context switch. */
153void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) 275void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
154{ 276{
155 /* You cannot have a stack segment with priv level 0. */ 277 /* You are not allowd have a stack segment with privilege level 0: bad
278 * Guest! */
156 if ((seg & 0x3) != GUEST_PL) 279 if ((seg & 0x3) != GUEST_PL)
157 kill_guest(lg, "bad stack segment %i", seg); 280 kill_guest(lg, "bad stack segment %i", seg);
281 /* We only expect one or two stack pages. */
158 if (pages > 2) 282 if (pages > 2)
159 kill_guest(lg, "bad stack pages %u", pages); 283 kill_guest(lg, "bad stack pages %u", pages);
284 /* Save where the stack is, and how many pages */
160 lg->ss1 = seg; 285 lg->ss1 = seg;
161 lg->esp1 = esp; 286 lg->esp1 = esp;
162 lg->stack_pages = pages; 287 lg->stack_pages = pages;
288 /* Make sure the new stack pages are mapped */
163 pin_stack_pages(lg); 289 pin_stack_pages(lg);
164} 290}
165 291
166/* Set up trap in IDT. */ 292/* All this reference to mapping stacks leads us neatly into the other complex
293 * part of the Host: page table handling. */
294
295/*H:235 This is the routine which actually checks the Guest's IDT entry and
296 * transfers it into our entry in "struct lguest": */
167static void set_trap(struct lguest *lg, struct desc_struct *trap, 297static void set_trap(struct lguest *lg, struct desc_struct *trap,
168 unsigned int num, u32 lo, u32 hi) 298 unsigned int num, u32 lo, u32 hi)
169{ 299{
170 u8 type = idt_type(lo, hi); 300 u8 type = idt_type(lo, hi);
171 301
302 /* We zero-out a not-present entry */
172 if (!idt_present(lo, hi)) { 303 if (!idt_present(lo, hi)) {
173 trap->a = trap->b = 0; 304 trap->a = trap->b = 0;
174 return; 305 return;
175 } 306 }
176 307
308 /* We only support interrupt and trap gates. */
177 if (type != 0xE && type != 0xF) 309 if (type != 0xE && type != 0xF)
178 kill_guest(lg, "bad IDT type %i", type); 310 kill_guest(lg, "bad IDT type %i", type);
179 311
312 /* We only copy the handler address, present bit, privilege level and
313 * type. The privilege level controls where the trap can be triggered
314 * manually with an "int" instruction. This is usually GUEST_PL,
315 * except for system calls which userspace can use. */
180 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF); 316 trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF);
181 trap->b = (hi&0xFFFFEF00); 317 trap->b = (hi&0xFFFFEF00);
182} 318}
183 319
320/*H:230 While we're here, dealing with delivering traps and interrupts to the
321 * Guest, we might as well complete the picture: how the Guest tells us where
322 * it wants them to go. This would be simple, except making traps fast
323 * requires some tricks.
324 *
325 * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
326 * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
184void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) 327void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
185{ 328{
186 /* Guest never handles: NMI, doublefault, hypercall, spurious irq. */ 329 /* Guest never handles: NMI, doublefault, spurious interrupt or
330 * hypercall. We ignore when it tries to set them. */
187 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY) 331 if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY)
188 return; 332 return;
189 333
334 /* Mark the IDT as changed: next time the Guest runs we'll know we have
335 * to copy this again. */
190 lg->changed |= CHANGED_IDT; 336 lg->changed |= CHANGED_IDT;
337
338 /* The IDT which we keep in "struct lguest" only contains 32 entries
339 * for the traps and LGUEST_IRQS (32) entries for interrupts. We
340 * ignore attempts to set handlers for higher interrupt numbers, except
341 * for the system call "interrupt" at 128: we have a special IDT entry
342 * for that. */
191 if (num < ARRAY_SIZE(lg->idt)) 343 if (num < ARRAY_SIZE(lg->idt))
192 set_trap(lg, &lg->idt[num], num, lo, hi); 344 set_trap(lg, &lg->idt[num], num, lo, hi);
193 else if (num == SYSCALL_VECTOR) 345 else if (num == SYSCALL_VECTOR)
194 set_trap(lg, &lg->syscall_idt, num, lo, hi); 346 set_trap(lg, &lg->syscall_idt, num, lo, hi);
195} 347}
196 348
349/* The default entry for each interrupt points into the Switcher routines which
350 * simply return to the Host. The run_guest() loop will then call
351 * deliver_trap() to bounce it back into the Guest. */
197static void default_idt_entry(struct desc_struct *idt, 352static void default_idt_entry(struct desc_struct *idt,
198 int trap, 353 int trap,
199 const unsigned long handler) 354 const unsigned long handler)
200{ 355{
356 /* A present interrupt gate. */
201 u32 flags = 0x8e00; 357 u32 flags = 0x8e00;
202 358
203 /* They can't "int" into any of them except hypercall. */ 359 /* Set the privilege level on the entry for the hypercall: this allows
360 * the Guest to use the "int" instruction to trigger it. */
204 if (trap == LGUEST_TRAP_ENTRY) 361 if (trap == LGUEST_TRAP_ENTRY)
205 flags |= (GUEST_PL << 13); 362 flags |= (GUEST_PL << 13);
206 363
364 /* Now pack it into the IDT entry in its weird format. */
207 idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF); 365 idt->a = (LGUEST_CS<<16) | (handler&0x0000FFFF);
208 idt->b = (handler&0xFFFF0000) | flags; 366 idt->b = (handler&0xFFFF0000) | flags;
209} 367}
210 368
369/* When the Guest first starts, we put default entries into the IDT. */
211void setup_default_idt_entries(struct lguest_ro_state *state, 370void setup_default_idt_entries(struct lguest_ro_state *state,
212 const unsigned long *def) 371 const unsigned long *def)
213{ 372{
@@ -217,19 +376,25 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
217 default_idt_entry(&state->guest_idt[i], i, def[i]); 376 default_idt_entry(&state->guest_idt[i], i, def[i]);
218} 377}
219 378
379/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
380 * we copy them into the IDT which we've set up for Guests on this CPU, just
381 * before we run the Guest. This routine does that copy. */
220void copy_traps(const struct lguest *lg, struct desc_struct *idt, 382void copy_traps(const struct lguest *lg, struct desc_struct *idt,
221 const unsigned long *def) 383 const unsigned long *def)
222{ 384{
223 unsigned int i; 385 unsigned int i;
224 386
225 /* All hardware interrupts are same whatever the guest: only the 387 /* We can simply copy the direct traps, otherwise we use the default
226 * traps might be different. */ 388 * ones in the Switcher: they will return to the Host. */
227 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { 389 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) {
228 if (direct_trap(lg, &lg->idt[i], i)) 390 if (direct_trap(lg, &lg->idt[i], i))
229 idt[i] = lg->idt[i]; 391 idt[i] = lg->idt[i];
230 else 392 else
231 default_idt_entry(&idt[i], i, def[i]); 393 default_idt_entry(&idt[i], i, def[i]);
232 } 394 }
395
396 /* Don't forget the system call trap! The IDT entries for other
397 * interupts never change, so no need to copy them. */
233 i = SYSCALL_VECTOR; 398 i = SYSCALL_VECTOR;
234 if (direct_trap(lg, &lg->syscall_idt, i)) 399 if (direct_trap(lg, &lg->syscall_idt, i))
235 idt[i] = lg->syscall_idt; 400 idt[i] = lg->syscall_idt;
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c
index c8eb79266991..ea68613b43f6 100644
--- a/drivers/lguest/io.c
+++ b/drivers/lguest/io.c
@@ -1,5 +1,9 @@
1/* Simple I/O model for guests, based on shared memory. 1/*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest
2 * Copyright (C) 2006 Rusty Russell IBM Corporation 2 * to talk to the Launcher or directly to another Guest. It uses familiar
3 * concepts of DMA and interrupts, plus some neat code stolen from
4 * futexes... :*/
5
6/* Copyright (C) 2006 Rusty Russell IBM Corporation
3 * 7 *
4 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
@@ -23,8 +27,36 @@
23#include <linux/uaccess.h> 27#include <linux/uaccess.h>
24#include "lg.h" 28#include "lg.h"
25 29
30/*L:300
31 * I/O
32 *
33 * Getting data in and out of the Guest is quite an art. There are numerous
34 * ways to do it, and they all suck differently. We try to keep things fairly
35 * close to "real" hardware so our Guest's drivers don't look like an alien
36 * visitation in the middle of the Linux code, and yet make sure that Guests
37 * can talk directly to other Guests, not just the Launcher.
38 *
39 * To do this, the Guest gives us a key when it binds or sends DMA buffers.
40 * The key corresponds to a "physical" address inside the Guest (ie. a virtual
41 * address inside the Launcher process). We don't, however, use this key
42 * directly.
43 *
44 * We want Guests which share memory to be able to DMA to each other: two
45 * Launchers can mmap memory the same file, then the Guests can communicate.
46 * Fortunately, the futex code provides us with a way to get a "union
47 * futex_key" corresponding to the memory lying at a virtual address: if the
48 * two processes share memory, the "union futex_key" for that memory will match
49 * even if the memory is mapped at different addresses in each. So we always
50 * convert the keys to "union futex_key"s to compare them.
51 *
52 * Before we dive into this though, we need to look at another set of helper
53 * routines used throughout the Host kernel code to access Guest memory.
54 :*/
26static struct list_head dma_hash[61]; 55static struct list_head dma_hash[61];
27 56
57/* An unfortunate side effect of the Linux double-linked list implementation is
58 * that there's no good way to statically initialize an array of linked
59 * lists. */
28void lguest_io_init(void) 60void lguest_io_init(void)
29{ 61{
30 unsigned int i; 62 unsigned int i;
@@ -56,6 +88,19 @@ kill:
56 return 0; 88 return 0;
57} 89}
58 90
91/*L:330 This is our hash function, using the wonderful Jenkins hash.
92 *
93 * The futex key is a union with three parts: an unsigned long word, a pointer,
94 * and an int "offset". We could use jhash_2words() which takes three u32s.
95 * (Ok, the hash functions are great: the naming sucks though).
96 *
97 * It's nice to be portable to 64-bit platforms, so we use the more generic
98 * jhash2(), which takes an array of u32, the number of u32s, and an initial
99 * u32 to roll in. This is uglier, but breaks down to almost the same code on
100 * 32-bit platforms like this one.
101 *
102 * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61).
103 */
59static unsigned int hash(const union futex_key *key) 104static unsigned int hash(const union futex_key *key)
60{ 105{
61 return jhash2((u32*)&key->both.word, 106 return jhash2((u32*)&key->both.word,
@@ -64,6 +109,9 @@ static unsigned int hash(const union futex_key *key)
64 % ARRAY_SIZE(dma_hash); 109 % ARRAY_SIZE(dma_hash);
65} 110}
66 111
112/* This is a convenience routine to compare two keys. It's a much bemoaned C
113 * weakness that it doesn't allow '==' on structures or unions, so we have to
114 * open-code it like this. */
67static inline int key_eq(const union futex_key *a, const union futex_key *b) 115static inline int key_eq(const union futex_key *a, const union futex_key *b)
68{ 116{
69 return (a->both.word == b->both.word 117 return (a->both.word == b->both.word
@@ -71,22 +119,36 @@ static inline int key_eq(const union futex_key *a, const union futex_key *b)
71 && a->both.offset == b->both.offset); 119 && a->both.offset == b->both.offset);
72} 120}
73 121
74/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ 122/*L:360 OK, when we need to actually free up a Guest's DMA array we do several
123 * things, so we have a convenient function to do it.
124 *
125 * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem
126 * for the drop_futex_key_refs(). */
75static void unlink_dma(struct lguest_dma_info *dmainfo) 127static void unlink_dma(struct lguest_dma_info *dmainfo)
76{ 128{
129 /* You locked this too, right? */
77 BUG_ON(!mutex_is_locked(&lguest_lock)); 130 BUG_ON(!mutex_is_locked(&lguest_lock));
131 /* This is how we know that the entry is free. */
78 dmainfo->interrupt = 0; 132 dmainfo->interrupt = 0;
133 /* Remove it from the hash table. */
79 list_del(&dmainfo->list); 134 list_del(&dmainfo->list);
135 /* Drop the references we were holding (to the inode or mm). */
80 drop_futex_key_refs(&dmainfo->key); 136 drop_futex_key_refs(&dmainfo->key);
81} 137}
82 138
139/*L:350 This is the routine which we call when the Guest asks to unregister a
140 * DMA array attached to a given key. Returns true if the array was found. */
83static int unbind_dma(struct lguest *lg, 141static int unbind_dma(struct lguest *lg,
84 const union futex_key *key, 142 const union futex_key *key,
85 unsigned long dmas) 143 unsigned long dmas)
86{ 144{
87 int i, ret = 0; 145 int i, ret = 0;
88 146
147 /* We don't bother with the hash table, just look through all this
148 * Guest's DMA arrays. */
89 for (i = 0; i < LGUEST_MAX_DMA; i++) { 149 for (i = 0; i < LGUEST_MAX_DMA; i++) {
150 /* In theory it could have more than one array on the same key,
151 * or one array on multiple keys, so we check both */
90 if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { 152 if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) {
91 unlink_dma(&lg->dma[i]); 153 unlink_dma(&lg->dma[i]);
92 ret = 1; 154 ret = 1;
@@ -96,51 +158,91 @@ static int unbind_dma(struct lguest *lg,
96 return ret; 158 return ret;
97} 159}
98 160
161/*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct
162 * lguest_dma" for receiving I/O.
163 *
164 * The Guest wants to bind an array of "struct lguest_dma"s to a particular key
165 * to receive input. This only happens when the Guest is setting up a new
166 * device, so it doesn't have to be very fast.
167 *
168 * It returns 1 on a successful registration (it can fail if we hit the limit
169 * of registrations for this Guest).
170 */
99int bind_dma(struct lguest *lg, 171int bind_dma(struct lguest *lg,
100 unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) 172 unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt)
101{ 173{
102 unsigned int i; 174 unsigned int i;
103 int ret = 0; 175 int ret = 0;
104 union futex_key key; 176 union futex_key key;
177 /* Futex code needs the mmap_sem. */
105 struct rw_semaphore *fshared = &current->mm->mmap_sem; 178 struct rw_semaphore *fshared = &current->mm->mmap_sem;
106 179
180 /* Invalid interrupt? (We could kill the guest here). */
107 if (interrupt >= LGUEST_IRQS) 181 if (interrupt >= LGUEST_IRQS)
108 return 0; 182 return 0;
109 183
184 /* We need to grab the Big Lguest Lock, because other Guests may be
185 * trying to look through this Guest's DMAs to send something while
186 * we're doing this. */
110 mutex_lock(&lguest_lock); 187 mutex_lock(&lguest_lock);
111 down_read(fshared); 188 down_read(fshared);
112 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 189 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
113 kill_guest(lg, "bad dma key %#lx", ukey); 190 kill_guest(lg, "bad dma key %#lx", ukey);
114 goto unlock; 191 goto unlock;
115 } 192 }
193
194 /* We want to keep this key valid once we drop mmap_sem, so we have to
195 * hold a reference. */
116 get_futex_key_refs(&key); 196 get_futex_key_refs(&key);
117 197
198 /* If the Guest specified an interrupt of 0, that means they want to
199 * unregister this array of "struct lguest_dma"s. */
118 if (interrupt == 0) 200 if (interrupt == 0)
119 ret = unbind_dma(lg, &key, dmas); 201 ret = unbind_dma(lg, &key, dmas);
120 else { 202 else {
203 /* Look through this Guest's dma array for an unused entry. */
121 for (i = 0; i < LGUEST_MAX_DMA; i++) { 204 for (i = 0; i < LGUEST_MAX_DMA; i++) {
205 /* If the interrupt is non-zero, the entry is already
206 * used. */
122 if (lg->dma[i].interrupt) 207 if (lg->dma[i].interrupt)
123 continue; 208 continue;
124 209
210 /* OK, a free one! Fill on our details. */
125 lg->dma[i].dmas = dmas; 211 lg->dma[i].dmas = dmas;
126 lg->dma[i].num_dmas = numdmas; 212 lg->dma[i].num_dmas = numdmas;
127 lg->dma[i].next_dma = 0; 213 lg->dma[i].next_dma = 0;
128 lg->dma[i].key = key; 214 lg->dma[i].key = key;
129 lg->dma[i].guestid = lg->guestid; 215 lg->dma[i].guestid = lg->guestid;
130 lg->dma[i].interrupt = interrupt; 216 lg->dma[i].interrupt = interrupt;
217
218 /* Now we add it to the hash table: the position
219 * depends on the futex key that we got. */
131 list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); 220 list_add(&lg->dma[i].list, &dma_hash[hash(&key)]);
221 /* Success! */
132 ret = 1; 222 ret = 1;
133 goto unlock; 223 goto unlock;
134 } 224 }
135 } 225 }
226 /* If we didn't find a slot to put the key in, drop the reference
227 * again. */
136 drop_futex_key_refs(&key); 228 drop_futex_key_refs(&key);
137unlock: 229unlock:
230 /* Unlock and out. */
138 up_read(fshared); 231 up_read(fshared);
139 mutex_unlock(&lguest_lock); 232 mutex_unlock(&lguest_lock);
140 return ret; 233 return ret;
141} 234}
142 235
143/* lgread from another guest */ 236/*L:385 Note that our routines to access a different Guest's memory are called
237 * lgread_other() and lgwrite_other(): these names emphasize that they are only
238 * used when the Guest is *not* the current Guest.
239 *
240 * The interface for copying from another process's memory is called
241 * access_process_vm(), with a final argument of 0 for a read, and 1 for a
242 * write.
243 *
244 * We need lgread_other() to read the destination Guest's "struct lguest_dma"
245 * array. */
144static int lgread_other(struct lguest *lg, 246static int lgread_other(struct lguest *lg,
145 void *buf, u32 addr, unsigned bytes) 247 void *buf, u32 addr, unsigned bytes)
146{ 248{
@@ -153,7 +255,8 @@ static int lgread_other(struct lguest *lg,
153 return 1; 255 return 1;
154} 256}
155 257
156/* lgwrite to another guest */ 258/* "lgwrite()" to another Guest: used to update the destination "used_len" once
259 * we've transferred data into the buffer. */
157static int lgwrite_other(struct lguest *lg, u32 addr, 260static int lgwrite_other(struct lguest *lg, u32 addr,
158 const void *buf, unsigned bytes) 261 const void *buf, unsigned bytes)
159{ 262{
@@ -166,6 +269,15 @@ static int lgwrite_other(struct lguest *lg, u32 addr,
166 return 1; 269 return 1;
167} 270}
168 271
272/*L:400 This is the generic engine which copies from a source "struct
273 * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The
274 * destination Guest's pages have already been mapped, as contained in the
275 * pages array.
276 *
277 * If you're wondering if there's a nice "copy from one process to another"
278 * routine, so was I. But Linux isn't really set up to copy between two
279 * unrelated processes, so we have to write it ourselves.
280 */
169static u32 copy_data(struct lguest *srclg, 281static u32 copy_data(struct lguest *srclg,
170 const struct lguest_dma *src, 282 const struct lguest_dma *src,
171 const struct lguest_dma *dst, 283 const struct lguest_dma *dst,
@@ -174,33 +286,59 @@ static u32 copy_data(struct lguest *srclg,
174 unsigned int totlen, si, di, srcoff, dstoff; 286 unsigned int totlen, si, di, srcoff, dstoff;
175 void *maddr = NULL; 287 void *maddr = NULL;
176 288
289 /* We return the total length transferred. */
177 totlen = 0; 290 totlen = 0;
291
292 /* We keep indexes into the source and destination "struct lguest_dma",
293 * and an offset within each region. */
178 si = di = 0; 294 si = di = 0;
179 srcoff = dstoff = 0; 295 srcoff = dstoff = 0;
296
297 /* We loop until the source or destination is exhausted. */
180 while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] 298 while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
181 && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { 299 && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
300 /* We can only transfer the rest of the src buffer, or as much
301 * as will fit into the destination buffer. */
182 u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); 302 u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
183 303
304 /* For systems using "highmem" we need to use kmap() to access
305 * the page we want. We often use the same page over and over,
306 * so rather than kmap() it on every loop, we set the maddr
307 * pointer to NULL when we need to move to the next
308 * destination page. */
184 if (!maddr) 309 if (!maddr)
185 maddr = kmap(pages[di]); 310 maddr = kmap(pages[di]);
186 311
187 /* FIXME: This is not completely portable, since 312 /* Copy directly from (this Guest's) source address to the
188 archs do different things for copy_to_user_page. */ 313 * destination Guest's kmap()ed buffer. Note that maddr points
314 * to the start of the page: we need to add the offset of the
315 * destination address and offset within the buffer. */
316
317 /* FIXME: This is not completely portable. I looked at
318 * copy_to_user_page(), and some arch's seem to need special
319 * flushes. x86 is fine. */
189 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, 320 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
190 (void __user *)src->addr[si], len) != 0) { 321 (void __user *)src->addr[si], len) != 0) {
322 /* If a copy failed, it's the source's fault. */
191 kill_guest(srclg, "bad address in sending DMA"); 323 kill_guest(srclg, "bad address in sending DMA");
192 totlen = 0; 324 totlen = 0;
193 break; 325 break;
194 } 326 }
195 327
328 /* Increment the total and src & dst offsets */
196 totlen += len; 329 totlen += len;
197 srcoff += len; 330 srcoff += len;
198 dstoff += len; 331 dstoff += len;
332
333 /* Presumably we reached the end of the src or dest buffers: */
199 if (srcoff == src->len[si]) { 334 if (srcoff == src->len[si]) {
335 /* Move to the next buffer at offset 0 */
200 si++; 336 si++;
201 srcoff = 0; 337 srcoff = 0;
202 } 338 }
203 if (dstoff == dst->len[di]) { 339 if (dstoff == dst->len[di]) {
340 /* We need to unmap that destination page and reset
341 * maddr ready for the next one. */
204 kunmap(pages[di]); 342 kunmap(pages[di]);
205 maddr = NULL; 343 maddr = NULL;
206 di++; 344 di++;
@@ -208,13 +346,15 @@ static u32 copy_data(struct lguest *srclg,
208 } 346 }
209 } 347 }
210 348
349 /* If we still had a page mapped at the end, unmap now. */
211 if (maddr) 350 if (maddr)
212 kunmap(pages[di]); 351 kunmap(pages[di]);
213 352
214 return totlen; 353 return totlen;
215} 354}
216 355
217/* Src is us, ie. current. */ 356/*L:390 This is how we transfer a "struct lguest_dma" from the source Guest
357 * (the current Guest which called SEND_DMA) to another Guest. */
218static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, 358static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
219 struct lguest *dstlg, const struct lguest_dma *dst) 359 struct lguest *dstlg, const struct lguest_dma *dst)
220{ 360{
@@ -222,23 +362,31 @@ static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
222 u32 ret; 362 u32 ret;
223 struct page *pages[LGUEST_MAX_DMA_SECTIONS]; 363 struct page *pages[LGUEST_MAX_DMA_SECTIONS];
224 364
365 /* We check that both source and destination "struct lguest_dma"s are
366 * within the bounds of the source and destination Guests */
225 if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) 367 if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
226 return 0; 368 return 0;
227 369
228 /* First get the destination pages */ 370 /* We need to map the pages which correspond to each parts of
371 * destination buffer. */
229 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { 372 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
230 if (dst->len[i] == 0) 373 if (dst->len[i] == 0)
231 break; 374 break;
375 /* get_user_pages() is a complicated function, especially since
376 * we only want a single page. But it works, and returns the
377 * number of pages. Note that we're holding the destination's
378 * mmap_sem, as get_user_pages() requires. */
232 if (get_user_pages(dstlg->tsk, dstlg->mm, 379 if (get_user_pages(dstlg->tsk, dstlg->mm,
233 dst->addr[i], 1, 1, 1, pages+i, NULL) 380 dst->addr[i], 1, 1, 1, pages+i, NULL)
234 != 1) { 381 != 1) {
382 /* This means the destination gave us a bogus buffer */
235 kill_guest(dstlg, "Error mapping DMA pages"); 383 kill_guest(dstlg, "Error mapping DMA pages");
236 ret = 0; 384 ret = 0;
237 goto drop_pages; 385 goto drop_pages;
238 } 386 }
239 } 387 }
240 388
241 /* Now copy until we run out of src or dst. */ 389 /* Now copy the data until we run out of src or dst. */
242 ret = copy_data(srclg, src, dst, pages); 390 ret = copy_data(srclg, src, dst, pages);
243 391
244drop_pages: 392drop_pages:
@@ -247,6 +395,11 @@ drop_pages:
247 return ret; 395 return ret;
248} 396}
249 397
398/*L:380 Transferring data from one Guest to another is not as simple as I'd
399 * like. We've found the "struct lguest_dma_info" bound to the same address as
400 * the send, we need to copy into it.
401 *
402 * This function returns true if the destination array was empty. */
250static int dma_transfer(struct lguest *srclg, 403static int dma_transfer(struct lguest *srclg,
251 unsigned long udma, 404 unsigned long udma,
252 struct lguest_dma_info *dst) 405 struct lguest_dma_info *dst)
@@ -255,15 +408,23 @@ static int dma_transfer(struct lguest *srclg,
255 struct lguest *dstlg; 408 struct lguest *dstlg;
256 u32 i, dma = 0; 409 u32 i, dma = 0;
257 410
411 /* From the "struct lguest_dma_info" we found in the hash, grab the
412 * Guest. */
258 dstlg = &lguests[dst->guestid]; 413 dstlg = &lguests[dst->guestid];
259 /* Get our dma list. */ 414 /* Read in the source "struct lguest_dma" handed to SEND_DMA. */
260 lgread(srclg, &src_dma, udma, sizeof(src_dma)); 415 lgread(srclg, &src_dma, udma, sizeof(src_dma));
261 416
262 /* We can't deadlock against them dmaing to us, because this 417 /* We need the destination's mmap_sem, and we already hold the source's
263 * is all under the lguest_lock. */ 418 * mmap_sem for the futex key lookup. Normally this would suggest that
419 * we could deadlock if the destination Guest was trying to send to
420 * this source Guest at the same time, which is another reason that all
421 * I/O is done under the big lguest_lock. */
264 down_read(&dstlg->mm->mmap_sem); 422 down_read(&dstlg->mm->mmap_sem);
265 423
424 /* Look through the destination DMA array for an available buffer. */
266 for (i = 0; i < dst->num_dmas; i++) { 425 for (i = 0; i < dst->num_dmas; i++) {
426 /* We keep a "next_dma" pointer which often helps us avoid
427 * looking at lots of previously-filled entries. */
267 dma = (dst->next_dma + i) % dst->num_dmas; 428 dma = (dst->next_dma + i) % dst->num_dmas;
268 if (!lgread_other(dstlg, &dst_dma, 429 if (!lgread_other(dstlg, &dst_dma,
269 dst->dmas + dma * sizeof(struct lguest_dma), 430 dst->dmas + dma * sizeof(struct lguest_dma),
@@ -273,30 +434,46 @@ static int dma_transfer(struct lguest *srclg,
273 if (!dst_dma.used_len) 434 if (!dst_dma.used_len)
274 break; 435 break;
275 } 436 }
437
438 /* If we found a buffer, we do the actual data copy. */
276 if (i != dst->num_dmas) { 439 if (i != dst->num_dmas) {
277 unsigned long used_lenp; 440 unsigned long used_lenp;
278 unsigned int ret; 441 unsigned int ret;
279 442
280 ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); 443 ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
281 /* Put used length in src. */ 444 /* Put used length in the source "struct lguest_dma"'s used_len
445 * field. It's a little tricky to figure out where that is,
446 * though. */
282 lgwrite_u32(srclg, 447 lgwrite_u32(srclg,
283 udma+offsetof(struct lguest_dma, used_len), ret); 448 udma+offsetof(struct lguest_dma, used_len), ret);
449 /* Tranferring 0 bytes is OK if the source buffer was empty. */
284 if (ret == 0 && src_dma.len[0] != 0) 450 if (ret == 0 && src_dma.len[0] != 0)
285 goto fail; 451 goto fail;
286 452
287 /* Make sure destination sees contents before length. */ 453 /* The destination Guest might be running on a different CPU:
454 * we have to make sure that it will see the "used_len" field
455 * change to non-zero *after* it sees the data we copied into
456 * the buffer. Hence a write memory barrier. */
288 wmb(); 457 wmb();
458 /* Figuring out where the destination's used_len field for this
459 * "struct lguest_dma" in the array is also a little ugly. */
289 used_lenp = dst->dmas 460 used_lenp = dst->dmas
290 + dma * sizeof(struct lguest_dma) 461 + dma * sizeof(struct lguest_dma)
291 + offsetof(struct lguest_dma, used_len); 462 + offsetof(struct lguest_dma, used_len);
292 lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); 463 lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
464 /* Move the cursor for next time. */
293 dst->next_dma++; 465 dst->next_dma++;
294 } 466 }
295 up_read(&dstlg->mm->mmap_sem); 467 up_read(&dstlg->mm->mmap_sem);
296 468
297 /* Do this last so dst doesn't simply sleep on lock. */ 469 /* We trigger the destination interrupt, even if the destination was
470 * empty and we didn't transfer anything: this gives them a chance to
471 * wake up and refill. */
298 set_bit(dst->interrupt, dstlg->irqs_pending); 472 set_bit(dst->interrupt, dstlg->irqs_pending);
473 /* Wake up the destination process. */
299 wake_up_process(dstlg->tsk); 474 wake_up_process(dstlg->tsk);
475 /* If we passed the last "struct lguest_dma", the receive had no
476 * buffers left. */
300 return i == dst->num_dmas; 477 return i == dst->num_dmas;
301 478
302fail: 479fail:
@@ -304,6 +481,8 @@ fail:
304 return 0; 481 return 0;
305} 482}
306 483
484/*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA
485 * hypercall. We find out who's listening, and send to them. */
307void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) 486void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
308{ 487{
309 union futex_key key; 488 union futex_key key;
@@ -313,31 +492,43 @@ void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
313again: 492again:
314 mutex_lock(&lguest_lock); 493 mutex_lock(&lguest_lock);
315 down_read(fshared); 494 down_read(fshared);
495 /* Get the futex key for the key the Guest gave us */
316 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 496 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
317 kill_guest(lg, "bad sending DMA key"); 497 kill_guest(lg, "bad sending DMA key");
318 goto unlock; 498 goto unlock;
319 } 499 }
320 /* Shared mapping? Look for other guests... */ 500 /* Since the key must be a multiple of 4, the futex key uses the lower
501 * bit of the "offset" field (which would always be 0) to indicate a
502 * mapping which is shared with other processes (ie. Guests). */
321 if (key.shared.offset & 1) { 503 if (key.shared.offset & 1) {
322 struct lguest_dma_info *i; 504 struct lguest_dma_info *i;
505 /* Look through the hash for other Guests. */
323 list_for_each_entry(i, &dma_hash[hash(&key)], list) { 506 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
507 /* Don't send to ourselves. */
324 if (i->guestid == lg->guestid) 508 if (i->guestid == lg->guestid)
325 continue; 509 continue;
326 if (!key_eq(&key, &i->key)) 510 if (!key_eq(&key, &i->key))
327 continue; 511 continue;
328 512
513 /* If dma_transfer() tells us the destination has no
514 * available buffers, we increment "empty". */
329 empty += dma_transfer(lg, udma, i); 515 empty += dma_transfer(lg, udma, i);
330 break; 516 break;
331 } 517 }
518 /* If the destination is empty, we release our locks and
519 * give the destination Guest a brief chance to restock. */
332 if (empty == 1) { 520 if (empty == 1) {
333 /* Give any recipients one chance to restock. */ 521 /* Give any recipients one chance to restock. */
334 up_read(&current->mm->mmap_sem); 522 up_read(&current->mm->mmap_sem);
335 mutex_unlock(&lguest_lock); 523 mutex_unlock(&lguest_lock);
524 /* Next time, we won't try again. */
336 empty++; 525 empty++;
337 goto again; 526 goto again;
338 } 527 }
339 } else { 528 } else {
340 /* Private mapping: tell our userspace. */ 529 /* Private mapping: Guest is sending to its Launcher. We set
530 * the "dma_is_pending" flag so that the main loop will exit
531 * and the Launcher's read() from /dev/lguest will return. */
341 lg->dma_is_pending = 1; 532 lg->dma_is_pending = 1;
342 lg->pending_dma = udma; 533 lg->pending_dma = udma;
343 lg->pending_key = ukey; 534 lg->pending_key = ukey;
@@ -346,6 +537,7 @@ unlock:
346 up_read(fshared); 537 up_read(fshared);
347 mutex_unlock(&lguest_lock); 538 mutex_unlock(&lguest_lock);
348} 539}
540/*:*/
349 541
350void release_all_dma(struct lguest *lg) 542void release_all_dma(struct lguest *lg)
351{ 543{
@@ -361,7 +553,18 @@ void release_all_dma(struct lguest *lg)
361 up_read(&lg->mm->mmap_sem); 553 up_read(&lg->mm->mmap_sem);
362} 554}
363 555
364/* Userspace wants a dma buffer from this guest. */ 556/*M:007 We only return a single DMA buffer to the Launcher, but it would be
557 * more efficient to return a pointer to the entire array of DMA buffers, which
558 * it can cache and choose one whenever it wants.
559 *
560 * Currently the Launcher uses a write to /dev/lguest, and the return value is
561 * the address of the DMA structure with the interrupt number placed in
562 * dma->used_len. If we wanted to return the entire array, we need to return
563 * the address, array size and interrupt number: this seems to require an
564 * ioctl(). :*/
565
566/*L:320 This routine looks for a DMA buffer registered by the Guest on the
567 * given key (using the BIND_DMA hypercall). */
365unsigned long get_dma_buffer(struct lguest *lg, 568unsigned long get_dma_buffer(struct lguest *lg,
366 unsigned long ukey, unsigned long *interrupt) 569 unsigned long ukey, unsigned long *interrupt)
367{ 570{
@@ -370,15 +573,29 @@ unsigned long get_dma_buffer(struct lguest *lg,
370 struct lguest_dma_info *i; 573 struct lguest_dma_info *i;
371 struct rw_semaphore *fshared = &current->mm->mmap_sem; 574 struct rw_semaphore *fshared = &current->mm->mmap_sem;
372 575
576 /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA
577 * at the same time. */
373 mutex_lock(&lguest_lock); 578 mutex_lock(&lguest_lock);
579 /* To match between Guests sharing the same underlying memory we steal
580 * code from the futex infrastructure. This requires that we hold the
581 * "mmap_sem" for our process (the Launcher), and pass it to the futex
582 * code. */
374 down_read(fshared); 583 down_read(fshared);
584
585 /* This can fail if it's not a valid address, or if the address is not
586 * divisible by 4 (the futex code needs that, we don't really). */
375 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { 587 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
376 kill_guest(lg, "bad registered DMA buffer"); 588 kill_guest(lg, "bad registered DMA buffer");
377 goto unlock; 589 goto unlock;
378 } 590 }
591 /* Search the hash table for matching entries (the Launcher can only
592 * send to its own Guest for the moment, so the entry must be for this
593 * Guest) */
379 list_for_each_entry(i, &dma_hash[hash(&key)], list) { 594 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
380 if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { 595 if (key_eq(&key, &i->key) && i->guestid == lg->guestid) {
381 unsigned int j; 596 unsigned int j;
597 /* Look through the registered DMA array for an
598 * available buffer. */
382 for (j = 0; j < i->num_dmas; j++) { 599 for (j = 0; j < i->num_dmas; j++) {
383 struct lguest_dma dma; 600 struct lguest_dma dma;
384 601
@@ -387,6 +604,8 @@ unsigned long get_dma_buffer(struct lguest *lg,
387 if (dma.used_len == 0) 604 if (dma.used_len == 0)
388 break; 605 break;
389 } 606 }
607 /* Store the interrupt the Guest wants when the buffer
608 * is used. */
390 *interrupt = i->interrupt; 609 *interrupt = i->interrupt;
391 break; 610 break;
392 } 611 }
@@ -396,4 +615,12 @@ unlock:
396 mutex_unlock(&lguest_lock); 615 mutex_unlock(&lguest_lock);
397 return ret; 616 return ret;
398} 617}
618/*:*/
399 619
620/*L:410 This really has completed the Launcher. Not only have we now finished
621 * the longest chapter in our journey, but this also means we are over halfway
622 * through!
623 *
624 * Enough prevaricating around the bush: it is time for us to dive into the
625 * core of the Host, in "make Host".
626 */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 3e2ddfbc816e..269116eee85f 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -58,9 +58,18 @@ struct lguest_dma_info
58 u8 interrupt; /* 0 when not registered */ 58 u8 interrupt; /* 0 when not registered */
59}; 59};
60 60
61/* We have separate types for the guest's ptes & pgds and the shadow ptes & 61/*H:310 The page-table code owes a great debt of gratitude to Andi Kleen. He
62 * pgds. Since this host might use three-level pagetables and the guest and 62 * reviewed the original code which used "u32" for all page table entries, and
63 * shadow pagetables don't, we can't use the normal pte_t/pgd_t. */ 63 * insisted that it would be far clearer with explicit typing. I thought it
64 * was overkill, but he was right: it is much clearer than it was before.
65 *
66 * We have separate types for the Guest's ptes & pgds and the shadow ptes &
67 * pgds. There's already a Linux type for these (pte_t and pgd_t) but they
68 * change depending on kernel config options (PAE). */
69
70/* Each entry is identical: lower 12 bits of flags and upper 20 bits for the
71 * "page frame number" (0 == first physical page, etc). They are different
72 * types so the compiler will warn us if we mix them improperly. */
64typedef union { 73typedef union {
65 struct { unsigned flags:12, pfn:20; }; 74 struct { unsigned flags:12, pfn:20; };
66 struct { unsigned long val; } raw; 75 struct { unsigned long val; } raw;
@@ -77,8 +86,12 @@ typedef union {
77 struct { unsigned flags:12, pfn:20; }; 86 struct { unsigned flags:12, pfn:20; };
78 struct { unsigned long val; } raw; 87 struct { unsigned long val; } raw;
79} gpte_t; 88} gpte_t;
89
90/* We have two convenient macros to convert a "raw" value as handed to us by
91 * the Guest into the correct Guest PGD or PTE type. */
80#define mkgpte(_val) ((gpte_t){.raw.val = _val}) 92#define mkgpte(_val) ((gpte_t){.raw.val = _val})
81#define mkgpgd(_val) ((gpgd_t){.raw.val = _val}) 93#define mkgpgd(_val) ((gpgd_t){.raw.val = _val})
94/*:*/
82 95
83struct pgdir 96struct pgdir
84{ 97{
@@ -244,6 +257,30 @@ unsigned long get_dma_buffer(struct lguest *lg, unsigned long key,
244/* hypercalls.c: */ 257/* hypercalls.c: */
245void do_hypercalls(struct lguest *lg); 258void do_hypercalls(struct lguest *lg);
246 259
260/*L:035
261 * Let's step aside for the moment, to study one important routine that's used
262 * widely in the Host code.
263 *
264 * There are many cases where the Guest does something invalid, like pass crap
265 * to a hypercall. Since only the Guest kernel can make hypercalls, it's quite
266 * acceptable to simply terminate the Guest and give the Launcher a nicely
267 * formatted reason. It's also simpler for the Guest itself, which doesn't
268 * need to check most hypercalls for "success"; if you're still running, it
269 * succeeded.
270 *
271 * Once this is called, the Guest will never run again, so most Host code can
272 * call this then continue as if nothing had happened. This means many
273 * functions don't have to explicitly return an error code, which keeps the
274 * code simple.
275 *
276 * It also means that this can be called more than once: only the first one is
277 * remembered. The only trick is that we still need to kill the Guest even if
278 * we can't allocate memory to store the reason. Linux has a neat way of
279 * packing error codes into invalid pointers, so we use that here.
280 *
281 * Like any macro which uses an "if", it is safely wrapped in a run-once "do {
282 * } while(0)".
283 */
247#define kill_guest(lg, fmt...) \ 284#define kill_guest(lg, fmt...) \
248do { \ 285do { \
249 if (!(lg)->dead) { \ 286 if (!(lg)->dead) { \
@@ -252,6 +289,7 @@ do { \
252 (lg)->dead = ERR_PTR(-ENOMEM); \ 289 (lg)->dead = ERR_PTR(-ENOMEM); \
253 } \ 290 } \
254} while(0) 291} while(0)
292/* (End of aside) :*/
255 293
256static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) 294static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
257{ 295{
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 18dade06d4a9..6dfe568523a2 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -1,6 +1,32 @@
1/* 1/*P:010
2 * Lguest specific paravirt-ops implementation 2 * A hypervisor allows multiple Operating Systems to run on a single machine.
3 * To quote David Wheeler: "Any problem in computer science can be solved with
4 * another layer of indirection."
5 *
6 * We keep things simple in two ways. First, we start with a normal Linux
7 * kernel and insert a module (lg.ko) which allows us to run other Linux
8 * kernels the same way we'd run processes. We call the first kernel the Host,
9 * and the others the Guests. The program which sets up and configures Guests
10 * (such as the example in Documentation/lguest/lguest.c) is called the
11 * Launcher.
12 *
13 * Secondly, we only run specially modified Guests, not normal kernels. When
14 * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
15 * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
16 * how to be a Guest. This means that you can use the same kernel you boot
17 * normally (ie. as a Host) as a Guest.
3 * 18 *
19 * These Guests know that they cannot do privileged operations, such as disable
20 * interrupts, and that they have to ask the Host to do such things explicitly.
21 * This file consists of all the replacements for such low-level native
22 * hardware operations: these special Guest versions call the Host.
23 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special
25 * entry point marked with a magic string, which sets up a few things then
26 * calls here. We replace the native functions in "struct paravirt_ops"
27 * with our Guest versions, then boot like normal. :*/
28
29/*
4 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. 30 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
5 * 31 *
6 * This program is free software; you can redistribute it and/or modify 32 * This program is free software; you can redistribute it and/or modify
@@ -40,6 +66,12 @@
40#include <asm/mce.h> 66#include <asm/mce.h>
41#include <asm/io.h> 67#include <asm/io.h>
42 68
69/*G:010 Welcome to the Guest!
70 *
71 * The Guest in our tale is a simple creature: identical to the Host but
72 * behaving in simplified but equivalent ways. In particular, the Guest is the
73 * same kernel as the Host (or at least, built from the same source code). :*/
74
43/* Declarations for definitions in lguest_guest.S */ 75/* Declarations for definitions in lguest_guest.S */
44extern char lguest_noirq_start[], lguest_noirq_end[]; 76extern char lguest_noirq_start[], lguest_noirq_end[];
45extern const char lgstart_cli[], lgend_cli[]; 77extern const char lgstart_cli[], lgend_cli[];
@@ -58,7 +90,26 @@ struct lguest_data lguest_data = {
58struct lguest_device_desc *lguest_devices; 90struct lguest_device_desc *lguest_devices;
59static cycle_t clock_base; 91static cycle_t clock_base;
60 92
61static enum paravirt_lazy_mode lazy_mode; 93/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first
94 * real optimization trick!
95 *
96 * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
98 * are reasonably expensive, batching them up makes sense. For example, a
99 * large mmap might update dozens of page table entries: that code calls
100 * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls
101 * lguest_lazy_mode(PARAVIRT_LAZY_NONE).
102 *
103 * So, when we're in lazy mode, we call async_hypercall() to store the call for
104 * future processing. When lazy mode is turned off we issue a hypercall to
105 * flush the stored calls.
106 *
107 * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which
108 * indicates we're to flush any outstanding calls immediately. This is used
109 * when an interrupt handler does a kmap_atomic(): the page table changes must
110 * happen immediately even if we're in the middle of a batch. Usually we're
111 * not, though, so there's nothing to do. */
112static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
62static void lguest_lazy_mode(enum paravirt_lazy_mode mode) 113static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
63{ 114{
64 if (mode == PARAVIRT_LAZY_FLUSH) { 115 if (mode == PARAVIRT_LAZY_FLUSH) {
@@ -82,6 +133,16 @@ static void lazy_hcall(unsigned long call,
82 async_hcall(call, arg1, arg2, arg3); 133 async_hcall(call, arg1, arg2, arg3);
83} 134}
84 135
136/* async_hcall() is pretty simple: I'm quite proud of it really. We have a
137 * ring buffer of stored hypercalls which the Host will run though next time we
138 * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall
139 * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
140 * and 255 once the Host has finished with it.
141 *
142 * If we come around to a slot which hasn't been finished, then the table is
143 * full and we just make the hypercall directly. This has the nice side
144 * effect of causing the Host to run all the stored calls in the ring buffer
145 * which empties it for next time! */
85void async_hcall(unsigned long call, 146void async_hcall(unsigned long call,
86 unsigned long arg1, unsigned long arg2, unsigned long arg3) 147 unsigned long arg1, unsigned long arg2, unsigned long arg3)
87{ 148{
@@ -89,6 +150,9 @@ void async_hcall(unsigned long call,
89 static unsigned int next_call; 150 static unsigned int next_call;
90 unsigned long flags; 151 unsigned long flags;
91 152
153 /* Disable interrupts if not already disabled: we don't want an
154 * interrupt handler making a hypercall while we're already doing
155 * one! */
92 local_irq_save(flags); 156 local_irq_save(flags);
93 if (lguest_data.hcall_status[next_call] != 0xFF) { 157 if (lguest_data.hcall_status[next_call] != 0xFF) {
94 /* Table full, so do normal hcall which will flush table. */ 158 /* Table full, so do normal hcall which will flush table. */
@@ -98,7 +162,7 @@ void async_hcall(unsigned long call,
98 lguest_data.hcalls[next_call].edx = arg1; 162 lguest_data.hcalls[next_call].edx = arg1;
99 lguest_data.hcalls[next_call].ebx = arg2; 163 lguest_data.hcalls[next_call].ebx = arg2;
100 lguest_data.hcalls[next_call].ecx = arg3; 164 lguest_data.hcalls[next_call].ecx = arg3;
101 /* Make sure host sees arguments before "valid" flag. */ 165 /* Arguments must all be written before we mark it to go */
102 wmb(); 166 wmb();
103 lguest_data.hcall_status[next_call] = 0; 167 lguest_data.hcall_status[next_call] = 0;
104 if (++next_call == LHCALL_RING_SIZE) 168 if (++next_call == LHCALL_RING_SIZE)
@@ -106,9 +170,14 @@ void async_hcall(unsigned long call,
106 } 170 }
107 local_irq_restore(flags); 171 local_irq_restore(flags);
108} 172}
173/*:*/
109 174
175/* Wrappers for the SEND_DMA and BIND_DMA hypercalls. This is mainly because
176 * Jeff Garzik complained that __pa() should never appear in drivers, and this
177 * helps remove most of them. But also, it wraps some ugliness. */
110void lguest_send_dma(unsigned long key, struct lguest_dma *dma) 178void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
111{ 179{
180 /* The hcall might not write this if something goes wrong */
112 dma->used_len = 0; 181 dma->used_len = 0;
113 hcall(LHCALL_SEND_DMA, key, __pa(dma), 0); 182 hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
114} 183}
@@ -116,11 +185,16 @@ void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
116int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, 185int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
117 unsigned int num, u8 irq) 186 unsigned int num, u8 irq)
118{ 187{
188 /* This is the only hypercall which actually wants 5 arguments, and we
189 * only support 4. Fortunately the interrupt number is always less
190 * than 256, so we can pack it with the number of dmas in the final
191 * argument. */
119 if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq)) 192 if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
120 return -ENOMEM; 193 return -ENOMEM;
121 return 0; 194 return 0;
122} 195}
123 196
197/* Unbinding is the same hypercall as binding, but with 0 num & irq. */
124void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas) 198void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
125{ 199{
126 hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0); 200 hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
@@ -138,35 +212,73 @@ void lguest_unmap(void *addr)
138 iounmap((__force void __iomem *)addr); 212 iounmap((__force void __iomem *)addr);
139} 213}
140 214
215/*G:033
216 * Here are our first native-instruction replacements: four functions for
217 * interrupt control.
218 *
219 * The simplest way of implementing these would be to have "turn interrupts
220 * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
221 * these are by far the most commonly called functions of those we override.
222 *
223 * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
224 * which the Guest can update with a single instruction. The Host knows to
225 * check there when it wants to deliver an interrupt.
226 */
227
228/* save_flags() is expected to return the processor state (ie. "eflags"). The
229 * eflags word contains all kind of stuff, but in practice Linux only cares
230 * about the interrupt flag. Our "save_flags()" just returns that. */
141static unsigned long save_fl(void) 231static unsigned long save_fl(void)
142{ 232{
143 return lguest_data.irq_enabled; 233 return lguest_data.irq_enabled;
144} 234}
145 235
236/* "restore_flags" just sets the flags back to the value given. */
146static void restore_fl(unsigned long flags) 237static void restore_fl(unsigned long flags)
147{ 238{
148 /* FIXME: Check if interrupt pending... */
149 lguest_data.irq_enabled = flags; 239 lguest_data.irq_enabled = flags;
150} 240}
151 241
242/* Interrupts go off... */
152static void irq_disable(void) 243static void irq_disable(void)
153{ 244{
154 lguest_data.irq_enabled = 0; 245 lguest_data.irq_enabled = 0;
155} 246}
156 247
248/* Interrupts go on... */
157static void irq_enable(void) 249static void irq_enable(void)
158{ 250{
159 /* FIXME: Check if interrupt pending... */
160 lguest_data.irq_enabled = X86_EFLAGS_IF; 251 lguest_data.irq_enabled = X86_EFLAGS_IF;
161} 252}
162 253/*:*/
254/*M:003 Note that we don't check for outstanding interrupts when we re-enable
255 * them (or when we unmask an interrupt). This seems to work for the moment,
256 * since interrupts are rare and we'll just get the interrupt on the next timer
257 * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way
258 * would be to put the "irq_enabled" field in a page by itself, and have the
259 * Host write-protect it when an interrupt comes in when irqs are disabled.
260 * There will then be a page fault as soon as interrupts are re-enabled. :*/
261
262/*G:034
263 * The Interrupt Descriptor Table (IDT).
264 *
265 * The IDT tells the processor what to do when an interrupt comes in. Each
266 * entry in the table is a 64-bit descriptor: this holds the privilege level,
267 * address of the handler, and... well, who cares? The Guest just asks the
268 * Host to make the change anyway, because the Host controls the real IDT.
269 */
163static void lguest_write_idt_entry(struct desc_struct *dt, 270static void lguest_write_idt_entry(struct desc_struct *dt,
164 int entrynum, u32 low, u32 high) 271 int entrynum, u32 low, u32 high)
165{ 272{
273 /* Keep the local copy up to date. */
166 write_dt_entry(dt, entrynum, low, high); 274 write_dt_entry(dt, entrynum, low, high);
275 /* Tell Host about this new entry. */
167 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); 276 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
168} 277}
169 278
279/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
280 * time it is written, so we can simply loop through all entries and tell the
281 * Host about them. */
170static void lguest_load_idt(const struct Xgt_desc_struct *desc) 282static void lguest_load_idt(const struct Xgt_desc_struct *desc)
171{ 283{
172 unsigned int i; 284 unsigned int i;
@@ -176,12 +288,29 @@ static void lguest_load_idt(const struct Xgt_desc_struct *desc)
176 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); 288 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
177} 289}
178 290
291/*
292 * The Global Descriptor Table.
293 *
294 * The Intel architecture defines another table, called the Global Descriptor
295 * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt"
296 * instruction, and then several other instructions refer to entries in the
297 * table. There are three entries which the Switcher needs, so the Host simply
298 * controls the entire thing and the Guest asks it to make changes using the
299 * LOAD_GDT hypercall.
300 *
301 * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
302 * hypercall and use that repeatedly to load a new IDT. I don't think it
303 * really matters, but wouldn't it be nice if they were the same?
304 */
179static void lguest_load_gdt(const struct Xgt_desc_struct *desc) 305static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
180{ 306{
181 BUG_ON((desc->size+1)/8 != GDT_ENTRIES); 307 BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
182 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); 308 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
183} 309}
184 310
311/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
312 * then tell the Host to reload the entire thing. This operation is so rare
313 * that this naive implementation is reasonable. */
185static void lguest_write_gdt_entry(struct desc_struct *dt, 314static void lguest_write_gdt_entry(struct desc_struct *dt,
186 int entrynum, u32 low, u32 high) 315 int entrynum, u32 low, u32 high)
187{ 316{
@@ -189,19 +318,58 @@ static void lguest_write_gdt_entry(struct desc_struct *dt,
189 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); 318 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
190} 319}
191 320
321/* OK, I lied. There are three "thread local storage" GDT entries which change
322 * on every context switch (these three entries are how glibc implements
323 * __thread variables). So we have a hypercall specifically for this case. */
192static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) 324static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
193{ 325{
194 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); 326 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
195} 327}
328/*:*/
196 329
330/*G:038 That's enough excitement for now, back to ploughing through each of
331 * the paravirt_ops (we're about 1/3 of the way through).
332 *
333 * This is the Local Descriptor Table, another weird Intel thingy. Linux only
334 * uses this for some strange applications like Wine. We don't do anything
335 * here, so they'll get an informative and friendly Segmentation Fault. */
197static void lguest_set_ldt(const void *addr, unsigned entries) 336static void lguest_set_ldt(const void *addr, unsigned entries)
198{ 337{
199} 338}
200 339
340/* This loads a GDT entry into the "Task Register": that entry points to a
341 * structure called the Task State Segment. Some comments scattered though the
342 * kernel code indicate that this used for task switching in ages past, along
343 * with blood sacrifice and astrology.
344 *
345 * Now there's nothing interesting in here that we don't get told elsewhere.
346 * But the native version uses the "ltr" instruction, which makes the Host
347 * complain to the Guest about a Segmentation Fault and it'll oops. So we
348 * override the native version with a do-nothing version. */
201static void lguest_load_tr_desc(void) 349static void lguest_load_tr_desc(void)
202{ 350{
203} 351}
204 352
353/* The "cpuid" instruction is a way of querying both the CPU identity
354 * (manufacturer, model, etc) and its features. It was introduced before the
355 * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you
356 * might imagine, after a decade and a half this treatment, it is now a giant
357 * ball of hair. Its entry in the current Intel manual runs to 28 pages.
358 *
359 * This instruction even it has its own Wikipedia entry. The Wikipedia entry
360 * has been translated into 4 languages. I am not making this up!
361 *
362 * We could get funky here and identify ourselves as "GenuineLguest", but
363 * instead we just use the real "cpuid" instruction. Then I pretty much turned
364 * off feature bits until the Guest booted. (Don't say that: you'll damage
365 * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is
366 * hardly future proof.) Noone's listening! They don't like you anyway,
367 * parenthetic weirdo!
368 *
369 * Replacing the cpuid so we can turn features off is great for the kernel, but
370 * anyone (including userspace) can just use the raw "cpuid" instruction and
371 * the Host won't even notice since it isn't privileged. So we try not to get
372 * too worked up about it. */
205static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, 373static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
206 unsigned int *ecx, unsigned int *edx) 374 unsigned int *ecx, unsigned int *edx)
207{ 375{
@@ -214,21 +382,43 @@ static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
214 *ecx &= 0x00002201; 382 *ecx &= 0x00002201;
215 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ 383 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
216 *edx &= 0x07808101; 384 *edx &= 0x07808101;
217 /* Host wants to know when we flush kernel pages: set PGE. */ 385 /* The Host can do a nice optimization if it knows that the
386 * kernel mappings (addresses above 0xC0000000 or whatever
387 * PAGE_OFFSET is set to) haven't changed. But Linux calls
388 * flush_tlb_user() for both user and kernel mappings unless
389 * the Page Global Enable (PGE) feature bit is set. */
218 *edx |= 0x00002000; 390 *edx |= 0x00002000;
219 break; 391 break;
220 case 0x80000000: 392 case 0x80000000:
221 /* Futureproof this a little: if they ask how much extended 393 /* Futureproof this a little: if they ask how much extended
222 * processor information, limit it to known fields. */ 394 * processor information there is, limit it to known fields. */
223 if (*eax > 0x80000008) 395 if (*eax > 0x80000008)
224 *eax = 0x80000008; 396 *eax = 0x80000008;
225 break; 397 break;
226 } 398 }
227} 399}
228 400
401/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
402 * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
403 * it. The Host needs to know when the Guest wants to change them, so we have
404 * a whole series of functions like read_cr0() and write_cr0().
405 *
406 * We start with CR0. CR0 allows you to turn on and off all kinds of basic
407 * features, but Linux only really cares about one: the horrifically-named Task
408 * Switched (TS) bit at bit 3 (ie. 8)
409 *
410 * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
411 * the floating point unit is used. Which allows us to restore FPU state
412 * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
413 * name like "FPUTRAP bit" be a little less cryptic?
414 *
415 * We store cr0 (and cr3) locally, because the Host never changes it. The
416 * Guest sometimes wants to read it and we'd prefer not to bother the Host
417 * unnecessarily. */
229static unsigned long current_cr0, current_cr3; 418static unsigned long current_cr0, current_cr3;
230static void lguest_write_cr0(unsigned long val) 419static void lguest_write_cr0(unsigned long val)
231{ 420{
421 /* 8 == TS bit. */
232 lazy_hcall(LHCALL_TS, val & 8, 0, 0); 422 lazy_hcall(LHCALL_TS, val & 8, 0, 0);
233 current_cr0 = val; 423 current_cr0 = val;
234} 424}
@@ -238,17 +428,25 @@ static unsigned long lguest_read_cr0(void)
238 return current_cr0; 428 return current_cr0;
239} 429}
240 430
431/* Intel provided a special instruction to clear the TS bit for people too cool
432 * to use write_cr0() to do it. This "clts" instruction is faster, because all
433 * the vowels have been optimized out. */
241static void lguest_clts(void) 434static void lguest_clts(void)
242{ 435{
243 lazy_hcall(LHCALL_TS, 0, 0, 0); 436 lazy_hcall(LHCALL_TS, 0, 0, 0);
244 current_cr0 &= ~8U; 437 current_cr0 &= ~8U;
245} 438}
246 439
440/* CR2 is the virtual address of the last page fault, which the Guest only ever
441 * reads. The Host kindly writes this into our "struct lguest_data", so we
442 * just read it out of there. */
247static unsigned long lguest_read_cr2(void) 443static unsigned long lguest_read_cr2(void)
248{ 444{
249 return lguest_data.cr2; 445 return lguest_data.cr2;
250} 446}
251 447
448/* CR3 is the current toplevel pagetable page: the principle is the same as
449 * cr0. Keep a local copy, and tell the Host when it changes. */
252static void lguest_write_cr3(unsigned long cr3) 450static void lguest_write_cr3(unsigned long cr3)
253{ 451{
254 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); 452 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
@@ -260,7 +458,7 @@ static unsigned long lguest_read_cr3(void)
260 return current_cr3; 458 return current_cr3;
261} 459}
262 460
263/* Used to enable/disable PGE, but we don't care. */ 461/* CR4 is used to enable and disable PGE, but we don't care. */
264static unsigned long lguest_read_cr4(void) 462static unsigned long lguest_read_cr4(void)
265{ 463{
266 return 0; 464 return 0;
@@ -270,6 +468,59 @@ static void lguest_write_cr4(unsigned long val)
270{ 468{
271} 469}
272 470
471/*
472 * Page Table Handling.
473 *
474 * Now would be a good time to take a rest and grab a coffee or similarly
475 * relaxing stimulant. The easy parts are behind us, and the trek gradually
476 * winds uphill from here.
477 *
478 * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU
479 * maps virtual addresses to physical addresses using "page tables". We could
480 * use one huge index of 1 million entries: each address is 4 bytes, so that's
481 * 1024 pages just to hold the page tables. But since most virtual addresses
482 * are unused, we use a two level index which saves space. The CR3 register
483 * contains the physical address of the top level "page directory" page, which
484 * contains physical addresses of up to 1024 second-level pages. Each of these
485 * second level pages contains up to 1024 physical addresses of actual pages,
486 * or Page Table Entries (PTEs).
487 *
488 * Here's a diagram, where arrows indicate physical addresses:
489 *
490 * CR3 ---> +---------+
491 * | --------->+---------+
492 * | | | PADDR1 |
493 * Top-level | | PADDR2 |
494 * (PMD) page | | |
495 * | | Lower-level |
496 * | | (PTE) page |
497 * | | | |
498 * .... ....
499 *
500 * So to convert a virtual address to a physical address, we look up the top
501 * level, which points us to the second level, which gives us the physical
502 * address of that page. If the top level entry was not present, or the second
503 * level entry was not present, then the virtual address is invalid (we
504 * say "the page was not mapped").
505 *
506 * Put another way, a 32-bit virtual address is divided up like so:
507 *
508 * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
509 * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
510 * Index into top Index into second Offset within page
511 * page directory page pagetable page
512 *
513 * The kernel spends a lot of time changing both the top-level page directory
514 * and lower-level pagetable pages. The Guest doesn't know physical addresses,
515 * so while it maintains these page tables exactly like normal, it also needs
516 * to keep the Host informed whenever it makes a change: the Host will create
517 * the real page tables based on the Guests'.
518 */
519
520/* The Guest calls this to set a second-level entry (pte), ie. to map a page
521 * into a process' address space. We set the entry then tell the Host the
522 * toplevel and address this corresponds to. The Guest uses one pagetable per
523 * process, so we need to tell the Host which one we're changing (mm->pgd). */
273static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, 524static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
274 pte_t *ptep, pte_t pteval) 525 pte_t *ptep, pte_t pteval)
275{ 526{
@@ -277,7 +528,9 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
277 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); 528 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low);
278} 529}
279 530
280/* We only support two-level pagetables at the moment. */ 531/* The Guest calls this to set a top-level entry. Again, we set the entry then
532 * tell the Host which top-level page we changed, and the index of the entry we
533 * changed. */
281static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) 534static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
282{ 535{
283 *pmdp = pmdval; 536 *pmdp = pmdval;
@@ -285,7 +538,15 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
285 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); 538 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
286} 539}
287 540
288/* FIXME: Eliminate all callers of this. */ 541/* There are a couple of legacy places where the kernel sets a PTE, but we
542 * don't know the top level any more. This is useless for us, since we don't
543 * know which pagetable is changing or what address, so we just tell the Host
544 * to forget all of them. Fortunately, this is very rare.
545 *
546 * ... except in early boot when the kernel sets up the initial pagetables,
547 * which makes booting astonishingly slow. So we don't even tell the Host
548 * anything changed until we've done the first page table switch.
549 */
289static void lguest_set_pte(pte_t *ptep, pte_t pteval) 550static void lguest_set_pte(pte_t *ptep, pte_t pteval)
290{ 551{
291 *ptep = pteval; 552 *ptep = pteval;
@@ -294,22 +555,51 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
294 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 555 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
295} 556}
296 557
558/* Unfortunately for Lguest, the paravirt_ops for page tables were based on
559 * native page table operations. On native hardware you can set a new page
560 * table entry whenever you want, but if you want to remove one you have to do
561 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
562 *
563 * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
564 * called when a valid entry is written, not when it's removed (ie. marked not
565 * present). Instead, this is where we come when the Guest wants to remove a
566 * page table entry: we tell the Host to set that entry to 0 (ie. the present
567 * bit is zero). */
297static void lguest_flush_tlb_single(unsigned long addr) 568static void lguest_flush_tlb_single(unsigned long addr)
298{ 569{
299 /* Simply set it to zero, and it will fault back in. */ 570 /* Simply set it to zero: if it was not, it will fault back in. */
300 lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); 571 lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
301} 572}
302 573
574/* This is what happens after the Guest has removed a large number of entries.
575 * This tells the Host that any of the page table entries for userspace might
576 * have changed, ie. virtual addresses below PAGE_OFFSET. */
303static void lguest_flush_tlb_user(void) 577static void lguest_flush_tlb_user(void)
304{ 578{
305 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); 579 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
306} 580}
307 581
582/* This is called when the kernel page tables have changed. That's not very
583 * common (unless the Guest is using highmem, which makes the Guest extremely
584 * slow), so it's worth separating this from the user flushing above. */
308static void lguest_flush_tlb_kernel(void) 585static void lguest_flush_tlb_kernel(void)
309{ 586{
310 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 587 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
311} 588}
312 589
590/*
591 * The Unadvanced Programmable Interrupt Controller.
592 *
593 * This is an attempt to implement the simplest possible interrupt controller.
594 * I spent some time looking though routines like set_irq_chip_and_handler,
595 * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
596 * I *think* this is as simple as it gets.
597 *
598 * We can tell the Host what interrupts we want blocked ready for using the
599 * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
600 * simple as setting a bit. We don't actually "ack" interrupts as such, we
601 * just mask and unmask them. I wonder if we should be cleverer?
602 */
313static void disable_lguest_irq(unsigned int irq) 603static void disable_lguest_irq(unsigned int irq)
314{ 604{
315 set_bit(irq, lguest_data.blocked_interrupts); 605 set_bit(irq, lguest_data.blocked_interrupts);
@@ -318,9 +608,9 @@ static void disable_lguest_irq(unsigned int irq)
318static void enable_lguest_irq(unsigned int irq) 608static void enable_lguest_irq(unsigned int irq)
319{ 609{
320 clear_bit(irq, lguest_data.blocked_interrupts); 610 clear_bit(irq, lguest_data.blocked_interrupts);
321 /* FIXME: If it's pending? */
322} 611}
323 612
613/* This structure describes the lguest IRQ controller. */
324static struct irq_chip lguest_irq_controller = { 614static struct irq_chip lguest_irq_controller = {
325 .name = "lguest", 615 .name = "lguest",
326 .mask = disable_lguest_irq, 616 .mask = disable_lguest_irq,
@@ -328,6 +618,10 @@ static struct irq_chip lguest_irq_controller = {
328 .unmask = enable_lguest_irq, 618 .unmask = enable_lguest_irq,
329}; 619};
330 620
621/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
622 * interrupt (except 128, which is used for system calls), and then tells the
623 * Linux infrastructure that each interrupt is controlled by our level-based
624 * lguest interrupt controller. */
331static void __init lguest_init_IRQ(void) 625static void __init lguest_init_IRQ(void)
332{ 626{
333 unsigned int i; 627 unsigned int i;
@@ -340,14 +634,24 @@ static void __init lguest_init_IRQ(void)
340 handle_level_irq); 634 handle_level_irq);
341 } 635 }
342 } 636 }
637 /* This call is required to set up for 4k stacks, where we have
638 * separate stacks for hard and soft interrupts. */
343 irq_ctx_init(smp_processor_id()); 639 irq_ctx_init(smp_processor_id());
344} 640}
345 641
642/*
643 * Time.
644 *
645 * It would be far better for everyone if the Guest had its own clock, but
646 * until then it must ask the Host for the time.
647 */
346static unsigned long lguest_get_wallclock(void) 648static unsigned long lguest_get_wallclock(void)
347{ 649{
348 return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0); 650 return hcall(LHCALL_GET_WALLCLOCK, 0, 0, 0);
349} 651}
350 652
653/* If the Host tells us we can trust the TSC, we use that, otherwise we simply
654 * use the imprecise but reliable "jiffies" counter. */
351static cycle_t lguest_clock_read(void) 655static cycle_t lguest_clock_read(void)
352{ 656{
353 if (lguest_data.tsc_khz) 657 if (lguest_data.tsc_khz)
@@ -428,12 +732,19 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
428 local_irq_restore(flags); 732 local_irq_restore(flags);
429} 733}
430 734
735/* At some point in the boot process, we get asked to set up our timing
736 * infrastructure. The kernel doesn't expect timer interrupts before this, but
737 * we cleverly initialized the "blocked_interrupts" field of "struct
738 * lguest_data" so that timer interrupts were blocked until now. */
431static void lguest_time_init(void) 739static void lguest_time_init(void)
432{ 740{
741 /* Set up the timer interrupt (0) to go to our simple timer routine */
433 set_irq_handler(0, lguest_time_irq); 742 set_irq_handler(0, lguest_time_irq);
434 743
435 /* We use the TSC if the Host tells us we can, otherwise a dumb 744 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use
436 * jiffies-based clock. */ 745 * the TSC, otherwise it looks like kernel/time/jiffies.c. Either way,
746 * the "rating" is initialized so high that it's always chosen over any
747 * other clocksource. */
437 if (lguest_data.tsc_khz) { 748 if (lguest_data.tsc_khz) {
438 lguest_clock.shift = 22; 749 lguest_clock.shift = 22;
439 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, 750 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
@@ -449,13 +760,30 @@ static void lguest_time_init(void)
449 clock_base = lguest_clock_read(); 760 clock_base = lguest_clock_read();
450 clocksource_register(&lguest_clock); 761 clocksource_register(&lguest_clock);
451 762
452 /* We can't set cpumask in the initializer: damn C limitations! */ 763 /* We can't set cpumask in the initializer: damn C limitations! Set it
764 * here and register our timer device. */
453 lguest_clockevent.cpumask = cpumask_of_cpu(0); 765 lguest_clockevent.cpumask = cpumask_of_cpu(0);
454 clockevents_register_device(&lguest_clockevent); 766 clockevents_register_device(&lguest_clockevent);
455 767
768 /* Finally, we unblock the timer interrupt. */
456 enable_lguest_irq(0); 769 enable_lguest_irq(0);
457} 770}
458 771
772/*
773 * Miscellaneous bits and pieces.
774 *
775 * Here is an oddball collection of functions which the Guest needs for things
776 * to work. They're pretty simple.
777 */
778
779/* The Guest needs to tell the host what stack it expects traps to use. For
780 * native hardware, this is part of the Task State Segment mentioned above in
781 * lguest_load_tr_desc(), but to help hypervisors there's this special call.
782 *
783 * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
784 * segment), the privilege level (we're privilege level 1, the Host is 0 and
785 * will not tolerate us trying to use that), the stack pointer, and the number
786 * of pages in the stack. */
459static void lguest_load_esp0(struct tss_struct *tss, 787static void lguest_load_esp0(struct tss_struct *tss,
460 struct thread_struct *thread) 788 struct thread_struct *thread)
461{ 789{
@@ -463,15 +791,31 @@ static void lguest_load_esp0(struct tss_struct *tss,
463 THREAD_SIZE/PAGE_SIZE); 791 THREAD_SIZE/PAGE_SIZE);
464} 792}
465 793
794/* Let's just say, I wouldn't do debugging under a Guest. */
466static void lguest_set_debugreg(int regno, unsigned long value) 795static void lguest_set_debugreg(int regno, unsigned long value)
467{ 796{
468 /* FIXME: Implement */ 797 /* FIXME: Implement */
469} 798}
470 799
800/* There are times when the kernel wants to make sure that no memory writes are
801 * caught in the cache (that they've all reached real hardware devices). This
802 * doesn't matter for the Guest which has virtual hardware.
803 *
804 * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
805 * (clflush) instruction is available and the kernel uses that. Otherwise, it
806 * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
807 * Unlike clflush, wbinvd can only be run at privilege level 0. So we can
808 * ignore clflush, but replace wbinvd.
809 */
471static void lguest_wbinvd(void) 810static void lguest_wbinvd(void)
472{ 811{
473} 812}
474 813
814/* If the Guest expects to have an Advanced Programmable Interrupt Controller,
815 * we play dumb by ignoring writes and returning 0 for reads. So it's no
816 * longer Programmable nor Controlling anything, and I don't think 8 lines of
817 * code qualifies for Advanced. It will also never interrupt anything. It
818 * does, however, allow us to get through the Linux boot code. */
475#ifdef CONFIG_X86_LOCAL_APIC 819#ifdef CONFIG_X86_LOCAL_APIC
476static void lguest_apic_write(unsigned long reg, unsigned long v) 820static void lguest_apic_write(unsigned long reg, unsigned long v)
477{ 821{
@@ -483,19 +827,32 @@ static unsigned long lguest_apic_read(unsigned long reg)
483} 827}
484#endif 828#endif
485 829
830/* STOP! Until an interrupt comes in. */
486static void lguest_safe_halt(void) 831static void lguest_safe_halt(void)
487{ 832{
488 hcall(LHCALL_HALT, 0, 0, 0); 833 hcall(LHCALL_HALT, 0, 0, 0);
489} 834}
490 835
836/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
837 * message out when we're crashing as well as elegant termination like powering
838 * off.
839 *
840 * Note that the Host always prefers that the Guest speak in physical addresses
841 * rather than virtual addresses, so we use __pa() here. */
491static void lguest_power_off(void) 842static void lguest_power_off(void)
492{ 843{
493 hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); 844 hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
494} 845}
495 846
847/*
848 * Panicing.
849 *
850 * Don't. But if you did, this is what happens.
851 */
496static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) 852static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
497{ 853{
498 hcall(LHCALL_CRASH, __pa(p), 0, 0); 854 hcall(LHCALL_CRASH, __pa(p), 0, 0);
855 /* The hcall won't return, but to keep gcc happy, we're "done". */
499 return NOTIFY_DONE; 856 return NOTIFY_DONE;
500} 857}
501 858
@@ -503,15 +860,45 @@ static struct notifier_block paniced = {
503 .notifier_call = lguest_panic 860 .notifier_call = lguest_panic
504}; 861};
505 862
863/* Setting up memory is fairly easy. */
506static __init char *lguest_memory_setup(void) 864static __init char *lguest_memory_setup(void)
507{ 865{
508 /* We do this here because lockcheck barfs if before start_kernel */ 866 /* We do this here and not earlier because lockcheck barfs if we do it
867 * before start_kernel() */
509 atomic_notifier_chain_register(&panic_notifier_list, &paniced); 868 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
510 869
870 /* The Linux bootloader header contains an "e820" memory map: the
871 * Launcher populated the first entry with our memory limit. */
511 add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type); 872 add_memory_region(E820_MAP->addr, E820_MAP->size, E820_MAP->type);
873
874 /* This string is for the boot messages. */
512 return "LGUEST"; 875 return "LGUEST";
513} 876}
514 877
878/*G:050
879 * Patching (Powerfully Placating Performance Pedants)
880 *
881 * We have already seen that "struct paravirt_ops" lets us replace simple
882 * native instructions with calls to the appropriate back end all throughout
883 * the kernel. This allows the same kernel to run as a Guest and as a native
884 * kernel, but it's slow because of all the indirect branches.
885 *
886 * Remember that David Wheeler quote about "Any problem in computer science can
887 * be solved with another layer of indirection"? The rest of that quote is
888 * "... But that usually will create another problem." This is the first of
889 * those problems.
890 *
891 * Our current solution is to allow the paravirt back end to optionally patch
892 * over the indirect calls to replace them with something more efficient. We
893 * patch the four most commonly called functions: disable interrupts, enable
894 * interrupts, restore interrupts and save interrupts. We usually have 10
895 * bytes to patch into: the Guest versions of these operations are small enough
896 * that we can fit comfortably.
897 *
898 * First we need assembly templates of each of the patchable Guest operations,
899 * and these are in lguest_asm.S. */
900
901/*G:060 We construct a table from the assembler templates: */
515static const struct lguest_insns 902static const struct lguest_insns
516{ 903{
517 const char *start, *end; 904 const char *start, *end;
@@ -521,35 +908,52 @@ static const struct lguest_insns
521 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, 908 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf },
522 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, 909 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf },
523}; 910};
911
912/* Now our patch routine is fairly simple (based on the native one in
913 * paravirt.c). If we have a replacement, we copy it in and return how much of
914 * the available space we used. */
524static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) 915static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len)
525{ 916{
526 unsigned int insn_len; 917 unsigned int insn_len;
527 918
528 /* Don't touch it if we don't have a replacement */ 919 /* Don't do anything special if we don't have a replacement */
529 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 920 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
530 return paravirt_patch_default(type, clobber, insns, len); 921 return paravirt_patch_default(type, clobber, insns, len);
531 922
532 insn_len = lguest_insns[type].end - lguest_insns[type].start; 923 insn_len = lguest_insns[type].end - lguest_insns[type].start;
533 924
534 /* Similarly if we can't fit replacement. */ 925 /* Similarly if we can't fit replacement (shouldn't happen, but let's
926 * be thorough). */
535 if (len < insn_len) 927 if (len < insn_len)
536 return paravirt_patch_default(type, clobber, insns, len); 928 return paravirt_patch_default(type, clobber, insns, len);
537 929
930 /* Copy in our instructions. */
538 memcpy(insns, lguest_insns[type].start, insn_len); 931 memcpy(insns, lguest_insns[type].start, insn_len);
539 return insn_len; 932 return insn_len;
540} 933}
541 934
935/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops
936 * structure in the kernel provides a single point for (almost) every routine
937 * we have to override to avoid privileged instructions. */
542__init void lguest_init(void *boot) 938__init void lguest_init(void *boot)
543{ 939{
544 /* Copy boot parameters first. */ 940 /* Copy boot parameters first: the Launcher put the physical location
941 * in %esi, and head.S converted that to a virtual address and handed
942 * it to us. */
545 memcpy(&boot_params, boot, PARAM_SIZE); 943 memcpy(&boot_params, boot, PARAM_SIZE);
944 /* The boot parameters also tell us where the command-line is: save
945 * that, too. */
546 memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr), 946 memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr),
547 COMMAND_LINE_SIZE); 947 COMMAND_LINE_SIZE);
548 948
949 /* We're under lguest, paravirt is enabled, and we're running at
950 * privilege level 1, not 0 as normal. */
549 paravirt_ops.name = "lguest"; 951 paravirt_ops.name = "lguest";
550 paravirt_ops.paravirt_enabled = 1; 952 paravirt_ops.paravirt_enabled = 1;
551 paravirt_ops.kernel_rpl = 1; 953 paravirt_ops.kernel_rpl = 1;
552 954
955 /* We set up all the lguest overrides for sensitive operations. These
956 * are detailed with the operations themselves. */
553 paravirt_ops.save_fl = save_fl; 957 paravirt_ops.save_fl = save_fl;
554 paravirt_ops.restore_fl = restore_fl; 958 paravirt_ops.restore_fl = restore_fl;
555 paravirt_ops.irq_disable = irq_disable; 959 paravirt_ops.irq_disable = irq_disable;
@@ -593,20 +997,45 @@ __init void lguest_init(void *boot)
593 paravirt_ops.set_lazy_mode = lguest_lazy_mode; 997 paravirt_ops.set_lazy_mode = lguest_lazy_mode;
594 paravirt_ops.wbinvd = lguest_wbinvd; 998 paravirt_ops.wbinvd = lguest_wbinvd;
595 paravirt_ops.sched_clock = lguest_sched_clock; 999 paravirt_ops.sched_clock = lguest_sched_clock;
596 1000 /* Now is a good time to look at the implementations of these functions
1001 * before returning to the rest of lguest_init(). */
1002
1003 /*G:070 Now we've seen all the paravirt_ops, we return to
1004 * lguest_init() where the rest of the fairly chaotic boot setup
1005 * occurs.
1006 *
1007 * The Host expects our first hypercall to tell it where our "struct
1008 * lguest_data" is, so we do that first. */
597 hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0); 1009 hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0);
598 1010
599 /* We use top of mem for initial pagetables. */ 1011 /* The native boot code sets up initial page tables immediately after
1012 * the kernel itself, and sets init_pg_tables_end so they're not
1013 * clobbered. The Launcher places our initial pagetables somewhere at
1014 * the top of our physical memory, so we don't need extra space: set
1015 * init_pg_tables_end to the end of the kernel. */
600 init_pg_tables_end = __pa(pg0); 1016 init_pg_tables_end = __pa(pg0);
601 1017
1018 /* Load the %fs segment register (the per-cpu segment register) with
1019 * the normal data segment to get through booting. */
602 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); 1020 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
603 1021
1022 /* The Host uses the top of the Guest's virtual address space for the
1023 * Host<->Guest Switcher, and it tells us how much it needs in
1024 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
604 reserve_top_address(lguest_data.reserve_mem); 1025 reserve_top_address(lguest_data.reserve_mem);
605 1026
1027 /* If we don't initialize the lock dependency checker now, it crashes
1028 * paravirt_disable_iospace. */
606 lockdep_init(); 1029 lockdep_init();
607 1030
1031 /* The IDE code spends about 3 seconds probing for disks: if we reserve
1032 * all the I/O ports up front it can't get them and so doesn't probe.
1033 * Other device drivers are similar (but less severe). This cuts the
1034 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */
608 paravirt_disable_iospace(); 1035 paravirt_disable_iospace();
609 1036
1037 /* This is messy CPU setup stuff which the native boot code does before
1038 * start_kernel, so we have to do, too: */
610 cpu_detect(&new_cpu_data); 1039 cpu_detect(&new_cpu_data);
611 /* head.S usually sets up the first capability word, so do it here. */ 1040 /* head.S usually sets up the first capability word, so do it here. */
612 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1041 new_cpu_data.x86_capability[0] = cpuid_edx(1);
@@ -617,14 +1046,27 @@ __init void lguest_init(void *boot)
617#ifdef CONFIG_X86_MCE 1046#ifdef CONFIG_X86_MCE
618 mce_disabled = 1; 1047 mce_disabled = 1;
619#endif 1048#endif
620
621#ifdef CONFIG_ACPI 1049#ifdef CONFIG_ACPI
622 acpi_disabled = 1; 1050 acpi_disabled = 1;
623 acpi_ht = 0; 1051 acpi_ht = 0;
624#endif 1052#endif
625 1053
1054 /* We set the perferred console to "hvc". This is the "hypervisor
1055 * virtual console" driver written by the PowerPC people, which we also
1056 * adapted for lguest's use. */
626 add_preferred_console("hvc", 0, NULL); 1057 add_preferred_console("hvc", 0, NULL);
627 1058
1059 /* Last of all, we set the power management poweroff hook to point to
1060 * the Guest routine to power off. */
628 pm_power_off = lguest_power_off; 1061 pm_power_off = lguest_power_off;
1062
1063 /* Now we're set up, call start_kernel() in init/main.c and we proceed
1064 * to boot as normal. It never returns. */
629 start_kernel(); 1065 start_kernel();
630} 1066}
1067/*
1068 * This marks the end of stage II of our journey, The Guest.
1069 *
1070 * It is now time for us to explore the nooks and crannies of the three Guest
1071 * devices and complete our understanding of the Guest in "make Drivers".
1072 */
diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S
index a3dbf22ee365..f182c6a36209 100644
--- a/drivers/lguest/lguest_asm.S
+++ b/drivers/lguest/lguest_asm.S
@@ -4,15 +4,15 @@
4#include <asm/thread_info.h> 4#include <asm/thread_info.h>
5#include <asm/processor-flags.h> 5#include <asm/processor-flags.h>
6 6
7/* 7/*G:020 This is where we begin: we have a magic signature which the launcher
8 * This is where we begin: we have a magic signature which the launcher looks 8 * looks for. The plan is that the Linux boot protocol will be extended with a
9 * for. The plan is that the Linux boot protocol will be extended with a
10 * "platform type" field which will guide us here from the normal entry point, 9 * "platform type" field which will guide us here from the normal entry point,
11 * but for the moment this suffices. We pass the virtual address of the boot 10 * but for the moment this suffices. The normal boot code uses %esi for the
12 * info to lguest_init(). 11 * boot header, so we do too. We convert it to a virtual address by adding
12 * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
13 * 13 *
14 * We put it in .init.text will be discarded after boot. 14 * The .section line puts this code in .init.text so it will be discarded after
15 */ 15 * boot. */
16.section .init.text, "ax", @progbits 16.section .init.text, "ax", @progbits
17.ascii "GenuineLguest" 17.ascii "GenuineLguest"
18 /* Set up initial stack. */ 18 /* Set up initial stack. */
@@ -21,7 +21,9 @@
21 addl $__PAGE_OFFSET, %eax 21 addl $__PAGE_OFFSET, %eax
22 jmp lguest_init 22 jmp lguest_init
23 23
24/* The templates for inline patching. */ 24/*G:055 We create a macro which puts the assembler code between lgstart_ and
25 * lgend_ markers. These templates end up in the .init.text section, so they
26 * are discarded after boot. */
25#define LGUEST_PATCH(name, insns...) \ 27#define LGUEST_PATCH(name, insns...) \
26 lgstart_##name: insns; lgend_##name:; \ 28 lgstart_##name: insns; lgend_##name:; \
27 .globl lgstart_##name; .globl lgend_##name 29 .globl lgstart_##name; .globl lgend_##name
@@ -30,24 +32,61 @@ LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
30LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) 32LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
31LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) 33LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
32LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) 34LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
35/*:*/
33 36
34.text 37.text
35/* These demark the EIP range where host should never deliver interrupts. */ 38/* These demark the EIP range where host should never deliver interrupts. */
36.global lguest_noirq_start 39.global lguest_noirq_start
37.global lguest_noirq_end 40.global lguest_noirq_end
38 41
39/* 42/*M:004 When the Host reflects a trap or injects an interrupt into the Guest,
40 * We move eflags word to lguest_data.irq_enabled to restore interrupt state. 43 * it sets the eflags interrupt bit on the stack based on
41 * For page faults, gpfs and virtual interrupts, the hypervisor has saved 44 * lguest_data.irq_enabled, so the Guest iret logic does the right thing when
42 * eflags manually, otherwise it was delivered directly and so eflags reflects 45 * restoring it. However, when the Host sets the Guest up for direct traps,
43 * the real machine IF state, ie. interrupts on. Since the kernel always dies 46 * such as system calls, the processor is the one to push eflags onto the
44 * if it takes such a trap with interrupts disabled anyway, turning interrupts 47 * stack, and the interrupt bit will be 1 (in reality, interrupts are always
45 * back on unconditionally here is OK. 48 * enabled in the Guest).
46 */ 49 *
50 * This turns out to be harmless: the only trap which should happen under Linux
51 * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
52 * regions), which has to be reflected through the Host anyway. If another
53 * trap *does* go off when interrupts are disabled, the Guest will panic, and
54 * we'll never get to this iret! :*/
55
56/*G:045 There is one final paravirt_op that the Guest implements, and glancing
57 * at it you can see why I left it to last. It's *cool*! It's in *assembler*!
58 *
59 * The "iret" instruction is used to return from an interrupt or trap. The
60 * stack looks like this:
61 * old address
62 * old code segment & privilege level
63 * old processor flags ("eflags")
64 *
65 * The "iret" instruction pops those values off the stack and restores them all
66 * at once. The only problem is that eflags includes the Interrupt Flag which
67 * the Guest can't change: the CPU will simply ignore it when we do an "iret".
68 * So we have to copy eflags from the stack to lguest_data.irq_enabled before
69 * we do the "iret".
70 *
71 * There are two problems with this: firstly, we need to use a register to do
72 * the copy and secondly, the whole thing needs to be atomic. The first
73 * problem is easy to solve: push %eax on the stack so we can use it, and then
74 * restore it at the end just before the real "iret".
75 *
76 * The second is harder: copying eflags to lguest_data.irq_enabled will turn
77 * interrupts on before we're finished, so we could be interrupted before we
78 * return to userspace or wherever. Our solution to this is to surround the
79 * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
80 * Host that it is *never* to interrupt us there, even if interrupts seem to be
81 * enabled. */
47ENTRY(lguest_iret) 82ENTRY(lguest_iret)
48 pushl %eax 83 pushl %eax
49 movl 12(%esp), %eax 84 movl 12(%esp), %eax
50lguest_noirq_start: 85lguest_noirq_start:
86 /* Note the %ss: segment prefix here. Normal data accesses use the
87 * "ds" segment, but that will have already been restored for whatever
88 * we're returning to (such as userspace): we can't trust it. The %ss:
89 * prefix makes sure we use the stack segment, which is still valid. */
51 movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled 90 movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
52 popl %eax 91 popl %eax
53 iret 92 iret
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
index 18d6ab21a43b..55a7940ca732 100644
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -1,3 +1,6 @@
1/*P:050 Lguest guests use a very simple bus for devices. It's a simple array
2 * of device descriptors contained just above the top of normal memory. The
3 * lguest bus is 80% tedious boilerplate code. :*/
1#include <linux/init.h> 4#include <linux/init.h>
2#include <linux/bootmem.h> 5#include <linux/bootmem.h>
3#include <linux/lguest_bus.h> 6#include <linux/lguest_bus.h>
@@ -43,6 +46,10 @@ static struct device_attribute lguest_dev_attrs[] = {
43 __ATTR_NULL 46 __ATTR_NULL
44}; 47};
45 48
49/*D:130 The generic bus infrastructure requires a function which says whether a
50 * device matches a driver. For us, it is simple: "struct lguest_driver"
51 * contains a "device_type" field which indicates what type of device it can
52 * handle, so we just cast the args and compare: */
46static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) 53static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
47{ 54{
48 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); 55 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
@@ -50,6 +57,7 @@ static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
50 57
51 return (drv->device_type == lguest_devices[dev->index].type); 58 return (drv->device_type == lguest_devices[dev->index].type);
52} 59}
60/*:*/
53 61
54struct lguest_bus { 62struct lguest_bus {
55 struct bus_type bus; 63 struct bus_type bus;
@@ -68,11 +76,24 @@ static struct lguest_bus lguest_bus = {
68 } 76 }
69}; 77};
70 78
79/*D:140 This is the callback which occurs once the bus infrastructure matches
80 * up a device and driver, ie. in response to add_lguest_device() calling
81 * device_register(), or register_lguest_driver() calling driver_register().
82 *
83 * At the moment it's always the latter: the devices are added first, since
84 * scan_devices() is called from a "core_initcall", and the drivers themselves
85 * called later as a normal "initcall". But it would work the other way too.
86 *
87 * So now we have the happy couple, we add the status bit to indicate that we
88 * found a driver. If the driver truly loves the device, it will return
89 * happiness from its probe function (ok, perhaps this wasn't my greatest
90 * analogy), and we set the final "driver ok" bit so the Host sees it's all
91 * green. */
71static int lguest_dev_probe(struct device *_dev) 92static int lguest_dev_probe(struct device *_dev)
72{ 93{
73 int ret; 94 int ret;
74 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); 95 struct lguest_device*dev = container_of(_dev,struct lguest_device,dev);
75 struct lguest_driver *drv = container_of(dev->dev.driver, 96 struct lguest_driver*drv = container_of(dev->dev.driver,
76 struct lguest_driver, drv); 97 struct lguest_driver, drv);
77 98
78 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; 99 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
@@ -82,6 +103,10 @@ static int lguest_dev_probe(struct device *_dev)
82 return ret; 103 return ret;
83} 104}
84 105
106/* The last part of the bus infrastructure is the function lguest drivers use
107 * to register themselves. Firstly, we do nothing if there's no lguest bus
108 * (ie. this is not a Guest), otherwise we fill in the embedded generic "struct
109 * driver" fields and call the generic driver_register(). */
85int register_lguest_driver(struct lguest_driver *drv) 110int register_lguest_driver(struct lguest_driver *drv)
86{ 111{
87 if (!lguest_devices) 112 if (!lguest_devices)
@@ -94,12 +119,36 @@ int register_lguest_driver(struct lguest_driver *drv)
94 119
95 return driver_register(&drv->drv); 120 return driver_register(&drv->drv);
96} 121}
122
123/* At the moment we build all the drivers into the kernel because they're so
124 * simple: 8144 bytes for all three of them as I type this. And as the console
125 * really needs to be built in, it's actually only 3527 bytes for the network
126 * and block drivers.
127 *
128 * If they get complex it will make sense for them to be modularized, so we
129 * need to explicitly export the symbol.
130 *
131 * I don't think non-GPL modules make sense, so it's a GPL-only export.
132 */
97EXPORT_SYMBOL_GPL(register_lguest_driver); 133EXPORT_SYMBOL_GPL(register_lguest_driver);
98 134
135/*D:120 This is the core of the lguest bus: actually adding a new device.
136 * It's a separate function because it's neater that way, and because an
137 * earlier version of the code supported hotplug and unplug. They were removed
138 * early on because they were never used.
139 *
140 * As Andrew Tridgell says, "Untested code is buggy code".
141 *
142 * It's worth reading this carefully: we start with an index into the array of
143 * "struct lguest_device_desc"s indicating the device which is new: */
99static void add_lguest_device(unsigned int index) 144static void add_lguest_device(unsigned int index)
100{ 145{
101 struct lguest_device *new; 146 struct lguest_device *new;
102 147
148 /* Each "struct lguest_device_desc" has a "status" field, which the
149 * Guest updates as the device is probed. In the worst case, the Host
150 * can look at these bits to tell what part of device setup failed,
151 * even if the console isn't available. */
103 lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; 152 lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
104 new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); 153 new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
105 if (!new) { 154 if (!new) {
@@ -108,12 +157,17 @@ static void add_lguest_device(unsigned int index)
108 return; 157 return;
109 } 158 }
110 159
160 /* The "struct lguest_device" setup is pretty straight-forward example
161 * code. */
111 new->index = index; 162 new->index = index;
112 new->private = NULL; 163 new->private = NULL;
113 memset(&new->dev, 0, sizeof(new->dev)); 164 memset(&new->dev, 0, sizeof(new->dev));
114 new->dev.parent = &lguest_bus.dev; 165 new->dev.parent = &lguest_bus.dev;
115 new->dev.bus = &lguest_bus.bus; 166 new->dev.bus = &lguest_bus.bus;
116 sprintf(new->dev.bus_id, "%u", index); 167 sprintf(new->dev.bus_id, "%u", index);
168
169 /* device_register() causes the bus infrastructure to look for a
170 * matching driver. */
117 if (device_register(&new->dev) != 0) { 171 if (device_register(&new->dev) != 0) {
118 printk(KERN_EMERG "Cannot register lguest device %u\n", index); 172 printk(KERN_EMERG "Cannot register lguest device %u\n", index);
119 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; 173 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
@@ -121,6 +175,9 @@ static void add_lguest_device(unsigned int index)
121 } 175 }
122} 176}
123 177
178/*D:110 scan_devices() simply iterates through the device array. The type 0
179 * is reserved to mean "no device", and anything else means we have found a
180 * device: add it. */
124static void scan_devices(void) 181static void scan_devices(void)
125{ 182{
126 unsigned int i; 183 unsigned int i;
@@ -130,12 +187,23 @@ static void scan_devices(void)
130 add_lguest_device(i); 187 add_lguest_device(i);
131} 188}
132 189
190/*D:100 Fairly early in boot, lguest_bus_init() is called to set up the lguest
191 * bus. We check that we are a Guest by checking paravirt_ops.name: there are
192 * other ways of checking, but this seems most obvious to me.
193 *
194 * So we can access the array of "struct lguest_device_desc"s easily, we map
195 * that memory and store the pointer in the global "lguest_devices". Then we
196 * register the bus with the core. Doing two registrations seems clunky to me,
197 * but it seems to be the correct sysfs incantation.
198 *
199 * Finally we call scan_devices() which adds all the devices found in the
200 * "struct lguest_device_desc" array. */
133static int __init lguest_bus_init(void) 201static int __init lguest_bus_init(void)
134{ 202{
135 if (strcmp(paravirt_ops.name, "lguest") != 0) 203 if (strcmp(paravirt_ops.name, "lguest") != 0)
136 return 0; 204 return 0;
137 205
138 /* Devices are in page above top of "normal" mem. */ 206 /* Devices are in a single page above top of "normal" mem */
139 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1); 207 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
140 208
141 if (bus_register(&lguest_bus.bus) != 0 209 if (bus_register(&lguest_bus.bus) != 0
@@ -145,4 +213,5 @@ static int __init lguest_bus_init(void)
145 scan_devices(); 213 scan_devices();
146 return 0; 214 return 0;
147} 215}
216/* Do this after core stuff, before devices. */
148postcore_initcall(lguest_bus_init); 217postcore_initcall(lguest_bus_init);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index e90d7a783daf..80d1b58c7698 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,36 +1,70 @@
1/* Userspace control of the guest, via /dev/lguest. */ 1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
2 * controls and communicates with the Guest. For example, the first write will
3 * tell us the memory size, pagetable, entry point and kernel address offset.
4 * A read will run the Guest until a signal is pending (-EINTR), or the Guest
5 * does a DMA out to the Launcher. Writes are also used to get a DMA buffer
6 * registered by the Guest and to send the Guest an interrupt. :*/
2#include <linux/uaccess.h> 7#include <linux/uaccess.h>
3#include <linux/miscdevice.h> 8#include <linux/miscdevice.h>
4#include <linux/fs.h> 9#include <linux/fs.h>
5#include "lg.h" 10#include "lg.h"
6 11
12/*L:030 setup_regs() doesn't really belong in this file, but it gives us an
13 * early glimpse deeper into the Host so it's worth having here.
14 *
15 * Most of the Guest's registers are left alone: we used get_zeroed_page() to
16 * allocate the structure, so they will be 0. */
7static void setup_regs(struct lguest_regs *regs, unsigned long start) 17static void setup_regs(struct lguest_regs *regs, unsigned long start)
8{ 18{
9 /* Write out stack in format lguest expects, so we can switch to it. */ 19 /* There are four "segment" registers which the Guest needs to boot:
20 * The "code segment" register (cs) refers to the kernel code segment
21 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
22 * refer to the kernel data segment __KERNEL_DS.
23 *
24 * The privilege level is packed into the lower bits. The Guest runs
25 * at privilege level 1 (GUEST_PL).*/
10 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL; 26 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
11 regs->cs = __KERNEL_CS|GUEST_PL; 27 regs->cs = __KERNEL_CS|GUEST_PL;
12 regs->eflags = 0x202; /* Interrupts enabled. */ 28
29 /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
30 * is supposed to always be "1". Bit 9 (0x200) controls whether
31 * interrupts are enabled. We always leave interrupts enabled while
32 * running the Guest. */
33 regs->eflags = 0x202;
34
35 /* The "Extended Instruction Pointer" register says where the Guest is
36 * running. */
13 regs->eip = start; 37 regs->eip = start;
14 /* esi points to our boot information (physical address 0) */ 38
39 /* %esi points to our boot information, at physical address 0, so don't
40 * touch it. */
15} 41}
16 42
17/* + addr */ 43/*L:310 To send DMA into the Guest, the Launcher needs to be able to ask for a
44 * DMA buffer. This is done by writing LHREQ_GETDMA and the key to
45 * /dev/lguest. */
18static long user_get_dma(struct lguest *lg, const u32 __user *input) 46static long user_get_dma(struct lguest *lg, const u32 __user *input)
19{ 47{
20 unsigned long key, udma, irq; 48 unsigned long key, udma, irq;
21 49
50 /* Fetch the key they wrote to us. */
22 if (get_user(key, input) != 0) 51 if (get_user(key, input) != 0)
23 return -EFAULT; 52 return -EFAULT;
53 /* Look for a free Guest DMA buffer bound to that key. */
24 udma = get_dma_buffer(lg, key, &irq); 54 udma = get_dma_buffer(lg, key, &irq);
25 if (!udma) 55 if (!udma)
26 return -ENOENT; 56 return -ENOENT;
27 57
28 /* We put irq number in udma->used_len. */ 58 /* We need to tell the Launcher what interrupt the Guest expects after
59 * the buffer is filled. We stash it in udma->used_len. */
29 lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq); 60 lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq);
61
62 /* The (guest-physical) address of the DMA buffer is returned from
63 * the write(). */
30 return udma; 64 return udma;
31} 65}
32 66
33/* To force the Guest to stop running and return to the Launcher, the 67/*L:315 To force the Guest to stop running and return to the Launcher, the
34 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The 68 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The
35 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ 69 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */
36static int break_guest_out(struct lguest *lg, const u32 __user *input) 70static int break_guest_out(struct lguest *lg, const u32 __user *input)
@@ -54,7 +88,8 @@ static int break_guest_out(struct lguest *lg, const u32 __user *input)
54 } 88 }
55} 89}
56 90
57/* + irq */ 91/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
92 * number to /dev/lguest. */
58static int user_send_irq(struct lguest *lg, const u32 __user *input) 93static int user_send_irq(struct lguest *lg, const u32 __user *input)
59{ 94{
60 u32 irq; 95 u32 irq;
@@ -63,14 +98,19 @@ static int user_send_irq(struct lguest *lg, const u32 __user *input)
63 return -EFAULT; 98 return -EFAULT;
64 if (irq >= LGUEST_IRQS) 99 if (irq >= LGUEST_IRQS)
65 return -EINVAL; 100 return -EINVAL;
101 /* Next time the Guest runs, the core code will see if it can deliver
102 * this interrupt. */
66 set_bit(irq, lg->irqs_pending); 103 set_bit(irq, lg->irqs_pending);
67 return 0; 104 return 0;
68} 105}
69 106
107/*L:040 Once our Guest is initialized, the Launcher makes it run by reading
108 * from /dev/lguest. */
70static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) 109static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
71{ 110{
72 struct lguest *lg = file->private_data; 111 struct lguest *lg = file->private_data;
73 112
113 /* You must write LHREQ_INITIALIZE first! */
74 if (!lg) 114 if (!lg)
75 return -EINVAL; 115 return -EINVAL;
76 116
@@ -78,27 +118,52 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
78 if (current != lg->tsk) 118 if (current != lg->tsk)
79 return -EPERM; 119 return -EPERM;
80 120
121 /* If the guest is already dead, we indicate why */
81 if (lg->dead) { 122 if (lg->dead) {
82 size_t len; 123 size_t len;
83 124
125 /* lg->dead either contains an error code, or a string. */
84 if (IS_ERR(lg->dead)) 126 if (IS_ERR(lg->dead))
85 return PTR_ERR(lg->dead); 127 return PTR_ERR(lg->dead);
86 128
129 /* We can only return as much as the buffer they read with. */
87 len = min(size, strlen(lg->dead)+1); 130 len = min(size, strlen(lg->dead)+1);
88 if (copy_to_user(user, lg->dead, len) != 0) 131 if (copy_to_user(user, lg->dead, len) != 0)
89 return -EFAULT; 132 return -EFAULT;
90 return len; 133 return len;
91 } 134 }
92 135
136 /* If we returned from read() last time because the Guest sent DMA,
137 * clear the flag. */
93 if (lg->dma_is_pending) 138 if (lg->dma_is_pending)
94 lg->dma_is_pending = 0; 139 lg->dma_is_pending = 0;
95 140
141 /* Run the Guest until something interesting happens. */
96 return run_guest(lg, (unsigned long __user *)user); 142 return run_guest(lg, (unsigned long __user *)user);
97} 143}
98 144
99/* Take: pfnlimit, pgdir, start, pageoffset. */ 145/*L:020 The initialization write supplies 4 32-bit values (in addition to the
146 * 32-bit LHREQ_INITIALIZE value). These are:
147 *
148 * pfnlimit: The highest (Guest-physical) page number the Guest should be
149 * allowed to access. The Launcher has to live in Guest memory, so it sets
150 * this to ensure the Guest can't reach it.
151 *
152 * pgdir: The (Guest-physical) address of the top of the initial Guest
153 * pagetables (which are set up by the Launcher).
154 *
155 * start: The first instruction to execute ("eip" in x86-speak).
156 *
157 * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should
158 * probably wean the code off this, but it's a very useful constant! Any
159 * address above this is within the Guest kernel, and any kernel address can
160 * quickly converted from physical to virtual by adding PAGE_OFFSET. It's
161 * 0xC0000000 (3G) by default, but it's configurable at kernel build time.
162 */
100static int initialize(struct file *file, const u32 __user *input) 163static int initialize(struct file *file, const u32 __user *input)
101{ 164{
165 /* "struct lguest" contains everything we (the Host) know about a
166 * Guest. */
102 struct lguest *lg; 167 struct lguest *lg;
103 int err, i; 168 int err, i;
104 u32 args[4]; 169 u32 args[4];
@@ -106,7 +171,7 @@ static int initialize(struct file *file, const u32 __user *input)
106 /* We grab the Big Lguest lock, which protects the global array 171 /* We grab the Big Lguest lock, which protects the global array
107 * "lguests" and multiple simultaneous initializations. */ 172 * "lguests" and multiple simultaneous initializations. */
108 mutex_lock(&lguest_lock); 173 mutex_lock(&lguest_lock);
109 174 /* You can't initialize twice! Close the device and start again... */
110 if (file->private_data) { 175 if (file->private_data) {
111 err = -EBUSY; 176 err = -EBUSY;
112 goto unlock; 177 goto unlock;
@@ -117,37 +182,70 @@ static int initialize(struct file *file, const u32 __user *input)
117 goto unlock; 182 goto unlock;
118 } 183 }
119 184
185 /* Find an unused guest. */
120 i = find_free_guest(); 186 i = find_free_guest();
121 if (i < 0) { 187 if (i < 0) {
122 err = -ENOSPC; 188 err = -ENOSPC;
123 goto unlock; 189 goto unlock;
124 } 190 }
191 /* OK, we have an index into the "lguest" array: "lg" is a convenient
192 * pointer. */
125 lg = &lguests[i]; 193 lg = &lguests[i];
194
195 /* Populate the easy fields of our "struct lguest" */
126 lg->guestid = i; 196 lg->guestid = i;
127 lg->pfn_limit = args[0]; 197 lg->pfn_limit = args[0];
128 lg->page_offset = args[3]; 198 lg->page_offset = args[3];
199
200 /* We need a complete page for the Guest registers: they are accessible
201 * to the Guest and we can only grant it access to whole pages. */
129 lg->regs_page = get_zeroed_page(GFP_KERNEL); 202 lg->regs_page = get_zeroed_page(GFP_KERNEL);
130 if (!lg->regs_page) { 203 if (!lg->regs_page) {
131 err = -ENOMEM; 204 err = -ENOMEM;
132 goto release_guest; 205 goto release_guest;
133 } 206 }
207 /* We actually put the registers at the bottom of the page. */
134 lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); 208 lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs);
135 209
210 /* Initialize the Guest's shadow page tables, using the toplevel
211 * address the Launcher gave us. This allocates memory, so can
212 * fail. */
136 err = init_guest_pagetable(lg, args[1]); 213 err = init_guest_pagetable(lg, args[1]);
137 if (err) 214 if (err)
138 goto free_regs; 215 goto free_regs;
139 216
217 /* Now we initialize the Guest's registers, handing it the start
218 * address. */
140 setup_regs(lg->regs, args[2]); 219 setup_regs(lg->regs, args[2]);
220
221 /* There are a couple of GDT entries the Guest expects when first
222 * booting. */
141 setup_guest_gdt(lg); 223 setup_guest_gdt(lg);
224
225 /* The timer for lguest's clock needs initialization. */
142 init_clockdev(lg); 226 init_clockdev(lg);
227
228 /* We keep a pointer to the Launcher task (ie. current task) for when
229 * other Guests want to wake this one (inter-Guest I/O). */
143 lg->tsk = current; 230 lg->tsk = current;
231 /* We need to keep a pointer to the Launcher's memory map, because if
232 * the Launcher dies we need to clean it up. If we don't keep a
233 * reference, it is destroyed before close() is called. */
144 lg->mm = get_task_mm(lg->tsk); 234 lg->mm = get_task_mm(lg->tsk);
235
236 /* Initialize the queue for the waker to wait on */
145 init_waitqueue_head(&lg->break_wq); 237 init_waitqueue_head(&lg->break_wq);
238
239 /* We remember which CPU's pages this Guest used last, for optimization
240 * when the same Guest runs on the same CPU twice. */
146 lg->last_pages = NULL; 241 lg->last_pages = NULL;
242
243 /* We keep our "struct lguest" in the file's private_data. */
147 file->private_data = lg; 244 file->private_data = lg;
148 245
149 mutex_unlock(&lguest_lock); 246 mutex_unlock(&lguest_lock);
150 247
248 /* And because this is a write() call, we return the length used. */
151 return sizeof(args); 249 return sizeof(args);
152 250
153free_regs: 251free_regs:
@@ -159,9 +257,15 @@ unlock:
159 return err; 257 return err;
160} 258}
161 259
260/*L:010 The first operation the Launcher does must be a write. All writes
261 * start with a 32 bit number: for the first write this must be
262 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use
263 * writes of other values to get DMA buffers and send interrupts. */
162static ssize_t write(struct file *file, const char __user *input, 264static ssize_t write(struct file *file, const char __user *input,
163 size_t size, loff_t *off) 265 size_t size, loff_t *off)
164{ 266{
267 /* Once the guest is initialized, we hold the "struct lguest" in the
268 * file private data. */
165 struct lguest *lg = file->private_data; 269 struct lguest *lg = file->private_data;
166 u32 req; 270 u32 req;
167 271
@@ -169,8 +273,11 @@ static ssize_t write(struct file *file, const char __user *input,
169 return -EFAULT; 273 return -EFAULT;
170 input += sizeof(req); 274 input += sizeof(req);
171 275
276 /* If you haven't initialized, you must do that first. */
172 if (req != LHREQ_INITIALIZE && !lg) 277 if (req != LHREQ_INITIALIZE && !lg)
173 return -EINVAL; 278 return -EINVAL;
279
280 /* Once the Guest is dead, all you can do is read() why it died. */
174 if (lg && lg->dead) 281 if (lg && lg->dead)
175 return -ENOENT; 282 return -ENOENT;
176 283
@@ -192,33 +299,72 @@ static ssize_t write(struct file *file, const char __user *input,
192 } 299 }
193} 300}
194 301
302/*L:060 The final piece of interface code is the close() routine. It reverses
303 * everything done in initialize(). This is usually called because the
304 * Launcher exited.
305 *
306 * Note that the close routine returns 0 or a negative error number: it can't
307 * really fail, but it can whine. I blame Sun for this wart, and K&R C for
308 * letting them do it. :*/
195static int close(struct inode *inode, struct file *file) 309static int close(struct inode *inode, struct file *file)
196{ 310{
197 struct lguest *lg = file->private_data; 311 struct lguest *lg = file->private_data;
198 312
313 /* If we never successfully initialized, there's nothing to clean up */
199 if (!lg) 314 if (!lg)
200 return 0; 315 return 0;
201 316
317 /* We need the big lock, to protect from inter-guest I/O and other
318 * Launchers initializing guests. */
202 mutex_lock(&lguest_lock); 319 mutex_lock(&lguest_lock);
203 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ 320 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
204 hrtimer_cancel(&lg->hrt); 321 hrtimer_cancel(&lg->hrt);
322 /* Free any DMA buffers the Guest had bound. */
205 release_all_dma(lg); 323 release_all_dma(lg);
324 /* Free up the shadow page tables for the Guest. */
206 free_guest_pagetable(lg); 325 free_guest_pagetable(lg);
326 /* Now all the memory cleanups are done, it's safe to release the
327 * Launcher's memory management structure. */
207 mmput(lg->mm); 328 mmput(lg->mm);
329 /* If lg->dead doesn't contain an error code it will be NULL or a
330 * kmalloc()ed string, either of which is ok to hand to kfree(). */
208 if (!IS_ERR(lg->dead)) 331 if (!IS_ERR(lg->dead))
209 kfree(lg->dead); 332 kfree(lg->dead);
333 /* We can free up the register page we allocated. */
210 free_page(lg->regs_page); 334 free_page(lg->regs_page);
335 /* We clear the entire structure, which also marks it as free for the
336 * next user. */
211 memset(lg, 0, sizeof(*lg)); 337 memset(lg, 0, sizeof(*lg));
338 /* Release lock and exit. */
212 mutex_unlock(&lguest_lock); 339 mutex_unlock(&lguest_lock);
340
213 return 0; 341 return 0;
214} 342}
215 343
344/*L:000
345 * Welcome to our journey through the Launcher!
346 *
347 * The Launcher is the Host userspace program which sets up, runs and services
348 * the Guest. In fact, many comments in the Drivers which refer to "the Host"
349 * doing things are inaccurate: the Launcher does all the device handling for
350 * the Guest. The Guest can't tell what's done by the the Launcher and what by
351 * the Host.
352 *
353 * Just to confuse you: to the Host kernel, the Launcher *is* the Guest and we
354 * shall see more of that later.
355 *
356 * We begin our understanding with the Host kernel interface which the Launcher
357 * uses: reading and writing a character device called /dev/lguest. All the
358 * work happens in the read(), write() and close() routines: */
216static struct file_operations lguest_fops = { 359static struct file_operations lguest_fops = {
217 .owner = THIS_MODULE, 360 .owner = THIS_MODULE,
218 .release = close, 361 .release = close,
219 .write = write, 362 .write = write,
220 .read = read, 363 .read = read,
221}; 364};
365
366/* This is a textbook example of a "misc" character device. Populate a "struct
367 * miscdevice" and register it with misc_register(). */
222static struct miscdevice lguest_dev = { 368static struct miscdevice lguest_dev = {
223 .minor = MISC_DYNAMIC_MINOR, 369 .minor = MISC_DYNAMIC_MINOR,
224 .name = "lguest", 370 .name = "lguest",
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 1b0ba09b1269..b7a924ace684 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -1,5 +1,11 @@
1/* Shadow page table operations. 1/*P:700 The pagetable code, on the other hand, still shows the scars of
2 * Copyright (C) Rusty Russell IBM Corporation 2006. 2 * previous encounters. It's functional, and as neat as it can be in the
3 * circumstances, but be wary, for these things are subtle and break easily.
4 * The Guest provides a virtual to physical mapping, but we can neither trust
5 * it nor use it: we verify and convert it here to point the hardware to the
6 * actual Guest pages when running the Guest. :*/
7
8/* Copyright (C) Rusty Russell IBM Corporation 2006.
3 * GPL v2 and any later version */ 9 * GPL v2 and any later version */
4#include <linux/mm.h> 10#include <linux/mm.h>
5#include <linux/types.h> 11#include <linux/types.h>
@@ -9,38 +15,96 @@
9#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
10#include "lg.h" 16#include "lg.h"
11 17
18/*M:008 We hold reference to pages, which prevents them from being swapped.
19 * It'd be nice to have a callback in the "struct mm_struct" when Linux wants
20 * to swap out. If we had this, and a shrinker callback to trim PTE pages, we
21 * could probably consider launching Guests as non-root. :*/
22
23/*H:300
24 * The Page Table Code
25 *
26 * We use two-level page tables for the Guest. If you're not entirely
27 * comfortable with virtual addresses, physical addresses and page tables then
28 * I recommend you review lguest.c's "Page Table Handling" (with diagrams!).
29 *
30 * The Guest keeps page tables, but we maintain the actual ones here: these are
31 * called "shadow" page tables. Which is a very Guest-centric name: these are
32 * the real page tables the CPU uses, although we keep them up to date to
33 * reflect the Guest's. (See what I mean about weird naming? Since when do
34 * shadows reflect anything?)
35 *
36 * Anyway, this is the most complicated part of the Host code. There are seven
37 * parts to this:
38 * (i) Setting up a page table entry for the Guest when it faults,
39 * (ii) Setting up the page table entry for the Guest stack,
40 * (iii) Setting up a page table entry when the Guest tells us it has changed,
41 * (iv) Switching page tables,
42 * (v) Flushing (thowing away) page tables,
43 * (vi) Mapping the Switcher when the Guest is about to run,
44 * (vii) Setting up the page tables initially.
45 :*/
46
47/* Pages a 4k long, and each page table entry is 4 bytes long, giving us 1024
48 * (or 2^10) entries per page. */
12#define PTES_PER_PAGE_SHIFT 10 49#define PTES_PER_PAGE_SHIFT 10
13#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT) 50#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
51
52/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is
53 * conveniently placed at the top 4MB, so it uses a separate, complete PTE
54 * page. */
14#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) 55#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)
15 56
57/* We actually need a separate PTE page for each CPU. Remember that after the
58 * Switcher code itself comes two pages for each CPU, and we don't want this
59 * CPU's guest to see the pages of any other CPU. */
16static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); 60static DEFINE_PER_CPU(spte_t *, switcher_pte_pages);
17#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) 61#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
18 62
63/*H:320 With our shadow and Guest types established, we need to deal with
64 * them: the page table code is curly enough to need helper functions to keep
65 * it clear and clean.
66 *
67 * The first helper takes a virtual address, and says which entry in the top
68 * level page table deals with that address. Since each top level entry deals
69 * with 4M, this effectively divides by 4M. */
19static unsigned vaddr_to_pgd_index(unsigned long vaddr) 70static unsigned vaddr_to_pgd_index(unsigned long vaddr)
20{ 71{
21 return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); 72 return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
22} 73}
23 74
24/* These access the shadow versions (ie. the ones used by the CPU). */ 75/* There are two functions which return pointers to the shadow (aka "real")
76 * page tables.
77 *
78 * spgd_addr() takes the virtual address and returns a pointer to the top-level
79 * page directory entry for that address. Since we keep track of several page
80 * tables, the "i" argument tells us which one we're interested in (it's
81 * usually the current one). */
25static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) 82static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
26{ 83{
27 unsigned int index = vaddr_to_pgd_index(vaddr); 84 unsigned int index = vaddr_to_pgd_index(vaddr);
28 85
86 /* We kill any Guest trying to touch the Switcher addresses. */
29 if (index >= SWITCHER_PGD_INDEX) { 87 if (index >= SWITCHER_PGD_INDEX) {
30 kill_guest(lg, "attempt to access switcher pages"); 88 kill_guest(lg, "attempt to access switcher pages");
31 index = 0; 89 index = 0;
32 } 90 }
91 /* Return a pointer index'th pgd entry for the i'th page table. */
33 return &lg->pgdirs[i].pgdir[index]; 92 return &lg->pgdirs[i].pgdir[index];
34} 93}
35 94
95/* This routine then takes the PGD entry given above, which contains the
96 * address of the PTE page. It then returns a pointer to the PTE entry for the
97 * given address. */
36static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) 98static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr)
37{ 99{
38 spte_t *page = __va(spgd.pfn << PAGE_SHIFT); 100 spte_t *page = __va(spgd.pfn << PAGE_SHIFT);
101 /* You should never call this if the PGD entry wasn't valid */
39 BUG_ON(!(spgd.flags & _PAGE_PRESENT)); 102 BUG_ON(!(spgd.flags & _PAGE_PRESENT));
40 return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; 103 return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE];
41} 104}
42 105
43/* These access the guest versions. */ 106/* These two functions just like the above two, except they access the Guest
107 * page tables. Hence they return a Guest address. */
44static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 108static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
45{ 109{
46 unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); 110 unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
@@ -55,12 +119,24 @@ static unsigned long gpte_addr(struct lguest *lg,
55 return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); 119 return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t);
56} 120}
57 121
58/* Do a virtual -> physical mapping on a user page. */ 122/*H:350 This routine takes a page number given by the Guest and converts it to
123 * an actual, physical page number. It can fail for several reasons: the
124 * virtual address might not be mapped by the Launcher, the write flag is set
125 * and the page is read-only, or the write flag was set and the page was
126 * shared so had to be copied, but we ran out of memory.
127 *
128 * This holds a reference to the page, so release_pte() is careful to
129 * put that back. */
59static unsigned long get_pfn(unsigned long virtpfn, int write) 130static unsigned long get_pfn(unsigned long virtpfn, int write)
60{ 131{
61 struct page *page; 132 struct page *page;
133 /* This value indicates failure. */
62 unsigned long ret = -1UL; 134 unsigned long ret = -1UL;
63 135
136 /* get_user_pages() is a complex interface: it gets the "struct
137 * vm_area_struct" and "struct page" assocated with a range of pages.
138 * It also needs the task's mmap_sem held, and is not very quick.
139 * It returns the number of pages it got. */
64 down_read(&current->mm->mmap_sem); 140 down_read(&current->mm->mmap_sem);
65 if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, 141 if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT,
66 1, write, 1, &page, NULL) == 1) 142 1, write, 1, &page, NULL) == 1)
@@ -69,28 +145,47 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
69 return ret; 145 return ret;
70} 146}
71 147
148/*H:340 Converting a Guest page table entry to a shadow (ie. real) page table
149 * entry can be a little tricky. The flags are (almost) the same, but the
150 * Guest PTE contains a virtual page number: the CPU needs the real page
151 * number. */
72static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) 152static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write)
73{ 153{
74 spte_t spte; 154 spte_t spte;
75 unsigned long pfn; 155 unsigned long pfn;
76 156
77 /* We ignore the global flag. */ 157 /* The Guest sets the global flag, because it thinks that it is using
158 * PGE. We only told it to use PGE so it would tell us whether it was
159 * flushing a kernel mapping or a userspace mapping. We don't actually
160 * use the global bit, so throw it away. */
78 spte.flags = (gpte.flags & ~_PAGE_GLOBAL); 161 spte.flags = (gpte.flags & ~_PAGE_GLOBAL);
162
163 /* We need a temporary "unsigned long" variable to hold the answer from
164 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
165 * fit in spte.pfn. get_pfn() finds the real physical number of the
166 * page, given the virtual number. */
79 pfn = get_pfn(gpte.pfn, write); 167 pfn = get_pfn(gpte.pfn, write);
80 if (pfn == -1UL) { 168 if (pfn == -1UL) {
81 kill_guest(lg, "failed to get page %u", gpte.pfn); 169 kill_guest(lg, "failed to get page %u", gpte.pfn);
82 /* Must not put_page() bogus page on cleanup. */ 170 /* When we destroy the Guest, we'll go through the shadow page
171 * tables and release_pte() them. Make sure we don't think
172 * this one is valid! */
83 spte.flags = 0; 173 spte.flags = 0;
84 } 174 }
175 /* Now we assign the page number, and our shadow PTE is complete. */
85 spte.pfn = pfn; 176 spte.pfn = pfn;
86 return spte; 177 return spte;
87} 178}
88 179
180/*H:460 And to complete the chain, release_pte() looks like this: */
89static void release_pte(spte_t pte) 181static void release_pte(spte_t pte)
90{ 182{
183 /* Remember that get_user_pages() took a reference to the page, in
184 * get_pfn()? We have to put it back now. */
91 if (pte.flags & _PAGE_PRESENT) 185 if (pte.flags & _PAGE_PRESENT)
92 put_page(pfn_to_page(pte.pfn)); 186 put_page(pfn_to_page(pte.pfn));
93} 187}
188/*:*/
94 189
95static void check_gpte(struct lguest *lg, gpte_t gpte) 190static void check_gpte(struct lguest *lg, gpte_t gpte)
96{ 191{
@@ -104,11 +199,16 @@ static void check_gpgd(struct lguest *lg, gpgd_t gpgd)
104 kill_guest(lg, "bad page directory entry"); 199 kill_guest(lg, "bad page directory entry");
105} 200}
106 201
107/* FIXME: We hold reference to pages, which prevents them from being 202/*H:330
108 swapped. It'd be nice to have a callback when Linux wants to swap out. */ 203 * (i) Setting up a page table entry for the Guest when it faults
109 204 *
110/* We fault pages in, which allows us to update accessed/dirty bits. 205 * We saw this call in run_guest(): when we see a page fault in the Guest, we
111 * Return true if we got page. */ 206 * come here. That's because we only set up the shadow page tables lazily as
207 * they're needed, so we get page faults all the time and quietly fix them up
208 * and return to the Guest without it knowing.
209 *
210 * If we fixed up the fault (ie. we mapped the address), this routine returns
211 * true. */
112int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 212int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
113{ 213{
114 gpgd_t gpgd; 214 gpgd_t gpgd;
@@ -117,106 +217,161 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
117 gpte_t gpte; 217 gpte_t gpte;
118 spte_t *spte; 218 spte_t *spte;
119 219
220 /* First step: get the top-level Guest page table entry. */
120 gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); 221 gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr)));
222 /* Toplevel not present? We can't map it in. */
121 if (!(gpgd.flags & _PAGE_PRESENT)) 223 if (!(gpgd.flags & _PAGE_PRESENT))
122 return 0; 224 return 0;
123 225
226 /* Now look at the matching shadow entry. */
124 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 227 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
125 if (!(spgd->flags & _PAGE_PRESENT)) { 228 if (!(spgd->flags & _PAGE_PRESENT)) {
126 /* Get a page of PTEs for them. */ 229 /* No shadow entry: allocate a new shadow PTE page. */
127 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 230 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
128 /* FIXME: Steal from self in this case? */ 231 /* This is not really the Guest's fault, but killing it is
232 * simple for this corner case. */
129 if (!ptepage) { 233 if (!ptepage) {
130 kill_guest(lg, "out of memory allocating pte page"); 234 kill_guest(lg, "out of memory allocating pte page");
131 return 0; 235 return 0;
132 } 236 }
237 /* We check that the Guest pgd is OK. */
133 check_gpgd(lg, gpgd); 238 check_gpgd(lg, gpgd);
239 /* And we copy the flags to the shadow PGD entry. The page
240 * number in the shadow PGD is the page we just allocated. */
134 spgd->raw.val = (__pa(ptepage) | gpgd.flags); 241 spgd->raw.val = (__pa(ptepage) | gpgd.flags);
135 } 242 }
136 243
244 /* OK, now we look at the lower level in the Guest page table: keep its
245 * address, because we might update it later. */
137 gpte_ptr = gpte_addr(lg, gpgd, vaddr); 246 gpte_ptr = gpte_addr(lg, gpgd, vaddr);
138 gpte = mkgpte(lgread_u32(lg, gpte_ptr)); 247 gpte = mkgpte(lgread_u32(lg, gpte_ptr));
139 248
140 /* No page? */ 249 /* If this page isn't in the Guest page tables, we can't page it in. */
141 if (!(gpte.flags & _PAGE_PRESENT)) 250 if (!(gpte.flags & _PAGE_PRESENT))
142 return 0; 251 return 0;
143 252
144 /* Write to read-only page? */ 253 /* Check they're not trying to write to a page the Guest wants
254 * read-only (bit 2 of errcode == write). */
145 if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) 255 if ((errcode & 2) && !(gpte.flags & _PAGE_RW))
146 return 0; 256 return 0;
147 257
148 /* User access to a non-user page? */ 258 /* User access to a kernel page? (bit 3 == user access) */
149 if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) 259 if ((errcode & 4) && !(gpte.flags & _PAGE_USER))
150 return 0; 260 return 0;
151 261
262 /* Check that the Guest PTE flags are OK, and the page number is below
263 * the pfn_limit (ie. not mapping the Launcher binary). */
152 check_gpte(lg, gpte); 264 check_gpte(lg, gpte);
265 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
153 gpte.flags |= _PAGE_ACCESSED; 266 gpte.flags |= _PAGE_ACCESSED;
154 if (errcode & 2) 267 if (errcode & 2)
155 gpte.flags |= _PAGE_DIRTY; 268 gpte.flags |= _PAGE_DIRTY;
156 269
157 /* We're done with the old pte. */ 270 /* Get the pointer to the shadow PTE entry we're going to set. */
158 spte = spte_addr(lg, *spgd, vaddr); 271 spte = spte_addr(lg, *spgd, vaddr);
272 /* If there was a valid shadow PTE entry here before, we release it.
273 * This can happen with a write to a previously read-only entry. */
159 release_pte(*spte); 274 release_pte(*spte);
160 275
161 /* We don't make it writable if this isn't a write: later 276 /* If this is a write, we insist that the Guest page is writable (the
162 * write will fault so we can set dirty bit in guest. */ 277 * final arg to gpte_to_spte()). */
163 if (gpte.flags & _PAGE_DIRTY) 278 if (gpte.flags & _PAGE_DIRTY)
164 *spte = gpte_to_spte(lg, gpte, 1); 279 *spte = gpte_to_spte(lg, gpte, 1);
165 else { 280 else {
281 /* If this is a read, don't set the "writable" bit in the page
282 * table entry, even if the Guest says it's writable. That way
283 * we come back here when a write does actually ocur, so we can
284 * update the Guest's _PAGE_DIRTY flag. */
166 gpte_t ro_gpte = gpte; 285 gpte_t ro_gpte = gpte;
167 ro_gpte.flags &= ~_PAGE_RW; 286 ro_gpte.flags &= ~_PAGE_RW;
168 *spte = gpte_to_spte(lg, ro_gpte, 0); 287 *spte = gpte_to_spte(lg, ro_gpte, 0);
169 } 288 }
170 289
171 /* Now we update dirty/accessed on guest. */ 290 /* Finally, we write the Guest PTE entry back: we've set the
291 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
172 lgwrite_u32(lg, gpte_ptr, gpte.raw.val); 292 lgwrite_u32(lg, gpte_ptr, gpte.raw.val);
293
294 /* We succeeded in mapping the page! */
173 return 1; 295 return 1;
174} 296}
175 297
176/* This is much faster than the full demand_page logic. */ 298/*H:360 (ii) Setting up the page table entry for the Guest stack.
299 *
300 * Remember pin_stack_pages() which makes sure the stack is mapped? It could
301 * simply call demand_page(), but as we've seen that logic is quite long, and
302 * usually the stack pages are already mapped anyway, so it's not required.
303 *
304 * This is a quick version which answers the question: is this virtual address
305 * mapped by the shadow page tables, and is it writable? */
177static int page_writable(struct lguest *lg, unsigned long vaddr) 306static int page_writable(struct lguest *lg, unsigned long vaddr)
178{ 307{
179 spgd_t *spgd; 308 spgd_t *spgd;
180 unsigned long flags; 309 unsigned long flags;
181 310
311 /* Look at the top level entry: is it present? */
182 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 312 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
183 if (!(spgd->flags & _PAGE_PRESENT)) 313 if (!(spgd->flags & _PAGE_PRESENT))
184 return 0; 314 return 0;
185 315
316 /* Check the flags on the pte entry itself: it must be present and
317 * writable. */
186 flags = spte_addr(lg, *spgd, vaddr)->flags; 318 flags = spte_addr(lg, *spgd, vaddr)->flags;
187 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 319 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
188} 320}
189 321
322/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
323 * in the page tables, and if not, we call demand_page() with error code 2
324 * (meaning "write"). */
190void pin_page(struct lguest *lg, unsigned long vaddr) 325void pin_page(struct lguest *lg, unsigned long vaddr)
191{ 326{
192 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) 327 if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2))
193 kill_guest(lg, "bad stack page %#lx", vaddr); 328 kill_guest(lg, "bad stack page %#lx", vaddr);
194} 329}
195 330
331/*H:450 If we chase down the release_pgd() code, it looks like this: */
196static void release_pgd(struct lguest *lg, spgd_t *spgd) 332static void release_pgd(struct lguest *lg, spgd_t *spgd)
197{ 333{
334 /* If the entry's not present, there's nothing to release. */
198 if (spgd->flags & _PAGE_PRESENT) { 335 if (spgd->flags & _PAGE_PRESENT) {
199 unsigned int i; 336 unsigned int i;
337 /* Converting the pfn to find the actual PTE page is easy: turn
338 * the page number into a physical address, then convert to a
339 * virtual address (easy for kernel pages like this one). */
200 spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); 340 spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT);
341 /* For each entry in the page, we might need to release it. */
201 for (i = 0; i < PTES_PER_PAGE; i++) 342 for (i = 0; i < PTES_PER_PAGE; i++)
202 release_pte(ptepage[i]); 343 release_pte(ptepage[i]);
344 /* Now we can free the page of PTEs */
203 free_page((long)ptepage); 345 free_page((long)ptepage);
346 /* And zero out the PGD entry we we never release it twice. */
204 spgd->raw.val = 0; 347 spgd->raw.val = 0;
205 } 348 }
206} 349}
207 350
351/*H:440 (v) Flushing (thowing away) page tables,
352 *
353 * We saw flush_user_mappings() called when we re-used a top-level pgdir page.
354 * It simply releases every PTE page from 0 up to the kernel address. */
208static void flush_user_mappings(struct lguest *lg, int idx) 355static void flush_user_mappings(struct lguest *lg, int idx)
209{ 356{
210 unsigned int i; 357 unsigned int i;
358 /* Release every pgd entry up to the kernel's address. */
211 for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) 359 for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++)
212 release_pgd(lg, lg->pgdirs[idx].pgdir + i); 360 release_pgd(lg, lg->pgdirs[idx].pgdir + i);
213} 361}
214 362
363/* The Guest also has a hypercall to do this manually: it's used when a large
364 * number of mappings have been changed. */
215void guest_pagetable_flush_user(struct lguest *lg) 365void guest_pagetable_flush_user(struct lguest *lg)
216{ 366{
367 /* Drop the userspace part of the current page table. */
217 flush_user_mappings(lg, lg->pgdidx); 368 flush_user_mappings(lg, lg->pgdidx);
218} 369}
370/*:*/
219 371
372/* We keep several page tables. This is a simple routine to find the page
373 * table (if any) corresponding to this top-level address the Guest has given
374 * us. */
220static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) 375static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
221{ 376{
222 unsigned int i; 377 unsigned int i;
@@ -226,21 +381,30 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
226 return i; 381 return i;
227} 382}
228 383
384/*H:435 And this is us, creating the new page directory. If we really do
385 * allocate a new one (and so the kernel parts are not there), we set
386 * blank_pgdir. */
229static unsigned int new_pgdir(struct lguest *lg, 387static unsigned int new_pgdir(struct lguest *lg,
230 unsigned long cr3, 388 unsigned long cr3,
231 int *blank_pgdir) 389 int *blank_pgdir)
232{ 390{
233 unsigned int next; 391 unsigned int next;
234 392
393 /* We pick one entry at random to throw out. Choosing the Least
394 * Recently Used might be better, but this is easy. */
235 next = random32() % ARRAY_SIZE(lg->pgdirs); 395 next = random32() % ARRAY_SIZE(lg->pgdirs);
396 /* If it's never been allocated at all before, try now. */
236 if (!lg->pgdirs[next].pgdir) { 397 if (!lg->pgdirs[next].pgdir) {
237 lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); 398 lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
399 /* If the allocation fails, just keep using the one we have */
238 if (!lg->pgdirs[next].pgdir) 400 if (!lg->pgdirs[next].pgdir)
239 next = lg->pgdidx; 401 next = lg->pgdidx;
240 else 402 else
241 /* There are no mappings: you'll need to re-pin */ 403 /* This is a blank page, so there are no kernel
404 * mappings: caller must map the stack! */
242 *blank_pgdir = 1; 405 *blank_pgdir = 1;
243 } 406 }
407 /* Record which Guest toplevel this shadows. */
244 lg->pgdirs[next].cr3 = cr3; 408 lg->pgdirs[next].cr3 = cr3;
245 /* Release all the non-kernel mappings. */ 409 /* Release all the non-kernel mappings. */
246 flush_user_mappings(lg, next); 410 flush_user_mappings(lg, next);
@@ -248,82 +412,161 @@ static unsigned int new_pgdir(struct lguest *lg,
248 return next; 412 return next;
249} 413}
250 414
415/*H:430 (iv) Switching page tables
416 *
417 * This is what happens when the Guest changes page tables (ie. changes the
418 * top-level pgdir). This happens on almost every context switch. */
251void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) 419void guest_new_pagetable(struct lguest *lg, unsigned long pgtable)
252{ 420{
253 int newpgdir, repin = 0; 421 int newpgdir, repin = 0;
254 422
423 /* Look to see if we have this one already. */
255 newpgdir = find_pgdir(lg, pgtable); 424 newpgdir = find_pgdir(lg, pgtable);
425 /* If not, we allocate or mug an existing one: if it's a fresh one,
426 * repin gets set to 1. */
256 if (newpgdir == ARRAY_SIZE(lg->pgdirs)) 427 if (newpgdir == ARRAY_SIZE(lg->pgdirs))
257 newpgdir = new_pgdir(lg, pgtable, &repin); 428 newpgdir = new_pgdir(lg, pgtable, &repin);
429 /* Change the current pgd index to the new one. */
258 lg->pgdidx = newpgdir; 430 lg->pgdidx = newpgdir;
431 /* If it was completely blank, we map in the Guest kernel stack */
259 if (repin) 432 if (repin)
260 pin_stack_pages(lg); 433 pin_stack_pages(lg);
261} 434}
262 435
436/*H:470 Finally, a routine which throws away everything: all PGD entries in all
437 * the shadow page tables. This is used when we destroy the Guest. */
263static void release_all_pagetables(struct lguest *lg) 438static void release_all_pagetables(struct lguest *lg)
264{ 439{
265 unsigned int i, j; 440 unsigned int i, j;
266 441
442 /* Every shadow pagetable this Guest has */
267 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 443 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
268 if (lg->pgdirs[i].pgdir) 444 if (lg->pgdirs[i].pgdir)
445 /* Every PGD entry except the Switcher at the top */
269 for (j = 0; j < SWITCHER_PGD_INDEX; j++) 446 for (j = 0; j < SWITCHER_PGD_INDEX; j++)
270 release_pgd(lg, lg->pgdirs[i].pgdir + j); 447 release_pgd(lg, lg->pgdirs[i].pgdir + j);
271} 448}
272 449
450/* We also throw away everything when a Guest tells us it's changed a kernel
451 * mapping. Since kernel mappings are in every page table, it's easiest to
452 * throw them all away. This is amazingly slow, but thankfully rare. */
273void guest_pagetable_clear_all(struct lguest *lg) 453void guest_pagetable_clear_all(struct lguest *lg)
274{ 454{
275 release_all_pagetables(lg); 455 release_all_pagetables(lg);
456 /* We need the Guest kernel stack mapped again. */
276 pin_stack_pages(lg); 457 pin_stack_pages(lg);
277} 458}
278 459
460/*H:420 This is the routine which actually sets the page table entry for then
461 * "idx"'th shadow page table.
462 *
463 * Normally, we can just throw out the old entry and replace it with 0: if they
464 * use it demand_page() will put the new entry in. We need to do this anyway:
465 * The Guest expects _PAGE_ACCESSED to be set on its PTE the first time a page
466 * is read from, and _PAGE_DIRTY when it's written to.
467 *
468 * But Avi Kivity pointed out that most Operating Systems (Linux included) set
469 * these bits on PTEs immediately anyway. This is done to save the CPU from
470 * having to update them, but it helps us the same way: if they set
471 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
472 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
473 */
279static void do_set_pte(struct lguest *lg, int idx, 474static void do_set_pte(struct lguest *lg, int idx,
280 unsigned long vaddr, gpte_t gpte) 475 unsigned long vaddr, gpte_t gpte)
281{ 476{
477 /* Look up the matching shadow page directot entry. */
282 spgd_t *spgd = spgd_addr(lg, idx, vaddr); 478 spgd_t *spgd = spgd_addr(lg, idx, vaddr);
479
480 /* If the top level isn't present, there's no entry to update. */
283 if (spgd->flags & _PAGE_PRESENT) { 481 if (spgd->flags & _PAGE_PRESENT) {
482 /* Otherwise, we start by releasing the existing entry. */
284 spte_t *spte = spte_addr(lg, *spgd, vaddr); 483 spte_t *spte = spte_addr(lg, *spgd, vaddr);
285 release_pte(*spte); 484 release_pte(*spte);
485
486 /* If they're setting this entry as dirty or accessed, we might
487 * as well put that entry they've given us in now. This shaves
488 * 10% off a copy-on-write micro-benchmark. */
286 if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 489 if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
287 check_gpte(lg, gpte); 490 check_gpte(lg, gpte);
288 *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); 491 *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY);
289 } else 492 } else
493 /* Otherwise we can demand_page() it in later. */
290 spte->raw.val = 0; 494 spte->raw.val = 0;
291 } 495 }
292} 496}
293 497
498/*H:410 Updating a PTE entry is a little trickier.
499 *
500 * We keep track of several different page tables (the Guest uses one for each
501 * process, so it makes sense to cache at least a few). Each of these have
502 * identical kernel parts: ie. every mapping above PAGE_OFFSET is the same for
503 * all processes. So when the page table above that address changes, we update
504 * all the page tables, not just the current one. This is rare.
505 *
506 * The benefit is that when we have to track a new page table, we can copy keep
507 * all the kernel mappings. This speeds up context switch immensely. */
294void guest_set_pte(struct lguest *lg, 508void guest_set_pte(struct lguest *lg,
295 unsigned long cr3, unsigned long vaddr, gpte_t gpte) 509 unsigned long cr3, unsigned long vaddr, gpte_t gpte)
296{ 510{
297 /* Kernel mappings must be changed on all top levels. */ 511 /* Kernel mappings must be changed on all top levels. Slow, but
512 * doesn't happen often. */
298 if (vaddr >= lg->page_offset) { 513 if (vaddr >= lg->page_offset) {
299 unsigned int i; 514 unsigned int i;
300 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 515 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
301 if (lg->pgdirs[i].pgdir) 516 if (lg->pgdirs[i].pgdir)
302 do_set_pte(lg, i, vaddr, gpte); 517 do_set_pte(lg, i, vaddr, gpte);
303 } else { 518 } else {
519 /* Is this page table one we have a shadow for? */
304 int pgdir = find_pgdir(lg, cr3); 520 int pgdir = find_pgdir(lg, cr3);
305 if (pgdir != ARRAY_SIZE(lg->pgdirs)) 521 if (pgdir != ARRAY_SIZE(lg->pgdirs))
522 /* If so, do the update. */
306 do_set_pte(lg, pgdir, vaddr, gpte); 523 do_set_pte(lg, pgdir, vaddr, gpte);
307 } 524 }
308} 525}
309 526
527/*H:400
528 * (iii) Setting up a page table entry when the Guest tells us it has changed.
529 *
530 * Just like we did in interrupts_and_traps.c, it makes sense for us to deal
531 * with the other side of page tables while we're here: what happens when the
532 * Guest asks for a page table to be updated?
533 *
534 * We already saw that demand_page() will fill in the shadow page tables when
535 * needed, so we can simply remove shadow page table entries whenever the Guest
536 * tells us they've changed. When the Guest tries to use the new entry it will
537 * fault and demand_page() will fix it up.
538 *
539 * So with that in mind here's our code to to update a (top-level) PGD entry:
540 */
310void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) 541void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
311{ 542{
312 int pgdir; 543 int pgdir;
313 544
545 /* The kernel seems to try to initialize this early on: we ignore its
546 * attempts to map over the Switcher. */
314 if (idx >= SWITCHER_PGD_INDEX) 547 if (idx >= SWITCHER_PGD_INDEX)
315 return; 548 return;
316 549
550 /* If they're talking about a page table we have a shadow for... */
317 pgdir = find_pgdir(lg, cr3); 551 pgdir = find_pgdir(lg, cr3);
318 if (pgdir < ARRAY_SIZE(lg->pgdirs)) 552 if (pgdir < ARRAY_SIZE(lg->pgdirs))
553 /* ... throw it away. */
319 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); 554 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
320} 555}
321 556
557/*H:500 (vii) Setting up the page tables initially.
558 *
559 * When a Guest is first created, the Launcher tells us where the toplevel of
560 * its first page table is. We set some things up here: */
322int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) 561int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
323{ 562{
324 /* We assume this in flush_user_mappings, so check now */ 563 /* In flush_user_mappings() we loop from 0 to
564 * "vaddr_to_pgd_index(lg->page_offset)". This assumes it won't hit
565 * the Switcher mappings, so check that now. */
325 if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX) 566 if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
326 return -EINVAL; 567 return -EINVAL;
568 /* We start on the first shadow page table, and give it a blank PGD
569 * page. */
327 lg->pgdidx = 0; 570 lg->pgdidx = 0;
328 lg->pgdirs[lg->pgdidx].cr3 = pgtable; 571 lg->pgdirs[lg->pgdidx].cr3 = pgtable;
329 lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); 572 lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
@@ -332,33 +575,48 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
332 return 0; 575 return 0;
333} 576}
334 577
578/* When a Guest dies, our cleanup is fairly simple. */
335void free_guest_pagetable(struct lguest *lg) 579void free_guest_pagetable(struct lguest *lg)
336{ 580{
337 unsigned int i; 581 unsigned int i;
338 582
583 /* Throw away all page table pages. */
339 release_all_pagetables(lg); 584 release_all_pagetables(lg);
585 /* Now free the top levels: free_page() can handle 0 just fine. */
340 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 586 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
341 free_page((long)lg->pgdirs[i].pgdir); 587 free_page((long)lg->pgdirs[i].pgdir);
342} 588}
343 589
344/* Caller must be preempt-safe */ 590/*H:480 (vi) Mapping the Switcher when the Guest is about to run.
591 *
592 * The Switcher and the two pages for this CPU need to be available to the
593 * Guest (and not the pages for other CPUs). We have the appropriate PTE pages
594 * for each CPU already set up, we just need to hook them in. */
345void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) 595void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
346{ 596{
347 spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 597 spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
348 spgd_t switcher_pgd; 598 spgd_t switcher_pgd;
349 spte_t regs_pte; 599 spte_t regs_pte;
350 600
351 /* Since switcher less that 4MB, we simply mug top pte page. */ 601 /* Make the last PGD entry for this Guest point to the Switcher's PTE
602 * page for this CPU (with appropriate flags). */
352 switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; 603 switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT;
353 switcher_pgd.flags = _PAGE_KERNEL; 604 switcher_pgd.flags = _PAGE_KERNEL;
354 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 605 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
355 606
356 /* Map our regs page over stack page. */ 607 /* We also change the Switcher PTE page. When we're running the Guest,
608 * we want the Guest's "regs" page to appear where the first Switcher
609 * page for this CPU is. This is an optimization: when the Switcher
610 * saves the Guest registers, it saves them into the first page of this
611 * CPU's "struct lguest_pages": if we make sure the Guest's register
612 * page is already mapped there, we don't have to copy them out
613 * again. */
357 regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; 614 regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT;
358 regs_pte.flags = _PAGE_KERNEL; 615 regs_pte.flags = _PAGE_KERNEL;
359 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE] 616 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE]
360 = regs_pte; 617 = regs_pte;
361} 618}
619/*:*/
362 620
363static void free_switcher_pte_pages(void) 621static void free_switcher_pte_pages(void)
364{ 622{
@@ -368,6 +626,10 @@ static void free_switcher_pte_pages(void)
368 free_page((long)switcher_pte_page(i)); 626 free_page((long)switcher_pte_page(i));
369} 627}
370 628
629/*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given
630 * the CPU number and the "struct page"s for the Switcher code itself.
631 *
632 * Currently the Switcher is less than a page long, so "pages" is always 1. */
371static __init void populate_switcher_pte_page(unsigned int cpu, 633static __init void populate_switcher_pte_page(unsigned int cpu,
372 struct page *switcher_page[], 634 struct page *switcher_page[],
373 unsigned int pages) 635 unsigned int pages)
@@ -375,21 +637,26 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
375 unsigned int i; 637 unsigned int i;
376 spte_t *pte = switcher_pte_page(cpu); 638 spte_t *pte = switcher_pte_page(cpu);
377 639
640 /* The first entries are easy: they map the Switcher code. */
378 for (i = 0; i < pages; i++) { 641 for (i = 0; i < pages; i++) {
379 pte[i].pfn = page_to_pfn(switcher_page[i]); 642 pte[i].pfn = page_to_pfn(switcher_page[i]);
380 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 643 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
381 } 644 }
382 645
383 /* We only map this CPU's pages, so guest can't see others. */ 646 /* The only other thing we map is this CPU's pair of pages. */
384 i = pages + cpu*2; 647 i = pages + cpu*2;
385 648
386 /* First page (regs) is rw, second (state) is ro. */ 649 /* First page (Guest registers) is writable from the Guest */
387 pte[i].pfn = page_to_pfn(switcher_page[i]); 650 pte[i].pfn = page_to_pfn(switcher_page[i]);
388 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; 651 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW;
652 /* The second page contains the "struct lguest_ro_state", and is
653 * read-only. */
389 pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); 654 pte[i+1].pfn = page_to_pfn(switcher_page[i+1]);
390 pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 655 pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED;
391} 656}
392 657
658/*H:510 At boot or module load time, init_pagetables() allocates and populates
659 * the Switcher PTE page for each CPU. */
393__init int init_pagetables(struct page **switcher_page, unsigned int pages) 660__init int init_pagetables(struct page **switcher_page, unsigned int pages)
394{ 661{
395 unsigned int i; 662 unsigned int i;
@@ -404,7 +671,9 @@ __init int init_pagetables(struct page **switcher_page, unsigned int pages)
404 } 671 }
405 return 0; 672 return 0;
406} 673}
674/*:*/
407 675
676/* Cleaning up simply involves freeing the PTE page for each CPU. */
408void free_pagetables(void) 677void free_pagetables(void)
409{ 678{
410 free_switcher_pte_pages(); 679 free_switcher_pte_pages();
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 1b2cfe89dcd5..f675a41a80da 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -1,16 +1,68 @@
1/*P:600 The x86 architecture has segments, which involve a table of descriptors
2 * which can be used to do funky things with virtual address interpretation.
3 * We originally used to use segments so the Guest couldn't alter the
4 * Guest<->Host Switcher, and then we had to trim Guest segments, and restore
5 * for userspace per-thread segments, but trim again for on userspace->kernel
6 * transitions... This nightmarish creation was contained within this file,
7 * where we knew not to tread without heavy armament and a change of underwear.
8 *
9 * In these modern times, the segment handling code consists of simple sanity
10 * checks, and the worst you'll experience reading this code is butterfly-rash
11 * from frolicking through its parklike serenity. :*/
1#include "lg.h" 12#include "lg.h"
2 13
14/*H:600
15 * We've almost completed the Host; there's just one file to go!
16 *
17 * Segments & The Global Descriptor Table
18 *
19 * (That title sounds like a bad Nerdcore group. Not to suggest that there are
20 * any good Nerdcore groups, but in high school a friend of mine had a band
21 * called Joe Fish and the Chips, so there are definitely worse band names).
22 *
23 * To refresh: the GDT is a table of 8-byte values describing segments. Once
24 * set up, these segments can be loaded into one of the 6 "segment registers".
25 *
26 * GDT entries are passed around as "struct desc_struct"s, which like IDT
27 * entries are split into two 32-bit members, "a" and "b". One day, someone
28 * will clean that up, and be declared a Hero. (No pressure, I'm just saying).
29 *
30 * Anyway, the GDT entry contains a base (the start address of the segment), a
31 * limit (the size of the segment - 1), and some flags. Sounds simple, and it
32 * would be, except those zany Intel engineers decided that it was too boring
33 * to put the base at one end, the limit at the other, and the flags in
34 * between. They decided to shotgun the bits at random throughout the 8 bytes,
35 * like so:
36 *
37 * 0 16 40 48 52 56 63
38 * [ limit part 1 ][ base part 1 ][ flags ][li][fl][base ]
39 * mit ags part 2
40 * part 2
41 *
42 * As a result, this file contains a certain amount of magic numeracy. Let's
43 * begin.
44 */
45
46/* Is the descriptor the Guest wants us to put in OK?
47 *
48 * The flag which Intel says must be zero: must be zero. The descriptor must
49 * be present, (this is actually checked earlier but is here for thorougness),
50 * and the descriptor type must be 1 (a memory segment). */
3static int desc_ok(const struct desc_struct *gdt) 51static int desc_ok(const struct desc_struct *gdt)
4{ 52{
5 /* MBZ=0, P=1, DT=1 */
6 return ((gdt->b & 0x00209000) == 0x00009000); 53 return ((gdt->b & 0x00209000) == 0x00009000);
7} 54}
8 55
56/* Is the segment present? (Otherwise it can't be used by the Guest). */
9static int segment_present(const struct desc_struct *gdt) 57static int segment_present(const struct desc_struct *gdt)
10{ 58{
11 return gdt->b & 0x8000; 59 return gdt->b & 0x8000;
12} 60}
13 61
62/* There are several entries we don't let the Guest set. The TSS entry is the
63 * "Task State Segment" which controls all kinds of delicate things. The
64 * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
65 * the Guest can't be trusted to deal with double faults. */
14static int ignored_gdt(unsigned int num) 66static int ignored_gdt(unsigned int num)
15{ 67{
16 return (num == GDT_ENTRY_TSS 68 return (num == GDT_ENTRY_TSS
@@ -19,9 +71,18 @@ static int ignored_gdt(unsigned int num)
19 || num == GDT_ENTRY_DOUBLEFAULT_TSS); 71 || num == GDT_ENTRY_DOUBLEFAULT_TSS);
20} 72}
21 73
22/* We don't allow removal of CS, DS or SS; it doesn't make sense. */ 74/* If the Guest asks us to remove an entry from the GDT, we have to be careful.
75 * If one of the segment registers is pointing at that entry the Switcher will
76 * crash when it tries to reload the segment registers for the Guest.
77 *
78 * It doesn't make much sense for the Guest to try to remove its own code, data
79 * or stack segments while they're in use: assume that's a Guest bug. If it's
80 * one of the lesser segment registers using the removed entry, we simply set
81 * that register to 0 (unusable). */
23static void check_segment_use(struct lguest *lg, unsigned int desc) 82static void check_segment_use(struct lguest *lg, unsigned int desc)
24{ 83{
84 /* GDT entries are 8 bytes long, so we divide to get the index and
85 * ignore the bottom bits. */
25 if (lg->regs->gs / 8 == desc) 86 if (lg->regs->gs / 8 == desc)
26 lg->regs->gs = 0; 87 lg->regs->gs = 0;
27 if (lg->regs->fs / 8 == desc) 88 if (lg->regs->fs / 8 == desc)
@@ -33,13 +94,21 @@ static void check_segment_use(struct lguest *lg, unsigned int desc)
33 || lg->regs->ss / 8 == desc) 94 || lg->regs->ss / 8 == desc)
34 kill_guest(lg, "Removed live GDT entry %u", desc); 95 kill_guest(lg, "Removed live GDT entry %u", desc);
35} 96}
36 97/*:*/
98/*M:009 We wouldn't need to check for removal of in-use segments if we handled
99 * faults in the Switcher. However, it's probably not a worthwhile
100 * optimization. :*/
101
102/*H:610 Once the GDT has been changed, we look through the changed entries and
103 * see if they're OK. If not, we'll call kill_guest() and the Guest will never
104 * get to use the invalid entries. */
37static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) 105static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
38{ 106{
39 unsigned int i; 107 unsigned int i;
40 108
41 for (i = start; i < end; i++) { 109 for (i = start; i < end; i++) {
42 /* We never copy these ones to real gdt */ 110 /* We never copy these ones to real GDT, so we don't care what
111 * they say */
43 if (ignored_gdt(i)) 112 if (ignored_gdt(i))
44 continue; 113 continue;
45 114
@@ -53,41 +122,57 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
53 if (!desc_ok(&lg->gdt[i])) 122 if (!desc_ok(&lg->gdt[i]))
54 kill_guest(lg, "Bad GDT descriptor %i", i); 123 kill_guest(lg, "Bad GDT descriptor %i", i);
55 124
56 /* DPL 0 presumably means "for use by guest". */ 125 /* Segment descriptors contain a privilege level: the Guest is
126 * sometimes careless and leaves this as 0, even though it's
127 * running at privilege level 1. If so, we fix it here. */
57 if ((lg->gdt[i].b & 0x00006000) == 0) 128 if ((lg->gdt[i].b & 0x00006000) == 0)
58 lg->gdt[i].b |= (GUEST_PL << 13); 129 lg->gdt[i].b |= (GUEST_PL << 13);
59 130
60 /* Set accessed bit, since gdt isn't writable. */ 131 /* Each descriptor has an "accessed" bit. If we don't set it
132 * now, the CPU will try to set it when the Guest first loads
133 * that entry into a segment register. But the GDT isn't
134 * writable by the Guest, so bad things can happen. */
61 lg->gdt[i].b |= 0x00000100; 135 lg->gdt[i].b |= 0x00000100;
62 } 136 }
63} 137}
64 138
139/* This routine is called at boot or modprobe time for each CPU to set up the
140 * "constant" GDT entries for Guests running on that CPU. */
65void setup_default_gdt_entries(struct lguest_ro_state *state) 141void setup_default_gdt_entries(struct lguest_ro_state *state)
66{ 142{
67 struct desc_struct *gdt = state->guest_gdt; 143 struct desc_struct *gdt = state->guest_gdt;
68 unsigned long tss = (unsigned long)&state->guest_tss; 144 unsigned long tss = (unsigned long)&state->guest_tss;
69 145
70 /* Hypervisor segments. */ 146 /* The hypervisor segments are full 0-4G segments, privilege level 0 */
71 gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT; 147 gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
72 gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT; 148 gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
73 149
74 /* This is the one which we *cannot* copy from guest, since tss 150 /* The TSS segment refers to the TSS entry for this CPU, so we cannot
75 is depended on this lguest_ro_state, ie. this cpu. */ 151 * copy it from the Guest. Forgive the magic flags */
76 gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); 152 gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16);
77 gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) 153 gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000)
78 | ((tss >> 16) & 0x000000FF); 154 | ((tss >> 16) & 0x000000FF);
79} 155}
80 156
157/* This routine is called before the Guest is run for the first time. */
81void setup_guest_gdt(struct lguest *lg) 158void setup_guest_gdt(struct lguest *lg)
82{ 159{
160 /* Start with full 0-4G segments... */
83 lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; 161 lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
84 lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; 162 lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
163 /* ...except the Guest is allowed to use them, so set the privilege
164 * level appropriately in the flags. */
85 lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); 165 lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
86 lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); 166 lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
87} 167}
88 168
89/* This is a fast version for the common case where only the three TLS entries 169/* Like the IDT, we never simply use the GDT the Guest gives us. We set up the
90 * have changed. */ 170 * GDTs for each CPU, then we copy across the entries each time we want to run
171 * a different Guest on that CPU. */
172
173/* A partial GDT load, for the three "thead-local storage" entries. Otherwise
174 * it's just like load_guest_gdt(). So much, in fact, it would probably be
175 * neater to have a single hypercall to cover both. */
91void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) 176void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
92{ 177{
93 unsigned int i; 178 unsigned int i;
@@ -96,22 +181,31 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
96 gdt[i] = lg->gdt[i]; 181 gdt[i] = lg->gdt[i];
97} 182}
98 183
184/* This is the full version */
99void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) 185void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
100{ 186{
101 unsigned int i; 187 unsigned int i;
102 188
189 /* The default entries from setup_default_gdt_entries() are not
190 * replaced. See ignored_gdt() above. */
103 for (i = 0; i < GDT_ENTRIES; i++) 191 for (i = 0; i < GDT_ENTRIES; i++)
104 if (!ignored_gdt(i)) 192 if (!ignored_gdt(i))
105 gdt[i] = lg->gdt[i]; 193 gdt[i] = lg->gdt[i];
106} 194}
107 195
196/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
108void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) 197void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
109{ 198{
199 /* We assume the Guest has the same number of GDT entries as the
200 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
110 if (num > ARRAY_SIZE(lg->gdt)) 201 if (num > ARRAY_SIZE(lg->gdt))
111 kill_guest(lg, "too many gdt entries %i", num); 202 kill_guest(lg, "too many gdt entries %i", num);
112 203
204 /* We read the whole thing in, then fix it up. */
113 lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); 205 lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0]));
114 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); 206 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt));
207 /* Mark that the GDT changed so the core knows it has to copy it again,
208 * even if the Guest is run on the same CPU. */
115 lg->changed |= CHANGED_GDT; 209 lg->changed |= CHANGED_GDT;
116} 210}
117 211
@@ -123,3 +217,13 @@ void guest_load_tls(struct lguest *lg, unsigned long gtls)
123 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); 217 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
124 lg->changed |= CHANGED_GDT_TLS; 218 lg->changed |= CHANGED_GDT_TLS;
125} 219}
220
221/*
222 * With this, we have finished the Host.
223 *
224 * Five of the seven parts of our task are complete. You have made it through
225 * the Bit of Despair (I think that's somewhere in the page table code,
226 * myself).
227 *
228 * Next, we examine "make Switcher". It's short, but intense.
229 */
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S
index eadd4cc299d2..d418179ea6b5 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/switcher.S
@@ -1,45 +1,136 @@
1/* This code sits at 0xFFC00000 to do the low-level guest<->host switch. 1/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level
2 * Guest<->Host switch. It is as simple as it can be made, but it's naturally
3 * very specific to x86.
4 *
5 * You have now completed Preparation. If this has whet your appetite; if you
6 * are feeling invigorated and refreshed then the next, more challenging stage
7 * can be found in "make Guest". :*/
2 8
3 There is are two pages above us for this CPU (struct lguest_pages). 9/*S:100
4 The second page (struct lguest_ro_state) becomes read-only after the 10 * Welcome to the Switcher itself!
5 context switch. The first page (the stack for traps) remains writable, 11 *
6 but while we're in here, the guest cannot be running. 12 * This file contains the low-level code which changes the CPU to run the Guest
7*/ 13 * code, and returns to the Host when something happens. Understand this, and
14 * you understand the heart of our journey.
15 *
16 * Because this is in assembler rather than C, our tale switches from prose to
17 * verse. First I tried limericks:
18 *
19 * There once was an eax reg,
20 * To which our pointer was fed,
21 * It needed an add,
22 * Which asm-offsets.h had
23 * But this limerick is hurting my head.
24 *
25 * Next I tried haikus, but fitting the required reference to the seasons in
26 * every stanza was quickly becoming tiresome:
27 *
28 * The %eax reg
29 * Holds "struct lguest_pages" now:
30 * Cherry blossoms fall.
31 *
32 * Then I started with Heroic Verse, but the rhyming requirement leeched away
33 * the content density and led to some uniquely awful oblique rhymes:
34 *
35 * These constants are coming from struct offsets
36 * For use within the asm switcher text.
37 *
38 * Finally, I settled for something between heroic hexameter, and normal prose
39 * with inappropriate linebreaks. Anyway, it aint no Shakespeare.
40 */
41
42// Not all kernel headers work from assembler
43// But these ones are needed: the ENTRY() define
44// And constants extracted from struct offsets
45// To avoid magic numbers and breakage:
46// Should they change the compiler can't save us
47// Down here in the depths of assembler code.
8#include <linux/linkage.h> 48#include <linux/linkage.h>
9#include <asm/asm-offsets.h> 49#include <asm/asm-offsets.h>
10#include "lg.h" 50#include "lg.h"
11 51
52// We mark the start of the code to copy
53// It's placed in .text tho it's never run here
54// You'll see the trick macro at the end
55// Which interleaves data and text to effect.
12.text 56.text
13ENTRY(start_switcher_text) 57ENTRY(start_switcher_text)
14 58
15/* %eax points to lguest pages for this CPU. %ebx contains cr3 value. 59// When we reach switch_to_guest we have just left
16 All normal registers can be clobbered! */ 60// The safe and comforting shores of C code
61// %eax has the "struct lguest_pages" to use
62// Where we save state and still see it from the Guest
63// And %ebx holds the Guest shadow pagetable:
64// Once set we have truly left Host behind.
17ENTRY(switch_to_guest) 65ENTRY(switch_to_guest)
18 /* Save host segments on host stack. */ 66 // We told gcc all its regs could fade,
67 // Clobbered by our journey into the Guest
68 // We could have saved them, if we tried
69 // But time is our master and cycles count.
70
71 // Segment registers must be saved for the Host
72 // We push them on the Host stack for later
19 pushl %es 73 pushl %es
20 pushl %ds 74 pushl %ds
21 pushl %gs 75 pushl %gs
22 pushl %fs 76 pushl %fs
23 /* With CONFIG_FRAME_POINTER, gcc doesn't let us clobber this! */ 77 // But the compiler is fickle, and heeds
78 // No warning of %ebp clobbers
79 // When frame pointers are used. That register
80 // Must be saved and restored or chaos strikes.
24 pushl %ebp 81 pushl %ebp
25 /* Save host stack. */ 82 // The Host's stack is done, now save it away
83 // In our "struct lguest_pages" at offset
84 // Distilled into asm-offsets.h
26 movl %esp, LGUEST_PAGES_host_sp(%eax) 85 movl %esp, LGUEST_PAGES_host_sp(%eax)
27 /* Switch to guest stack: if we get NMI we expect to be there. */ 86
87 // All saved and there's now five steps before us:
88 // Stack, GDT, IDT, TSS
89 // And last of all the page tables are flipped.
90
91 // Yet beware that our stack pointer must be
92 // Always valid lest an NMI hits
93 // %edx does the duty here as we juggle
94 // %eax is lguest_pages: our stack lies within.
28 movl %eax, %edx 95 movl %eax, %edx
29 addl $LGUEST_PAGES_regs, %edx 96 addl $LGUEST_PAGES_regs, %edx
30 movl %edx, %esp 97 movl %edx, %esp
31 /* Switch to guest's GDT, IDT. */ 98
99 // The Guest's GDT we so carefully
100 // Placed in the "struct lguest_pages" before
32 lgdt LGUEST_PAGES_guest_gdt_desc(%eax) 101 lgdt LGUEST_PAGES_guest_gdt_desc(%eax)
102
103 // The Guest's IDT we did partially
104 // Move to the "struct lguest_pages" as well.
33 lidt LGUEST_PAGES_guest_idt_desc(%eax) 105 lidt LGUEST_PAGES_guest_idt_desc(%eax)
34 /* Switch to guest's TSS while GDT still writable. */ 106
107 // The TSS entry which controls traps
108 // Must be loaded up with "ltr" now:
109 // For after we switch over our page tables
110 // It (as the rest) will be writable no more.
111 // (The GDT entry TSS needs
112 // Changes type when we load it: damn Intel!)
35 movl $(GDT_ENTRY_TSS*8), %edx 113 movl $(GDT_ENTRY_TSS*8), %edx
36 ltr %dx 114 ltr %dx
37 /* Set host's TSS GDT entry to available (clear byte 5 bit 2). */ 115
116 // Look back now, before we take this last step!
117 // The Host's TSS entry was also marked used;
118 // Let's clear it again, ere we return.
119 // The GDT descriptor of the Host
120 // Points to the table after two "size" bytes
38 movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx 121 movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx
122 // Clear the type field of "used" (byte 5, bit 2)
39 andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) 123 andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx)
40 /* Switch to guest page tables: lguest_pages->state now read-only. */ 124
125 // Once our page table's switched, the Guest is live!
126 // The Host fades as we run this final step.
127 // Our "struct lguest_pages" is now read-only.
41 movl %ebx, %cr3 128 movl %ebx, %cr3
42 /* Restore guest regs */ 129
130 // The page table change did one tricky thing:
131 // The Guest's register page has been mapped
132 // Writable onto our %esp (stack) --
133 // We can simply pop off all Guest regs.
43 popl %ebx 134 popl %ebx
44 popl %ecx 135 popl %ecx
45 popl %edx 136 popl %edx
@@ -51,12 +142,27 @@ ENTRY(switch_to_guest)
51 popl %fs 142 popl %fs
52 popl %ds 143 popl %ds
53 popl %es 144 popl %es
54 /* Skip error code and trap number */ 145
146 // Near the base of the stack lurk two strange fields
147 // Which we fill as we exit the Guest
148 // These are the trap number and its error
149 // We can simply step past them on our way.
55 addl $8, %esp 150 addl $8, %esp
151
152 // The last five stack slots hold return address
153 // And everything needed to change privilege
154 // Into the Guest privilege level of 1,
155 // And the stack where the Guest had last left it.
156 // Interrupts are turned back on: we are Guest.
56 iret 157 iret
57 158
159// There are two paths where we switch to the Host
160// So we put the routine in a macro.
161// We are on our way home, back to the Host
162// Interrupted out of the Guest, we come here.
58#define SWITCH_TO_HOST \ 163#define SWITCH_TO_HOST \
59 /* Save guest state */ \ 164 /* We save the Guest state: all registers first \
165 * Laid out just as "struct lguest_regs" defines */ \
60 pushl %es; \ 166 pushl %es; \
61 pushl %ds; \ 167 pushl %ds; \
62 pushl %fs; \ 168 pushl %fs; \
@@ -68,58 +174,119 @@ ENTRY(switch_to_guest)
68 pushl %edx; \ 174 pushl %edx; \
69 pushl %ecx; \ 175 pushl %ecx; \
70 pushl %ebx; \ 176 pushl %ebx; \
71 /* Load lguest ds segment for convenience. */ \ 177 /* Our stack and our code are using segments \
178 * Set in the TSS and IDT \
179 * Yet if we were to touch data we'd use \
180 * Whatever data segment the Guest had. \
181 * Load the lguest ds segment for now. */ \
72 movl $(LGUEST_DS), %eax; \ 182 movl $(LGUEST_DS), %eax; \
73 movl %eax, %ds; \ 183 movl %eax, %ds; \
74 /* Figure out where we are, based on stack (at top of regs). */ \ 184 /* So where are we? Which CPU, which struct? \
185 * The stack is our clue: our TSS sets \
186 * It at the end of "struct lguest_pages" \
187 * And we then pushed and pushed and pushed Guest regs: \
188 * Now stack points atop the "struct lguest_regs". \
189 * Subtract that offset, and we find our struct. */ \
75 movl %esp, %eax; \ 190 movl %esp, %eax; \
76 subl $LGUEST_PAGES_regs, %eax; \ 191 subl $LGUEST_PAGES_regs, %eax; \
77 /* Put trap number in %ebx before we switch cr3 and lose it. */ \ 192 /* Save our trap number: the switch will obscure it \
193 * (The Guest regs are not mapped here in the Host) \
194 * %ebx holds it safe for deliver_to_host */ \
78 movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ 195 movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \
79 /* Switch to host page tables (host GDT, IDT and stack are in host \ 196 /* The Host GDT, IDT and stack! \
80 mem, so need this first) */ \ 197 * All these lie safely hidden from the Guest: \
198 * We must return to the Host page tables \
199 * (Hence that was saved in struct lguest_pages) */ \
81 movl LGUEST_PAGES_host_cr3(%eax), %edx; \ 200 movl LGUEST_PAGES_host_cr3(%eax), %edx; \
82 movl %edx, %cr3; \ 201 movl %edx, %cr3; \
83 /* Set guest's TSS to available (clear byte 5 bit 2). */ \ 202 /* As before, when we looked back at the Host \
203 * As we left and marked TSS unused \
204 * So must we now for the Guest left behind. */ \
84 andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ 205 andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \
85 /* Switch to host's GDT & IDT. */ \ 206 /* Switch to Host's GDT, IDT. */ \
86 lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ 207 lgdt LGUEST_PAGES_host_gdt_desc(%eax); \
87 lidt LGUEST_PAGES_host_idt_desc(%eax); \ 208 lidt LGUEST_PAGES_host_idt_desc(%eax); \
88 /* Switch to host's stack. */ \ 209 /* Restore the Host's stack where it's saved regs lie */ \
89 movl LGUEST_PAGES_host_sp(%eax), %esp; \ 210 movl LGUEST_PAGES_host_sp(%eax), %esp; \
90 /* Switch to host's TSS */ \ 211 /* Last the TSS: our Host is complete */ \
91 movl $(GDT_ENTRY_TSS*8), %edx; \ 212 movl $(GDT_ENTRY_TSS*8), %edx; \
92 ltr %dx; \ 213 ltr %dx; \
214 /* Restore now the regs saved right at the first. */ \
93 popl %ebp; \ 215 popl %ebp; \
94 popl %fs; \ 216 popl %fs; \
95 popl %gs; \ 217 popl %gs; \
96 popl %ds; \ 218 popl %ds; \
97 popl %es 219 popl %es
98 220
99/* Return to run_guest_once. */ 221// Here's where we come when the Guest has just trapped:
222// (Which trap we'll see has been pushed on the stack).
223// We need only switch back, and the Host will decode
224// Why we came home, and what needs to be done.
100return_to_host: 225return_to_host:
101 SWITCH_TO_HOST 226 SWITCH_TO_HOST
102 iret 227 iret
103 228
229// An interrupt, with some cause external
230// Has ajerked us rudely from the Guest's code
231// Again we must return home to the Host
104deliver_to_host: 232deliver_to_host:
105 SWITCH_TO_HOST 233 SWITCH_TO_HOST
106 /* Decode IDT and jump to hosts' irq handler. When that does iret, it 234 // But now we must go home via that place
107 * will return to run_guest_once. This is a feature. */ 235 // Where that interrupt was supposed to go
236 // Had we not been ensconced, running the Guest.
237 // Here we see the cleverness of our stack:
238 // The Host stack is formed like an interrupt
239 // With EIP, CS and EFLAGS layered.
240 // Interrupt handlers end with "iret"
241 // And that will take us home at long long last.
242
243 // But first we must find the handler to call!
244 // The IDT descriptor for the Host
245 // Has two bytes for size, and four for address:
246 // %edx will hold it for us for now.
108 movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx 247 movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx
248 // We now know the table address we need,
249 // And saved the trap's number inside %ebx.
250 // Yet the pointer to the handler is smeared
251 // Across the bits of the table entry.
252 // What oracle can tell us how to extract
253 // From such a convoluted encoding?
254 // I consulted gcc, and it gave
255 // These instructions, which I gladly credit:
109 leal (%edx,%ebx,8), %eax 256 leal (%edx,%ebx,8), %eax
110 movzwl (%eax),%edx 257 movzwl (%eax),%edx
111 movl 4(%eax), %eax 258 movl 4(%eax), %eax
112 xorw %ax, %ax 259 xorw %ax, %ax
113 orl %eax, %edx 260 orl %eax, %edx
261 // Now the address of the handler's in %edx
262 // We call it now: its "iret" takes us home.
114 jmp *%edx 263 jmp *%edx
115 264
116/* Real hardware interrupts are delivered straight to the host. Others 265// Every interrupt can come to us here
117 cause us to return to run_guest_once so it can decide what to do. Note 266// But we must truly tell each apart.
118 that some of these are overridden by the guest to deliver directly, and 267// They number two hundred and fifty six
119 never enter here (see load_guest_idt_entry). */ 268// And each must land in a different spot,
269// Push its number on stack, and join the stream.
270
271// And worse, a mere six of the traps stand apart
272// And push on their stack an addition:
273// An error number, thirty two bits long
274// So we punish the other two fifty
275// And make them push a zero so they match.
276
277// Yet two fifty six entries is long
278// And all will look most the same as the last
279// So we create a macro which can make
280// As many entries as we need to fill.
281
282// Note the change to .data then .text:
283// We plant the address of each entry
284// Into a (data) table for the Host
285// To know where each Guest interrupt should go.
120.macro IRQ_STUB N TARGET 286.macro IRQ_STUB N TARGET
121 .data; .long 1f; .text; 1: 287 .data; .long 1f; .text; 1:
122 /* Make an error number for most traps, which don't have one. */ 288 // Trap eight, ten through fourteen and seventeen
289 // Supply an error number. Else zero.
123 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) 290 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17)
124 pushl $0 291 pushl $0
125 .endif 292 .endif
@@ -128,6 +295,8 @@ deliver_to_host:
128 ALIGN 295 ALIGN
129.endm 296.endm
130 297
298// This macro creates numerous entries
299// Using GAS macros which out-power C's.
131.macro IRQ_STUBS FIRST LAST TARGET 300.macro IRQ_STUBS FIRST LAST TARGET
132 irq=\FIRST 301 irq=\FIRST
133 .rept \LAST-\FIRST+1 302 .rept \LAST-\FIRST+1
@@ -136,24 +305,43 @@ deliver_to_host:
136 .endr 305 .endr
137.endm 306.endm
138 307
139/* We intercept every interrupt, because we may need to switch back to 308// Here's the marker for our pointer table
140 * host. Unfortunately we can't tell them apart except by entry 309// Laid in the data section just before
141 * point, so we need 256 entry points. 310// Each macro places the address of code
142 */ 311// Forming an array: each one points to text
312// Which handles interrupt in its turn.
143.data 313.data
144.global default_idt_entries 314.global default_idt_entries
145default_idt_entries: 315default_idt_entries:
146.text 316.text
147 IRQ_STUBS 0 1 return_to_host /* First two traps */ 317 // The first two traps go straight back to the Host
148 IRQ_STUB 2 handle_nmi /* NMI */ 318 IRQ_STUBS 0 1 return_to_host
149 IRQ_STUBS 3 31 return_to_host /* Rest of traps */ 319 // We'll say nothing, yet, about NMI
150 IRQ_STUBS 32 127 deliver_to_host /* Real interrupts */ 320 IRQ_STUB 2 handle_nmi
151 IRQ_STUB 128 return_to_host /* System call (overridden) */ 321 // Other traps also return to the Host
152 IRQ_STUBS 129 255 deliver_to_host /* Other real interrupts */ 322 IRQ_STUBS 3 31 return_to_host
153 323 // All interrupts go via their handlers
154/* We ignore NMI and return. */ 324 IRQ_STUBS 32 127 deliver_to_host
325 // 'Cept system calls coming from userspace
326 // Are to go to the Guest, never the Host.
327 IRQ_STUB 128 return_to_host
328 IRQ_STUBS 129 255 deliver_to_host
329
330// The NMI, what a fabulous beast
331// Which swoops in and stops us no matter that
332// We're suspended between heaven and hell,
333// (Or more likely between the Host and Guest)
334// When in it comes! We are dazed and confused
335// So we do the simplest thing which one can.
336// Though we've pushed the trap number and zero
337// We discard them, return, and hope we live.
155handle_nmi: 338handle_nmi:
156 addl $8, %esp 339 addl $8, %esp
157 iret 340 iret
158 341
342// We are done; all that's left is Mastery
343// And "make Mastery" is a journey long
344// Designed to make your fingers itch to code.
345
346// Here ends the text, the file and poem.
159ENTRY(end_switcher_text) 347ENTRY(end_switcher_text)
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 9dcbffd0aa15..e204e7b4028a 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -509,7 +509,7 @@ config VIDEO_VINO
509 509
510config VIDEO_STRADIS 510config VIDEO_STRADIS
511 tristate "Stradis 4:2:2 MPEG-2 video driver (EXPERIMENTAL)" 511 tristate "Stradis 4:2:2 MPEG-2 video driver (EXPERIMENTAL)"
512 depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && !PPC64 512 depends on EXPERIMENTAL && PCI && VIDEO_V4L1 && VIRT_TO_BUS
513 help 513 help
514 Say Y here to enable support for the Stradis 4:2:2 MPEG-2 video 514 Say Y here to enable support for the Stradis 4:2:2 MPEG-2 video
515 driver for PCI. There is a product page at 515 driver for PCI. There is a product page at
@@ -520,7 +520,7 @@ config VIDEO_ZORAN_ZR36060
520 520
521config VIDEO_ZORAN 521config VIDEO_ZORAN
522 tristate "Zoran ZR36057/36067 Video For Linux" 522 tristate "Zoran ZR36057/36067 Video For Linux"
523 depends on PCI && I2C_ALGOBIT && VIDEO_V4L1 && !PPC64 523 depends on PCI && I2C_ALGOBIT && VIDEO_V4L1 && VIRT_TO_BUS
524 help 524 help
525 Say Y for support for MJPEG capture cards based on the Zoran 525 Say Y for support for MJPEG capture cards based on the Zoran
526 36057/36067 PCI controller chipset. This includes the Iomega 526 36057/36067 PCI controller chipset. This includes the Iomega
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index f88ebc5b685e..cc6c73442435 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -103,7 +103,7 @@ config MTD_PMC_MSP_RAMROOT
103 103
104config MTD_SUN_UFLASH 104config MTD_SUN_UFLASH
105 tristate "Sun Microsystems userflash support" 105 tristate "Sun Microsystems userflash support"
106 depends on SPARC && MTD_CFI 106 depends on SPARC && MTD_CFI && PCI
107 help 107 help
108 This provides a 'mapping' driver which supports the way in 108 This provides a 'mapping' driver which supports the way in
109 which user-programmable flash chips are connected on various 109 which user-programmable flash chips are connected on various
diff --git a/drivers/net/ax88796.c b/drivers/net/ax88796.c
index 1d882360b34d..e43e8047b90e 100644
--- a/drivers/net/ax88796.c
+++ b/drivers/net/ax88796.c
@@ -819,7 +819,7 @@ static int ax_probe(struct platform_device *pdev)
819 } 819 }
820 820
821 ei_status.mem = ioremap(res->start, size); 821 ei_status.mem = ioremap(res->start, size);
822 dev->base_addr = (long)ei_status.mem; 822 dev->base_addr = (unsigned long)ei_status.mem;
823 823
824 if (ei_status.mem == NULL) { 824 if (ei_status.mem == NULL) {
825 dev_err(&pdev->dev, "Cannot ioremap area (%08zx,%08zx)\n", 825 dev_err(&pdev->dev, "Cannot ioremap area (%08zx,%08zx)\n",
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index ebcf35e4cf5b..e620ed4c3ff0 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -699,7 +699,7 @@ static int do_cr(struct t3cdev *dev, struct sk_buff *skb)
699 * the buffer. 699 * the buffer.
700 */ 700 */
701static struct sk_buff *cxgb3_get_cpl_reply_skb(struct sk_buff *skb, size_t len, 701static struct sk_buff *cxgb3_get_cpl_reply_skb(struct sk_buff *skb, size_t len,
702 int gfp) 702 gfp_t gfp)
703{ 703{
704 if (likely(!skb_cloned(skb))) { 704 if (likely(!skb_cloned(skb))) {
705 BUG_ON(skb->len < len); 705 BUG_ON(skb->len < len);
diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c
index 112778652f7d..cab57911a80e 100644
--- a/drivers/net/lguest_net.c
+++ b/drivers/net/lguest_net.c
@@ -1,6 +1,13 @@
1/* A simple network driver for lguest. 1/*D:500
2 * The Guest network driver.
2 * 3 *
3 * Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation 4 * This is very simple a virtual network driver, and our last Guest driver.
5 * The only trick is that it can talk directly to multiple other recipients
6 * (ie. other Guests on the same network). It can also be used with only the
7 * Host on the network.
8 :*/
9
10/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 * 11 *
5 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 13 * it under the terms of the GNU General Public License as published by
@@ -28,23 +35,47 @@
28#define MAX_LANS 4 35#define MAX_LANS 4
29#define NUM_SKBS 8 36#define NUM_SKBS 8
30 37
38/*M:011 Network code master Jeff Garzik points out numerous shortcomings in
39 * this driver if it aspires to greatness.
40 *
41 * Firstly, it doesn't use "NAPI": the networking's New API, and is poorer for
42 * it. As he says "NAPI means system-wide load leveling, across multiple
43 * network interfaces. Lack of NAPI can mean competition at higher loads."
44 *
45 * He also points out that we don't implement set_mac_address, so users cannot
46 * change the devices hardware address. When I asked why one would want to:
47 * "Bonding, and situations where you /do/ want the MAC address to "leak" out
48 * of the host onto the wider net."
49 *
50 * Finally, he would like module unloading: "It is not unrealistic to think of
51 * [un|re|]loading the net support module in an lguest guest. And, adding
52 * module support makes the programmer more responsible, because they now have
53 * to learn to clean up after themselves. Any driver that cannot clean up
54 * after itself is an incomplete driver in my book."
55 :*/
56
57/*D:530 The "struct lguestnet_info" contains all the information we need to
58 * know about the network device. */
31struct lguestnet_info 59struct lguestnet_info
32{ 60{
33 /* The shared page(s). */ 61 /* The mapped device page(s) (an array of "struct lguest_net"). */
34 struct lguest_net *peer; 62 struct lguest_net *peer;
63 /* The physical address of the device page(s) */
35 unsigned long peer_phys; 64 unsigned long peer_phys;
65 /* The size of the device page(s). */
36 unsigned long mapsize; 66 unsigned long mapsize;
37 67
38 /* The lguest_device I come from */ 68 /* The lguest_device I come from */
39 struct lguest_device *lgdev; 69 struct lguest_device *lgdev;
40 70
41 /* My peerid. */ 71 /* My peerid (ie. my slot in the array). */
42 unsigned int me; 72 unsigned int me;
43 73
44 /* Receive queue. */ 74 /* Receive queue: the network packets waiting to be filled. */
45 struct sk_buff *skb[NUM_SKBS]; 75 struct sk_buff *skb[NUM_SKBS];
46 struct lguest_dma dma[NUM_SKBS]; 76 struct lguest_dma dma[NUM_SKBS];
47}; 77};
78/*:*/
48 79
49/* How many bytes left in this page. */ 80/* How many bytes left in this page. */
50static unsigned int rest_of_page(void *data) 81static unsigned int rest_of_page(void *data)
@@ -52,39 +83,82 @@ static unsigned int rest_of_page(void *data)
52 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); 83 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
53} 84}
54 85
55/* Simple convention: offset 4 * peernum. */ 86/*D:570 Each peer (ie. Guest or Host) on the network binds their receive
87 * buffers to a different key: we simply use the physical address of the
88 * device's memory page plus the peer number. The Host insists that all keys
89 * be a multiple of 4, so we multiply the peer number by 4. */
56static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum) 90static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum)
57{ 91{
58 return info->peer_phys + 4 * peernum; 92 return info->peer_phys + 4 * peernum;
59} 93}
60 94
95/* This is the routine which sets up a "struct lguest_dma" to point to a
96 * network packet, similar to req_to_dma() in lguest_blk.c. The structure of a
97 * "struct sk_buff" has grown complex over the years: it consists of a "head"
98 * linear section pointed to by "skb->data", and possibly an array of
99 * "fragments" in the case of a non-linear packet.
100 *
101 * Our receive buffers don't use fragments at all but outgoing skbs might, so
102 * we handle it. */
61static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen, 103static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen,
62 struct lguest_dma *dma) 104 struct lguest_dma *dma)
63{ 105{
64 unsigned int i, seg; 106 unsigned int i, seg;
65 107
108 /* First, we put the linear region into the "struct lguest_dma". Each
109 * entry can't go over a page boundary, so even though all our packets
110 * are 1514 bytes or less, we might need to use two entries here: */
66 for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) { 111 for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) {
67 dma->addr[seg] = virt_to_phys(skb->data + i); 112 dma->addr[seg] = virt_to_phys(skb->data + i);
68 dma->len[seg] = min((unsigned)(headlen - i), 113 dma->len[seg] = min((unsigned)(headlen - i),
69 rest_of_page(skb->data + i)); 114 rest_of_page(skb->data + i));
70 } 115 }
116
117 /* Now we handle the fragments: at least they're guaranteed not to go
118 * over a page. skb_shinfo(skb) returns a pointer to the structure
119 * which tells us about the number of fragments and the fragment
120 * array. */
71 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) { 121 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) {
72 const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 122 const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
73 /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */ 123 /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */
74 if (seg == LGUEST_MAX_DMA_SECTIONS) { 124 if (seg == LGUEST_MAX_DMA_SECTIONS) {
125 /* We will end up sending a truncated packet should
126 * this ever happen. Plus, a cool log message! */
75 printk("Woah dude! Megapacket!\n"); 127 printk("Woah dude! Megapacket!\n");
76 break; 128 break;
77 } 129 }
78 dma->addr[seg] = page_to_phys(f->page) + f->page_offset; 130 dma->addr[seg] = page_to_phys(f->page) + f->page_offset;
79 dma->len[seg] = f->size; 131 dma->len[seg] = f->size;
80 } 132 }
133
134 /* If after all that we didn't use the entire "struct lguest_dma"
135 * array, we terminate it with a 0 length. */
81 if (seg < LGUEST_MAX_DMA_SECTIONS) 136 if (seg < LGUEST_MAX_DMA_SECTIONS)
82 dma->len[seg] = 0; 137 dma->len[seg] = 0;
83} 138}
84 139
85/* We overload multicast bit to show promiscuous mode. */ 140/*
141 * Packet transmission.
142 *
143 * Our packet transmission is a little unusual. A real network card would just
144 * send out the packet and leave the receivers to decide if they're interested.
145 * Instead, we look through the network device memory page and see if any of
146 * the ethernet addresses match the packet destination, and if so we send it to
147 * that Guest.
148 *
149 * This is made a little more complicated in two cases. The first case is
150 * broadcast packets: for that we send the packet to all Guests on the network,
151 * one at a time. The second case is "promiscuous" mode, where a Guest wants
152 * to see all the packets on the network. We need a way for the Guest to tell
153 * us it wants to see all packets, so it sets the "multicast" bit on its
154 * published MAC address, which is never valid in a real ethernet address.
155 */
86#define PROMISC_BIT 0x01 156#define PROMISC_BIT 0x01
87 157
158/* This is the callback which is summoned whenever the network device's
159 * multicast or promiscuous state changes. If the card is in promiscuous mode,
160 * we advertise that in our ethernet address in the device's memory. We do the
161 * same if Linux wants any or all multicast traffic. */
88static void lguestnet_set_multicast(struct net_device *dev) 162static void lguestnet_set_multicast(struct net_device *dev)
89{ 163{
90 struct lguestnet_info *info = netdev_priv(dev); 164 struct lguestnet_info *info = netdev_priv(dev);
@@ -95,11 +169,14 @@ static void lguestnet_set_multicast(struct net_device *dev)
95 info->peer[info->me].mac[0] &= ~PROMISC_BIT; 169 info->peer[info->me].mac[0] &= ~PROMISC_BIT;
96} 170}
97 171
172/* A simple test function to see if a peer wants to see all packets.*/
98static int promisc(struct lguestnet_info *info, unsigned int peer) 173static int promisc(struct lguestnet_info *info, unsigned int peer)
99{ 174{
100 return info->peer[peer].mac[0] & PROMISC_BIT; 175 return info->peer[peer].mac[0] & PROMISC_BIT;
101} 176}
102 177
178/* Another simple function to see if a peer's advertised ethernet address
179 * matches a packet's destination ethernet address. */
103static int mac_eq(const unsigned char mac[ETH_ALEN], 180static int mac_eq(const unsigned char mac[ETH_ALEN],
104 struct lguestnet_info *info, unsigned int peer) 181 struct lguestnet_info *info, unsigned int peer)
105{ 182{
@@ -109,6 +186,8 @@ static int mac_eq(const unsigned char mac[ETH_ALEN],
109 return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0; 186 return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0;
110} 187}
111 188
189/* This is the function which actually sends a packet once we've decided a
190 * peer wants it: */
112static void transfer_packet(struct net_device *dev, 191static void transfer_packet(struct net_device *dev,
113 struct sk_buff *skb, 192 struct sk_buff *skb,
114 unsigned int peernum) 193 unsigned int peernum)
@@ -116,76 +195,134 @@ static void transfer_packet(struct net_device *dev,
116 struct lguestnet_info *info = netdev_priv(dev); 195 struct lguestnet_info *info = netdev_priv(dev);
117 struct lguest_dma dma; 196 struct lguest_dma dma;
118 197
198 /* We use our handy "struct lguest_dma" packing function to prepare
199 * the skb for sending. */
119 skb_to_dma(skb, skb_headlen(skb), &dma); 200 skb_to_dma(skb, skb_headlen(skb), &dma);
120 pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len); 201 pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);
121 202
203 /* This is the actual send call which copies the packet. */
122 lguest_send_dma(peer_key(info, peernum), &dma); 204 lguest_send_dma(peer_key(info, peernum), &dma);
205
206 /* Check that the entire packet was transmitted. If not, it could mean
207 * that the other Guest registered a short receive buffer, but this
208 * driver should never do that. More likely, the peer is dead. */
123 if (dma.used_len != skb->len) { 209 if (dma.used_len != skb->len) {
124 dev->stats.tx_carrier_errors++; 210 dev->stats.tx_carrier_errors++;
125 pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n", 211 pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
126 peernum, dma.used_len, skb->len, 212 peernum, dma.used_len, skb->len,
127 (void *)dma.addr[0], dma.len[0]); 213 (void *)dma.addr[0], dma.len[0]);
128 } else { 214 } else {
215 /* On success we update the stats. */
129 dev->stats.tx_bytes += skb->len; 216 dev->stats.tx_bytes += skb->len;
130 dev->stats.tx_packets++; 217 dev->stats.tx_packets++;
131 } 218 }
132} 219}
133 220
221/* Another helper function to tell is if a slot in the device memory is unused.
222 * Since we always set the Local Assignment bit in the ethernet address, the
223 * first byte can never be 0. */
134static int unused_peer(const struct lguest_net peer[], unsigned int num) 224static int unused_peer(const struct lguest_net peer[], unsigned int num)
135{ 225{
136 return peer[num].mac[0] == 0; 226 return peer[num].mac[0] == 0;
137} 227}
138 228
229/* Finally, here is the routine which handles an outgoing packet. It's called
230 * "start_xmit" for traditional reasons. */
139static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev) 231static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
140{ 232{
141 unsigned int i; 233 unsigned int i;
142 int broadcast; 234 int broadcast;
143 struct lguestnet_info *info = netdev_priv(dev); 235 struct lguestnet_info *info = netdev_priv(dev);
236 /* Extract the destination ethernet address from the packet. */
144 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; 237 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
145 238
146 pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n", 239 pr_debug("%s: xmit %02x:%02x:%02x:%02x:%02x:%02x\n",
147 dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]); 240 dev->name, dest[0],dest[1],dest[2],dest[3],dest[4],dest[5]);
148 241
242 /* If it's a multicast packet, we broadcast to everyone. That's not
243 * very efficient, but there are very few applications which actually
244 * use multicast, which is a shame really.
245 *
246 * As etherdevice.h points out: "By definition the broadcast address is
247 * also a multicast address." So we don't have to test for broadcast
248 * packets separately. */
149 broadcast = is_multicast_ether_addr(dest); 249 broadcast = is_multicast_ether_addr(dest);
250
251 /* Look through all the published ethernet addresses to see if we
252 * should send this packet. */
150 for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) { 253 for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) {
254 /* We don't send to ourselves (we actually can't SEND_DMA to
255 * ourselves anyway), and don't send to unused slots.*/
151 if (i == info->me || unused_peer(info->peer, i)) 256 if (i == info->me || unused_peer(info->peer, i))
152 continue; 257 continue;
153 258
259 /* If it's broadcast we send it. If they want every packet we
260 * send it. If the destination matches their address we send
261 * it. Otherwise we go to the next peer. */
154 if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i)) 262 if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i))
155 continue; 263 continue;
156 264
157 pr_debug("lguestnet %s: sending from %i to %i\n", 265 pr_debug("lguestnet %s: sending from %i to %i\n",
158 dev->name, info->me, i); 266 dev->name, info->me, i);
267 /* Our routine which actually does the transfer. */
159 transfer_packet(dev, skb, i); 268 transfer_packet(dev, skb, i);
160 } 269 }
270
271 /* An xmit routine is expected to dispose of the packet, so we do. */
161 dev_kfree_skb(skb); 272 dev_kfree_skb(skb);
273
274 /* As per kernel convention, 0 means success. This is why I love
275 * networking: even if we never sent to anyone, that's still
276 * success! */
162 return 0; 277 return 0;
163} 278}
164 279
165/* Find a new skb to put in this slot in shared mem. */ 280/*D:560
281 * Packet receiving.
282 *
283 * First, here's a helper routine which fills one of our array of receive
284 * buffers: */
166static int fill_slot(struct net_device *dev, unsigned int slot) 285static int fill_slot(struct net_device *dev, unsigned int slot)
167{ 286{
168 struct lguestnet_info *info = netdev_priv(dev); 287 struct lguestnet_info *info = netdev_priv(dev);
169 /* Try to create and register a new one. */ 288
289 /* We can receive ETH_DATA_LEN (1500) byte packets, plus a standard
290 * ethernet header of ETH_HLEN (14) bytes. */
170 info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN); 291 info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
171 if (!info->skb[slot]) { 292 if (!info->skb[slot]) {
172 printk("%s: could not fill slot %i\n", dev->name, slot); 293 printk("%s: could not fill slot %i\n", dev->name, slot);
173 return -ENOMEM; 294 return -ENOMEM;
174 } 295 }
175 296
297 /* skb_to_dma() is a helper which sets up the "struct lguest_dma" to
298 * point to the data in the skb: we also use it for sending out a
299 * packet. */
176 skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]); 300 skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]);
301
302 /* This is a Write Memory Barrier: it ensures that the entry in the
303 * receive buffer array is written *before* we set the "used_len" entry
304 * to 0. If the Host were looking at the receive buffer array from a
305 * different CPU, it could potentially see "used_len = 0" and not see
306 * the updated receive buffer information. This would be a horribly
307 * nasty bug, so make sure the compiler and CPU know this has to happen
308 * first. */
177 wmb(); 309 wmb();
178 /* Now we tell hypervisor it can use the slot. */ 310 /* Writing 0 to "used_len" tells the Host it can use this receive
311 * buffer now. */
179 info->dma[slot].used_len = 0; 312 info->dma[slot].used_len = 0;
180 return 0; 313 return 0;
181} 314}
182 315
316/* This is the actual receive routine. When we receive an interrupt from the
317 * Host to tell us a packet has been delivered, we arrive here: */
183static irqreturn_t lguestnet_rcv(int irq, void *dev_id) 318static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
184{ 319{
185 struct net_device *dev = dev_id; 320 struct net_device *dev = dev_id;
186 struct lguestnet_info *info = netdev_priv(dev); 321 struct lguestnet_info *info = netdev_priv(dev);
187 unsigned int i, done = 0; 322 unsigned int i, done = 0;
188 323
324 /* Look through our entire receive array for an entry which has data
325 * in it. */
189 for (i = 0; i < ARRAY_SIZE(info->dma); i++) { 326 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
190 unsigned int length; 327 unsigned int length;
191 struct sk_buff *skb; 328 struct sk_buff *skb;
@@ -194,10 +331,16 @@ static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
194 if (length == 0) 331 if (length == 0)
195 continue; 332 continue;
196 333
334 /* We've found one! Remember the skb (we grabbed the length
335 * above), and immediately refill the slot we've taken it
336 * from. */
197 done++; 337 done++;
198 skb = info->skb[i]; 338 skb = info->skb[i];
199 fill_slot(dev, i); 339 fill_slot(dev, i);
200 340
341 /* This shouldn't happen: micropackets could be sent by a
342 * badly-behaved Guest on the network, but the Host will never
343 * stuff more data in the buffer than the buffer length. */
201 if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) { 344 if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) {
202 pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n", 345 pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n",
203 dev->name, length); 346 dev->name, length);
@@ -205,36 +348,72 @@ static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
205 continue; 348 continue;
206 } 349 }
207 350
351 /* skb_put(), what a great function! I've ranted about this
352 * function before (http://lkml.org/lkml/1999/9/26/24). You
353 * call it after you've added data to the end of an skb (in
354 * this case, it was the Host which wrote the data). */
208 skb_put(skb, length); 355 skb_put(skb, length);
356
357 /* The ethernet header contains a protocol field: we use the
358 * standard helper to extract it, and place the result in
359 * skb->protocol. The helper also sets up skb->pkt_type and
360 * eats up the ethernet header from the front of the packet. */
209 skb->protocol = eth_type_trans(skb, dev); 361 skb->protocol = eth_type_trans(skb, dev);
210 /* This is a reliable transport. */ 362
363 /* If this device doesn't need checksums for sending, we also
364 * don't need to check the packets when they come in. */
211 if (dev->features & NETIF_F_NO_CSUM) 365 if (dev->features & NETIF_F_NO_CSUM)
212 skb->ip_summed = CHECKSUM_UNNECESSARY; 366 skb->ip_summed = CHECKSUM_UNNECESSARY;
367
368 /* As a last resort for debugging the driver or the lguest I/O
369 * subsystem, you can uncomment the "#define DEBUG" at the top
370 * of this file, which turns all the pr_debug() into printk()
371 * and floods the logs. */
213 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 372 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
214 ntohs(skb->protocol), skb->len, skb->pkt_type); 373 ntohs(skb->protocol), skb->len, skb->pkt_type);
215 374
375 /* Update the packet and byte counts (visible from ifconfig,
376 * and good for debugging). */
216 dev->stats.rx_bytes += skb->len; 377 dev->stats.rx_bytes += skb->len;
217 dev->stats.rx_packets++; 378 dev->stats.rx_packets++;
379
380 /* Hand our fresh network packet into the stack's "network
381 * interface receive" routine. That will free the packet
382 * itself when it's finished. */
218 netif_rx(skb); 383 netif_rx(skb);
219 } 384 }
385
386 /* If we found any packets, we assume the interrupt was for us. */
220 return done ? IRQ_HANDLED : IRQ_NONE; 387 return done ? IRQ_HANDLED : IRQ_NONE;
221} 388}
222 389
390/*D:550 This is where we start: when the device is brought up by dhcpd or
391 * ifconfig. At this point we advertise our MAC address to the rest of the
392 * network, and register receive buffers ready for incoming packets. */
223static int lguestnet_open(struct net_device *dev) 393static int lguestnet_open(struct net_device *dev)
224{ 394{
225 int i; 395 int i;
226 struct lguestnet_info *info = netdev_priv(dev); 396 struct lguestnet_info *info = netdev_priv(dev);
227 397
228 /* Set up our MAC address */ 398 /* Copy our MAC address into the device page, so others on the network
399 * can find us. */
229 memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN); 400 memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
230 401
231 /* Turn on promisc mode if needed */ 402 /* We might already be in promisc mode (dev->flags & IFF_PROMISC). Our
403 * set_multicast callback handles this already, so we call it now. */
232 lguestnet_set_multicast(dev); 404 lguestnet_set_multicast(dev);
233 405
406 /* Allocate packets and put them into our "struct lguest_dma" array.
407 * If we fail to allocate all the packets we could still limp along,
408 * but it's a sign of real stress so we should probably give up now. */
234 for (i = 0; i < ARRAY_SIZE(info->dma); i++) { 409 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
235 if (fill_slot(dev, i) != 0) 410 if (fill_slot(dev, i) != 0)
236 goto cleanup; 411 goto cleanup;
237 } 412 }
413
414 /* Finally we tell the Host where our array of "struct lguest_dma"
415 * receive buffers is, binding it to the key corresponding to the
416 * device's physical memory plus our peerid. */
238 if (lguest_bind_dma(peer_key(info,info->me), info->dma, 417 if (lguest_bind_dma(peer_key(info,info->me), info->dma,
239 NUM_SKBS, lgdev_irq(info->lgdev)) != 0) 418 NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
240 goto cleanup; 419 goto cleanup;
@@ -245,22 +424,29 @@ cleanup:
245 dev_kfree_skb(info->skb[i]); 424 dev_kfree_skb(info->skb[i]);
246 return -ENOMEM; 425 return -ENOMEM;
247} 426}
427/*:*/
248 428
429/* The close routine is called when the device is no longer in use: we clean up
430 * elegantly. */
249static int lguestnet_close(struct net_device *dev) 431static int lguestnet_close(struct net_device *dev)
250{ 432{
251 unsigned int i; 433 unsigned int i;
252 struct lguestnet_info *info = netdev_priv(dev); 434 struct lguestnet_info *info = netdev_priv(dev);
253 435
254 /* Clear all trace: others might deliver packets, we'll ignore it. */ 436 /* Clear all trace of our existence out of the device memory by setting
437 * the slot which held our MAC address to 0 (unused). */
255 memset(&info->peer[info->me], 0, sizeof(info->peer[info->me])); 438 memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));
256 439
257 /* Deregister sg lists. */ 440 /* Unregister our array of receive buffers */
258 lguest_unbind_dma(peer_key(info, info->me), info->dma); 441 lguest_unbind_dma(peer_key(info, info->me), info->dma);
259 for (i = 0; i < ARRAY_SIZE(info->dma); i++) 442 for (i = 0; i < ARRAY_SIZE(info->dma); i++)
260 dev_kfree_skb(info->skb[i]); 443 dev_kfree_skb(info->skb[i]);
261 return 0; 444 return 0;
262} 445}
263 446
447/*D:510 The network device probe function is basically a standard ethernet
448 * device setup. It reads the "struct lguest_device_desc" and sets the "struct
449 * net_device". Oh, the line-by-line excitement! Let's skip over it. :*/
264static int lguestnet_probe(struct lguest_device *lgdev) 450static int lguestnet_probe(struct lguest_device *lgdev)
265{ 451{
266 int err, irqf = IRQF_SHARED; 452 int err, irqf = IRQF_SHARED;
@@ -290,10 +476,16 @@ static int lguestnet_probe(struct lguest_device *lgdev)
290 dev->stop = lguestnet_close; 476 dev->stop = lguestnet_close;
291 dev->hard_start_xmit = lguestnet_start_xmit; 477 dev->hard_start_xmit = lguestnet_start_xmit;
292 478
293 /* Turning on/off promisc will call dev->set_multicast_list. 479 /* We don't actually support multicast yet, but turning on/off
294 * We don't actually support multicast yet */ 480 * promisc also calls dev->set_multicast_list. */
295 dev->set_multicast_list = lguestnet_set_multicast; 481 dev->set_multicast_list = lguestnet_set_multicast;
296 SET_NETDEV_DEV(dev, &lgdev->dev); 482 SET_NETDEV_DEV(dev, &lgdev->dev);
483
484 /* The network code complains if you have "scatter-gather" capability
485 * if you don't also handle checksums (it seem that would be
486 * "illogical"). So we use a lie of omission and don't tell it that we
487 * can handle scattered packets unless we also don't want checksums,
488 * even though to us they're completely independent. */
297 if (desc->features & LGUEST_NET_F_NOCSUM) 489 if (desc->features & LGUEST_NET_F_NOCSUM)
298 dev->features = NETIF_F_SG|NETIF_F_NO_CSUM; 490 dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
299 491
@@ -325,6 +517,9 @@ static int lguestnet_probe(struct lguest_device *lgdev)
325 } 517 }
326 518
327 pr_debug("lguestnet: registered device %s\n", dev->name); 519 pr_debug("lguestnet: registered device %s\n", dev->name);
520 /* Finally, we put the "struct net_device" in the generic "struct
521 * lguest_device"s private pointer. Again, it's not necessary, but
522 * makes sure the cool kernel kids don't tease us. */
328 lgdev->private = dev; 523 lgdev->private = dev;
329 return 0; 524 return 0;
330 525
@@ -352,3 +547,11 @@ module_init(lguestnet_init);
352 547
353MODULE_DESCRIPTION("Lguest network driver"); 548MODULE_DESCRIPTION("Lguest network driver");
354MODULE_LICENSE("GPL"); 549MODULE_LICENSE("GPL");
550
551/*D:580
552 * This is the last of the Drivers, and with this we have covered the many and
553 * wonderous and fine (and boring) details of the Guest.
554 *
555 * "make Launcher" beckons, where we answer questions like "Where do Guests
556 * come from?", and "What do you do when someone asks for optimization?"
557 */
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index 5c86e737f954..721ee38d2241 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -219,15 +219,6 @@ static void ei_tx_timeout(struct net_device *dev)
219 int txsr, isr, tickssofar = jiffies - dev->trans_start; 219 int txsr, isr, tickssofar = jiffies - dev->trans_start;
220 unsigned long flags; 220 unsigned long flags;
221 221
222#if defined(CONFIG_M32R) && defined(CONFIG_SMP)
223 unsigned long icucr;
224
225 local_irq_save(flags);
226 icucr = inl(M32R_ICU_CR1_PORTL);
227 icucr |= M32R_ICUCR_ISMOD11;
228 outl(icucr, M32R_ICU_CR1_PORTL);
229 local_irq_restore(flags);
230#endif
231 ei_local->stat.tx_errors++; 222 ei_local->stat.tx_errors++;
232 223
233 spin_lock_irqsave(&ei_local->page_lock, flags); 224 spin_lock_irqsave(&ei_local->page_lock, flags);
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index f87176055d0e..266e8b38fe10 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -2054,7 +2054,7 @@ end:
2054 */ 2054 */
2055static int pppol2tp_tunnel_getsockopt(struct sock *sk, 2055static int pppol2tp_tunnel_getsockopt(struct sock *sk,
2056 struct pppol2tp_tunnel *tunnel, 2056 struct pppol2tp_tunnel *tunnel,
2057 int optname, int __user *val) 2057 int optname, int *val)
2058{ 2058{
2059 int err = 0; 2059 int err = 0;
2060 2060
@@ -2077,7 +2077,7 @@ static int pppol2tp_tunnel_getsockopt(struct sock *sk,
2077 */ 2077 */
2078static int pppol2tp_session_getsockopt(struct sock *sk, 2078static int pppol2tp_session_getsockopt(struct sock *sk,
2079 struct pppol2tp_session *session, 2079 struct pppol2tp_session *session,
2080 int optname, int __user *val) 2080 int optname, int *val)
2081{ 2081{
2082 int err = 0; 2082 int err = 0;
2083 2083
diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index dd6384b1efce..b6a4f02b01d1 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -2,7 +2,6 @@
2 * card.c - contains functions for managing groups of PnP devices 2 * card.c - contains functions for managing groups of PnP devices
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/module.h> 7#include <linux/module.h>
@@ -13,26 +12,31 @@
13LIST_HEAD(pnp_cards); 12LIST_HEAD(pnp_cards);
14static LIST_HEAD(pnp_card_drivers); 13static LIST_HEAD(pnp_card_drivers);
15 14
16 15static const struct pnp_card_device_id *match_card(struct pnp_card_driver *drv,
17static const struct pnp_card_device_id * match_card(struct pnp_card_driver * drv, struct pnp_card * card) 16 struct pnp_card *card)
18{ 17{
19 const struct pnp_card_device_id * drv_id = drv->id_table; 18 const struct pnp_card_device_id *drv_id = drv->id_table;
20 while (*drv_id->id){ 19
21 if (compare_pnp_id(card->id,drv_id->id)) { 20 while (*drv_id->id) {
21 if (compare_pnp_id(card->id, drv_id->id)) {
22 int i = 0; 22 int i = 0;
23
23 for (;;) { 24 for (;;) {
24 int found; 25 int found;
25 struct pnp_dev *dev; 26 struct pnp_dev *dev;
26 if (i == PNP_MAX_DEVICES || ! *drv_id->devs[i].id) 27
28 if (i == PNP_MAX_DEVICES
29 || !*drv_id->devs[i].id)
27 return drv_id; 30 return drv_id;
28 found = 0; 31 found = 0;
29 card_for_each_dev(card, dev) { 32 card_for_each_dev(card, dev) {
30 if (compare_pnp_id(dev->id, drv_id->devs[i].id)) { 33 if (compare_pnp_id
34 (dev->id, drv_id->devs[i].id)) {
31 found = 1; 35 found = 1;
32 break; 36 break;
33 } 37 }
34 } 38 }
35 if (! found) 39 if (!found)
36 break; 40 break;
37 i++; 41 i++;
38 } 42 }
@@ -42,14 +46,15 @@ static const struct pnp_card_device_id * match_card(struct pnp_card_driver * drv
42 return NULL; 46 return NULL;
43} 47}
44 48
45static void card_remove(struct pnp_dev * dev) 49static void card_remove(struct pnp_dev *dev)
46{ 50{
47 dev->card_link = NULL; 51 dev->card_link = NULL;
48} 52}
49 53
50static void card_remove_first(struct pnp_dev * dev) 54static void card_remove_first(struct pnp_dev *dev)
51{ 55{
52 struct pnp_card_driver * drv = to_pnp_card_driver(dev->driver); 56 struct pnp_card_driver *drv = to_pnp_card_driver(dev->driver);
57
53 if (!dev->card || !drv) 58 if (!dev->card || !drv)
54 return; 59 return;
55 if (drv->remove) 60 if (drv->remove)
@@ -67,7 +72,7 @@ static int card_probe(struct pnp_card *card, struct pnp_card_driver *drv)
67 72
68 if (!drv->probe) 73 if (!drv->probe)
69 return 0; 74 return 0;
70 id = match_card(drv,card); 75 id = match_card(drv, card);
71 if (!id) 76 if (!id)
72 return 0; 77 return 0;
73 78
@@ -94,12 +99,11 @@ static int card_probe(struct pnp_card *card, struct pnp_card_driver *drv)
94 * pnp_add_card_id - adds an EISA id to the specified card 99 * pnp_add_card_id - adds an EISA id to the specified card
95 * @id: pointer to a pnp_id structure 100 * @id: pointer to a pnp_id structure
96 * @card: pointer to the desired card 101 * @card: pointer to the desired card
97 *
98 */ 102 */
99 103int pnp_add_card_id(struct pnp_id *id, struct pnp_card *card)
100int pnp_add_card_id(struct pnp_id *id, struct pnp_card * card)
101{ 104{
102 struct pnp_id * ptr; 105 struct pnp_id *ptr;
106
103 if (!id) 107 if (!id)
104 return -EINVAL; 108 return -EINVAL;
105 if (!card) 109 if (!card)
@@ -115,10 +119,11 @@ int pnp_add_card_id(struct pnp_id *id, struct pnp_card * card)
115 return 0; 119 return 0;
116} 120}
117 121
118static void pnp_free_card_ids(struct pnp_card * card) 122static void pnp_free_card_ids(struct pnp_card *card)
119{ 123{
120 struct pnp_id * id; 124 struct pnp_id *id;
121 struct pnp_id *next; 125 struct pnp_id *next;
126
122 if (!card) 127 if (!card)
123 return; 128 return;
124 id = card->id; 129 id = card->id;
@@ -131,49 +136,55 @@ static void pnp_free_card_ids(struct pnp_card * card)
131 136
132static void pnp_release_card(struct device *dmdev) 137static void pnp_release_card(struct device *dmdev)
133{ 138{
134 struct pnp_card * card = to_pnp_card(dmdev); 139 struct pnp_card *card = to_pnp_card(dmdev);
140
135 pnp_free_card_ids(card); 141 pnp_free_card_ids(card);
136 kfree(card); 142 kfree(card);
137} 143}
138 144
139 145static ssize_t pnp_show_card_name(struct device *dmdev,
140static ssize_t pnp_show_card_name(struct device *dmdev, struct device_attribute *attr, char *buf) 146 struct device_attribute *attr, char *buf)
141{ 147{
142 char *str = buf; 148 char *str = buf;
143 struct pnp_card *card = to_pnp_card(dmdev); 149 struct pnp_card *card = to_pnp_card(dmdev);
144 str += sprintf(str,"%s\n", card->name); 150
151 str += sprintf(str, "%s\n", card->name);
145 return (str - buf); 152 return (str - buf);
146} 153}
147 154
148static DEVICE_ATTR(name,S_IRUGO,pnp_show_card_name,NULL); 155static DEVICE_ATTR(name, S_IRUGO, pnp_show_card_name, NULL);
149 156
150static ssize_t pnp_show_card_ids(struct device *dmdev, struct device_attribute *attr, char *buf) 157static ssize_t pnp_show_card_ids(struct device *dmdev,
158 struct device_attribute *attr, char *buf)
151{ 159{
152 char *str = buf; 160 char *str = buf;
153 struct pnp_card *card = to_pnp_card(dmdev); 161 struct pnp_card *card = to_pnp_card(dmdev);
154 struct pnp_id * pos = card->id; 162 struct pnp_id *pos = card->id;
155 163
156 while (pos) { 164 while (pos) {
157 str += sprintf(str,"%s\n", pos->id); 165 str += sprintf(str, "%s\n", pos->id);
158 pos = pos->next; 166 pos = pos->next;
159 } 167 }
160 return (str - buf); 168 return (str - buf);
161} 169}
162 170
163static DEVICE_ATTR(card_id,S_IRUGO,pnp_show_card_ids,NULL); 171static DEVICE_ATTR(card_id, S_IRUGO, pnp_show_card_ids, NULL);
164 172
165static int pnp_interface_attach_card(struct pnp_card *card) 173static int pnp_interface_attach_card(struct pnp_card *card)
166{ 174{
167 int rc = device_create_file(&card->dev,&dev_attr_name); 175 int rc = device_create_file(&card->dev, &dev_attr_name);
168 if (rc) return rc;
169 176
170 rc = device_create_file(&card->dev,&dev_attr_card_id); 177 if (rc)
171 if (rc) goto err_name; 178 return rc;
179
180 rc = device_create_file(&card->dev, &dev_attr_card_id);
181 if (rc)
182 goto err_name;
172 183
173 return 0; 184 return 0;
174 185
175err_name: 186 err_name:
176 device_remove_file(&card->dev,&dev_attr_name); 187 device_remove_file(&card->dev, &dev_attr_name);
177 return rc; 188 return rc;
178} 189}
179 190
@@ -181,15 +192,16 @@ err_name:
181 * pnp_add_card - adds a PnP card to the PnP Layer 192 * pnp_add_card - adds a PnP card to the PnP Layer
182 * @card: pointer to the card to add 193 * @card: pointer to the card to add
183 */ 194 */
184 195int pnp_add_card(struct pnp_card *card)
185int pnp_add_card(struct pnp_card * card)
186{ 196{
187 int error; 197 int error;
188 struct list_head * pos, * temp; 198 struct list_head *pos, *temp;
199
189 if (!card || !card->protocol) 200 if (!card || !card->protocol)
190 return -EINVAL; 201 return -EINVAL;
191 202
192 sprintf(card->dev.bus_id, "%02x:%02x", card->protocol->number, card->number); 203 sprintf(card->dev.bus_id, "%02x:%02x", card->protocol->number,
204 card->number);
193 card->dev.parent = &card->protocol->dev; 205 card->dev.parent = &card->protocol->dev;
194 card->dev.bus = NULL; 206 card->dev.bus = NULL;
195 card->dev.release = &pnp_release_card; 207 card->dev.release = &pnp_release_card;
@@ -205,18 +217,21 @@ int pnp_add_card(struct pnp_card * card)
205 /* we wait until now to add devices in order to ensure the drivers 217 /* we wait until now to add devices in order to ensure the drivers
206 * will be able to use all of the related devices on the card 218 * will be able to use all of the related devices on the card
207 * without waiting any unresonable length of time */ 219 * without waiting any unresonable length of time */
208 list_for_each(pos,&card->devices){ 220 list_for_each(pos, &card->devices) {
209 struct pnp_dev *dev = card_to_pnp_dev(pos); 221 struct pnp_dev *dev = card_to_pnp_dev(pos);
210 __pnp_add_device(dev); 222 __pnp_add_device(dev);
211 } 223 }
212 224
213 /* match with card drivers */ 225 /* match with card drivers */
214 list_for_each_safe(pos,temp,&pnp_card_drivers){ 226 list_for_each_safe(pos, temp, &pnp_card_drivers) {
215 struct pnp_card_driver * drv = list_entry(pos, struct pnp_card_driver, global_list); 227 struct pnp_card_driver *drv =
216 card_probe(card,drv); 228 list_entry(pos, struct pnp_card_driver,
229 global_list);
230 card_probe(card, drv);
217 } 231 }
218 } else 232 } else
219 pnp_err("sysfs failure, card '%s' will be unavailable", card->dev.bus_id); 233 pnp_err("sysfs failure, card '%s' will be unavailable",
234 card->dev.bus_id);
220 return error; 235 return error;
221} 236}
222 237
@@ -224,10 +239,10 @@ int pnp_add_card(struct pnp_card * card)
224 * pnp_remove_card - removes a PnP card from the PnP Layer 239 * pnp_remove_card - removes a PnP card from the PnP Layer
225 * @card: pointer to the card to remove 240 * @card: pointer to the card to remove
226 */ 241 */
227 242void pnp_remove_card(struct pnp_card *card)
228void pnp_remove_card(struct pnp_card * card)
229{ 243{
230 struct list_head *pos, *temp; 244 struct list_head *pos, *temp;
245
231 if (!card) 246 if (!card)
232 return; 247 return;
233 device_unregister(&card->dev); 248 device_unregister(&card->dev);
@@ -235,7 +250,7 @@ void pnp_remove_card(struct pnp_card * card)
235 list_del(&card->global_list); 250 list_del(&card->global_list);
236 list_del(&card->protocol_list); 251 list_del(&card->protocol_list);
237 spin_unlock(&pnp_lock); 252 spin_unlock(&pnp_lock);
238 list_for_each_safe(pos,temp,&card->devices){ 253 list_for_each_safe(pos, temp, &card->devices) {
239 struct pnp_dev *dev = card_to_pnp_dev(pos); 254 struct pnp_dev *dev = card_to_pnp_dev(pos);
240 pnp_remove_card_device(dev); 255 pnp_remove_card_device(dev);
241 } 256 }
@@ -246,15 +261,14 @@ void pnp_remove_card(struct pnp_card * card)
246 * @card: pointer to the card to add to 261 * @card: pointer to the card to add to
247 * @dev: pointer to the device to add 262 * @dev: pointer to the device to add
248 */ 263 */
249 264int pnp_add_card_device(struct pnp_card *card, struct pnp_dev *dev)
250int pnp_add_card_device(struct pnp_card * card, struct pnp_dev * dev)
251{ 265{
252 if (!card || !dev || !dev->protocol) 266 if (!card || !dev || !dev->protocol)
253 return -EINVAL; 267 return -EINVAL;
254 dev->dev.parent = &card->dev; 268 dev->dev.parent = &card->dev;
255 dev->card_link = NULL; 269 dev->card_link = NULL;
256 snprintf(dev->dev.bus_id, BUS_ID_SIZE, "%02x:%02x.%02x", dev->protocol->number, 270 snprintf(dev->dev.bus_id, BUS_ID_SIZE, "%02x:%02x.%02x",
257 card->number,dev->number); 271 dev->protocol->number, card->number, dev->number);
258 spin_lock(&pnp_lock); 272 spin_lock(&pnp_lock);
259 dev->card = card; 273 dev->card = card;
260 list_add_tail(&dev->card_list, &card->devices); 274 list_add_tail(&dev->card_list, &card->devices);
@@ -266,8 +280,7 @@ int pnp_add_card_device(struct pnp_card * card, struct pnp_dev * dev)
266 * pnp_remove_card_device- removes a device from the specified card 280 * pnp_remove_card_device- removes a device from the specified card
267 * @dev: pointer to the device to remove 281 * @dev: pointer to the device to remove
268 */ 282 */
269 283void pnp_remove_card_device(struct pnp_dev *dev)
270void pnp_remove_card_device(struct pnp_dev * dev)
271{ 284{
272 spin_lock(&pnp_lock); 285 spin_lock(&pnp_lock);
273 dev->card = NULL; 286 dev->card = NULL;
@@ -282,13 +295,14 @@ void pnp_remove_card_device(struct pnp_dev * dev)
282 * @id: pointer to a PnP ID structure that explains the rules for finding the device 295 * @id: pointer to a PnP ID structure that explains the rules for finding the device
283 * @from: Starting place to search from. If NULL it will start from the begining. 296 * @from: Starting place to search from. If NULL it will start from the begining.
284 */ 297 */
285 298struct pnp_dev *pnp_request_card_device(struct pnp_card_link *clink,
286struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char * id, struct pnp_dev * from) 299 const char *id, struct pnp_dev *from)
287{ 300{
288 struct list_head * pos; 301 struct list_head *pos;
289 struct pnp_dev * dev; 302 struct pnp_dev *dev;
290 struct pnp_card_driver * drv; 303 struct pnp_card_driver *drv;
291 struct pnp_card * card; 304 struct pnp_card *card;
305
292 if (!clink || !id) 306 if (!clink || !id)
293 goto done; 307 goto done;
294 card = clink->card; 308 card = clink->card;
@@ -302,15 +316,15 @@ struct pnp_dev * pnp_request_card_device(struct pnp_card_link *clink, const char
302 } 316 }
303 while (pos != &card->devices) { 317 while (pos != &card->devices) {
304 dev = card_to_pnp_dev(pos); 318 dev = card_to_pnp_dev(pos);
305 if ((!dev->card_link) && compare_pnp_id(dev->id,id)) 319 if ((!dev->card_link) && compare_pnp_id(dev->id, id))
306 goto found; 320 goto found;
307 pos = pos->next; 321 pos = pos->next;
308 } 322 }
309 323
310done: 324 done:
311 return NULL; 325 return NULL;
312 326
313found: 327 found:
314 dev->card_link = clink; 328 dev->card_link = clink;
315 dev->dev.driver = &drv->link.driver; 329 dev->dev.driver = &drv->link.driver;
316 if (pnp_bus_type.probe(&dev->dev)) 330 if (pnp_bus_type.probe(&dev->dev))
@@ -320,7 +334,7 @@ found:
320 334
321 return dev; 335 return dev;
322 336
323err_out: 337 err_out:
324 dev->dev.driver = NULL; 338 dev->dev.driver = NULL;
325 dev->card_link = NULL; 339 dev->card_link = NULL;
326 return NULL; 340 return NULL;
@@ -330,10 +344,10 @@ err_out:
330 * pnp_release_card_device - call this when the driver no longer needs the device 344 * pnp_release_card_device - call this when the driver no longer needs the device
331 * @dev: pointer to the PnP device stucture 345 * @dev: pointer to the PnP device stucture
332 */ 346 */
333 347void pnp_release_card_device(struct pnp_dev *dev)
334void pnp_release_card_device(struct pnp_dev * dev)
335{ 348{
336 struct pnp_card_driver * drv = dev->card_link->driver; 349 struct pnp_card_driver *drv = dev->card_link->driver;
350
337 if (!drv) 351 if (!drv)
338 return; 352 return;
339 drv->link.remove = &card_remove; 353 drv->link.remove = &card_remove;
@@ -347,6 +361,7 @@ void pnp_release_card_device(struct pnp_dev * dev)
347static int card_suspend(struct pnp_dev *dev, pm_message_t state) 361static int card_suspend(struct pnp_dev *dev, pm_message_t state)
348{ 362{
349 struct pnp_card_link *link = dev->card_link; 363 struct pnp_card_link *link = dev->card_link;
364
350 if (link->pm_state.event == state.event) 365 if (link->pm_state.event == state.event)
351 return 0; 366 return 0;
352 link->pm_state = state; 367 link->pm_state = state;
@@ -356,6 +371,7 @@ static int card_suspend(struct pnp_dev *dev, pm_message_t state)
356static int card_resume(struct pnp_dev *dev) 371static int card_resume(struct pnp_dev *dev)
357{ 372{
358 struct pnp_card_link *link = dev->card_link; 373 struct pnp_card_link *link = dev->card_link;
374
359 if (link->pm_state.event == PM_EVENT_ON) 375 if (link->pm_state.event == PM_EVENT_ON)
360 return 0; 376 return 0;
361 link->pm_state = PMSG_ON; 377 link->pm_state = PMSG_ON;
@@ -367,8 +383,7 @@ static int card_resume(struct pnp_dev *dev)
367 * pnp_register_card_driver - registers a PnP card driver with the PnP Layer 383 * pnp_register_card_driver - registers a PnP card driver with the PnP Layer
368 * @drv: pointer to the driver to register 384 * @drv: pointer to the driver to register
369 */ 385 */
370 386int pnp_register_card_driver(struct pnp_card_driver *drv)
371int pnp_register_card_driver(struct pnp_card_driver * drv)
372{ 387{
373 int error; 388 int error;
374 struct list_head *pos, *temp; 389 struct list_head *pos, *temp;
@@ -389,9 +404,10 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
389 list_add_tail(&drv->global_list, &pnp_card_drivers); 404 list_add_tail(&drv->global_list, &pnp_card_drivers);
390 spin_unlock(&pnp_lock); 405 spin_unlock(&pnp_lock);
391 406
392 list_for_each_safe(pos,temp,&pnp_cards){ 407 list_for_each_safe(pos, temp, &pnp_cards) {
393 struct pnp_card *card = list_entry(pos, struct pnp_card, global_list); 408 struct pnp_card *card =
394 card_probe(card,drv); 409 list_entry(pos, struct pnp_card, global_list);
410 card_probe(card, drv);
395 } 411 }
396 return 0; 412 return 0;
397} 413}
@@ -400,8 +416,7 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
400 * pnp_unregister_card_driver - unregisters a PnP card driver from the PnP Layer 416 * pnp_unregister_card_driver - unregisters a PnP card driver from the PnP Layer
401 * @drv: pointer to the driver to unregister 417 * @drv: pointer to the driver to unregister
402 */ 418 */
403 419void pnp_unregister_card_driver(struct pnp_card_driver *drv)
404void pnp_unregister_card_driver(struct pnp_card_driver * drv)
405{ 420{
406 spin_lock(&pnp_lock); 421 spin_lock(&pnp_lock);
407 list_del(&drv->global_list); 422 list_del(&drv->global_list);
@@ -409,13 +424,6 @@ void pnp_unregister_card_driver(struct pnp_card_driver * drv)
409 pnp_unregister_driver(&drv->link); 424 pnp_unregister_driver(&drv->link);
410} 425}
411 426
412#if 0
413EXPORT_SYMBOL(pnp_add_card);
414EXPORT_SYMBOL(pnp_remove_card);
415EXPORT_SYMBOL(pnp_add_card_device);
416EXPORT_SYMBOL(pnp_remove_card_device);
417EXPORT_SYMBOL(pnp_add_card_id);
418#endif /* 0 */
419EXPORT_SYMBOL(pnp_request_card_device); 427EXPORT_SYMBOL(pnp_request_card_device);
420EXPORT_SYMBOL(pnp_release_card_device); 428EXPORT_SYMBOL(pnp_release_card_device);
421EXPORT_SYMBOL(pnp_register_card_driver); 429EXPORT_SYMBOL(pnp_register_card_driver);
diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index 8e7b2dd38810..61066fdb9e6d 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -2,7 +2,6 @@
2 * core.c - contains all core device and protocol registration functions 2 * core.c - contains all core device and protocol registration functions
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/pnp.h> 7#include <linux/pnp.h>
@@ -18,7 +17,6 @@
18 17
19#include "base.h" 18#include "base.h"
20 19
21
22static LIST_HEAD(pnp_protocols); 20static LIST_HEAD(pnp_protocols);
23LIST_HEAD(pnp_global); 21LIST_HEAD(pnp_global);
24DEFINE_SPINLOCK(pnp_lock); 22DEFINE_SPINLOCK(pnp_lock);
@@ -36,7 +34,7 @@ void *pnp_alloc(long size)
36 void *result; 34 void *result;
37 35
38 result = kzalloc(size, GFP_KERNEL); 36 result = kzalloc(size, GFP_KERNEL);
39 if (!result){ 37 if (!result) {
40 printk(KERN_ERR "pnp: Out of Memory\n"); 38 printk(KERN_ERR "pnp: Out of Memory\n");
41 return NULL; 39 return NULL;
42 } 40 }
@@ -49,11 +47,10 @@ void *pnp_alloc(long size)
49 * 47 *
50 * Ex protocols: ISAPNP, PNPBIOS, etc 48 * Ex protocols: ISAPNP, PNPBIOS, etc
51 */ 49 */
52
53int pnp_register_protocol(struct pnp_protocol *protocol) 50int pnp_register_protocol(struct pnp_protocol *protocol)
54{ 51{
55 int nodenum; 52 int nodenum;
56 struct list_head * pos; 53 struct list_head *pos;
57 54
58 if (!protocol) 55 if (!protocol)
59 return -EINVAL; 56 return -EINVAL;
@@ -64,9 +61,9 @@ int pnp_register_protocol(struct pnp_protocol *protocol)
64 spin_lock(&pnp_lock); 61 spin_lock(&pnp_lock);
65 62
66 /* assign the lowest unused number */ 63 /* assign the lowest unused number */
67 list_for_each(pos,&pnp_protocols) { 64 list_for_each(pos, &pnp_protocols) {
68 struct pnp_protocol * cur = to_pnp_protocol(pos); 65 struct pnp_protocol *cur = to_pnp_protocol(pos);
69 if (cur->number == nodenum){ 66 if (cur->number == nodenum) {
70 pos = &pnp_protocols; 67 pos = &pnp_protocols;
71 nodenum++; 68 nodenum++;
72 } 69 }
@@ -83,7 +80,6 @@ int pnp_register_protocol(struct pnp_protocol *protocol)
83/** 80/**
84 * pnp_protocol_unregister - removes a pnp protocol from the pnp layer 81 * pnp_protocol_unregister - removes a pnp protocol from the pnp layer
85 * @protocol: pointer to the corresponding pnp_protocol structure 82 * @protocol: pointer to the corresponding pnp_protocol structure
86 *
87 */ 83 */
88void pnp_unregister_protocol(struct pnp_protocol *protocol) 84void pnp_unregister_protocol(struct pnp_protocol *protocol)
89{ 85{
@@ -93,11 +89,11 @@ void pnp_unregister_protocol(struct pnp_protocol *protocol)
93 device_unregister(&protocol->dev); 89 device_unregister(&protocol->dev);
94} 90}
95 91
96
97static void pnp_free_ids(struct pnp_dev *dev) 92static void pnp_free_ids(struct pnp_dev *dev)
98{ 93{
99 struct pnp_id * id; 94 struct pnp_id *id;
100 struct pnp_id * next; 95 struct pnp_id *next;
96
101 if (!dev) 97 if (!dev)
102 return; 98 return;
103 id = dev->id; 99 id = dev->id;
@@ -110,7 +106,8 @@ static void pnp_free_ids(struct pnp_dev *dev)
110 106
111static void pnp_release_device(struct device *dmdev) 107static void pnp_release_device(struct device *dmdev)
112{ 108{
113 struct pnp_dev * dev = to_pnp_dev(dmdev); 109 struct pnp_dev *dev = to_pnp_dev(dmdev);
110
114 pnp_free_option(dev->independent); 111 pnp_free_option(dev->independent);
115 pnp_free_option(dev->dependent); 112 pnp_free_option(dev->dependent);
116 pnp_free_ids(dev); 113 pnp_free_ids(dev);
@@ -120,6 +117,7 @@ static void pnp_release_device(struct device *dmdev)
120int __pnp_add_device(struct pnp_dev *dev) 117int __pnp_add_device(struct pnp_dev *dev)
121{ 118{
122 int ret; 119 int ret;
120
123 pnp_fixup_device(dev); 121 pnp_fixup_device(dev);
124 dev->dev.bus = &pnp_bus_type; 122 dev->dev.bus = &pnp_bus_type;
125 dev->dev.dma_mask = &dev->dma_mask; 123 dev->dev.dma_mask = &dev->dma_mask;
@@ -143,13 +141,13 @@ int __pnp_add_device(struct pnp_dev *dev)
143 * 141 *
144 * adds to driver model, name database, fixups, interface, etc. 142 * adds to driver model, name database, fixups, interface, etc.
145 */ 143 */
146
147int pnp_add_device(struct pnp_dev *dev) 144int pnp_add_device(struct pnp_dev *dev)
148{ 145{
149 if (!dev || !dev->protocol || dev->card) 146 if (!dev || !dev->protocol || dev->card)
150 return -EINVAL; 147 return -EINVAL;
151 dev->dev.parent = &dev->protocol->dev; 148 dev->dev.parent = &dev->protocol->dev;
152 sprintf(dev->dev.bus_id, "%02x:%02x", dev->protocol->number, dev->number); 149 sprintf(dev->dev.bus_id, "%02x:%02x", dev->protocol->number,
150 dev->number);
153 return __pnp_add_device(dev); 151 return __pnp_add_device(dev);
154} 152}
155 153
@@ -162,21 +160,6 @@ void __pnp_remove_device(struct pnp_dev *dev)
162 device_unregister(&dev->dev); 160 device_unregister(&dev->dev);
163} 161}
164 162
165/**
166 * pnp_remove_device - removes a pnp device from the pnp layer
167 * @dev: pointer to dev to add
168 *
169 * this function will free all mem used by dev
170 */
171#if 0
172void pnp_remove_device(struct pnp_dev *dev)
173{
174 if (!dev || dev->card)
175 return;
176 __pnp_remove_device(dev);
177}
178#endif /* 0 */
179
180static int __init pnp_init(void) 163static int __init pnp_init(void)
181{ 164{
182 printk(KERN_INFO "Linux Plug and Play Support v0.97 (c) Adam Belay\n"); 165 printk(KERN_INFO "Linux Plug and Play Support v0.97 (c) Adam Belay\n");
@@ -184,10 +167,3 @@ static int __init pnp_init(void)
184} 167}
185 168
186subsys_initcall(pnp_init); 169subsys_initcall(pnp_init);
187
188#if 0
189EXPORT_SYMBOL(pnp_register_protocol);
190EXPORT_SYMBOL(pnp_unregister_protocol);
191EXPORT_SYMBOL(pnp_add_device);
192EXPORT_SYMBOL(pnp_remove_device);
193#endif /* 0 */
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 1432806451cd..30b8f6f3258a 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -2,7 +2,6 @@
2 * driver.c - device id matching, driver model, etc. 2 * driver.c - device id matching, driver model, etc.
3 * 3 *
4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/string.h> 7#include <linux/string.h>
@@ -16,12 +15,11 @@
16static int compare_func(const char *ida, const char *idb) 15static int compare_func(const char *ida, const char *idb)
17{ 16{
18 int i; 17 int i;
18
19 /* we only need to compare the last 4 chars */ 19 /* we only need to compare the last 4 chars */
20 for (i=3; i<7; i++) 20 for (i = 3; i < 7; i++) {
21 {
22 if (ida[i] != 'X' && 21 if (ida[i] != 'X' &&
23 idb[i] != 'X' && 22 idb[i] != 'X' && toupper(ida[i]) != toupper(idb[i]))
24 toupper(ida[i]) != toupper(idb[i]))
25 return 0; 23 return 0;
26 } 24 }
27 return 1; 25 return 1;
@@ -31,20 +29,22 @@ int compare_pnp_id(struct pnp_id *pos, const char *id)
31{ 29{
32 if (!pos || !id || (strlen(id) != 7)) 30 if (!pos || !id || (strlen(id) != 7))
33 return 0; 31 return 0;
34 if (memcmp(id,"ANYDEVS",7)==0) 32 if (memcmp(id, "ANYDEVS", 7) == 0)
35 return 1; 33 return 1;
36 while (pos){ 34 while (pos) {
37 if (memcmp(pos->id,id,3)==0) 35 if (memcmp(pos->id, id, 3) == 0)
38 if (compare_func(pos->id,id)==1) 36 if (compare_func(pos->id, id) == 1)
39 return 1; 37 return 1;
40 pos = pos->next; 38 pos = pos->next;
41 } 39 }
42 return 0; 40 return 0;
43} 41}
44 42
45static const struct pnp_device_id * match_device(struct pnp_driver *drv, struct pnp_dev *dev) 43static const struct pnp_device_id *match_device(struct pnp_driver *drv,
44 struct pnp_dev *dev)
46{ 45{
47 const struct pnp_device_id *drv_id = drv->id_table; 46 const struct pnp_device_id *drv_id = drv->id_table;
47
48 if (!drv_id) 48 if (!drv_id)
49 return NULL; 49 return NULL;
50 50
@@ -59,7 +59,7 @@ static const struct pnp_device_id * match_device(struct pnp_driver *drv, struct
59int pnp_device_attach(struct pnp_dev *pnp_dev) 59int pnp_device_attach(struct pnp_dev *pnp_dev)
60{ 60{
61 spin_lock(&pnp_lock); 61 spin_lock(&pnp_lock);
62 if(pnp_dev->status != PNP_READY){ 62 if (pnp_dev->status != PNP_READY) {
63 spin_unlock(&pnp_lock); 63 spin_unlock(&pnp_lock);
64 return -EBUSY; 64 return -EBUSY;
65 } 65 }
@@ -86,7 +86,8 @@ static int pnp_device_probe(struct device *dev)
86 pnp_dev = to_pnp_dev(dev); 86 pnp_dev = to_pnp_dev(dev);
87 pnp_drv = to_pnp_driver(dev->driver); 87 pnp_drv = to_pnp_driver(dev->driver);
88 88
89 pnp_dbg("match found with the PnP device '%s' and the driver '%s'", dev->bus_id,pnp_drv->name); 89 pnp_dbg("match found with the PnP device '%s' and the driver '%s'",
90 dev->bus_id, pnp_drv->name);
90 91
91 error = pnp_device_attach(pnp_dev); 92 error = pnp_device_attach(pnp_dev);
92 if (error < 0) 93 if (error < 0)
@@ -99,7 +100,7 @@ static int pnp_device_probe(struct device *dev)
99 return error; 100 return error;
100 } 101 }
101 } else if ((pnp_drv->flags & PNP_DRIVER_RES_DISABLE) 102 } else if ((pnp_drv->flags & PNP_DRIVER_RES_DISABLE)
102 == PNP_DRIVER_RES_DISABLE) { 103 == PNP_DRIVER_RES_DISABLE) {
103 error = pnp_disable_dev(pnp_dev); 104 error = pnp_disable_dev(pnp_dev);
104 if (error < 0) 105 if (error < 0)
105 return error; 106 return error;
@@ -110,22 +111,22 @@ static int pnp_device_probe(struct device *dev)
110 if (dev_id != NULL) 111 if (dev_id != NULL)
111 error = pnp_drv->probe(pnp_dev, dev_id); 112 error = pnp_drv->probe(pnp_dev, dev_id);
112 } 113 }
113 if (error >= 0){ 114 if (error >= 0) {
114 pnp_dev->driver = pnp_drv; 115 pnp_dev->driver = pnp_drv;
115 error = 0; 116 error = 0;
116 } else 117 } else
117 goto fail; 118 goto fail;
118 return error; 119 return error;
119 120
120fail: 121 fail:
121 pnp_device_detach(pnp_dev); 122 pnp_device_detach(pnp_dev);
122 return error; 123 return error;
123} 124}
124 125
125static int pnp_device_remove(struct device *dev) 126static int pnp_device_remove(struct device *dev)
126{ 127{
127 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 128 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
128 struct pnp_driver * drv = pnp_dev->driver; 129 struct pnp_driver *drv = pnp_dev->driver;
129 130
130 if (drv) { 131 if (drv) {
131 if (drv->remove) 132 if (drv->remove)
@@ -138,8 +139,9 @@ static int pnp_device_remove(struct device *dev)
138 139
139static int pnp_bus_match(struct device *dev, struct device_driver *drv) 140static int pnp_bus_match(struct device *dev, struct device_driver *drv)
140{ 141{
141 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 142 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
142 struct pnp_driver * pnp_drv = to_pnp_driver(drv); 143 struct pnp_driver *pnp_drv = to_pnp_driver(drv);
144
143 if (match_device(pnp_drv, pnp_dev) == NULL) 145 if (match_device(pnp_drv, pnp_dev) == NULL)
144 return 0; 146 return 0;
145 return 1; 147 return 1;
@@ -147,8 +149,8 @@ static int pnp_bus_match(struct device *dev, struct device_driver *drv)
147 149
148static int pnp_bus_suspend(struct device *dev, pm_message_t state) 150static int pnp_bus_suspend(struct device *dev, pm_message_t state)
149{ 151{
150 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 152 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
151 struct pnp_driver * pnp_drv = pnp_dev->driver; 153 struct pnp_driver *pnp_drv = pnp_dev->driver;
152 int error; 154 int error;
153 155
154 if (!pnp_drv) 156 if (!pnp_drv)
@@ -162,9 +164,9 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state)
162 164
163 if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) && 165 if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) &&
164 pnp_can_disable(pnp_dev)) { 166 pnp_can_disable(pnp_dev)) {
165 error = pnp_stop_dev(pnp_dev); 167 error = pnp_stop_dev(pnp_dev);
166 if (error) 168 if (error)
167 return error; 169 return error;
168 } 170 }
169 171
170 if (pnp_dev->protocol && pnp_dev->protocol->suspend) 172 if (pnp_dev->protocol && pnp_dev->protocol->suspend)
@@ -174,8 +176,8 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state)
174 176
175static int pnp_bus_resume(struct device *dev) 177static int pnp_bus_resume(struct device *dev)
176{ 178{
177 struct pnp_dev * pnp_dev = to_pnp_dev(dev); 179 struct pnp_dev *pnp_dev = to_pnp_dev(dev);
178 struct pnp_driver * pnp_drv = pnp_dev->driver; 180 struct pnp_driver *pnp_drv = pnp_dev->driver;
179 int error; 181 int error;
180 182
181 if (!pnp_drv) 183 if (!pnp_drv)
@@ -197,12 +199,12 @@ static int pnp_bus_resume(struct device *dev)
197} 199}
198 200
199struct bus_type pnp_bus_type = { 201struct bus_type pnp_bus_type = {
200 .name = "pnp", 202 .name = "pnp",
201 .match = pnp_bus_match, 203 .match = pnp_bus_match,
202 .probe = pnp_device_probe, 204 .probe = pnp_device_probe,
203 .remove = pnp_device_remove, 205 .remove = pnp_device_remove,
204 .suspend = pnp_bus_suspend, 206 .suspend = pnp_bus_suspend,
205 .resume = pnp_bus_resume, 207 .resume = pnp_bus_resume,
206}; 208};
207 209
208int pnp_register_driver(struct pnp_driver *drv) 210int pnp_register_driver(struct pnp_driver *drv)
@@ -225,12 +227,11 @@ void pnp_unregister_driver(struct pnp_driver *drv)
225 * pnp_add_id - adds an EISA id to the specified device 227 * pnp_add_id - adds an EISA id to the specified device
226 * @id: pointer to a pnp_id structure 228 * @id: pointer to a pnp_id structure
227 * @dev: pointer to the desired device 229 * @dev: pointer to the desired device
228 *
229 */ 230 */
230
231int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev) 231int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev)
232{ 232{
233 struct pnp_id *ptr; 233 struct pnp_id *ptr;
234
234 if (!id) 235 if (!id)
235 return -EINVAL; 236 return -EINVAL;
236 if (!dev) 237 if (!dev)
@@ -248,8 +249,5 @@ int pnp_add_id(struct pnp_id *id, struct pnp_dev *dev)
248 249
249EXPORT_SYMBOL(pnp_register_driver); 250EXPORT_SYMBOL(pnp_register_driver);
250EXPORT_SYMBOL(pnp_unregister_driver); 251EXPORT_SYMBOL(pnp_unregister_driver);
251#if 0
252EXPORT_SYMBOL(pnp_add_id);
253#endif
254EXPORT_SYMBOL(pnp_device_attach); 252EXPORT_SYMBOL(pnp_device_attach);
255EXPORT_SYMBOL(pnp_device_detach); 253EXPORT_SYMBOL(pnp_device_detach);
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index ac9fcd499f3f..fe6684e13e82 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela <perex@suse.cz> 4 * Some code, especially possible resource dumping is based on isapnp_proc.c (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/pnp.h> 8#include <linux/pnp.h>
@@ -29,7 +28,7 @@ struct pnp_info_buffer {
29 28
30typedef struct pnp_info_buffer pnp_info_buffer_t; 29typedef struct pnp_info_buffer pnp_info_buffer_t;
31 30
32static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...) 31static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt, ...)
33{ 32{
34 va_list args; 33 va_list args;
35 int res; 34 int res;
@@ -48,14 +47,18 @@ static int pnp_printf(pnp_info_buffer_t * buffer, char *fmt,...)
48 return res; 47 return res;
49} 48}
50 49
51static void pnp_print_port(pnp_info_buffer_t *buffer, char *space, struct pnp_port *port) 50static void pnp_print_port(pnp_info_buffer_t * buffer, char *space,
51 struct pnp_port *port)
52{ 52{
53 pnp_printf(buffer, "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n", 53 pnp_printf(buffer,
54 space, port->min, port->max, port->align ? (port->align-1) : 0, port->size, 54 "%sport 0x%x-0x%x, align 0x%x, size 0x%x, %i-bit address decoding\n",
55 port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10); 55 space, port->min, port->max,
56 port->align ? (port->align - 1) : 0, port->size,
57 port->flags & PNP_PORT_FLAG_16BITADDR ? 16 : 10);
56} 58}
57 59
58static void pnp_print_irq(pnp_info_buffer_t *buffer, char *space, struct pnp_irq *irq) 60static void pnp_print_irq(pnp_info_buffer_t * buffer, char *space,
61 struct pnp_irq *irq)
59{ 62{
60 int first = 1, i; 63 int first = 1, i;
61 64
@@ -85,14 +88,15 @@ static void pnp_print_irq(pnp_info_buffer_t *buffer, char *space, struct pnp_irq
85 pnp_printf(buffer, "\n"); 88 pnp_printf(buffer, "\n");
86} 89}
87 90
88static void pnp_print_dma(pnp_info_buffer_t *buffer, char *space, struct pnp_dma *dma) 91static void pnp_print_dma(pnp_info_buffer_t * buffer, char *space,
92 struct pnp_dma *dma)
89{ 93{
90 int first = 1, i; 94 int first = 1, i;
91 char *s; 95 char *s;
92 96
93 pnp_printf(buffer, "%sdma ", space); 97 pnp_printf(buffer, "%sdma ", space);
94 for (i = 0; i < 8; i++) 98 for (i = 0; i < 8; i++)
95 if (dma->map & (1<<i)) { 99 if (dma->map & (1 << i)) {
96 if (!first) { 100 if (!first) {
97 pnp_printf(buffer, ","); 101 pnp_printf(buffer, ",");
98 } else { 102 } else {
@@ -136,12 +140,13 @@ static void pnp_print_dma(pnp_info_buffer_t *buffer, char *space, struct pnp_dma
136 pnp_printf(buffer, " %s\n", s); 140 pnp_printf(buffer, " %s\n", s);
137} 141}
138 142
139static void pnp_print_mem(pnp_info_buffer_t *buffer, char *space, struct pnp_mem *mem) 143static void pnp_print_mem(pnp_info_buffer_t * buffer, char *space,
144 struct pnp_mem *mem)
140{ 145{
141 char *s; 146 char *s;
142 147
143 pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x", 148 pnp_printf(buffer, "%sMemory 0x%x-0x%x, align 0x%x, size 0x%x",
144 space, mem->min, mem->max, mem->align, mem->size); 149 space, mem->min, mem->max, mem->align, mem->size);
145 if (mem->flags & IORESOURCE_MEM_WRITEABLE) 150 if (mem->flags & IORESOURCE_MEM_WRITEABLE)
146 pnp_printf(buffer, ", writeable"); 151 pnp_printf(buffer, ", writeable");
147 if (mem->flags & IORESOURCE_MEM_CACHEABLE) 152 if (mem->flags & IORESOURCE_MEM_CACHEABLE)
@@ -168,7 +173,7 @@ static void pnp_print_mem(pnp_info_buffer_t *buffer, char *space, struct pnp_mem
168 pnp_printf(buffer, ", %s\n", s); 173 pnp_printf(buffer, ", %s\n", s);
169} 174}
170 175
171static void pnp_print_option(pnp_info_buffer_t *buffer, char *space, 176static void pnp_print_option(pnp_info_buffer_t * buffer, char *space,
172 struct pnp_option *option, int dep) 177 struct pnp_option *option, int dep)
173{ 178{
174 char *s; 179 char *s;
@@ -179,19 +184,19 @@ static void pnp_print_option(pnp_info_buffer_t *buffer, char *space,
179 184
180 if (dep) { 185 if (dep) {
181 switch (option->priority) { 186 switch (option->priority) {
182 case PNP_RES_PRIORITY_PREFERRED: 187 case PNP_RES_PRIORITY_PREFERRED:
183 s = "preferred"; 188 s = "preferred";
184 break; 189 break;
185 case PNP_RES_PRIORITY_ACCEPTABLE: 190 case PNP_RES_PRIORITY_ACCEPTABLE:
186 s = "acceptable"; 191 s = "acceptable";
187 break; 192 break;
188 case PNP_RES_PRIORITY_FUNCTIONAL: 193 case PNP_RES_PRIORITY_FUNCTIONAL:
189 s = "functional"; 194 s = "functional";
190 break; 195 break;
191 default: 196 default:
192 s = "invalid"; 197 s = "invalid";
193 } 198 }
194 pnp_printf(buffer, "Dependent: %02i - Priority %s\n",dep, s); 199 pnp_printf(buffer, "Dependent: %02i - Priority %s\n", dep, s);
195 } 200 }
196 201
197 for (port = option->port; port; port = port->next) 202 for (port = option->port; port; port = port->next)
@@ -204,16 +209,16 @@ static void pnp_print_option(pnp_info_buffer_t *buffer, char *space,
204 pnp_print_mem(buffer, space, mem); 209 pnp_print_mem(buffer, space, mem);
205} 210}
206 211
207 212static ssize_t pnp_show_options(struct device *dmdev,
208static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *attr, char *buf) 213 struct device_attribute *attr, char *buf)
209{ 214{
210 struct pnp_dev *dev = to_pnp_dev(dmdev); 215 struct pnp_dev *dev = to_pnp_dev(dmdev);
211 struct pnp_option * independent = dev->independent; 216 struct pnp_option *independent = dev->independent;
212 struct pnp_option * dependent = dev->dependent; 217 struct pnp_option *dependent = dev->dependent;
213 int ret, dep = 1; 218 int ret, dep = 1;
214 219
215 pnp_info_buffer_t *buffer = (pnp_info_buffer_t *) 220 pnp_info_buffer_t *buffer = (pnp_info_buffer_t *)
216 pnp_alloc(sizeof(pnp_info_buffer_t)); 221 pnp_alloc(sizeof(pnp_info_buffer_t));
217 if (!buffer) 222 if (!buffer)
218 return -ENOMEM; 223 return -ENOMEM;
219 224
@@ -223,7 +228,7 @@ static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *a
223 if (independent) 228 if (independent)
224 pnp_print_option(buffer, "", independent, 0); 229 pnp_print_option(buffer, "", independent, 0);
225 230
226 while (dependent){ 231 while (dependent) {
227 pnp_print_option(buffer, " ", dependent, dep); 232 pnp_print_option(buffer, " ", dependent, dep);
228 dependent = dependent->next; 233 dependent = dependent->next;
229 dep++; 234 dep++;
@@ -233,10 +238,11 @@ static ssize_t pnp_show_options(struct device *dmdev, struct device_attribute *a
233 return ret; 238 return ret;
234} 239}
235 240
236static DEVICE_ATTR(options,S_IRUGO,pnp_show_options,NULL); 241static DEVICE_ATTR(options, S_IRUGO, pnp_show_options, NULL);
237 242
238 243static ssize_t pnp_show_current_resources(struct device *dmdev,
239static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_attribute *attr, char *buf) 244 struct device_attribute *attr,
245 char *buf)
240{ 246{
241 struct pnp_dev *dev = to_pnp_dev(dmdev); 247 struct pnp_dev *dev = to_pnp_dev(dmdev);
242 int i, ret; 248 int i, ret;
@@ -252,52 +258,56 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_at
252 buffer->buffer = buf; 258 buffer->buffer = buf;
253 buffer->curr = buffer->buffer; 259 buffer->curr = buffer->buffer;
254 260
255 pnp_printf(buffer,"state = "); 261 pnp_printf(buffer, "state = ");
256 if (dev->active) 262 if (dev->active)
257 pnp_printf(buffer,"active\n"); 263 pnp_printf(buffer, "active\n");
258 else 264 else
259 pnp_printf(buffer,"disabled\n"); 265 pnp_printf(buffer, "disabled\n");
260 266
261 for (i = 0; i < PNP_MAX_PORT; i++) { 267 for (i = 0; i < PNP_MAX_PORT; i++) {
262 if (pnp_port_valid(dev, i)) { 268 if (pnp_port_valid(dev, i)) {
263 pnp_printf(buffer,"io"); 269 pnp_printf(buffer, "io");
264 if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) 270 if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED)
265 pnp_printf(buffer," disabled\n"); 271 pnp_printf(buffer, " disabled\n");
266 else 272 else
267 pnp_printf(buffer," 0x%llx-0x%llx\n", 273 pnp_printf(buffer, " 0x%llx-0x%llx\n",
268 (unsigned long long)pnp_port_start(dev, i), 274 (unsigned long long)
269 (unsigned long long)pnp_port_end(dev, i)); 275 pnp_port_start(dev, i),
276 (unsigned long long)pnp_port_end(dev,
277 i));
270 } 278 }
271 } 279 }
272 for (i = 0; i < PNP_MAX_MEM; i++) { 280 for (i = 0; i < PNP_MAX_MEM; i++) {
273 if (pnp_mem_valid(dev, i)) { 281 if (pnp_mem_valid(dev, i)) {
274 pnp_printf(buffer,"mem"); 282 pnp_printf(buffer, "mem");
275 if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) 283 if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED)
276 pnp_printf(buffer," disabled\n"); 284 pnp_printf(buffer, " disabled\n");
277 else 285 else
278 pnp_printf(buffer," 0x%llx-0x%llx\n", 286 pnp_printf(buffer, " 0x%llx-0x%llx\n",
279 (unsigned long long)pnp_mem_start(dev, i), 287 (unsigned long long)
280 (unsigned long long)pnp_mem_end(dev, i)); 288 pnp_mem_start(dev, i),
289 (unsigned long long)pnp_mem_end(dev,
290 i));
281 } 291 }
282 } 292 }
283 for (i = 0; i < PNP_MAX_IRQ; i++) { 293 for (i = 0; i < PNP_MAX_IRQ; i++) {
284 if (pnp_irq_valid(dev, i)) { 294 if (pnp_irq_valid(dev, i)) {
285 pnp_printf(buffer,"irq"); 295 pnp_printf(buffer, "irq");
286 if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) 296 if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED)
287 pnp_printf(buffer," disabled\n"); 297 pnp_printf(buffer, " disabled\n");
288 else 298 else
289 pnp_printf(buffer," %lld\n", 299 pnp_printf(buffer, " %lld\n",
290 (unsigned long long)pnp_irq(dev, i)); 300 (unsigned long long)pnp_irq(dev, i));
291 } 301 }
292 } 302 }
293 for (i = 0; i < PNP_MAX_DMA; i++) { 303 for (i = 0; i < PNP_MAX_DMA; i++) {
294 if (pnp_dma_valid(dev, i)) { 304 if (pnp_dma_valid(dev, i)) {
295 pnp_printf(buffer,"dma"); 305 pnp_printf(buffer, "dma");
296 if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) 306 if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED)
297 pnp_printf(buffer," disabled\n"); 307 pnp_printf(buffer, " disabled\n");
298 else 308 else
299 pnp_printf(buffer," %lld\n", 309 pnp_printf(buffer, " %lld\n",
300 (unsigned long long)pnp_dma(dev, i)); 310 (unsigned long long)pnp_dma(dev, i));
301 } 311 }
302 } 312 }
303 ret = (buffer->curr - buf); 313 ret = (buffer->curr - buf);
@@ -308,55 +318,57 @@ static ssize_t pnp_show_current_resources(struct device *dmdev, struct device_at
308extern struct semaphore pnp_res_mutex; 318extern struct semaphore pnp_res_mutex;
309 319
310static ssize_t 320static ssize_t
311pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr, const char * ubuf, size_t count) 321pnp_set_current_resources(struct device *dmdev, struct device_attribute *attr,
322 const char *ubuf, size_t count)
312{ 323{
313 struct pnp_dev *dev = to_pnp_dev(dmdev); 324 struct pnp_dev *dev = to_pnp_dev(dmdev);
314 char *buf = (void *)ubuf; 325 char *buf = (void *)ubuf;
315 int retval = 0; 326 int retval = 0;
316 327
317 if (dev->status & PNP_ATTACHED) { 328 if (dev->status & PNP_ATTACHED) {
318 retval = -EBUSY; 329 retval = -EBUSY;
319 pnp_info("Device %s cannot be configured because it is in use.", dev->dev.bus_id); 330 pnp_info("Device %s cannot be configured because it is in use.",
331 dev->dev.bus_id);
320 goto done; 332 goto done;
321 } 333 }
322 334
323 while (isspace(*buf)) 335 while (isspace(*buf))
324 ++buf; 336 ++buf;
325 if (!strnicmp(buf,"disable",7)) { 337 if (!strnicmp(buf, "disable", 7)) {
326 retval = pnp_disable_dev(dev); 338 retval = pnp_disable_dev(dev);
327 goto done; 339 goto done;
328 } 340 }
329 if (!strnicmp(buf,"activate",8)) { 341 if (!strnicmp(buf, "activate", 8)) {
330 retval = pnp_activate_dev(dev); 342 retval = pnp_activate_dev(dev);
331 goto done; 343 goto done;
332 } 344 }
333 if (!strnicmp(buf,"fill",4)) { 345 if (!strnicmp(buf, "fill", 4)) {
334 if (dev->active) 346 if (dev->active)
335 goto done; 347 goto done;
336 retval = pnp_auto_config_dev(dev); 348 retval = pnp_auto_config_dev(dev);
337 goto done; 349 goto done;
338 } 350 }
339 if (!strnicmp(buf,"auto",4)) { 351 if (!strnicmp(buf, "auto", 4)) {
340 if (dev->active) 352 if (dev->active)
341 goto done; 353 goto done;
342 pnp_init_resource_table(&dev->res); 354 pnp_init_resource_table(&dev->res);
343 retval = pnp_auto_config_dev(dev); 355 retval = pnp_auto_config_dev(dev);
344 goto done; 356 goto done;
345 } 357 }
346 if (!strnicmp(buf,"clear",5)) { 358 if (!strnicmp(buf, "clear", 5)) {
347 if (dev->active) 359 if (dev->active)
348 goto done; 360 goto done;
349 pnp_init_resource_table(&dev->res); 361 pnp_init_resource_table(&dev->res);
350 goto done; 362 goto done;
351 } 363 }
352 if (!strnicmp(buf,"get",3)) { 364 if (!strnicmp(buf, "get", 3)) {
353 down(&pnp_res_mutex); 365 down(&pnp_res_mutex);
354 if (pnp_can_read(dev)) 366 if (pnp_can_read(dev))
355 dev->protocol->get(dev, &dev->res); 367 dev->protocol->get(dev, &dev->res);
356 up(&pnp_res_mutex); 368 up(&pnp_res_mutex);
357 goto done; 369 goto done;
358 } 370 }
359 if (!strnicmp(buf,"set",3)) { 371 if (!strnicmp(buf, "set", 3)) {
360 int nport = 0, nmem = 0, nirq = 0, ndma = 0; 372 int nport = 0, nmem = 0, nirq = 0, ndma = 0;
361 if (dev->active) 373 if (dev->active)
362 goto done; 374 goto done;
@@ -366,65 +378,77 @@ pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr,
366 while (1) { 378 while (1) {
367 while (isspace(*buf)) 379 while (isspace(*buf))
368 ++buf; 380 ++buf;
369 if (!strnicmp(buf,"io",2)) { 381 if (!strnicmp(buf, "io", 2)) {
370 buf += 2; 382 buf += 2;
371 while (isspace(*buf)) 383 while (isspace(*buf))
372 ++buf; 384 ++buf;
373 dev->res.port_resource[nport].start = simple_strtoul(buf,&buf,0); 385 dev->res.port_resource[nport].start =
386 simple_strtoul(buf, &buf, 0);
374 while (isspace(*buf)) 387 while (isspace(*buf))
375 ++buf; 388 ++buf;
376 if(*buf == '-') { 389 if (*buf == '-') {
377 buf += 1; 390 buf += 1;
378 while (isspace(*buf)) 391 while (isspace(*buf))
379 ++buf; 392 ++buf;
380 dev->res.port_resource[nport].end = simple_strtoul(buf,&buf,0); 393 dev->res.port_resource[nport].end =
394 simple_strtoul(buf, &buf, 0);
381 } else 395 } else
382 dev->res.port_resource[nport].end = dev->res.port_resource[nport].start; 396 dev->res.port_resource[nport].end =
383 dev->res.port_resource[nport].flags = IORESOURCE_IO; 397 dev->res.port_resource[nport].start;
398 dev->res.port_resource[nport].flags =
399 IORESOURCE_IO;
384 nport++; 400 nport++;
385 if (nport >= PNP_MAX_PORT) 401 if (nport >= PNP_MAX_PORT)
386 break; 402 break;
387 continue; 403 continue;
388 } 404 }
389 if (!strnicmp(buf,"mem",3)) { 405 if (!strnicmp(buf, "mem", 3)) {
390 buf += 3; 406 buf += 3;
391 while (isspace(*buf)) 407 while (isspace(*buf))
392 ++buf; 408 ++buf;
393 dev->res.mem_resource[nmem].start = simple_strtoul(buf,&buf,0); 409 dev->res.mem_resource[nmem].start =
410 simple_strtoul(buf, &buf, 0);
394 while (isspace(*buf)) 411 while (isspace(*buf))
395 ++buf; 412 ++buf;
396 if(*buf == '-') { 413 if (*buf == '-') {
397 buf += 1; 414 buf += 1;
398 while (isspace(*buf)) 415 while (isspace(*buf))
399 ++buf; 416 ++buf;
400 dev->res.mem_resource[nmem].end = simple_strtoul(buf,&buf,0); 417 dev->res.mem_resource[nmem].end =
418 simple_strtoul(buf, &buf, 0);
401 } else 419 } else
402 dev->res.mem_resource[nmem].end = dev->res.mem_resource[nmem].start; 420 dev->res.mem_resource[nmem].end =
403 dev->res.mem_resource[nmem].flags = IORESOURCE_MEM; 421 dev->res.mem_resource[nmem].start;
422 dev->res.mem_resource[nmem].flags =
423 IORESOURCE_MEM;
404 nmem++; 424 nmem++;
405 if (nmem >= PNP_MAX_MEM) 425 if (nmem >= PNP_MAX_MEM)
406 break; 426 break;
407 continue; 427 continue;
408 } 428 }
409 if (!strnicmp(buf,"irq",3)) { 429 if (!strnicmp(buf, "irq", 3)) {
410 buf += 3; 430 buf += 3;
411 while (isspace(*buf)) 431 while (isspace(*buf))
412 ++buf; 432 ++buf;
413 dev->res.irq_resource[nirq].start = 433 dev->res.irq_resource[nirq].start =
414 dev->res.irq_resource[nirq].end = simple_strtoul(buf,&buf,0); 434 dev->res.irq_resource[nirq].end =
415 dev->res.irq_resource[nirq].flags = IORESOURCE_IRQ; 435 simple_strtoul(buf, &buf, 0);
436 dev->res.irq_resource[nirq].flags =
437 IORESOURCE_IRQ;
416 nirq++; 438 nirq++;
417 if (nirq >= PNP_MAX_IRQ) 439 if (nirq >= PNP_MAX_IRQ)
418 break; 440 break;
419 continue; 441 continue;
420 } 442 }
421 if (!strnicmp(buf,"dma",3)) { 443 if (!strnicmp(buf, "dma", 3)) {
422 buf += 3; 444 buf += 3;
423 while (isspace(*buf)) 445 while (isspace(*buf))
424 ++buf; 446 ++buf;
425 dev->res.dma_resource[ndma].start = 447 dev->res.dma_resource[ndma].start =
426 dev->res.dma_resource[ndma].end = simple_strtoul(buf,&buf,0); 448 dev->res.dma_resource[ndma].end =
427 dev->res.dma_resource[ndma].flags = IORESOURCE_DMA; 449 simple_strtoul(buf, &buf, 0);
450 dev->res.dma_resource[ndma].flags =
451 IORESOURCE_DMA;
428 ndma++; 452 ndma++;
429 if (ndma >= PNP_MAX_DMA) 453 if (ndma >= PNP_MAX_DMA)
430 break; 454 break;
@@ -435,45 +459,50 @@ pnp_set_current_resources(struct device * dmdev, struct device_attribute *attr,
435 up(&pnp_res_mutex); 459 up(&pnp_res_mutex);
436 goto done; 460 goto done;
437 } 461 }
438 done: 462 done:
439 if (retval < 0) 463 if (retval < 0)
440 return retval; 464 return retval;
441 return count; 465 return count;
442} 466}
443 467
444static DEVICE_ATTR(resources,S_IRUGO | S_IWUSR, 468static DEVICE_ATTR(resources, S_IRUGO | S_IWUSR,
445 pnp_show_current_resources,pnp_set_current_resources); 469 pnp_show_current_resources, pnp_set_current_resources);
446 470
447static ssize_t pnp_show_current_ids(struct device *dmdev, struct device_attribute *attr, char *buf) 471static ssize_t pnp_show_current_ids(struct device *dmdev,
472 struct device_attribute *attr, char *buf)
448{ 473{
449 char *str = buf; 474 char *str = buf;
450 struct pnp_dev *dev = to_pnp_dev(dmdev); 475 struct pnp_dev *dev = to_pnp_dev(dmdev);
451 struct pnp_id * pos = dev->id; 476 struct pnp_id *pos = dev->id;
452 477
453 while (pos) { 478 while (pos) {
454 str += sprintf(str,"%s\n", pos->id); 479 str += sprintf(str, "%s\n", pos->id);
455 pos = pos->next; 480 pos = pos->next;
456 } 481 }
457 return (str - buf); 482 return (str - buf);
458} 483}
459 484
460static DEVICE_ATTR(id,S_IRUGO,pnp_show_current_ids,NULL); 485static DEVICE_ATTR(id, S_IRUGO, pnp_show_current_ids, NULL);
461 486
462int pnp_interface_attach_device(struct pnp_dev *dev) 487int pnp_interface_attach_device(struct pnp_dev *dev)
463{ 488{
464 int rc = device_create_file(&dev->dev,&dev_attr_options); 489 int rc = device_create_file(&dev->dev, &dev_attr_options);
465 if (rc) goto err; 490
466 rc = device_create_file(&dev->dev,&dev_attr_resources); 491 if (rc)
467 if (rc) goto err_opt; 492 goto err;
468 rc = device_create_file(&dev->dev,&dev_attr_id); 493 rc = device_create_file(&dev->dev, &dev_attr_resources);
469 if (rc) goto err_res; 494 if (rc)
495 goto err_opt;
496 rc = device_create_file(&dev->dev, &dev_attr_id);
497 if (rc)
498 goto err_res;
470 499
471 return 0; 500 return 0;
472 501
473err_res: 502 err_res:
474 device_remove_file(&dev->dev,&dev_attr_resources); 503 device_remove_file(&dev->dev, &dev_attr_resources);
475err_opt: 504 err_opt:
476 device_remove_file(&dev->dev,&dev_attr_options); 505 device_remove_file(&dev->dev, &dev_attr_options);
477err: 506 err:
478 return rc; 507 return rc;
479} 508}
diff --git a/drivers/pnp/isapnp/compat.c b/drivers/pnp/isapnp/compat.c
index 0697ab88a9ac..10bdcc4d4f7b 100644
--- a/drivers/pnp/isapnp/compat.c
+++ b/drivers/pnp/isapnp/compat.c
@@ -3,34 +3,30 @@
3 * the old isapnp APIs. If possible use the new APIs instead. 3 * the old isapnp APIs. If possible use the new APIs instead.
4 * 4 *
5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2002 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8
9/* TODO: see if more isapnp functions are needed here */
10 7
11#include <linux/module.h> 8#include <linux/module.h>
12#include <linux/isapnp.h> 9#include <linux/isapnp.h>
13#include <linux/string.h> 10#include <linux/string.h>
14 11
15static void pnp_convert_id(char *buf, unsigned short vendor, unsigned short device) 12static void pnp_convert_id(char *buf, unsigned short vendor,
13 unsigned short device)
16{ 14{
17 sprintf(buf, "%c%c%c%x%x%x%x", 15 sprintf(buf, "%c%c%c%x%x%x%x",
18 'A' + ((vendor >> 2) & 0x3f) - 1, 16 'A' + ((vendor >> 2) & 0x3f) - 1,
19 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 17 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
20 'A' + ((vendor >> 8) & 0x1f) - 1, 18 'A' + ((vendor >> 8) & 0x1f) - 1,
21 (device >> 4) & 0x0f, 19 (device >> 4) & 0x0f, device & 0x0f,
22 device & 0x0f, 20 (device >> 12) & 0x0f, (device >> 8) & 0x0f);
23 (device >> 12) & 0x0f,
24 (device >> 8) & 0x0f);
25} 21}
26 22
27struct pnp_card *pnp_find_card(unsigned short vendor, 23struct pnp_card *pnp_find_card(unsigned short vendor, unsigned short device,
28 unsigned short device,
29 struct pnp_card *from) 24 struct pnp_card *from)
30{ 25{
31 char id[8]; 26 char id[8];
32 char any[8]; 27 char any[8];
33 struct list_head *list; 28 struct list_head *list;
29
34 pnp_convert_id(id, vendor, device); 30 pnp_convert_id(id, vendor, device);
35 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID); 31 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID);
36 32
@@ -38,20 +34,20 @@ struct pnp_card *pnp_find_card(unsigned short vendor,
38 34
39 while (list != &pnp_cards) { 35 while (list != &pnp_cards) {
40 struct pnp_card *card = global_to_pnp_card(list); 36 struct pnp_card *card = global_to_pnp_card(list);
41 if (compare_pnp_id(card->id,id) || (memcmp(id,any,7)==0)) 37
38 if (compare_pnp_id(card->id, id) || (memcmp(id, any, 7) == 0))
42 return card; 39 return card;
43 list = list->next; 40 list = list->next;
44 } 41 }
45 return NULL; 42 return NULL;
46} 43}
47 44
48struct pnp_dev *pnp_find_dev(struct pnp_card *card, 45struct pnp_dev *pnp_find_dev(struct pnp_card *card, unsigned short vendor,
49 unsigned short vendor, 46 unsigned short function, struct pnp_dev *from)
50 unsigned short function,
51 struct pnp_dev *from)
52{ 47{
53 char id[8]; 48 char id[8];
54 char any[8]; 49 char any[8];
50
55 pnp_convert_id(id, vendor, function); 51 pnp_convert_id(id, vendor, function);
56 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID); 52 pnp_convert_id(any, ISAPNP_ANY_ID, ISAPNP_ANY_ID);
57 if (card == NULL) { /* look for a logical device from all cards */ 53 if (card == NULL) { /* look for a logical device from all cards */
@@ -63,7 +59,9 @@ struct pnp_dev *pnp_find_dev(struct pnp_card *card,
63 59
64 while (list != &pnp_global) { 60 while (list != &pnp_global) {
65 struct pnp_dev *dev = global_to_pnp_dev(list); 61 struct pnp_dev *dev = global_to_pnp_dev(list);
66 if (compare_pnp_id(dev->id,id) || (memcmp(id,any,7)==0)) 62
63 if (compare_pnp_id(dev->id, id) ||
64 (memcmp(id, any, 7) == 0))
67 return dev; 65 return dev;
68 list = list->next; 66 list = list->next;
69 } 67 }
@@ -78,7 +76,8 @@ struct pnp_dev *pnp_find_dev(struct pnp_card *card,
78 } 76 }
79 while (list != &card->devices) { 77 while (list != &card->devices) {
80 struct pnp_dev *dev = card_to_pnp_dev(list); 78 struct pnp_dev *dev = card_to_pnp_dev(list);
81 if (compare_pnp_id(dev->id,id)) 79
80 if (compare_pnp_id(dev->id, id))
82 return dev; 81 return dev;
83 list = list->next; 82 list = list->next;
84 } 83 }
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c
index 914d00c423ad..b4e2aa995b53 100644
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -51,10 +51,10 @@
51#define ISAPNP_DEBUG 51#define ISAPNP_DEBUG
52#endif 52#endif
53 53
54int isapnp_disable; /* Disable ISA PnP */ 54int isapnp_disable; /* Disable ISA PnP */
55static int isapnp_rdp; /* Read Data Port */ 55static int isapnp_rdp; /* Read Data Port */
56static int isapnp_reset = 1; /* reset all PnP cards (deactivate) */ 56static int isapnp_reset = 1; /* reset all PnP cards (deactivate) */
57static int isapnp_verbose = 1; /* verbose mode */ 57static int isapnp_verbose = 1; /* verbose mode */
58 58
59MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>"); 59MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>");
60MODULE_DESCRIPTION("Generic ISA Plug & Play support"); 60MODULE_DESCRIPTION("Generic ISA Plug & Play support");
@@ -126,7 +126,7 @@ static unsigned short isapnp_read_word(unsigned char idx)
126 unsigned short val; 126 unsigned short val;
127 127
128 val = isapnp_read_byte(idx); 128 val = isapnp_read_byte(idx);
129 val = (val << 8) + isapnp_read_byte(idx+1); 129 val = (val << 8) + isapnp_read_byte(idx + 1);
130 return val; 130 return val;
131} 131}
132 132
@@ -139,7 +139,7 @@ void isapnp_write_byte(unsigned char idx, unsigned char val)
139static void isapnp_write_word(unsigned char idx, unsigned short val) 139static void isapnp_write_word(unsigned char idx, unsigned short val)
140{ 140{
141 isapnp_write_byte(idx, val >> 8); 141 isapnp_write_byte(idx, val >> 8);
142 isapnp_write_byte(idx+1, val); 142 isapnp_write_byte(idx + 1, val);
143} 143}
144 144
145static void isapnp_key(void) 145static void isapnp_key(void)
@@ -193,7 +193,7 @@ static void isapnp_deactivate(unsigned char logdev)
193static void __init isapnp_peek(unsigned char *data, int bytes) 193static void __init isapnp_peek(unsigned char *data, int bytes)
194{ 194{
195 int i, j; 195 int i, j;
196 unsigned char d=0; 196 unsigned char d = 0;
197 197
198 for (i = 1; i <= bytes; i++) { 198 for (i = 1; i <= bytes; i++) {
199 for (j = 0; j < 20; j++) { 199 for (j = 0; j < 20; j++) {
@@ -220,19 +220,18 @@ static int isapnp_next_rdp(void)
220{ 220{
221 int rdp = isapnp_rdp; 221 int rdp = isapnp_rdp;
222 static int old_rdp = 0; 222 static int old_rdp = 0;
223 223
224 if(old_rdp) 224 if (old_rdp) {
225 {
226 release_region(old_rdp, 1); 225 release_region(old_rdp, 1);
227 old_rdp = 0; 226 old_rdp = 0;
228 } 227 }
229 while (rdp <= 0x3ff) { 228 while (rdp <= 0x3ff) {
230 /* 229 /*
231 * We cannot use NE2000 probe spaces for ISAPnP or we 230 * We cannot use NE2000 probe spaces for ISAPnP or we
232 * will lock up machines. 231 * will lock up machines.
233 */ 232 */
234 if ((rdp < 0x280 || rdp > 0x380) && request_region(rdp, 1, "ISAPnP")) 233 if ((rdp < 0x280 || rdp > 0x380)
235 { 234 && request_region(rdp, 1, "ISAPnP")) {
236 isapnp_rdp = rdp; 235 isapnp_rdp = rdp;
237 old_rdp = rdp; 236 old_rdp = rdp;
238 return 0; 237 return 0;
@@ -253,7 +252,6 @@ static inline void isapnp_set_rdp(void)
253 * Perform an isolation. The port selection code now tries to avoid 252 * Perform an isolation. The port selection code now tries to avoid
254 * "dangerous to read" ports. 253 * "dangerous to read" ports.
255 */ 254 */
256
257static int __init isapnp_isolate_rdp_select(void) 255static int __init isapnp_isolate_rdp_select(void)
258{ 256{
259 isapnp_wait(); 257 isapnp_wait();
@@ -282,7 +280,6 @@ static int __init isapnp_isolate_rdp_select(void)
282/* 280/*
283 * Isolate (assign uniqued CSN) to all ISA PnP devices. 281 * Isolate (assign uniqued CSN) to all ISA PnP devices.
284 */ 282 */
285
286static int __init isapnp_isolate(void) 283static int __init isapnp_isolate(void)
287{ 284{
288 unsigned char checksum = 0x6a; 285 unsigned char checksum = 0x6a;
@@ -305,7 +302,9 @@ static int __init isapnp_isolate(void)
305 udelay(250); 302 udelay(250);
306 if (data == 0x55aa) 303 if (data == 0x55aa)
307 bit = 0x01; 304 bit = 0x01;
308 checksum = ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7) | (checksum >> 1); 305 checksum =
306 ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7)
307 | (checksum >> 1);
309 bit = 0x00; 308 bit = 0x00;
310 } 309 }
311 for (i = 65; i <= 72; i++) { 310 for (i = 65; i <= 72; i++) {
@@ -351,13 +350,12 @@ static int __init isapnp_isolate(void)
351/* 350/*
352 * Read one tag from stream. 351 * Read one tag from stream.
353 */ 352 */
354
355static int __init isapnp_read_tag(unsigned char *type, unsigned short *size) 353static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
356{ 354{
357 unsigned char tag, tmp[2]; 355 unsigned char tag, tmp[2];
358 356
359 isapnp_peek(&tag, 1); 357 isapnp_peek(&tag, 1);
360 if (tag == 0) /* invalid tag */ 358 if (tag == 0) /* invalid tag */
361 return -1; 359 return -1;
362 if (tag & 0x80) { /* large item */ 360 if (tag & 0x80) { /* large item */
363 *type = tag; 361 *type = tag;
@@ -368,7 +366,8 @@ static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
368 *size = tag & 0x07; 366 *size = tag & 0x07;
369 } 367 }
370#if 0 368#if 0
371 printk(KERN_DEBUG "tag = 0x%x, type = 0x%x, size = %i\n", tag, *type, *size); 369 printk(KERN_DEBUG "tag = 0x%x, type = 0x%x, size = %i\n", tag, *type,
370 *size);
372#endif 371#endif
373 if (*type == 0xff && *size == 0xffff) /* probably invalid data */ 372 if (*type == 0xff && *size == 0xffff) /* probably invalid data */
374 return -1; 373 return -1;
@@ -378,7 +377,6 @@ static int __init isapnp_read_tag(unsigned char *type, unsigned short *size)
378/* 377/*
379 * Skip specified number of bytes from stream. 378 * Skip specified number of bytes from stream.
380 */ 379 */
381
382static void __init isapnp_skip_bytes(int count) 380static void __init isapnp_skip_bytes(int count)
383{ 381{
384 isapnp_peek(NULL, count); 382 isapnp_peek(NULL, count);
@@ -387,31 +385,30 @@ static void __init isapnp_skip_bytes(int count)
387/* 385/*
388 * Parse EISA id. 386 * Parse EISA id.
389 */ 387 */
390 388static void isapnp_parse_id(struct pnp_dev *dev, unsigned short vendor,
391static void isapnp_parse_id(struct pnp_dev * dev, unsigned short vendor, unsigned short device) 389 unsigned short device)
392{ 390{
393 struct pnp_id * id; 391 struct pnp_id *id;
392
394 if (!dev) 393 if (!dev)
395 return; 394 return;
396 id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 395 id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
397 if (!id) 396 if (!id)
398 return; 397 return;
399 sprintf(id->id, "%c%c%c%x%x%x%x", 398 sprintf(id->id, "%c%c%c%x%x%x%x",
400 'A' + ((vendor >> 2) & 0x3f) - 1, 399 'A' + ((vendor >> 2) & 0x3f) - 1,
401 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 400 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
402 'A' + ((vendor >> 8) & 0x1f) - 1, 401 'A' + ((vendor >> 8) & 0x1f) - 1,
403 (device >> 4) & 0x0f, 402 (device >> 4) & 0x0f,
404 device & 0x0f, 403 device & 0x0f, (device >> 12) & 0x0f, (device >> 8) & 0x0f);
405 (device >> 12) & 0x0f,
406 (device >> 8) & 0x0f);
407 pnp_add_id(id, dev); 404 pnp_add_id(id, dev);
408} 405}
409 406
410/* 407/*
411 * Parse logical device tag. 408 * Parse logical device tag.
412 */ 409 */
413 410static struct pnp_dev *__init isapnp_parse_device(struct pnp_card *card,
414static struct pnp_dev * __init isapnp_parse_device(struct pnp_card *card, int size, int number) 411 int size, int number)
415{ 412{
416 unsigned char tmp[6]; 413 unsigned char tmp[6];
417 struct pnp_dev *dev; 414 struct pnp_dev *dev;
@@ -435,13 +432,11 @@ static struct pnp_dev * __init isapnp_parse_device(struct pnp_card *card, int si
435 return dev; 432 return dev;
436} 433}
437 434
438
439/* 435/*
440 * Add IRQ resource to resources list. 436 * Add IRQ resource to resources list.
441 */ 437 */
442
443static void __init isapnp_parse_irq_resource(struct pnp_option *option, 438static void __init isapnp_parse_irq_resource(struct pnp_option *option,
444 int size) 439 int size)
445{ 440{
446 unsigned char tmp[3]; 441 unsigned char tmp[3];
447 struct pnp_irq *irq; 442 struct pnp_irq *irq;
@@ -458,15 +453,13 @@ static void __init isapnp_parse_irq_resource(struct pnp_option *option,
458 else 453 else
459 irq->flags = IORESOURCE_IRQ_HIGHEDGE; 454 irq->flags = IORESOURCE_IRQ_HIGHEDGE;
460 pnp_register_irq_resource(option, irq); 455 pnp_register_irq_resource(option, irq);
461 return;
462} 456}
463 457
464/* 458/*
465 * Add DMA resource to resources list. 459 * Add DMA resource to resources list.
466 */ 460 */
467
468static void __init isapnp_parse_dma_resource(struct pnp_option *option, 461static void __init isapnp_parse_dma_resource(struct pnp_option *option,
469 int size) 462 int size)
470{ 463{
471 unsigned char tmp[2]; 464 unsigned char tmp[2];
472 struct pnp_dma *dma; 465 struct pnp_dma *dma;
@@ -478,15 +471,13 @@ static void __init isapnp_parse_dma_resource(struct pnp_option *option,
478 dma->map = tmp[0]; 471 dma->map = tmp[0];
479 dma->flags = tmp[1]; 472 dma->flags = tmp[1];
480 pnp_register_dma_resource(option, dma); 473 pnp_register_dma_resource(option, dma);
481 return;
482} 474}
483 475
484/* 476/*
485 * Add port resource to resources list. 477 * Add port resource to resources list.
486 */ 478 */
487
488static void __init isapnp_parse_port_resource(struct pnp_option *option, 479static void __init isapnp_parse_port_resource(struct pnp_option *option,
489 int size) 480 int size)
490{ 481{
491 unsigned char tmp[7]; 482 unsigned char tmp[7];
492 struct pnp_port *port; 483 struct pnp_port *port;
@@ -500,16 +491,14 @@ static void __init isapnp_parse_port_resource(struct pnp_option *option,
500 port->align = tmp[5]; 491 port->align = tmp[5];
501 port->size = tmp[6]; 492 port->size = tmp[6];
502 port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0; 493 port->flags = tmp[0] ? PNP_PORT_FLAG_16BITADDR : 0;
503 pnp_register_port_resource(option,port); 494 pnp_register_port_resource(option, port);
504 return;
505} 495}
506 496
507/* 497/*
508 * Add fixed port resource to resources list. 498 * Add fixed port resource to resources list.
509 */ 499 */
510
511static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option, 500static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option,
512 int size) 501 int size)
513{ 502{
514 unsigned char tmp[3]; 503 unsigned char tmp[3];
515 struct pnp_port *port; 504 struct pnp_port *port;
@@ -522,16 +511,14 @@ static void __init isapnp_parse_fixed_port_resource(struct pnp_option *option,
522 port->size = tmp[2]; 511 port->size = tmp[2];
523 port->align = 0; 512 port->align = 0;
524 port->flags = PNP_PORT_FLAG_FIXED; 513 port->flags = PNP_PORT_FLAG_FIXED;
525 pnp_register_port_resource(option,port); 514 pnp_register_port_resource(option, port);
526 return;
527} 515}
528 516
529/* 517/*
530 * Add memory resource to resources list. 518 * Add memory resource to resources list.
531 */ 519 */
532
533static void __init isapnp_parse_mem_resource(struct pnp_option *option, 520static void __init isapnp_parse_mem_resource(struct pnp_option *option,
534 int size) 521 int size)
535{ 522{
536 unsigned char tmp[9]; 523 unsigned char tmp[9];
537 struct pnp_mem *mem; 524 struct pnp_mem *mem;
@@ -545,16 +532,14 @@ static void __init isapnp_parse_mem_resource(struct pnp_option *option,
545 mem->align = (tmp[6] << 8) | tmp[5]; 532 mem->align = (tmp[6] << 8) | tmp[5];
546 mem->size = ((tmp[8] << 8) | tmp[7]) << 8; 533 mem->size = ((tmp[8] << 8) | tmp[7]) << 8;
547 mem->flags = tmp[0]; 534 mem->flags = tmp[0];
548 pnp_register_mem_resource(option,mem); 535 pnp_register_mem_resource(option, mem);
549 return;
550} 536}
551 537
552/* 538/*
553 * Add 32-bit memory resource to resources list. 539 * Add 32-bit memory resource to resources list.
554 */ 540 */
555
556static void __init isapnp_parse_mem32_resource(struct pnp_option *option, 541static void __init isapnp_parse_mem32_resource(struct pnp_option *option,
557 int size) 542 int size)
558{ 543{
559 unsigned char tmp[17]; 544 unsigned char tmp[17];
560 struct pnp_mem *mem; 545 struct pnp_mem *mem;
@@ -565,18 +550,19 @@ static void __init isapnp_parse_mem32_resource(struct pnp_option *option,
565 return; 550 return;
566 mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; 551 mem->min = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
567 mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; 552 mem->max = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
568 mem->align = (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9]; 553 mem->align =
569 mem->size = (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13]; 554 (tmp[12] << 24) | (tmp[11] << 16) | (tmp[10] << 8) | tmp[9];
555 mem->size =
556 (tmp[16] << 24) | (tmp[15] << 16) | (tmp[14] << 8) | tmp[13];
570 mem->flags = tmp[0]; 557 mem->flags = tmp[0];
571 pnp_register_mem_resource(option,mem); 558 pnp_register_mem_resource(option, mem);
572} 559}
573 560
574/* 561/*
575 * Add 32-bit fixed memory resource to resources list. 562 * Add 32-bit fixed memory resource to resources list.
576 */ 563 */
577
578static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option, 564static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option,
579 int size) 565 int size)
580{ 566{
581 unsigned char tmp[9]; 567 unsigned char tmp[9];
582 struct pnp_mem *mem; 568 struct pnp_mem *mem;
@@ -585,28 +571,29 @@ static void __init isapnp_parse_fixed_mem32_resource(struct pnp_option *option,
585 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 571 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
586 if (!mem) 572 if (!mem)
587 return; 573 return;
588 mem->min = mem->max = (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1]; 574 mem->min = mem->max =
575 (tmp[4] << 24) | (tmp[3] << 16) | (tmp[2] << 8) | tmp[1];
589 mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5]; 576 mem->size = (tmp[8] << 24) | (tmp[7] << 16) | (tmp[6] << 8) | tmp[5];
590 mem->align = 0; 577 mem->align = 0;
591 mem->flags = tmp[0]; 578 mem->flags = tmp[0];
592 pnp_register_mem_resource(option,mem); 579 pnp_register_mem_resource(option, mem);
593} 580}
594 581
595/* 582/*
596 * Parse card name for ISA PnP device. 583 * Parse card name for ISA PnP device.
597 */ 584 */
598
599static void __init 585static void __init
600isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size) 586isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size)
601{ 587{
602 if (name[0] == '\0') { 588 if (name[0] == '\0') {
603 unsigned short size1 = *size >= name_max ? (name_max - 1) : *size; 589 unsigned short size1 =
590 *size >= name_max ? (name_max - 1) : *size;
604 isapnp_peek(name, size1); 591 isapnp_peek(name, size1);
605 name[size1] = '\0'; 592 name[size1] = '\0';
606 *size -= size1; 593 *size -= size1;
607 594
608 /* clean whitespace from end of string */ 595 /* clean whitespace from end of string */
609 while (size1 > 0 && name[--size1] == ' ') 596 while (size1 > 0 && name[--size1] == ' ')
610 name[size1] = '\0'; 597 name[size1] = '\0';
611 } 598 }
612} 599}
@@ -614,7 +601,6 @@ isapnp_parse_name(char *name, unsigned int name_max, unsigned short *size)
614/* 601/*
615 * Parse resource map for logical device. 602 * Parse resource map for logical device.
616 */ 603 */
617
618static int __init isapnp_create_device(struct pnp_card *card, 604static int __init isapnp_create_device(struct pnp_card *card,
619 unsigned short size) 605 unsigned short size)
620{ 606{
@@ -622,6 +608,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
622 unsigned char type, tmp[17]; 608 unsigned char type, tmp[17];
623 struct pnp_option *option; 609 struct pnp_option *option;
624 struct pnp_dev *dev; 610 struct pnp_dev *dev;
611
625 if ((dev = isapnp_parse_device(card, size, number++)) == NULL) 612 if ((dev = isapnp_parse_device(card, size, number++)) == NULL)
626 return 1; 613 return 1;
627 option = pnp_register_independent_option(dev); 614 option = pnp_register_independent_option(dev);
@@ -629,17 +616,19 @@ static int __init isapnp_create_device(struct pnp_card *card,
629 kfree(dev); 616 kfree(dev);
630 return 1; 617 return 1;
631 } 618 }
632 pnp_add_card_device(card,dev); 619 pnp_add_card_device(card, dev);
633 620
634 while (1) { 621 while (1) {
635 if (isapnp_read_tag(&type, &size)<0) 622 if (isapnp_read_tag(&type, &size) < 0)
636 return 1; 623 return 1;
637 if (skip && type != _STAG_LOGDEVID && type != _STAG_END) 624 if (skip && type != _STAG_LOGDEVID && type != _STAG_END)
638 goto __skip; 625 goto __skip;
639 switch (type) { 626 switch (type) {
640 case _STAG_LOGDEVID: 627 case _STAG_LOGDEVID:
641 if (size >= 5 && size <= 6) { 628 if (size >= 5 && size <= 6) {
642 if ((dev = isapnp_parse_device(card, size, number++)) == NULL) 629 if ((dev =
630 isapnp_parse_device(card, size,
631 number++)) == NULL)
643 return 1; 632 return 1;
644 size = 0; 633 size = 0;
645 skip = 0; 634 skip = 0;
@@ -648,7 +637,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
648 kfree(dev); 637 kfree(dev);
649 return 1; 638 return 1;
650 } 639 }
651 pnp_add_card_device(card,dev); 640 pnp_add_card_device(card, dev);
652 } else { 641 } else {
653 skip = 1; 642 skip = 1;
654 } 643 }
@@ -658,7 +647,8 @@ static int __init isapnp_create_device(struct pnp_card *card,
658 case _STAG_COMPATDEVID: 647 case _STAG_COMPATDEVID:
659 if (size == 4 && compat < DEVICE_COUNT_COMPATIBLE) { 648 if (size == 4 && compat < DEVICE_COUNT_COMPATIBLE) {
660 isapnp_peek(tmp, 4); 649 isapnp_peek(tmp, 4);
661 isapnp_parse_id(dev,(tmp[1] << 8) | tmp[0], (tmp[3] << 8) | tmp[2]); 650 isapnp_parse_id(dev, (tmp[1] << 8) | tmp[0],
651 (tmp[3] << 8) | tmp[2]);
662 compat++; 652 compat++;
663 size = 0; 653 size = 0;
664 } 654 }
@@ -684,7 +674,7 @@ static int __init isapnp_create_device(struct pnp_card *card,
684 priority = 0x100 | tmp[0]; 674 priority = 0x100 | tmp[0];
685 size = 0; 675 size = 0;
686 } 676 }
687 option = pnp_register_dependent_option(dev,priority); 677 option = pnp_register_dependent_option(dev, priority);
688 if (!option) 678 if (!option)
689 return 1; 679 return 1;
690 break; 680 break;
@@ -739,11 +729,13 @@ static int __init isapnp_create_device(struct pnp_card *card,
739 isapnp_skip_bytes(size); 729 isapnp_skip_bytes(size);
740 return 1; 730 return 1;
741 default: 731 default:
742 printk(KERN_ERR "isapnp: unexpected or unknown tag type 0x%x for logical device %i (device %i), ignored\n", type, dev->number, card->number); 732 printk(KERN_ERR
733 "isapnp: unexpected or unknown tag type 0x%x for logical device %i (device %i), ignored\n",
734 type, dev->number, card->number);
743 } 735 }
744 __skip: 736 __skip:
745 if (size > 0) 737 if (size > 0)
746 isapnp_skip_bytes(size); 738 isapnp_skip_bytes(size);
747 } 739 }
748 return 0; 740 return 0;
749} 741}
@@ -751,14 +743,13 @@ static int __init isapnp_create_device(struct pnp_card *card,
751/* 743/*
752 * Parse resource map for ISA PnP card. 744 * Parse resource map for ISA PnP card.
753 */ 745 */
754
755static void __init isapnp_parse_resource_map(struct pnp_card *card) 746static void __init isapnp_parse_resource_map(struct pnp_card *card)
756{ 747{
757 unsigned char type, tmp[17]; 748 unsigned char type, tmp[17];
758 unsigned short size; 749 unsigned short size;
759 750
760 while (1) { 751 while (1) {
761 if (isapnp_read_tag(&type, &size)<0) 752 if (isapnp_read_tag(&type, &size) < 0)
762 return; 753 return;
763 switch (type) { 754 switch (type) {
764 case _STAG_PNPVERNO: 755 case _STAG_PNPVERNO:
@@ -771,7 +762,7 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
771 break; 762 break;
772 case _STAG_LOGDEVID: 763 case _STAG_LOGDEVID:
773 if (size >= 5 && size <= 6) { 764 if (size >= 5 && size <= 6) {
774 if (isapnp_create_device(card, size)==1) 765 if (isapnp_create_device(card, size) == 1)
775 return; 766 return;
776 size = 0; 767 size = 0;
777 } 768 }
@@ -779,7 +770,8 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
779 case _STAG_VENDOR: 770 case _STAG_VENDOR:
780 break; 771 break;
781 case _LTAG_ANSISTR: 772 case _LTAG_ANSISTR:
782 isapnp_parse_name(card->name, sizeof(card->name), &size); 773 isapnp_parse_name(card->name, sizeof(card->name),
774 &size);
783 break; 775 break;
784 case _LTAG_UNICODESTR: 776 case _LTAG_UNICODESTR:
785 /* silently ignore */ 777 /* silently ignore */
@@ -792,18 +784,19 @@ static void __init isapnp_parse_resource_map(struct pnp_card *card)
792 isapnp_skip_bytes(size); 784 isapnp_skip_bytes(size);
793 return; 785 return;
794 default: 786 default:
795 printk(KERN_ERR "isapnp: unexpected or unknown tag type 0x%x for device %i, ignored\n", type, card->number); 787 printk(KERN_ERR
788 "isapnp: unexpected or unknown tag type 0x%x for device %i, ignored\n",
789 type, card->number);
796 } 790 }
797 __skip: 791 __skip:
798 if (size > 0) 792 if (size > 0)
799 isapnp_skip_bytes(size); 793 isapnp_skip_bytes(size);
800 } 794 }
801} 795}
802 796
803/* 797/*
804 * Compute ISA PnP checksum for first eight bytes. 798 * Compute ISA PnP checksum for first eight bytes.
805 */ 799 */
806
807static unsigned char __init isapnp_checksum(unsigned char *data) 800static unsigned char __init isapnp_checksum(unsigned char *data)
808{ 801{
809 int i, j; 802 int i, j;
@@ -815,7 +808,9 @@ static unsigned char __init isapnp_checksum(unsigned char *data)
815 bit = 0; 808 bit = 0;
816 if (b & (1 << j)) 809 if (b & (1 << j))
817 bit = 1; 810 bit = 1;
818 checksum = ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7) | (checksum >> 1); 811 checksum =
812 ((((checksum ^ (checksum >> 1)) & 0x01) ^ bit) << 7)
813 | (checksum >> 1);
819 } 814 }
820 } 815 }
821 return checksum; 816 return checksum;
@@ -824,27 +819,25 @@ static unsigned char __init isapnp_checksum(unsigned char *data)
824/* 819/*
825 * Parse EISA id for ISA PnP card. 820 * Parse EISA id for ISA PnP card.
826 */ 821 */
827 822static void isapnp_parse_card_id(struct pnp_card *card, unsigned short vendor,
828static void isapnp_parse_card_id(struct pnp_card * card, unsigned short vendor, unsigned short device) 823 unsigned short device)
829{ 824{
830 struct pnp_id * id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 825 struct pnp_id *id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
826
831 if (!id) 827 if (!id)
832 return; 828 return;
833 sprintf(id->id, "%c%c%c%x%x%x%x", 829 sprintf(id->id, "%c%c%c%x%x%x%x",
834 'A' + ((vendor >> 2) & 0x3f) - 1, 830 'A' + ((vendor >> 2) & 0x3f) - 1,
835 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, 831 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
836 'A' + ((vendor >> 8) & 0x1f) - 1, 832 'A' + ((vendor >> 8) & 0x1f) - 1,
837 (device >> 4) & 0x0f, 833 (device >> 4) & 0x0f,
838 device & 0x0f, 834 device & 0x0f, (device >> 12) & 0x0f, (device >> 8) & 0x0f);
839 (device >> 12) & 0x0f, 835 pnp_add_card_id(id, card);
840 (device >> 8) & 0x0f);
841 pnp_add_card_id(id,card);
842} 836}
843 837
844/* 838/*
845 * Build device list for all present ISA PnP devices. 839 * Build device list for all present ISA PnP devices.
846 */ 840 */
847
848static int __init isapnp_build_device_list(void) 841static int __init isapnp_build_device_list(void)
849{ 842{
850 int csn; 843 int csn;
@@ -858,22 +851,29 @@ static int __init isapnp_build_device_list(void)
858 isapnp_peek(header, 9); 851 isapnp_peek(header, 9);
859 checksum = isapnp_checksum(header); 852 checksum = isapnp_checksum(header);
860#if 0 853#if 0
861 printk(KERN_DEBUG "vendor: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", 854 printk(KERN_DEBUG
862 header[0], header[1], header[2], header[3], 855 "vendor: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
863 header[4], header[5], header[6], header[7], header[8]); 856 header[0], header[1], header[2], header[3], header[4],
857 header[5], header[6], header[7], header[8]);
864 printk(KERN_DEBUG "checksum = 0x%x\n", checksum); 858 printk(KERN_DEBUG "checksum = 0x%x\n", checksum);
865#endif 859#endif
866 if ((card = kzalloc(sizeof(struct pnp_card), GFP_KERNEL)) == NULL) 860 if ((card =
861 kzalloc(sizeof(struct pnp_card), GFP_KERNEL)) == NULL)
867 continue; 862 continue;
868 863
869 card->number = csn; 864 card->number = csn;
870 INIT_LIST_HEAD(&card->devices); 865 INIT_LIST_HEAD(&card->devices);
871 isapnp_parse_card_id(card, (header[1] << 8) | header[0], (header[3] << 8) | header[2]); 866 isapnp_parse_card_id(card, (header[1] << 8) | header[0],
872 card->serial = (header[7] << 24) | (header[6] << 16) | (header[5] << 8) | header[4]; 867 (header[3] << 8) | header[2]);
868 card->serial =
869 (header[7] << 24) | (header[6] << 16) | (header[5] << 8) |
870 header[4];
873 isapnp_checksum_value = 0x00; 871 isapnp_checksum_value = 0x00;
874 isapnp_parse_resource_map(card); 872 isapnp_parse_resource_map(card);
875 if (isapnp_checksum_value != 0x00) 873 if (isapnp_checksum_value != 0x00)
876 printk(KERN_ERR "isapnp: checksum for device %i is not valid (0x%x)\n", csn, isapnp_checksum_value); 874 printk(KERN_ERR
875 "isapnp: checksum for device %i is not valid (0x%x)\n",
876 csn, isapnp_checksum_value);
877 card->checksum = isapnp_checksum_value; 877 card->checksum = isapnp_checksum_value;
878 card->protocol = &isapnp_protocol; 878 card->protocol = &isapnp_protocol;
879 879
@@ -890,6 +890,7 @@ static int __init isapnp_build_device_list(void)
890int isapnp_present(void) 890int isapnp_present(void)
891{ 891{
892 struct pnp_card *card; 892 struct pnp_card *card;
893
893 pnp_for_each_card(card) { 894 pnp_for_each_card(card) {
894 if (card->protocol == &isapnp_protocol) 895 if (card->protocol == &isapnp_protocol)
895 return 1; 896 return 1;
@@ -911,13 +912,13 @@ int isapnp_cfg_begin(int csn, int logdev)
911 /* it is possible to set RDP only in the isolation phase */ 912 /* it is possible to set RDP only in the isolation phase */
912 /* Jens Thoms Toerring <Jens.Toerring@physik.fu-berlin.de> */ 913 /* Jens Thoms Toerring <Jens.Toerring@physik.fu-berlin.de> */
913 isapnp_write_byte(0x02, 0x04); /* clear CSN of card */ 914 isapnp_write_byte(0x02, 0x04); /* clear CSN of card */
914 mdelay(2); /* is this necessary? */ 915 mdelay(2); /* is this necessary? */
915 isapnp_wake(csn); /* bring card into sleep state */ 916 isapnp_wake(csn); /* bring card into sleep state */
916 isapnp_wake(0); /* bring card into isolation state */ 917 isapnp_wake(0); /* bring card into isolation state */
917 isapnp_set_rdp(); /* reset the RDP port */ 918 isapnp_set_rdp(); /* reset the RDP port */
918 udelay(1000); /* delay 1000us */ 919 udelay(1000); /* delay 1000us */
919 isapnp_write_byte(0x06, csn); /* reset CSN to previous value */ 920 isapnp_write_byte(0x06, csn); /* reset CSN to previous value */
920 udelay(250); /* is this necessary? */ 921 udelay(250); /* is this necessary? */
921#endif 922#endif
922 if (logdev >= 0) 923 if (logdev >= 0)
923 isapnp_device(logdev); 924 isapnp_device(logdev);
@@ -931,12 +932,10 @@ int isapnp_cfg_end(void)
931 return 0; 932 return 0;
932} 933}
933 934
934
935/* 935/*
936 * Inititialization. 936 * Initialization.
937 */ 937 */
938 938
939
940EXPORT_SYMBOL(isapnp_protocol); 939EXPORT_SYMBOL(isapnp_protocol);
941EXPORT_SYMBOL(isapnp_present); 940EXPORT_SYMBOL(isapnp_present);
942EXPORT_SYMBOL(isapnp_cfg_begin); 941EXPORT_SYMBOL(isapnp_cfg_begin);
@@ -946,7 +945,8 @@ EXPORT_SYMBOL(isapnp_read_byte);
946#endif 945#endif
947EXPORT_SYMBOL(isapnp_write_byte); 946EXPORT_SYMBOL(isapnp_write_byte);
948 947
949static int isapnp_read_resources(struct pnp_dev *dev, struct pnp_resource_table *res) 948static int isapnp_read_resources(struct pnp_dev *dev,
949 struct pnp_resource_table *res)
950{ 950{
951 int tmp, ret; 951 int tmp, ret;
952 952
@@ -960,31 +960,37 @@ static int isapnp_read_resources(struct pnp_dev *dev, struct pnp_resource_table
960 res->port_resource[tmp].flags = IORESOURCE_IO; 960 res->port_resource[tmp].flags = IORESOURCE_IO;
961 } 961 }
962 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { 962 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) {
963 ret = isapnp_read_word(ISAPNP_CFG_MEM + (tmp << 3)) << 8; 963 ret =
964 isapnp_read_word(ISAPNP_CFG_MEM + (tmp << 3)) << 8;
964 if (!ret) 965 if (!ret)
965 continue; 966 continue;
966 res->mem_resource[tmp].start = ret; 967 res->mem_resource[tmp].start = ret;
967 res->mem_resource[tmp].flags = IORESOURCE_MEM; 968 res->mem_resource[tmp].flags = IORESOURCE_MEM;
968 } 969 }
969 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { 970 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) {
970 ret = (isapnp_read_word(ISAPNP_CFG_IRQ + (tmp << 1)) >> 8); 971 ret =
972 (isapnp_read_word(ISAPNP_CFG_IRQ + (tmp << 1)) >>
973 8);
971 if (!ret) 974 if (!ret)
972 continue; 975 continue;
973 res->irq_resource[tmp].start = res->irq_resource[tmp].end = ret; 976 res->irq_resource[tmp].start =
977 res->irq_resource[tmp].end = ret;
974 res->irq_resource[tmp].flags = IORESOURCE_IRQ; 978 res->irq_resource[tmp].flags = IORESOURCE_IRQ;
975 } 979 }
976 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { 980 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) {
977 ret = isapnp_read_byte(ISAPNP_CFG_DMA + tmp); 981 ret = isapnp_read_byte(ISAPNP_CFG_DMA + tmp);
978 if (ret == 4) 982 if (ret == 4)
979 continue; 983 continue;
980 res->dma_resource[tmp].start = res->dma_resource[tmp].end = ret; 984 res->dma_resource[tmp].start =
985 res->dma_resource[tmp].end = ret;
981 res->dma_resource[tmp].flags = IORESOURCE_DMA; 986 res->dma_resource[tmp].flags = IORESOURCE_DMA;
982 } 987 }
983 } 988 }
984 return 0; 989 return 0;
985} 990}
986 991
987static int isapnp_get_resources(struct pnp_dev *dev, struct pnp_resource_table * res) 992static int isapnp_get_resources(struct pnp_dev *dev,
993 struct pnp_resource_table *res)
988{ 994{
989 int ret; 995 int ret;
990 pnp_init_resource_table(res); 996 pnp_init_resource_table(res);
@@ -994,24 +1000,44 @@ static int isapnp_get_resources(struct pnp_dev *dev, struct pnp_resource_table *
994 return ret; 1000 return ret;
995} 1001}
996 1002
997static int isapnp_set_resources(struct pnp_dev *dev, struct pnp_resource_table * res) 1003static int isapnp_set_resources(struct pnp_dev *dev,
1004 struct pnp_resource_table *res)
998{ 1005{
999 int tmp; 1006 int tmp;
1000 1007
1001 isapnp_cfg_begin(dev->card->number, dev->number); 1008 isapnp_cfg_begin(dev->card->number, dev->number);
1002 dev->active = 1; 1009 dev->active = 1;
1003 for (tmp = 0; tmp < PNP_MAX_PORT && (res->port_resource[tmp].flags & (IORESOURCE_IO | IORESOURCE_UNSET)) == IORESOURCE_IO; tmp++) 1010 for (tmp = 0;
1004 isapnp_write_word(ISAPNP_CFG_PORT+(tmp<<1), res->port_resource[tmp].start); 1011 tmp < PNP_MAX_PORT
1005 for (tmp = 0; tmp < PNP_MAX_IRQ && (res->irq_resource[tmp].flags & (IORESOURCE_IRQ | IORESOURCE_UNSET)) == IORESOURCE_IRQ; tmp++) { 1012 && (res->port_resource[tmp].
1013 flags & (IORESOURCE_IO | IORESOURCE_UNSET)) == IORESOURCE_IO;
1014 tmp++)
1015 isapnp_write_word(ISAPNP_CFG_PORT + (tmp << 1),
1016 res->port_resource[tmp].start);
1017 for (tmp = 0;
1018 tmp < PNP_MAX_IRQ
1019 && (res->irq_resource[tmp].
1020 flags & (IORESOURCE_IRQ | IORESOURCE_UNSET)) == IORESOURCE_IRQ;
1021 tmp++) {
1006 int irq = res->irq_resource[tmp].start; 1022 int irq = res->irq_resource[tmp].start;
1007 if (irq == 2) 1023 if (irq == 2)
1008 irq = 9; 1024 irq = 9;
1009 isapnp_write_byte(ISAPNP_CFG_IRQ+(tmp<<1), irq); 1025 isapnp_write_byte(ISAPNP_CFG_IRQ + (tmp << 1), irq);
1010 } 1026 }
1011 for (tmp = 0; tmp < PNP_MAX_DMA && (res->dma_resource[tmp].flags & (IORESOURCE_DMA | IORESOURCE_UNSET)) == IORESOURCE_DMA; tmp++) 1027 for (tmp = 0;
1012 isapnp_write_byte(ISAPNP_CFG_DMA+tmp, res->dma_resource[tmp].start); 1028 tmp < PNP_MAX_DMA
1013 for (tmp = 0; tmp < PNP_MAX_MEM && (res->mem_resource[tmp].flags & (IORESOURCE_MEM | IORESOURCE_UNSET)) == IORESOURCE_MEM; tmp++) 1029 && (res->dma_resource[tmp].
1014 isapnp_write_word(ISAPNP_CFG_MEM+(tmp<<3), (res->mem_resource[tmp].start >> 8) & 0xffff); 1030 flags & (IORESOURCE_DMA | IORESOURCE_UNSET)) == IORESOURCE_DMA;
1031 tmp++)
1032 isapnp_write_byte(ISAPNP_CFG_DMA + tmp,
1033 res->dma_resource[tmp].start);
1034 for (tmp = 0;
1035 tmp < PNP_MAX_MEM
1036 && (res->mem_resource[tmp].
1037 flags & (IORESOURCE_MEM | IORESOURCE_UNSET)) == IORESOURCE_MEM;
1038 tmp++)
1039 isapnp_write_word(ISAPNP_CFG_MEM + (tmp << 3),
1040 (res->mem_resource[tmp].start >> 8) & 0xffff);
1015 /* FIXME: We aren't handling 32bit mems properly here */ 1041 /* FIXME: We aren't handling 32bit mems properly here */
1016 isapnp_activate(dev->number); 1042 isapnp_activate(dev->number);
1017 isapnp_cfg_end(); 1043 isapnp_cfg_end();
@@ -1030,9 +1056,9 @@ static int isapnp_disable_resources(struct pnp_dev *dev)
1030} 1056}
1031 1057
1032struct pnp_protocol isapnp_protocol = { 1058struct pnp_protocol isapnp_protocol = {
1033 .name = "ISA Plug and Play", 1059 .name = "ISA Plug and Play",
1034 .get = isapnp_get_resources, 1060 .get = isapnp_get_resources,
1035 .set = isapnp_set_resources, 1061 .set = isapnp_set_resources,
1036 .disable = isapnp_disable_resources, 1062 .disable = isapnp_disable_resources,
1037}; 1063};
1038 1064
@@ -1053,31 +1079,36 @@ static int __init isapnp_init(void)
1053#endif 1079#endif
1054#ifdef ISAPNP_REGION_OK 1080#ifdef ISAPNP_REGION_OK
1055 if (!request_region(_PIDXR, 1, "isapnp index")) { 1081 if (!request_region(_PIDXR, 1, "isapnp index")) {
1056 printk(KERN_ERR "isapnp: Index Register 0x%x already used\n", _PIDXR); 1082 printk(KERN_ERR "isapnp: Index Register 0x%x already used\n",
1083 _PIDXR);
1057 return -EBUSY; 1084 return -EBUSY;
1058 } 1085 }
1059#endif 1086#endif
1060 if (!request_region(_PNPWRP, 1, "isapnp write")) { 1087 if (!request_region(_PNPWRP, 1, "isapnp write")) {
1061 printk(KERN_ERR "isapnp: Write Data Register 0x%x already used\n", _PNPWRP); 1088 printk(KERN_ERR
1089 "isapnp: Write Data Register 0x%x already used\n",
1090 _PNPWRP);
1062#ifdef ISAPNP_REGION_OK 1091#ifdef ISAPNP_REGION_OK
1063 release_region(_PIDXR, 1); 1092 release_region(_PIDXR, 1);
1064#endif 1093#endif
1065 return -EBUSY; 1094 return -EBUSY;
1066 } 1095 }
1067 1096
1068 if(pnp_register_protocol(&isapnp_protocol)<0) 1097 if (pnp_register_protocol(&isapnp_protocol) < 0)
1069 return -EBUSY; 1098 return -EBUSY;
1070 1099
1071 /* 1100 /*
1072 * Print a message. The existing ISAPnP code is hanging machines 1101 * Print a message. The existing ISAPnP code is hanging machines
1073 * so let the user know where. 1102 * so let the user know where.
1074 */ 1103 */
1075 1104
1076 printk(KERN_INFO "isapnp: Scanning for PnP cards...\n"); 1105 printk(KERN_INFO "isapnp: Scanning for PnP cards...\n");
1077 if (isapnp_rdp >= 0x203 && isapnp_rdp <= 0x3ff) { 1106 if (isapnp_rdp >= 0x203 && isapnp_rdp <= 0x3ff) {
1078 isapnp_rdp |= 3; 1107 isapnp_rdp |= 3;
1079 if (!request_region(isapnp_rdp, 1, "isapnp read")) { 1108 if (!request_region(isapnp_rdp, 1, "isapnp read")) {
1080 printk(KERN_ERR "isapnp: Read Data Register 0x%x already used\n", isapnp_rdp); 1109 printk(KERN_ERR
1110 "isapnp: Read Data Register 0x%x already used\n",
1111 isapnp_rdp);
1081#ifdef ISAPNP_REGION_OK 1112#ifdef ISAPNP_REGION_OK
1082 release_region(_PIDXR, 1); 1113 release_region(_PIDXR, 1);
1083#endif 1114#endif
@@ -1089,14 +1120,14 @@ static int __init isapnp_init(void)
1089 isapnp_detected = 1; 1120 isapnp_detected = 1;
1090 if (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff) { 1121 if (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff) {
1091 cards = isapnp_isolate(); 1122 cards = isapnp_isolate();
1092 if (cards < 0 || 1123 if (cards < 0 || (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff)) {
1093 (isapnp_rdp < 0x203 || isapnp_rdp > 0x3ff)) {
1094#ifdef ISAPNP_REGION_OK 1124#ifdef ISAPNP_REGION_OK
1095 release_region(_PIDXR, 1); 1125 release_region(_PIDXR, 1);
1096#endif 1126#endif
1097 release_region(_PNPWRP, 1); 1127 release_region(_PNPWRP, 1);
1098 isapnp_detected = 0; 1128 isapnp_detected = 0;
1099 printk(KERN_INFO "isapnp: No Plug & Play device found\n"); 1129 printk(KERN_INFO
1130 "isapnp: No Plug & Play device found\n");
1100 return 0; 1131 return 0;
1101 } 1132 }
1102 request_region(isapnp_rdp, 1, "isapnp read"); 1133 request_region(isapnp_rdp, 1, "isapnp read");
@@ -1104,19 +1135,23 @@ static int __init isapnp_init(void)
1104 isapnp_build_device_list(); 1135 isapnp_build_device_list();
1105 cards = 0; 1136 cards = 0;
1106 1137
1107 protocol_for_each_card(&isapnp_protocol,card) { 1138 protocol_for_each_card(&isapnp_protocol, card) {
1108 cards++; 1139 cards++;
1109 if (isapnp_verbose) { 1140 if (isapnp_verbose) {
1110 printk(KERN_INFO "isapnp: Card '%s'\n", card->name[0]?card->name:"Unknown"); 1141 printk(KERN_INFO "isapnp: Card '%s'\n",
1142 card->name[0] ? card->name : "Unknown");
1111 if (isapnp_verbose < 2) 1143 if (isapnp_verbose < 2)
1112 continue; 1144 continue;
1113 card_for_each_dev(card,dev) { 1145 card_for_each_dev(card, dev) {
1114 printk(KERN_INFO "isapnp: Device '%s'\n", dev->name[0]?dev->name:"Unknown"); 1146 printk(KERN_INFO "isapnp: Device '%s'\n",
1147 dev->name[0] ? dev->name : "Unknown");
1115 } 1148 }
1116 } 1149 }
1117 } 1150 }
1118 if (cards) { 1151 if (cards) {
1119 printk(KERN_INFO "isapnp: %i Plug & Play card%s detected total\n", cards, cards>1?"s":""); 1152 printk(KERN_INFO
1153 "isapnp: %i Plug & Play card%s detected total\n", cards,
1154 cards > 1 ? "s" : "");
1120 } else { 1155 } else {
1121 printk(KERN_INFO "isapnp: No Plug & Play card found\n"); 1156 printk(KERN_INFO "isapnp: No Plug & Play card found\n");
1122 } 1157 }
@@ -1141,11 +1176,10 @@ __setup("noisapnp", isapnp_setup_disable);
1141 1176
1142static int __init isapnp_setup_isapnp(char *str) 1177static int __init isapnp_setup_isapnp(char *str)
1143{ 1178{
1144 (void)((get_option(&str,&isapnp_rdp) == 2) && 1179 (void)((get_option(&str, &isapnp_rdp) == 2) &&
1145 (get_option(&str,&isapnp_reset) == 2) && 1180 (get_option(&str, &isapnp_reset) == 2) &&
1146 (get_option(&str,&isapnp_verbose) == 2)); 1181 (get_option(&str, &isapnp_verbose) == 2));
1147 return 1; 1182 return 1;
1148} 1183}
1149 1184
1150__setup("isapnp=", isapnp_setup_isapnp); 1185__setup("isapnp=", isapnp_setup_isapnp);
1151
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 40b724ebe23b..3fbc0f9ffc26 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -2,7 +2,6 @@
2 * ISA Plug & Play support 2 * ISA Plug & Play support
3 * Copyright (c) by Jaroslav Kysela <perex@suse.cz> 3 * Copyright (c) by Jaroslav Kysela <perex@suse.cz>
4 * 4 *
5 *
6 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
@@ -16,7 +15,6 @@
16 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 */ 18 */
21 19
22#include <linux/module.h> 20#include <linux/module.h>
@@ -54,7 +52,8 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
54 return (file->f_pos = new); 52 return (file->f_pos = new);
55} 53}
56 54
57static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) 55static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf,
56 size_t nbytes, loff_t * ppos)
58{ 57{
59 struct inode *ino = file->f_path.dentry->d_inode; 58 struct inode *ino = file->f_path.dentry->d_inode;
60 struct proc_dir_entry *dp = PDE(ino); 59 struct proc_dir_entry *dp = PDE(ino);
@@ -74,7 +73,7 @@ static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t
74 return -EINVAL; 73 return -EINVAL;
75 74
76 isapnp_cfg_begin(dev->card->number, dev->number); 75 isapnp_cfg_begin(dev->card->number, dev->number);
77 for ( ; pos < 256 && cnt > 0; pos++, buf++, cnt--) { 76 for (; pos < 256 && cnt > 0; pos++, buf++, cnt--) {
78 unsigned char val; 77 unsigned char val;
79 val = isapnp_read_byte(pos); 78 val = isapnp_read_byte(pos);
80 __put_user(val, buf); 79 __put_user(val, buf);
@@ -85,10 +84,9 @@ static ssize_t isapnp_proc_bus_read(struct file *file, char __user *buf, size_t
85 return nbytes; 84 return nbytes;
86} 85}
87 86
88static const struct file_operations isapnp_proc_bus_file_operations = 87static const struct file_operations isapnp_proc_bus_file_operations = {
89{ 88 .llseek = isapnp_proc_bus_lseek,
90 .llseek = isapnp_proc_bus_lseek, 89 .read = isapnp_proc_bus_read,
91 .read = isapnp_proc_bus_read,
92}; 90};
93 91
94static int isapnp_proc_attach_device(struct pnp_dev *dev) 92static int isapnp_proc_attach_device(struct pnp_dev *dev)
@@ -139,13 +137,14 @@ static int __exit isapnp_proc_detach_bus(struct pnp_card *bus)
139 remove_proc_entry(name, isapnp_proc_bus_dir); 137 remove_proc_entry(name, isapnp_proc_bus_dir);
140 return 0; 138 return 0;
141} 139}
142#endif /* MODULE */ 140#endif /* MODULE */
143 141
144int __init isapnp_proc_init(void) 142int __init isapnp_proc_init(void)
145{ 143{
146 struct pnp_dev *dev; 144 struct pnp_dev *dev;
145
147 isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus); 146 isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus);
148 protocol_for_each_dev(&isapnp_protocol,dev) { 147 protocol_for_each_dev(&isapnp_protocol, dev) {
149 isapnp_proc_attach_device(dev); 148 isapnp_proc_attach_device(dev);
150 } 149 }
151 return 0; 150 return 0;
@@ -167,4 +166,4 @@ int __exit isapnp_proc_done(void)
167 remove_proc_entry("isapnp", proc_bus); 166 remove_proc_entry("isapnp", proc_bus);
168 return 0; 167 return 0;
169} 168}
170#endif /* MODULE */ 169#endif /* MODULE */
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 57e6ab1004d0..3bda513a6bd3 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz> 4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/errno.h> 8#include <linux/errno.h>
@@ -26,7 +25,8 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
26 return -EINVAL; 25 return -EINVAL;
27 26
28 if (idx >= PNP_MAX_PORT) { 27 if (idx >= PNP_MAX_PORT) {
29 pnp_err("More than 4 ports is incompatible with pnp specifications."); 28 pnp_err
29 ("More than 4 ports is incompatible with pnp specifications.");
30 /* pretend we were successful so at least the manager won't try again */ 30 /* pretend we were successful so at least the manager won't try again */
31 return 1; 31 return 1;
32 } 32 }
@@ -41,11 +41,11 @@ static int pnp_assign_port(struct pnp_dev *dev, struct pnp_port *rule, int idx)
41 41
42 /* set the initial values */ 42 /* set the initial values */
43 *flags |= rule->flags | IORESOURCE_IO; 43 *flags |= rule->flags | IORESOURCE_IO;
44 *flags &= ~IORESOURCE_UNSET; 44 *flags &= ~IORESOURCE_UNSET;
45 45
46 if (!rule->size) { 46 if (!rule->size) {
47 *flags |= IORESOURCE_DISABLED; 47 *flags |= IORESOURCE_DISABLED;
48 return 1; /* skip disabled resource requests */ 48 return 1; /* skip disabled resource requests */
49 } 49 }
50 50
51 *start = rule->min; 51 *start = rule->min;
@@ -70,7 +70,8 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
70 return -EINVAL; 70 return -EINVAL;
71 71
72 if (idx >= PNP_MAX_MEM) { 72 if (idx >= PNP_MAX_MEM) {
73 pnp_err("More than 8 mems is incompatible with pnp specifications."); 73 pnp_err
74 ("More than 8 mems is incompatible with pnp specifications.");
74 /* pretend we were successful so at least the manager won't try again */ 75 /* pretend we were successful so at least the manager won't try again */
75 return 1; 76 return 1;
76 } 77 }
@@ -85,7 +86,7 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
85 86
86 /* set the initial values */ 87 /* set the initial values */
87 *flags |= rule->flags | IORESOURCE_MEM; 88 *flags |= rule->flags | IORESOURCE_MEM;
88 *flags &= ~IORESOURCE_UNSET; 89 *flags &= ~IORESOURCE_UNSET;
89 90
90 /* convert pnp flags to standard Linux flags */ 91 /* convert pnp flags to standard Linux flags */
91 if (!(rule->flags & IORESOURCE_MEM_WRITEABLE)) 92 if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
@@ -99,11 +100,11 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
99 100
100 if (!rule->size) { 101 if (!rule->size) {
101 *flags |= IORESOURCE_DISABLED; 102 *flags |= IORESOURCE_DISABLED;
102 return 1; /* skip disabled resource requests */ 103 return 1; /* skip disabled resource requests */
103 } 104 }
104 105
105 *start = rule->min; 106 *start = rule->min;
106 *end = *start + rule->size -1; 107 *end = *start + rule->size - 1;
107 108
108 /* run through until pnp_check_mem is happy */ 109 /* run through until pnp_check_mem is happy */
109 while (!pnp_check_mem(dev, idx)) { 110 while (!pnp_check_mem(dev, idx)) {
@@ -115,7 +116,7 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
115 return 1; 116 return 1;
116} 117}
117 118
118static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx) 119static int pnp_assign_irq(struct pnp_dev *dev, struct pnp_irq *rule, int idx)
119{ 120{
120 resource_size_t *start, *end; 121 resource_size_t *start, *end;
121 unsigned long *flags; 122 unsigned long *flags;
@@ -130,7 +131,8 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
130 return -EINVAL; 131 return -EINVAL;
131 132
132 if (idx >= PNP_MAX_IRQ) { 133 if (idx >= PNP_MAX_IRQ) {
133 pnp_err("More than 2 irqs is incompatible with pnp specifications."); 134 pnp_err
135 ("More than 2 irqs is incompatible with pnp specifications.");
134 /* pretend we were successful so at least the manager won't try again */ 136 /* pretend we were successful so at least the manager won't try again */
135 return 1; 137 return 1;
136 } 138 }
@@ -145,11 +147,11 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
145 147
146 /* set the initial values */ 148 /* set the initial values */
147 *flags |= rule->flags | IORESOURCE_IRQ; 149 *flags |= rule->flags | IORESOURCE_IRQ;
148 *flags &= ~IORESOURCE_UNSET; 150 *flags &= ~IORESOURCE_UNSET;
149 151
150 if (bitmap_empty(rule->map, PNP_IRQ_NR)) { 152 if (bitmap_empty(rule->map, PNP_IRQ_NR)) {
151 *flags |= IORESOURCE_DISABLED; 153 *flags |= IORESOURCE_DISABLED;
152 return 1; /* skip disabled resource requests */ 154 return 1; /* skip disabled resource requests */
153 } 155 }
154 156
155 /* TBD: need check for >16 IRQ */ 157 /* TBD: need check for >16 IRQ */
@@ -159,9 +161,9 @@ static int pnp_assign_irq(struct pnp_dev * dev, struct pnp_irq *rule, int idx)
159 return 1; 161 return 1;
160 } 162 }
161 for (i = 0; i < 16; i++) { 163 for (i = 0; i < 16; i++) {
162 if(test_bit(xtab[i], rule->map)) { 164 if (test_bit(xtab[i], rule->map)) {
163 *start = *end = xtab[i]; 165 *start = *end = xtab[i];
164 if(pnp_check_irq(dev, idx)) 166 if (pnp_check_irq(dev, idx))
165 return 1; 167 return 1;
166 } 168 }
167 } 169 }
@@ -183,7 +185,8 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
183 return -EINVAL; 185 return -EINVAL;
184 186
185 if (idx >= PNP_MAX_DMA) { 187 if (idx >= PNP_MAX_DMA) {
186 pnp_err("More than 2 dmas is incompatible with pnp specifications."); 188 pnp_err
189 ("More than 2 dmas is incompatible with pnp specifications.");
187 /* pretend we were successful so at least the manager won't try again */ 190 /* pretend we were successful so at least the manager won't try again */
188 return 1; 191 return 1;
189 } 192 }
@@ -198,17 +201,17 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
198 201
199 /* set the initial values */ 202 /* set the initial values */
200 *flags |= rule->flags | IORESOURCE_DMA; 203 *flags |= rule->flags | IORESOURCE_DMA;
201 *flags &= ~IORESOURCE_UNSET; 204 *flags &= ~IORESOURCE_UNSET;
202 205
203 if (!rule->map) { 206 if (!rule->map) {
204 *flags |= IORESOURCE_DISABLED; 207 *flags |= IORESOURCE_DISABLED;
205 return 1; /* skip disabled resource requests */ 208 return 1; /* skip disabled resource requests */
206 } 209 }
207 210
208 for (i = 0; i < 8; i++) { 211 for (i = 0; i < 8; i++) {
209 if(rule->map & (1<<xtab[i])) { 212 if (rule->map & (1 << xtab[i])) {
210 *start = *end = xtab[i]; 213 *start = *end = xtab[i];
211 if(pnp_check_dma(dev, idx)) 214 if (pnp_check_dma(dev, idx))
212 return 1; 215 return 1;
213 } 216 }
214 } 217 }
@@ -218,72 +221,80 @@ static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
218/** 221/**
219 * pnp_init_resources - Resets a resource table to default values. 222 * pnp_init_resources - Resets a resource table to default values.
220 * @table: pointer to the desired resource table 223 * @table: pointer to the desired resource table
221 *
222 */ 224 */
223void pnp_init_resource_table(struct pnp_resource_table *table) 225void pnp_init_resource_table(struct pnp_resource_table *table)
224{ 226{
225 int idx; 227 int idx;
228
226 for (idx = 0; idx < PNP_MAX_IRQ; idx++) { 229 for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
227 table->irq_resource[idx].name = NULL; 230 table->irq_resource[idx].name = NULL;
228 table->irq_resource[idx].start = -1; 231 table->irq_resource[idx].start = -1;
229 table->irq_resource[idx].end = -1; 232 table->irq_resource[idx].end = -1;
230 table->irq_resource[idx].flags = IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; 233 table->irq_resource[idx].flags =
234 IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET;
231 } 235 }
232 for (idx = 0; idx < PNP_MAX_DMA; idx++) { 236 for (idx = 0; idx < PNP_MAX_DMA; idx++) {
233 table->dma_resource[idx].name = NULL; 237 table->dma_resource[idx].name = NULL;
234 table->dma_resource[idx].start = -1; 238 table->dma_resource[idx].start = -1;
235 table->dma_resource[idx].end = -1; 239 table->dma_resource[idx].end = -1;
236 table->dma_resource[idx].flags = IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; 240 table->dma_resource[idx].flags =
241 IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET;
237 } 242 }
238 for (idx = 0; idx < PNP_MAX_PORT; idx++) { 243 for (idx = 0; idx < PNP_MAX_PORT; idx++) {
239 table->port_resource[idx].name = NULL; 244 table->port_resource[idx].name = NULL;
240 table->port_resource[idx].start = 0; 245 table->port_resource[idx].start = 0;
241 table->port_resource[idx].end = 0; 246 table->port_resource[idx].end = 0;
242 table->port_resource[idx].flags = IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; 247 table->port_resource[idx].flags =
248 IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET;
243 } 249 }
244 for (idx = 0; idx < PNP_MAX_MEM; idx++) { 250 for (idx = 0; idx < PNP_MAX_MEM; idx++) {
245 table->mem_resource[idx].name = NULL; 251 table->mem_resource[idx].name = NULL;
246 table->mem_resource[idx].start = 0; 252 table->mem_resource[idx].start = 0;
247 table->mem_resource[idx].end = 0; 253 table->mem_resource[idx].end = 0;
248 table->mem_resource[idx].flags = IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; 254 table->mem_resource[idx].flags =
255 IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET;
249 } 256 }
250} 257}
251 258
252/** 259/**
253 * pnp_clean_resources - clears resources that were not manually set 260 * pnp_clean_resources - clears resources that were not manually set
254 * @res: the resources to clean 261 * @res: the resources to clean
255 *
256 */ 262 */
257static void pnp_clean_resource_table(struct pnp_resource_table * res) 263static void pnp_clean_resource_table(struct pnp_resource_table *res)
258{ 264{
259 int idx; 265 int idx;
266
260 for (idx = 0; idx < PNP_MAX_IRQ; idx++) { 267 for (idx = 0; idx < PNP_MAX_IRQ; idx++) {
261 if (!(res->irq_resource[idx].flags & IORESOURCE_AUTO)) 268 if (!(res->irq_resource[idx].flags & IORESOURCE_AUTO))
262 continue; 269 continue;
263 res->irq_resource[idx].start = -1; 270 res->irq_resource[idx].start = -1;
264 res->irq_resource[idx].end = -1; 271 res->irq_resource[idx].end = -1;
265 res->irq_resource[idx].flags = IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; 272 res->irq_resource[idx].flags =
273 IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET;
266 } 274 }
267 for (idx = 0; idx < PNP_MAX_DMA; idx++) { 275 for (idx = 0; idx < PNP_MAX_DMA; idx++) {
268 if (!(res->dma_resource[idx].flags & IORESOURCE_AUTO)) 276 if (!(res->dma_resource[idx].flags & IORESOURCE_AUTO))
269 continue; 277 continue;
270 res->dma_resource[idx].start = -1; 278 res->dma_resource[idx].start = -1;
271 res->dma_resource[idx].end = -1; 279 res->dma_resource[idx].end = -1;
272 res->dma_resource[idx].flags = IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; 280 res->dma_resource[idx].flags =
281 IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET;
273 } 282 }
274 for (idx = 0; idx < PNP_MAX_PORT; idx++) { 283 for (idx = 0; idx < PNP_MAX_PORT; idx++) {
275 if (!(res->port_resource[idx].flags & IORESOURCE_AUTO)) 284 if (!(res->port_resource[idx].flags & IORESOURCE_AUTO))
276 continue; 285 continue;
277 res->port_resource[idx].start = 0; 286 res->port_resource[idx].start = 0;
278 res->port_resource[idx].end = 0; 287 res->port_resource[idx].end = 0;
279 res->port_resource[idx].flags = IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; 288 res->port_resource[idx].flags =
289 IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET;
280 } 290 }
281 for (idx = 0; idx < PNP_MAX_MEM; idx++) { 291 for (idx = 0; idx < PNP_MAX_MEM; idx++) {
282 if (!(res->mem_resource[idx].flags & IORESOURCE_AUTO)) 292 if (!(res->mem_resource[idx].flags & IORESOURCE_AUTO))
283 continue; 293 continue;
284 res->mem_resource[idx].start = 0; 294 res->mem_resource[idx].start = 0;
285 res->mem_resource[idx].end = 0; 295 res->mem_resource[idx].end = 0;
286 res->mem_resource[idx].flags = IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; 296 res->mem_resource[idx].flags =
297 IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET;
287 } 298 }
288} 299}
289 300
@@ -306,7 +317,7 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
306 return -ENODEV; 317 return -ENODEV;
307 318
308 down(&pnp_res_mutex); 319 down(&pnp_res_mutex);
309 pnp_clean_resource_table(&dev->res); /* start with a fresh slate */ 320 pnp_clean_resource_table(&dev->res); /* start with a fresh slate */
310 if (dev->independent) { 321 if (dev->independent) {
311 port = dev->independent->port; 322 port = dev->independent->port;
312 mem = dev->independent->mem; 323 mem = dev->independent->mem;
@@ -341,10 +352,11 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
341 if (depnum) { 352 if (depnum) {
342 struct pnp_option *dep; 353 struct pnp_option *dep;
343 int i; 354 int i;
344 for (i=1,dep=dev->dependent; i<depnum; i++, dep=dep->next) 355 for (i = 1, dep = dev->dependent; i < depnum;
345 if(!dep) 356 i++, dep = dep->next)
357 if (!dep)
346 goto fail; 358 goto fail;
347 port =dep->port; 359 port = dep->port;
348 mem = dep->mem; 360 mem = dep->mem;
349 irq = dep->irq; 361 irq = dep->irq;
350 dma = dep->dma; 362 dma = dep->dma;
@@ -378,7 +390,7 @@ static int pnp_assign_resources(struct pnp_dev *dev, int depnum)
378 up(&pnp_res_mutex); 390 up(&pnp_res_mutex);
379 return 1; 391 return 1;
380 392
381fail: 393 fail:
382 pnp_clean_resource_table(&dev->res); 394 pnp_clean_resource_table(&dev->res);
383 up(&pnp_res_mutex); 395 up(&pnp_res_mutex);
384 return 0; 396 return 0;
@@ -392,10 +404,12 @@ fail:
392 * 404 *
393 * This function can be used by drivers that want to manually set thier resources. 405 * This function can be used by drivers that want to manually set thier resources.
394 */ 406 */
395int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res, int mode) 407int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res,
408 int mode)
396{ 409{
397 int i; 410 int i;
398 struct pnp_resource_table * bak; 411 struct pnp_resource_table *bak;
412
399 if (!dev || !res) 413 if (!dev || !res)
400 return -EINVAL; 414 return -EINVAL;
401 if (!pnp_can_configure(dev)) 415 if (!pnp_can_configure(dev))
@@ -409,19 +423,19 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res,
409 dev->res = *res; 423 dev->res = *res;
410 if (!(mode & PNP_CONFIG_FORCE)) { 424 if (!(mode & PNP_CONFIG_FORCE)) {
411 for (i = 0; i < PNP_MAX_PORT; i++) { 425 for (i = 0; i < PNP_MAX_PORT; i++) {
412 if(!pnp_check_port(dev,i)) 426 if (!pnp_check_port(dev, i))
413 goto fail; 427 goto fail;
414 } 428 }
415 for (i = 0; i < PNP_MAX_MEM; i++) { 429 for (i = 0; i < PNP_MAX_MEM; i++) {
416 if(!pnp_check_mem(dev,i)) 430 if (!pnp_check_mem(dev, i))
417 goto fail; 431 goto fail;
418 } 432 }
419 for (i = 0; i < PNP_MAX_IRQ; i++) { 433 for (i = 0; i < PNP_MAX_IRQ; i++) {
420 if(!pnp_check_irq(dev,i)) 434 if (!pnp_check_irq(dev, i))
421 goto fail; 435 goto fail;
422 } 436 }
423 for (i = 0; i < PNP_MAX_DMA; i++) { 437 for (i = 0; i < PNP_MAX_DMA; i++) {
424 if(!pnp_check_dma(dev,i)) 438 if (!pnp_check_dma(dev, i))
425 goto fail; 439 goto fail;
426 } 440 }
427 } 441 }
@@ -430,7 +444,7 @@ int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table * res,
430 kfree(bak); 444 kfree(bak);
431 return 0; 445 return 0;
432 446
433fail: 447 fail:
434 dev->res = *bak; 448 dev->res = *bak;
435 up(&pnp_res_mutex); 449 up(&pnp_res_mutex);
436 kfree(bak); 450 kfree(bak);
@@ -440,18 +454,18 @@ fail:
440/** 454/**
441 * pnp_auto_config_dev - automatically assigns resources to a device 455 * pnp_auto_config_dev - automatically assigns resources to a device
442 * @dev: pointer to the desired device 456 * @dev: pointer to the desired device
443 *
444 */ 457 */
445int pnp_auto_config_dev(struct pnp_dev *dev) 458int pnp_auto_config_dev(struct pnp_dev *dev)
446{ 459{
447 struct pnp_option *dep; 460 struct pnp_option *dep;
448 int i = 1; 461 int i = 1;
449 462
450 if(!dev) 463 if (!dev)
451 return -EINVAL; 464 return -EINVAL;
452 465
453 if(!pnp_can_configure(dev)) { 466 if (!pnp_can_configure(dev)) {
454 pnp_dbg("Device %s does not support resource configuration.", dev->dev.bus_id); 467 pnp_dbg("Device %s does not support resource configuration.",
468 dev->dev.bus_id);
455 return -ENODEV; 469 return -ENODEV;
456 } 470 }
457 471
@@ -476,23 +490,22 @@ int pnp_auto_config_dev(struct pnp_dev *dev)
476 * pnp_start_dev - low-level start of the PnP device 490 * pnp_start_dev - low-level start of the PnP device
477 * @dev: pointer to the desired device 491 * @dev: pointer to the desired device
478 * 492 *
479 * assumes that resources have alread been allocated 493 * assumes that resources have already been allocated
480 */ 494 */
481
482int pnp_start_dev(struct pnp_dev *dev) 495int pnp_start_dev(struct pnp_dev *dev)
483{ 496{
484 if (!pnp_can_write(dev)) { 497 if (!pnp_can_write(dev)) {
485 pnp_dbg("Device %s does not support activation.", dev->dev.bus_id); 498 pnp_dbg("Device %s does not support activation.",
499 dev->dev.bus_id);
486 return -EINVAL; 500 return -EINVAL;
487 } 501 }
488 502
489 if (dev->protocol->set(dev, &dev->res)<0) { 503 if (dev->protocol->set(dev, &dev->res) < 0) {
490 pnp_err("Failed to activate device %s.", dev->dev.bus_id); 504 pnp_err("Failed to activate device %s.", dev->dev.bus_id);
491 return -EIO; 505 return -EIO;
492 } 506 }
493 507
494 pnp_info("Device %s activated.", dev->dev.bus_id); 508 pnp_info("Device %s activated.", dev->dev.bus_id);
495
496 return 0; 509 return 0;
497} 510}
498 511
@@ -502,20 +515,19 @@ int pnp_start_dev(struct pnp_dev *dev)
502 * 515 *
503 * does not free resources 516 * does not free resources
504 */ 517 */
505
506int pnp_stop_dev(struct pnp_dev *dev) 518int pnp_stop_dev(struct pnp_dev *dev)
507{ 519{
508 if (!pnp_can_disable(dev)) { 520 if (!pnp_can_disable(dev)) {
509 pnp_dbg("Device %s does not support disabling.", dev->dev.bus_id); 521 pnp_dbg("Device %s does not support disabling.",
522 dev->dev.bus_id);
510 return -EINVAL; 523 return -EINVAL;
511 } 524 }
512 if (dev->protocol->disable(dev)<0) { 525 if (dev->protocol->disable(dev) < 0) {
513 pnp_err("Failed to disable device %s.", dev->dev.bus_id); 526 pnp_err("Failed to disable device %s.", dev->dev.bus_id);
514 return -EIO; 527 return -EIO;
515 } 528 }
516 529
517 pnp_info("Device %s disabled.", dev->dev.bus_id); 530 pnp_info("Device %s disabled.", dev->dev.bus_id);
518
519 return 0; 531 return 0;
520} 532}
521 533
@@ -531,9 +543,8 @@ int pnp_activate_dev(struct pnp_dev *dev)
531 543
532 if (!dev) 544 if (!dev)
533 return -EINVAL; 545 return -EINVAL;
534 if (dev->active) { 546 if (dev->active)
535 return 0; /* the device is already active */ 547 return 0; /* the device is already active */
536 }
537 548
538 /* ensure resources are allocated */ 549 /* ensure resources are allocated */
539 if (pnp_auto_config_dev(dev)) 550 if (pnp_auto_config_dev(dev))
@@ -544,7 +555,6 @@ int pnp_activate_dev(struct pnp_dev *dev)
544 return error; 555 return error;
545 556
546 dev->active = 1; 557 dev->active = 1;
547
548 return 1; 558 return 1;
549} 559}
550 560
@@ -558,11 +568,10 @@ int pnp_disable_dev(struct pnp_dev *dev)
558{ 568{
559 int error; 569 int error;
560 570
561 if (!dev) 571 if (!dev)
562 return -EINVAL; 572 return -EINVAL;
563 if (!dev->active) { 573 if (!dev->active)
564 return 0; /* the device is already disabled */ 574 return 0; /* the device is already disabled */
565 }
566 575
567 error = pnp_stop_dev(dev); 576 error = pnp_stop_dev(dev);
568 if (error) 577 if (error)
@@ -583,10 +592,9 @@ int pnp_disable_dev(struct pnp_dev *dev)
583 * @resource: pointer to resource to be changed 592 * @resource: pointer to resource to be changed
584 * @start: start of region 593 * @start: start of region
585 * @size: size of region 594 * @size: size of region
586 *
587 */ 595 */
588void pnp_resource_change(struct resource *resource, resource_size_t start, 596void pnp_resource_change(struct resource *resource, resource_size_t start,
589 resource_size_t size) 597 resource_size_t size)
590{ 598{
591 if (resource == NULL) 599 if (resource == NULL)
592 return; 600 return;
@@ -595,11 +603,7 @@ void pnp_resource_change(struct resource *resource, resource_size_t start,
595 resource->end = start + size - 1; 603 resource->end = start + size - 1;
596} 604}
597 605
598
599EXPORT_SYMBOL(pnp_manual_config_dev); 606EXPORT_SYMBOL(pnp_manual_config_dev);
600#if 0
601EXPORT_SYMBOL(pnp_auto_config_dev);
602#endif
603EXPORT_SYMBOL(pnp_start_dev); 607EXPORT_SYMBOL(pnp_start_dev);
604EXPORT_SYMBOL(pnp_stop_dev); 608EXPORT_SYMBOL(pnp_stop_dev);
605EXPORT_SYMBOL(pnp_activate_dev); 609EXPORT_SYMBOL(pnp_activate_dev);
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index fcd32ac575c3..6a2a3c2f4d5e 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -34,13 +34,13 @@ static int num = 0;
34 * used by the kernel (PCI root, ...), as it is harmless and there were 34 * used by the kernel (PCI root, ...), as it is harmless and there were
35 * already present in pnpbios. But there is an exception for devices that 35 * already present in pnpbios. But there is an exception for devices that
36 * have irqs (PIC, Timer) because we call acpi_register_gsi. 36 * have irqs (PIC, Timer) because we call acpi_register_gsi.
37 * Finaly only devices that have a CRS method need to be in this list. 37 * Finally, only devices that have a CRS method need to be in this list.
38 */ 38 */
39static __initdata struct acpi_device_id excluded_id_list[] ={ 39static struct __initdata acpi_device_id excluded_id_list[] = {
40 {"PNP0C09", 0}, /* EC */ 40 {"PNP0C09", 0}, /* EC */
41 {"PNP0C0F", 0}, /* Link device */ 41 {"PNP0C0F", 0}, /* Link device */
42 {"PNP0000", 0}, /* PIC */ 42 {"PNP0000", 0}, /* PIC */
43 {"PNP0100", 0}, /* Timer */ 43 {"PNP0100", 0}, /* Timer */
44 {"", 0}, 44 {"", 0},
45}; 45};
46 46
@@ -84,15 +84,18 @@ static void __init pnpidacpi_to_pnpid(char *id, char *str)
84 str[7] = '\0'; 84 str[7] = '\0';
85} 85}
86 86
87static int pnpacpi_get_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 87static int pnpacpi_get_resources(struct pnp_dev *dev,
88 struct pnp_resource_table *res)
88{ 89{
89 acpi_status status; 90 acpi_status status;
90 status = pnpacpi_parse_allocated_resource((acpi_handle)dev->data, 91
91 &dev->res); 92 status = pnpacpi_parse_allocated_resource((acpi_handle) dev->data,
93 &dev->res);
92 return ACPI_FAILURE(status) ? -ENODEV : 0; 94 return ACPI_FAILURE(status) ? -ENODEV : 0;
93} 95}
94 96
95static int pnpacpi_set_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 97static int pnpacpi_set_resources(struct pnp_dev *dev,
98 struct pnp_resource_table *res)
96{ 99{
97 acpi_handle handle = dev->data; 100 acpi_handle handle = dev->data;
98 struct acpi_buffer buffer; 101 struct acpi_buffer buffer;
@@ -119,27 +122,29 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
119 acpi_status status; 122 acpi_status status;
120 123
121 /* acpi_unregister_gsi(pnp_irq(dev, 0)); */ 124 /* acpi_unregister_gsi(pnp_irq(dev, 0)); */
122 status = acpi_evaluate_object((acpi_handle)dev->data, 125 status = acpi_evaluate_object((acpi_handle) dev->data,
123 "_DIS", NULL, NULL); 126 "_DIS", NULL, NULL);
124 return ACPI_FAILURE(status) ? -ENODEV : 0; 127 return ACPI_FAILURE(status) ? -ENODEV : 0;
125} 128}
126 129
127static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) 130static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
128{ 131{
129 return acpi_bus_set_power((acpi_handle)dev->data, 132 return acpi_bus_set_power((acpi_handle) dev->data,
130 acpi_pm_device_sleep_state(&dev->dev, 133 acpi_pm_device_sleep_state(&dev->dev,
131 device_may_wakeup(&dev->dev), NULL)); 134 device_may_wakeup
135 (&dev->dev),
136 NULL));
132} 137}
133 138
134static int pnpacpi_resume(struct pnp_dev *dev) 139static int pnpacpi_resume(struct pnp_dev *dev)
135{ 140{
136 return acpi_bus_set_power((acpi_handle)dev->data, ACPI_STATE_D0); 141 return acpi_bus_set_power((acpi_handle) dev->data, ACPI_STATE_D0);
137} 142}
138 143
139static struct pnp_protocol pnpacpi_protocol = { 144static struct pnp_protocol pnpacpi_protocol = {
140 .name = "Plug and Play ACPI", 145 .name = "Plug and Play ACPI",
141 .get = pnpacpi_get_resources, 146 .get = pnpacpi_get_resources,
142 .set = pnpacpi_set_resources, 147 .set = pnpacpi_set_resources,
143 .disable = pnpacpi_disable_resources, 148 .disable = pnpacpi_disable_resources,
144 .suspend = pnpacpi_suspend, 149 .suspend = pnpacpi_suspend,
145 .resume = pnpacpi_resume, 150 .resume = pnpacpi_resume,
@@ -154,17 +159,17 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
154 159
155 status = acpi_get_handle(device->handle, "_CRS", &temp); 160 status = acpi_get_handle(device->handle, "_CRS", &temp);
156 if (ACPI_FAILURE(status) || !ispnpidacpi(acpi_device_hid(device)) || 161 if (ACPI_FAILURE(status) || !ispnpidacpi(acpi_device_hid(device)) ||
157 is_exclusive_device(device)) 162 is_exclusive_device(device))
158 return 0; 163 return 0;
159 164
160 pnp_dbg("ACPI device : hid %s", acpi_device_hid(device)); 165 pnp_dbg("ACPI device : hid %s", acpi_device_hid(device));
161 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL); 166 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
162 if (!dev) { 167 if (!dev) {
163 pnp_err("Out of memory"); 168 pnp_err("Out of memory");
164 return -ENOMEM; 169 return -ENOMEM;
165 } 170 }
166 dev->data = device->handle; 171 dev->data = device->handle;
167 /* .enabled means if the device can decode the resources */ 172 /* .enabled means the device can decode the resources */
168 dev->active = device->status.enabled; 173 dev->active = device->status.enabled;
169 status = acpi_get_handle(device->handle, "_SRS", &temp); 174 status = acpi_get_handle(device->handle, "_SRS", &temp);
170 if (ACPI_SUCCESS(status)) 175 if (ACPI_SUCCESS(status))
@@ -194,20 +199,23 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
194 pnpidacpi_to_pnpid(acpi_device_hid(device), dev_id->id); 199 pnpidacpi_to_pnpid(acpi_device_hid(device), dev_id->id);
195 pnp_add_id(dev_id, dev); 200 pnp_add_id(dev_id, dev);
196 201
197 if(dev->active) { 202 if (dev->active) {
198 /* parse allocated resource */ 203 /* parse allocated resource */
199 status = pnpacpi_parse_allocated_resource(device->handle, &dev->res); 204 status = pnpacpi_parse_allocated_resource(device->handle,
205 &dev->res);
200 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { 206 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
201 pnp_err("PnPACPI: METHOD_NAME__CRS failure for %s", dev_id->id); 207 pnp_err("PnPACPI: METHOD_NAME__CRS failure for %s",
208 dev_id->id);
202 goto err1; 209 goto err1;
203 } 210 }
204 } 211 }
205 212
206 if(dev->capabilities & PNP_CONFIGURABLE) { 213 if (dev->capabilities & PNP_CONFIGURABLE) {
207 status = pnpacpi_parse_resource_option_data(device->handle, 214 status = pnpacpi_parse_resource_option_data(device->handle,
208 dev); 215 dev);
209 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) { 216 if (ACPI_FAILURE(status) && (status != AE_NOT_FOUND)) {
210 pnp_err("PnPACPI: METHOD_NAME__PRS failure for %s", dev_id->id); 217 pnp_err("PnPACPI: METHOD_NAME__PRS failure for %s",
218 dev_id->id);
211 goto err1; 219 goto err1;
212 } 220 }
213 } 221 }
@@ -233,18 +241,19 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
233 if (!dev->active) 241 if (!dev->active)
234 pnp_init_resource_table(&dev->res); 242 pnp_init_resource_table(&dev->res);
235 pnp_add_device(dev); 243 pnp_add_device(dev);
236 num ++; 244 num++;
237 245
238 return AE_OK; 246 return AE_OK;
239err1: 247 err1:
240 kfree(dev_id); 248 kfree(dev_id);
241err: 249 err:
242 kfree(dev); 250 kfree(dev);
243 return -EINVAL; 251 return -EINVAL;
244} 252}
245 253
246static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle, 254static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
247 u32 lvl, void *context, void **rv) 255 u32 lvl, void *context,
256 void **rv)
248{ 257{
249 struct acpi_device *device; 258 struct acpi_device *device;
250 259
@@ -257,23 +266,22 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
257 266
258static int __init acpi_pnp_match(struct device *dev, void *_pnp) 267static int __init acpi_pnp_match(struct device *dev, void *_pnp)
259{ 268{
260 struct acpi_device *acpi = to_acpi_device(dev); 269 struct acpi_device *acpi = to_acpi_device(dev);
261 struct pnp_dev *pnp = _pnp; 270 struct pnp_dev *pnp = _pnp;
262 271
263 /* true means it matched */ 272 /* true means it matched */
264 return acpi->flags.hardware_id 273 return acpi->flags.hardware_id
265 && !acpi_get_physical_device(acpi->handle) 274 && !acpi_get_physical_device(acpi->handle)
266 && compare_pnp_id(pnp->id, acpi->pnp.hardware_id); 275 && compare_pnp_id(pnp->id, acpi->pnp.hardware_id);
267} 276}
268 277
269static int __init acpi_pnp_find_device(struct device *dev, acpi_handle *handle) 278static int __init acpi_pnp_find_device(struct device *dev, acpi_handle * handle)
270{ 279{
271 struct device *adev; 280 struct device *adev;
272 struct acpi_device *acpi; 281 struct acpi_device *acpi;
273 282
274 adev = bus_find_device(&acpi_bus_type, NULL, 283 adev = bus_find_device(&acpi_bus_type, NULL,
275 to_pnp_dev(dev), 284 to_pnp_dev(dev), acpi_pnp_match);
276 acpi_pnp_match);
277 if (!adev) 285 if (!adev)
278 return -ENODEV; 286 return -ENODEV;
279 287
@@ -287,7 +295,7 @@ static int __init acpi_pnp_find_device(struct device *dev, acpi_handle *handle)
287 * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling. 295 * pnpdev->dev.archdata.acpi_handle point to its ACPI sibling.
288 */ 296 */
289static struct acpi_bus_type __initdata acpi_pnp_bus = { 297static struct acpi_bus_type __initdata acpi_pnp_bus = {
290 .bus = &pnp_bus_type, 298 .bus = &pnp_bus_type,
291 .find_device = acpi_pnp_find_device, 299 .find_device = acpi_pnp_find_device,
292}; 300};
293 301
@@ -307,6 +315,7 @@ static int __init pnpacpi_init(void)
307 pnp_platform_devices = 1; 315 pnp_platform_devices = 1;
308 return 0; 316 return 0;
309} 317}
318
310subsys_initcall(pnpacpi_init); 319subsys_initcall(pnpacpi_init);
311 320
312static int __init pnpacpi_setup(char *str) 321static int __init pnpacpi_setup(char *str)
@@ -317,8 +326,5 @@ static int __init pnpacpi_setup(char *str)
317 pnpacpi_disabled = 1; 326 pnpacpi_disabled = 1;
318 return 1; 327 return 1;
319} 328}
320__setup("pnpacpi=", pnpacpi_setup);
321 329
322#if 0 330__setup("pnpacpi=", pnpacpi_setup);
323EXPORT_SYMBOL(pnpacpi_protocol);
324#endif
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 118ac9779b3c..ce5027feb3da 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -40,8 +40,7 @@ static int irq_flags(int triggering, int polarity)
40 flag = IORESOURCE_IRQ_LOWLEVEL; 40 flag = IORESOURCE_IRQ_LOWLEVEL;
41 else 41 else
42 flag = IORESOURCE_IRQ_HIGHLEVEL; 42 flag = IORESOURCE_IRQ_HIGHLEVEL;
43 } 43 } else {
44 else {
45 if (polarity == ACPI_ACTIVE_LOW) 44 if (polarity == ACPI_ACTIVE_LOW)
46 flag = IORESOURCE_IRQ_LOWEDGE; 45 flag = IORESOURCE_IRQ_LOWEDGE;
47 else 46 else
@@ -72,9 +71,9 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
72 } 71 }
73} 72}
74 73
75static void 74static void pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res,
76pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi, 75 u32 gsi, int triggering,
77 int triggering, int polarity, int shareable) 76 int polarity, int shareable)
78{ 77{
79 int i = 0; 78 int i = 0;
80 int irq; 79 int irq;
@@ -83,12 +82,12 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
83 return; 82 return;
84 83
85 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) && 84 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) &&
86 i < PNP_MAX_IRQ) 85 i < PNP_MAX_IRQ)
87 i++; 86 i++;
88 if (i >= PNP_MAX_IRQ) 87 if (i >= PNP_MAX_IRQ)
89 return; 88 return;
90 89
91 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag 90 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag
92 res->irq_resource[i].flags |= irq_flags(triggering, polarity); 91 res->irq_resource[i].flags |= irq_flags(triggering, polarity);
93 irq = acpi_register_gsi(gsi, triggering, polarity); 92 irq = acpi_register_gsi(gsi, triggering, polarity);
94 if (irq < 0) { 93 if (irq < 0) {
@@ -147,17 +146,19 @@ static int dma_flags(int type, int bus_master, int transfer)
147 return flags; 146 return flags;
148} 147}
149 148
150static void 149static void pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res,
151pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res, u32 dma, 150 u32 dma, int type,
152 int type, int bus_master, int transfer) 151 int bus_master, int transfer)
153{ 152{
154 int i = 0; 153 int i = 0;
154
155 while (i < PNP_MAX_DMA && 155 while (i < PNP_MAX_DMA &&
156 !(res->dma_resource[i].flags & IORESOURCE_UNSET)) 156 !(res->dma_resource[i].flags & IORESOURCE_UNSET))
157 i++; 157 i++;
158 if (i < PNP_MAX_DMA) { 158 if (i < PNP_MAX_DMA) {
159 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag 159 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag
160 res->dma_resource[i].flags |= dma_flags(type, bus_master, transfer); 160 res->dma_resource[i].flags |=
161 dma_flags(type, bus_master, transfer);
161 if (dma == -1) { 162 if (dma == -1) {
162 res->dma_resource[i].flags |= IORESOURCE_DISABLED; 163 res->dma_resource[i].flags |= IORESOURCE_DISABLED;
163 return; 164 return;
@@ -167,19 +168,19 @@ pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table *res, u32 dma,
167 } 168 }
168} 169}
169 170
170static void 171static void pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res,
171pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res, 172 u64 io, u64 len, int io_decode)
172 u64 io, u64 len, int io_decode)
173{ 173{
174 int i = 0; 174 int i = 0;
175
175 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) && 176 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) &&
176 i < PNP_MAX_PORT) 177 i < PNP_MAX_PORT)
177 i++; 178 i++;
178 if (i < PNP_MAX_PORT) { 179 if (i < PNP_MAX_PORT) {
179 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag 180 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag
180 if (io_decode == ACPI_DECODE_16) 181 if (io_decode == ACPI_DECODE_16)
181 res->port_resource[i].flags |= PNP_PORT_FLAG_16BITADDR; 182 res->port_resource[i].flags |= PNP_PORT_FLAG_16BITADDR;
182 if (len <= 0 || (io + len -1) >= 0x10003) { 183 if (len <= 0 || (io + len - 1) >= 0x10003) {
183 res->port_resource[i].flags |= IORESOURCE_DISABLED; 184 res->port_resource[i].flags |= IORESOURCE_DISABLED;
184 return; 185 return;
185 } 186 }
@@ -188,21 +189,22 @@ pnpacpi_parse_allocated_ioresource(struct pnp_resource_table *res,
188 } 189 }
189} 190}
190 191
191static void 192static void pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res,
192pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res, 193 u64 mem, u64 len,
193 u64 mem, u64 len, int write_protect) 194 int write_protect)
194{ 195{
195 int i = 0; 196 int i = 0;
197
196 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) && 198 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) &&
197 (i < PNP_MAX_MEM)) 199 (i < PNP_MAX_MEM))
198 i++; 200 i++;
199 if (i < PNP_MAX_MEM) { 201 if (i < PNP_MAX_MEM) {
200 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag 202 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag
201 if (len <= 0) { 203 if (len <= 0) {
202 res->mem_resource[i].flags |= IORESOURCE_DISABLED; 204 res->mem_resource[i].flags |= IORESOURCE_DISABLED;
203 return; 205 return;
204 } 206 }
205 if(write_protect == ACPI_READ_WRITE_MEMORY) 207 if (write_protect == ACPI_READ_WRITE_MEMORY)
206 res->mem_resource[i].flags |= IORESOURCE_MEM_WRITEABLE; 208 res->mem_resource[i].flags |= IORESOURCE_MEM_WRITEABLE;
207 209
208 res->mem_resource[i].start = mem; 210 res->mem_resource[i].start = mem;
@@ -210,9 +212,8 @@ pnpacpi_parse_allocated_memresource(struct pnp_resource_table *res,
210 } 212 }
211} 213}
212 214
213static void 215static void pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
214pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table, 216 struct acpi_resource *res)
215 struct acpi_resource *res)
216{ 217{
217 struct acpi_resource_address64 addr, *p = &addr; 218 struct acpi_resource_address64 addr, *p = &addr;
218 acpi_status status; 219 acpi_status status;
@@ -220,7 +221,7 @@ pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
220 status = acpi_resource_to_address64(res, p); 221 status = acpi_resource_to_address64(res, p);
221 if (!ACPI_SUCCESS(status)) { 222 if (!ACPI_SUCCESS(status)) {
222 pnp_warn("PnPACPI: failed to convert resource type %d", 223 pnp_warn("PnPACPI: failed to convert resource type %d",
223 res->type); 224 res->type);
224 return; 225 return;
225 } 226 }
226 227
@@ -229,17 +230,20 @@ pnpacpi_parse_allocated_address_space(struct pnp_resource_table *res_table,
229 230
230 if (p->resource_type == ACPI_MEMORY_RANGE) 231 if (p->resource_type == ACPI_MEMORY_RANGE)
231 pnpacpi_parse_allocated_memresource(res_table, 232 pnpacpi_parse_allocated_memresource(res_table,
232 p->minimum, p->address_length, p->info.mem.write_protect); 233 p->minimum, p->address_length,
234 p->info.mem.write_protect);
233 else if (p->resource_type == ACPI_IO_RANGE) 235 else if (p->resource_type == ACPI_IO_RANGE)
234 pnpacpi_parse_allocated_ioresource(res_table, 236 pnpacpi_parse_allocated_ioresource(res_table,
235 p->minimum, p->address_length, 237 p->minimum, p->address_length,
236 p->granularity == 0xfff ? ACPI_DECODE_10 : ACPI_DECODE_16); 238 p->granularity == 0xfff ? ACPI_DECODE_10 :
239 ACPI_DECODE_16);
237} 240}
238 241
239static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, 242static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
240 void *data) 243 void *data)
241{ 244{
242 struct pnp_resource_table *res_table = (struct pnp_resource_table *)data; 245 struct pnp_resource_table *res_table =
246 (struct pnp_resource_table *)data;
243 int i; 247 int i;
244 248
245 switch (res->type) { 249 switch (res->type) {
@@ -260,17 +264,17 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
260 case ACPI_RESOURCE_TYPE_DMA: 264 case ACPI_RESOURCE_TYPE_DMA:
261 if (res->data.dma.channel_count > 0) 265 if (res->data.dma.channel_count > 0)
262 pnpacpi_parse_allocated_dmaresource(res_table, 266 pnpacpi_parse_allocated_dmaresource(res_table,
263 res->data.dma.channels[0], 267 res->data.dma.channels[0],
264 res->data.dma.type, 268 res->data.dma.type,
265 res->data.dma.bus_master, 269 res->data.dma.bus_master,
266 res->data.dma.transfer); 270 res->data.dma.transfer);
267 break; 271 break;
268 272
269 case ACPI_RESOURCE_TYPE_IO: 273 case ACPI_RESOURCE_TYPE_IO:
270 pnpacpi_parse_allocated_ioresource(res_table, 274 pnpacpi_parse_allocated_ioresource(res_table,
271 res->data.io.minimum, 275 res->data.io.minimum,
272 res->data.io.address_length, 276 res->data.io.address_length,
273 res->data.io.io_decode); 277 res->data.io.io_decode);
274 break; 278 break;
275 279
276 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 280 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
@@ -279,9 +283,9 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
279 283
280 case ACPI_RESOURCE_TYPE_FIXED_IO: 284 case ACPI_RESOURCE_TYPE_FIXED_IO:
281 pnpacpi_parse_allocated_ioresource(res_table, 285 pnpacpi_parse_allocated_ioresource(res_table,
282 res->data.fixed_io.address, 286 res->data.fixed_io.address,
283 res->data.fixed_io.address_length, 287 res->data.fixed_io.address_length,
284 ACPI_DECODE_10); 288 ACPI_DECODE_10);
285 break; 289 break;
286 290
287 case ACPI_RESOURCE_TYPE_VENDOR: 291 case ACPI_RESOURCE_TYPE_VENDOR:
@@ -292,21 +296,21 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
292 296
293 case ACPI_RESOURCE_TYPE_MEMORY24: 297 case ACPI_RESOURCE_TYPE_MEMORY24:
294 pnpacpi_parse_allocated_memresource(res_table, 298 pnpacpi_parse_allocated_memresource(res_table,
295 res->data.memory24.minimum, 299 res->data.memory24.minimum,
296 res->data.memory24.address_length, 300 res->data.memory24.address_length,
297 res->data.memory24.write_protect); 301 res->data.memory24.write_protect);
298 break; 302 break;
299 case ACPI_RESOURCE_TYPE_MEMORY32: 303 case ACPI_RESOURCE_TYPE_MEMORY32:
300 pnpacpi_parse_allocated_memresource(res_table, 304 pnpacpi_parse_allocated_memresource(res_table,
301 res->data.memory32.minimum, 305 res->data.memory32.minimum,
302 res->data.memory32.address_length, 306 res->data.memory32.address_length,
303 res->data.memory32.write_protect); 307 res->data.memory32.write_protect);
304 break; 308 break;
305 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 309 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
306 pnpacpi_parse_allocated_memresource(res_table, 310 pnpacpi_parse_allocated_memresource(res_table,
307 res->data.fixed_memory32.address, 311 res->data.fixed_memory32.address,
308 res->data.fixed_memory32.address_length, 312 res->data.fixed_memory32.address_length,
309 res->data.fixed_memory32.write_protect); 313 res->data.fixed_memory32.write_protect);
310 break; 314 break;
311 case ACPI_RESOURCE_TYPE_ADDRESS16: 315 case ACPI_RESOURCE_TYPE_ADDRESS16:
312 case ACPI_RESOURCE_TYPE_ADDRESS32: 316 case ACPI_RESOURCE_TYPE_ADDRESS32:
@@ -343,18 +347,21 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
343 return AE_OK; 347 return AE_OK;
344} 348}
345 349
346acpi_status pnpacpi_parse_allocated_resource(acpi_handle handle, struct pnp_resource_table *res) 350acpi_status pnpacpi_parse_allocated_resource(acpi_handle handle,
351 struct pnp_resource_table * res)
347{ 352{
348 /* Blank the resource table values */ 353 /* Blank the resource table values */
349 pnp_init_resource_table(res); 354 pnp_init_resource_table(res);
350 355
351 return acpi_walk_resources(handle, METHOD_NAME__CRS, pnpacpi_allocated_resource, res); 356 return acpi_walk_resources(handle, METHOD_NAME__CRS,
357 pnpacpi_allocated_resource, res);
352} 358}
353 359
354static void pnpacpi_parse_dma_option(struct pnp_option *option, struct acpi_resource_dma *p) 360static void pnpacpi_parse_dma_option(struct pnp_option *option,
361 struct acpi_resource_dma *p)
355{ 362{
356 int i; 363 int i;
357 struct pnp_dma * dma; 364 struct pnp_dma *dma;
358 365
359 if (p->channel_count == 0) 366 if (p->channel_count == 0)
360 return; 367 return;
@@ -362,18 +369,16 @@ static void pnpacpi_parse_dma_option(struct pnp_option *option, struct acpi_reso
362 if (!dma) 369 if (!dma)
363 return; 370 return;
364 371
365 for(i = 0; i < p->channel_count; i++) 372 for (i = 0; i < p->channel_count; i++)
366 dma->map |= 1 << p->channels[i]; 373 dma->map |= 1 << p->channels[i];
367 374
368 dma->flags = dma_flags(p->type, p->bus_master, p->transfer); 375 dma->flags = dma_flags(p->type, p->bus_master, p->transfer);
369 376
370 pnp_register_dma_resource(option, dma); 377 pnp_register_dma_resource(option, dma);
371 return;
372} 378}
373 379
374
375static void pnpacpi_parse_irq_option(struct pnp_option *option, 380static void pnpacpi_parse_irq_option(struct pnp_option *option,
376 struct acpi_resource_irq *p) 381 struct acpi_resource_irq *p)
377{ 382{
378 int i; 383 int i;
379 struct pnp_irq *irq; 384 struct pnp_irq *irq;
@@ -384,17 +389,16 @@ static void pnpacpi_parse_irq_option(struct pnp_option *option,
384 if (!irq) 389 if (!irq)
385 return; 390 return;
386 391
387 for(i = 0; i < p->interrupt_count; i++) 392 for (i = 0; i < p->interrupt_count; i++)
388 if (p->interrupts[i]) 393 if (p->interrupts[i])
389 __set_bit(p->interrupts[i], irq->map); 394 __set_bit(p->interrupts[i], irq->map);
390 irq->flags = irq_flags(p->triggering, p->polarity); 395 irq->flags = irq_flags(p->triggering, p->polarity);
391 396
392 pnp_register_irq_resource(option, irq); 397 pnp_register_irq_resource(option, irq);
393 return;
394} 398}
395 399
396static void pnpacpi_parse_ext_irq_option(struct pnp_option *option, 400static void pnpacpi_parse_ext_irq_option(struct pnp_option *option,
397 struct acpi_resource_extended_irq *p) 401 struct acpi_resource_extended_irq *p)
398{ 402{
399 int i; 403 int i;
400 struct pnp_irq *irq; 404 struct pnp_irq *irq;
@@ -405,18 +409,16 @@ static void pnpacpi_parse_ext_irq_option(struct pnp_option *option,
405 if (!irq) 409 if (!irq)
406 return; 410 return;
407 411
408 for(i = 0; i < p->interrupt_count; i++) 412 for (i = 0; i < p->interrupt_count; i++)
409 if (p->interrupts[i]) 413 if (p->interrupts[i])
410 __set_bit(p->interrupts[i], irq->map); 414 __set_bit(p->interrupts[i], irq->map);
411 irq->flags = irq_flags(p->triggering, p->polarity); 415 irq->flags = irq_flags(p->triggering, p->polarity);
412 416
413 pnp_register_irq_resource(option, irq); 417 pnp_register_irq_resource(option, irq);
414 return;
415} 418}
416 419
417static void 420static void pnpacpi_parse_port_option(struct pnp_option *option,
418pnpacpi_parse_port_option(struct pnp_option *option, 421 struct acpi_resource_io *io)
419 struct acpi_resource_io *io)
420{ 422{
421 struct pnp_port *port; 423 struct pnp_port *port;
422 424
@@ -430,14 +432,12 @@ pnpacpi_parse_port_option(struct pnp_option *option,
430 port->align = io->alignment; 432 port->align = io->alignment;
431 port->size = io->address_length; 433 port->size = io->address_length;
432 port->flags = ACPI_DECODE_16 == io->io_decode ? 434 port->flags = ACPI_DECODE_16 == io->io_decode ?
433 PNP_PORT_FLAG_16BITADDR : 0; 435 PNP_PORT_FLAG_16BITADDR : 0;
434 pnp_register_port_resource(option, port); 436 pnp_register_port_resource(option, port);
435 return;
436} 437}
437 438
438static void 439static void pnpacpi_parse_fixed_port_option(struct pnp_option *option,
439pnpacpi_parse_fixed_port_option(struct pnp_option *option, 440 struct acpi_resource_fixed_io *io)
440 struct acpi_resource_fixed_io *io)
441{ 441{
442 struct pnp_port *port; 442 struct pnp_port *port;
443 443
@@ -451,12 +451,10 @@ pnpacpi_parse_fixed_port_option(struct pnp_option *option,
451 port->align = 0; 451 port->align = 0;
452 port->flags = PNP_PORT_FLAG_FIXED; 452 port->flags = PNP_PORT_FLAG_FIXED;
453 pnp_register_port_resource(option, port); 453 pnp_register_port_resource(option, port);
454 return;
455} 454}
456 455
457static void 456static void pnpacpi_parse_mem24_option(struct pnp_option *option,
458pnpacpi_parse_mem24_option(struct pnp_option *option, 457 struct acpi_resource_memory24 *p)
459 struct acpi_resource_memory24 *p)
460{ 458{
461 struct pnp_mem *mem; 459 struct pnp_mem *mem;
462 460
@@ -471,15 +469,13 @@ pnpacpi_parse_mem24_option(struct pnp_option *option,
471 mem->size = p->address_length; 469 mem->size = p->address_length;
472 470
473 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 471 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
474 IORESOURCE_MEM_WRITEABLE : 0; 472 IORESOURCE_MEM_WRITEABLE : 0;
475 473
476 pnp_register_mem_resource(option, mem); 474 pnp_register_mem_resource(option, mem);
477 return;
478} 475}
479 476
480static void 477static void pnpacpi_parse_mem32_option(struct pnp_option *option,
481pnpacpi_parse_mem32_option(struct pnp_option *option, 478 struct acpi_resource_memory32 *p)
482 struct acpi_resource_memory32 *p)
483{ 479{
484 struct pnp_mem *mem; 480 struct pnp_mem *mem;
485 481
@@ -494,15 +490,13 @@ pnpacpi_parse_mem32_option(struct pnp_option *option,
494 mem->size = p->address_length; 490 mem->size = p->address_length;
495 491
496 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 492 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
497 IORESOURCE_MEM_WRITEABLE : 0; 493 IORESOURCE_MEM_WRITEABLE : 0;
498 494
499 pnp_register_mem_resource(option, mem); 495 pnp_register_mem_resource(option, mem);
500 return;
501} 496}
502 497
503static void 498static void pnpacpi_parse_fixed_mem32_option(struct pnp_option *option,
504pnpacpi_parse_fixed_mem32_option(struct pnp_option *option, 499 struct acpi_resource_fixed_memory32 *p)
505 struct acpi_resource_fixed_memory32 *p)
506{ 500{
507 struct pnp_mem *mem; 501 struct pnp_mem *mem;
508 502
@@ -516,14 +510,13 @@ pnpacpi_parse_fixed_mem32_option(struct pnp_option *option,
516 mem->align = 0; 510 mem->align = 0;
517 511
518 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ? 512 mem->flags = (ACPI_READ_WRITE_MEMORY == p->write_protect) ?
519 IORESOURCE_MEM_WRITEABLE : 0; 513 IORESOURCE_MEM_WRITEABLE : 0;
520 514
521 pnp_register_mem_resource(option, mem); 515 pnp_register_mem_resource(option, mem);
522 return;
523} 516}
524 517
525static void 518static void pnpacpi_parse_address_option(struct pnp_option *option,
526pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r) 519 struct acpi_resource *r)
527{ 520{
528 struct acpi_resource_address64 addr, *p = &addr; 521 struct acpi_resource_address64 addr, *p = &addr;
529 acpi_status status; 522 acpi_status status;
@@ -532,7 +525,8 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
532 525
533 status = acpi_resource_to_address64(r, p); 526 status = acpi_resource_to_address64(r, p);
534 if (!ACPI_SUCCESS(status)) { 527 if (!ACPI_SUCCESS(status)) {
535 pnp_warn("PnPACPI: failed to convert resource type %d", r->type); 528 pnp_warn("PnPACPI: failed to convert resource type %d",
529 r->type);
536 return; 530 return;
537 } 531 }
538 532
@@ -547,7 +541,8 @@ pnpacpi_parse_address_option(struct pnp_option *option, struct acpi_resource *r)
547 mem->size = p->address_length; 541 mem->size = p->address_length;
548 mem->align = 0; 542 mem->align = 0;
549 mem->flags = (p->info.mem.write_protect == 543 mem->flags = (p->info.mem.write_protect ==
550 ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE : 0; 544 ACPI_READ_WRITE_MEMORY) ? IORESOURCE_MEM_WRITEABLE
545 : 0;
551 pnp_register_mem_resource(option, mem); 546 pnp_register_mem_resource(option, mem);
552 } else if (p->resource_type == ACPI_IO_RANGE) { 547 } else if (p->resource_type == ACPI_IO_RANGE) {
553 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 548 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
@@ -568,109 +563,108 @@ struct acpipnp_parse_option_s {
568}; 563};
569 564
570static acpi_status pnpacpi_option_resource(struct acpi_resource *res, 565static acpi_status pnpacpi_option_resource(struct acpi_resource *res,
571 void *data) 566 void *data)
572{ 567{
573 int priority = 0; 568 int priority = 0;
574 struct acpipnp_parse_option_s *parse_data = (struct acpipnp_parse_option_s *)data; 569 struct acpipnp_parse_option_s *parse_data =
570 (struct acpipnp_parse_option_s *)data;
575 struct pnp_dev *dev = parse_data->dev; 571 struct pnp_dev *dev = parse_data->dev;
576 struct pnp_option *option = parse_data->option; 572 struct pnp_option *option = parse_data->option;
577 573
578 switch (res->type) { 574 switch (res->type) {
579 case ACPI_RESOURCE_TYPE_IRQ: 575 case ACPI_RESOURCE_TYPE_IRQ:
580 pnpacpi_parse_irq_option(option, &res->data.irq); 576 pnpacpi_parse_irq_option(option, &res->data.irq);
581 break; 577 break;
582 578
583 case ACPI_RESOURCE_TYPE_DMA: 579 case ACPI_RESOURCE_TYPE_DMA:
584 pnpacpi_parse_dma_option(option, &res->data.dma); 580 pnpacpi_parse_dma_option(option, &res->data.dma);
585 break; 581 break;
586 582
587 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 583 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
588 switch (res->data.start_dpf.compatibility_priority) { 584 switch (res->data.start_dpf.compatibility_priority) {
589 case ACPI_GOOD_CONFIGURATION: 585 case ACPI_GOOD_CONFIGURATION:
590 priority = PNP_RES_PRIORITY_PREFERRED; 586 priority = PNP_RES_PRIORITY_PREFERRED;
591 break;
592
593 case ACPI_ACCEPTABLE_CONFIGURATION:
594 priority = PNP_RES_PRIORITY_ACCEPTABLE;
595 break;
596
597 case ACPI_SUB_OPTIMAL_CONFIGURATION:
598 priority = PNP_RES_PRIORITY_FUNCTIONAL;
599 break;
600 default:
601 priority = PNP_RES_PRIORITY_INVALID;
602 break;
603 }
604 /* TBD: Considering performace/robustness bits */
605 option = pnp_register_dependent_option(dev, priority);
606 if (!option)
607 return AE_ERROR;
608 parse_data->option = option;
609 break; 587 break;
610 588
611 case ACPI_RESOURCE_TYPE_END_DEPENDENT: 589 case ACPI_ACCEPTABLE_CONFIGURATION:
612 /*only one EndDependentFn is allowed*/ 590 priority = PNP_RES_PRIORITY_ACCEPTABLE;
613 if (!parse_data->option_independent) {
614 pnp_warn("PnPACPI: more than one EndDependentFn");
615 return AE_ERROR;
616 }
617 parse_data->option = parse_data->option_independent;
618 parse_data->option_independent = NULL;
619 break; 591 break;
620 592
621 case ACPI_RESOURCE_TYPE_IO: 593 case ACPI_SUB_OPTIMAL_CONFIGURATION:
622 pnpacpi_parse_port_option(option, &res->data.io); 594 priority = PNP_RES_PRIORITY_FUNCTIONAL;
623 break; 595 break;
624 596 default:
625 case ACPI_RESOURCE_TYPE_FIXED_IO: 597 priority = PNP_RES_PRIORITY_INVALID;
626 pnpacpi_parse_fixed_port_option(option,
627 &res->data.fixed_io);
628 break; 598 break;
599 }
600 /* TBD: Consider performance/robustness bits */
601 option = pnp_register_dependent_option(dev, priority);
602 if (!option)
603 return AE_ERROR;
604 parse_data->option = option;
605 break;
629 606
630 case ACPI_RESOURCE_TYPE_VENDOR: 607 case ACPI_RESOURCE_TYPE_END_DEPENDENT:
631 case ACPI_RESOURCE_TYPE_END_TAG: 608 /*only one EndDependentFn is allowed */
632 break; 609 if (!parse_data->option_independent) {
610 pnp_warn("PnPACPI: more than one EndDependentFn");
611 return AE_ERROR;
612 }
613 parse_data->option = parse_data->option_independent;
614 parse_data->option_independent = NULL;
615 break;
633 616
634 case ACPI_RESOURCE_TYPE_MEMORY24: 617 case ACPI_RESOURCE_TYPE_IO:
635 pnpacpi_parse_mem24_option(option, &res->data.memory24); 618 pnpacpi_parse_port_option(option, &res->data.io);
636 break; 619 break;
637 620
638 case ACPI_RESOURCE_TYPE_MEMORY32: 621 case ACPI_RESOURCE_TYPE_FIXED_IO:
639 pnpacpi_parse_mem32_option(option, &res->data.memory32); 622 pnpacpi_parse_fixed_port_option(option, &res->data.fixed_io);
640 break; 623 break;
641 624
642 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 625 case ACPI_RESOURCE_TYPE_VENDOR:
643 pnpacpi_parse_fixed_mem32_option(option, 626 case ACPI_RESOURCE_TYPE_END_TAG:
644 &res->data.fixed_memory32); 627 break;
645 break;
646 628
647 case ACPI_RESOURCE_TYPE_ADDRESS16: 629 case ACPI_RESOURCE_TYPE_MEMORY24:
648 case ACPI_RESOURCE_TYPE_ADDRESS32: 630 pnpacpi_parse_mem24_option(option, &res->data.memory24);
649 case ACPI_RESOURCE_TYPE_ADDRESS64: 631 break;
650 pnpacpi_parse_address_option(option, res);
651 break;
652 632
653 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 633 case ACPI_RESOURCE_TYPE_MEMORY32:
654 break; 634 pnpacpi_parse_mem32_option(option, &res->data.memory32);
635 break;
655 636
656 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 637 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
657 pnpacpi_parse_ext_irq_option(option, 638 pnpacpi_parse_fixed_mem32_option(option,
658 &res->data.extended_irq); 639 &res->data.fixed_memory32);
659 break; 640 break;
660 641
661 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: 642 case ACPI_RESOURCE_TYPE_ADDRESS16:
662 break; 643 case ACPI_RESOURCE_TYPE_ADDRESS32:
644 case ACPI_RESOURCE_TYPE_ADDRESS64:
645 pnpacpi_parse_address_option(option, res);
646 break;
663 647
664 default: 648 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
665 pnp_warn("PnPACPI: unknown resource type %d", res->type); 649 break;
666 return AE_ERROR; 650
651 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
652 pnpacpi_parse_ext_irq_option(option, &res->data.extended_irq);
653 break;
654
655 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
656 break;
657
658 default:
659 pnp_warn("PnPACPI: unknown resource type %d", res->type);
660 return AE_ERROR;
667 } 661 }
668 662
669 return AE_OK; 663 return AE_OK;
670} 664}
671 665
672acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle, 666acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle,
673 struct pnp_dev *dev) 667 struct pnp_dev * dev)
674{ 668{
675 acpi_status status; 669 acpi_status status;
676 struct acpipnp_parse_option_s parse_data; 670 struct acpipnp_parse_option_s parse_data;
@@ -681,7 +675,7 @@ acpi_status pnpacpi_parse_resource_option_data(acpi_handle handle,
681 parse_data.option_independent = parse_data.option; 675 parse_data.option_independent = parse_data.option;
682 parse_data.dev = dev; 676 parse_data.dev = dev;
683 status = acpi_walk_resources(handle, METHOD_NAME__PRS, 677 status = acpi_walk_resources(handle, METHOD_NAME__PRS,
684 pnpacpi_option_resource, &parse_data); 678 pnpacpi_option_resource, &parse_data);
685 679
686 return status; 680 return status;
687} 681}
@@ -709,7 +703,7 @@ static int pnpacpi_supported_resource(struct acpi_resource *res)
709 * Set resource 703 * Set resource
710 */ 704 */
711static acpi_status pnpacpi_count_resources(struct acpi_resource *res, 705static acpi_status pnpacpi_count_resources(struct acpi_resource *res,
712 void *data) 706 void *data)
713{ 707{
714 int *res_cnt = (int *)data; 708 int *res_cnt = (int *)data;
715 709
@@ -732,14 +726,14 @@ static acpi_status pnpacpi_type_resources(struct acpi_resource *res, void *data)
732} 726}
733 727
734int pnpacpi_build_resource_template(acpi_handle handle, 728int pnpacpi_build_resource_template(acpi_handle handle,
735 struct acpi_buffer *buffer) 729 struct acpi_buffer *buffer)
736{ 730{
737 struct acpi_resource *resource; 731 struct acpi_resource *resource;
738 int res_cnt = 0; 732 int res_cnt = 0;
739 acpi_status status; 733 acpi_status status;
740 734
741 status = acpi_walk_resources(handle, METHOD_NAME__CRS, 735 status = acpi_walk_resources(handle, METHOD_NAME__CRS,
742 pnpacpi_count_resources, &res_cnt); 736 pnpacpi_count_resources, &res_cnt);
743 if (ACPI_FAILURE(status)) { 737 if (ACPI_FAILURE(status)) {
744 pnp_err("Evaluate _CRS failed"); 738 pnp_err("Evaluate _CRS failed");
745 return -EINVAL; 739 return -EINVAL;
@@ -753,7 +747,7 @@ int pnpacpi_build_resource_template(acpi_handle handle,
753 pnp_dbg("Res cnt %d", res_cnt); 747 pnp_dbg("Res cnt %d", res_cnt);
754 resource = (struct acpi_resource *)buffer->pointer; 748 resource = (struct acpi_resource *)buffer->pointer;
755 status = acpi_walk_resources(handle, METHOD_NAME__CRS, 749 status = acpi_walk_resources(handle, METHOD_NAME__CRS,
756 pnpacpi_type_resources, &resource); 750 pnpacpi_type_resources, &resource);
757 if (ACPI_FAILURE(status)) { 751 if (ACPI_FAILURE(status)) {
758 kfree(buffer->pointer); 752 kfree(buffer->pointer);
759 pnp_err("Evaluate _CRS failed"); 753 pnp_err("Evaluate _CRS failed");
@@ -766,7 +760,7 @@ int pnpacpi_build_resource_template(acpi_handle handle,
766} 760}
767 761
768static void pnpacpi_encode_irq(struct acpi_resource *resource, 762static void pnpacpi_encode_irq(struct acpi_resource *resource,
769 struct resource *p) 763 struct resource *p)
770{ 764{
771 int triggering, polarity; 765 int triggering, polarity;
772 766
@@ -782,7 +776,7 @@ static void pnpacpi_encode_irq(struct acpi_resource *resource,
782} 776}
783 777
784static void pnpacpi_encode_ext_irq(struct acpi_resource *resource, 778static void pnpacpi_encode_ext_irq(struct acpi_resource *resource,
785 struct resource *p) 779 struct resource *p)
786{ 780{
787 int triggering, polarity; 781 int triggering, polarity;
788 782
@@ -799,32 +793,32 @@ static void pnpacpi_encode_ext_irq(struct acpi_resource *resource,
799} 793}
800 794
801static void pnpacpi_encode_dma(struct acpi_resource *resource, 795static void pnpacpi_encode_dma(struct acpi_resource *resource,
802 struct resource *p) 796 struct resource *p)
803{ 797{
804 /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */ 798 /* Note: pnp_assign_dma will copy pnp_dma->flags into p->flags */
805 switch (p->flags & IORESOURCE_DMA_SPEED_MASK) { 799 switch (p->flags & IORESOURCE_DMA_SPEED_MASK) {
806 case IORESOURCE_DMA_TYPEA: 800 case IORESOURCE_DMA_TYPEA:
807 resource->data.dma.type = ACPI_TYPE_A; 801 resource->data.dma.type = ACPI_TYPE_A;
808 break; 802 break;
809 case IORESOURCE_DMA_TYPEB: 803 case IORESOURCE_DMA_TYPEB:
810 resource->data.dma.type = ACPI_TYPE_B; 804 resource->data.dma.type = ACPI_TYPE_B;
811 break; 805 break;
812 case IORESOURCE_DMA_TYPEF: 806 case IORESOURCE_DMA_TYPEF:
813 resource->data.dma.type = ACPI_TYPE_F; 807 resource->data.dma.type = ACPI_TYPE_F;
814 break; 808 break;
815 default: 809 default:
816 resource->data.dma.type = ACPI_COMPATIBILITY; 810 resource->data.dma.type = ACPI_COMPATIBILITY;
817 } 811 }
818 812
819 switch (p->flags & IORESOURCE_DMA_TYPE_MASK) { 813 switch (p->flags & IORESOURCE_DMA_TYPE_MASK) {
820 case IORESOURCE_DMA_8BIT: 814 case IORESOURCE_DMA_8BIT:
821 resource->data.dma.transfer = ACPI_TRANSFER_8; 815 resource->data.dma.transfer = ACPI_TRANSFER_8;
822 break; 816 break;
823 case IORESOURCE_DMA_8AND16BIT: 817 case IORESOURCE_DMA_8AND16BIT:
824 resource->data.dma.transfer = ACPI_TRANSFER_8_16; 818 resource->data.dma.transfer = ACPI_TRANSFER_8_16;
825 break; 819 break;
826 default: 820 default:
827 resource->data.dma.transfer = ACPI_TRANSFER_16; 821 resource->data.dma.transfer = ACPI_TRANSFER_16;
828 } 822 }
829 823
830 resource->data.dma.bus_master = !!(p->flags & IORESOURCE_DMA_MASTER); 824 resource->data.dma.bus_master = !!(p->flags & IORESOURCE_DMA_MASTER);
@@ -833,31 +827,31 @@ static void pnpacpi_encode_dma(struct acpi_resource *resource,
833} 827}
834 828
835static void pnpacpi_encode_io(struct acpi_resource *resource, 829static void pnpacpi_encode_io(struct acpi_resource *resource,
836 struct resource *p) 830 struct resource *p)
837{ 831{
838 /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */ 832 /* Note: pnp_assign_port will copy pnp_port->flags into p->flags */
839 resource->data.io.io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR)? 833 resource->data.io.io_decode = (p->flags & PNP_PORT_FLAG_16BITADDR) ?
840 ACPI_DECODE_16 : ACPI_DECODE_10; 834 ACPI_DECODE_16 : ACPI_DECODE_10;
841 resource->data.io.minimum = p->start; 835 resource->data.io.minimum = p->start;
842 resource->data.io.maximum = p->end; 836 resource->data.io.maximum = p->end;
843 resource->data.io.alignment = 0; /* Correct? */ 837 resource->data.io.alignment = 0; /* Correct? */
844 resource->data.io.address_length = p->end - p->start + 1; 838 resource->data.io.address_length = p->end - p->start + 1;
845} 839}
846 840
847static void pnpacpi_encode_fixed_io(struct acpi_resource *resource, 841static void pnpacpi_encode_fixed_io(struct acpi_resource *resource,
848 struct resource *p) 842 struct resource *p)
849{ 843{
850 resource->data.fixed_io.address = p->start; 844 resource->data.fixed_io.address = p->start;
851 resource->data.fixed_io.address_length = p->end - p->start + 1; 845 resource->data.fixed_io.address_length = p->end - p->start + 1;
852} 846}
853 847
854static void pnpacpi_encode_mem24(struct acpi_resource *resource, 848static void pnpacpi_encode_mem24(struct acpi_resource *resource,
855 struct resource *p) 849 struct resource *p)
856{ 850{
857 /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */ 851 /* Note: pnp_assign_mem will copy pnp_mem->flags into p->flags */
858 resource->data.memory24.write_protect = 852 resource->data.memory24.write_protect =
859 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 853 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
860 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 854 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
861 resource->data.memory24.minimum = p->start; 855 resource->data.memory24.minimum = p->start;
862 resource->data.memory24.maximum = p->end; 856 resource->data.memory24.maximum = p->end;
863 resource->data.memory24.alignment = 0; 857 resource->data.memory24.alignment = 0;
@@ -865,11 +859,11 @@ static void pnpacpi_encode_mem24(struct acpi_resource *resource,
865} 859}
866 860
867static void pnpacpi_encode_mem32(struct acpi_resource *resource, 861static void pnpacpi_encode_mem32(struct acpi_resource *resource,
868 struct resource *p) 862 struct resource *p)
869{ 863{
870 resource->data.memory32.write_protect = 864 resource->data.memory32.write_protect =
871 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 865 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
872 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 866 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
873 resource->data.memory32.minimum = p->start; 867 resource->data.memory32.minimum = p->start;
874 resource->data.memory32.maximum = p->end; 868 resource->data.memory32.maximum = p->end;
875 resource->data.memory32.alignment = 0; 869 resource->data.memory32.alignment = 0;
@@ -877,74 +871,77 @@ static void pnpacpi_encode_mem32(struct acpi_resource *resource,
877} 871}
878 872
879static void pnpacpi_encode_fixed_mem32(struct acpi_resource *resource, 873static void pnpacpi_encode_fixed_mem32(struct acpi_resource *resource,
880 struct resource *p) 874 struct resource *p)
881{ 875{
882 resource->data.fixed_memory32.write_protect = 876 resource->data.fixed_memory32.write_protect =
883 (p->flags & IORESOURCE_MEM_WRITEABLE) ? 877 (p->flags & IORESOURCE_MEM_WRITEABLE) ?
884 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY; 878 ACPI_READ_WRITE_MEMORY : ACPI_READ_ONLY_MEMORY;
885 resource->data.fixed_memory32.address = p->start; 879 resource->data.fixed_memory32.address = p->start;
886 resource->data.fixed_memory32.address_length = p->end - p->start + 1; 880 resource->data.fixed_memory32.address_length = p->end - p->start + 1;
887} 881}
888 882
889int pnpacpi_encode_resources(struct pnp_resource_table *res_table, 883int pnpacpi_encode_resources(struct pnp_resource_table *res_table,
890 struct acpi_buffer *buffer) 884 struct acpi_buffer *buffer)
891{ 885{
892 int i = 0; 886 int i = 0;
893 /* pnpacpi_build_resource_template allocates extra mem */ 887 /* pnpacpi_build_resource_template allocates extra mem */
894 int res_cnt = (buffer->length - 1)/sizeof(struct acpi_resource) - 1; 888 int res_cnt = (buffer->length - 1) / sizeof(struct acpi_resource) - 1;
895 struct acpi_resource *resource = (struct acpi_resource*)buffer->pointer; 889 struct acpi_resource *resource =
890 (struct acpi_resource *)buffer->pointer;
896 int port = 0, irq = 0, dma = 0, mem = 0; 891 int port = 0, irq = 0, dma = 0, mem = 0;
897 892
898 pnp_dbg("res cnt %d", res_cnt); 893 pnp_dbg("res cnt %d", res_cnt);
899 while (i < res_cnt) { 894 while (i < res_cnt) {
900 switch(resource->type) { 895 switch (resource->type) {
901 case ACPI_RESOURCE_TYPE_IRQ: 896 case ACPI_RESOURCE_TYPE_IRQ:
902 pnp_dbg("Encode irq"); 897 pnp_dbg("Encode irq");
903 pnpacpi_encode_irq(resource, 898 pnpacpi_encode_irq(resource,
904 &res_table->irq_resource[irq]); 899 &res_table->irq_resource[irq]);
905 irq++; 900 irq++;
906 break; 901 break;
907 902
908 case ACPI_RESOURCE_TYPE_DMA: 903 case ACPI_RESOURCE_TYPE_DMA:
909 pnp_dbg("Encode dma"); 904 pnp_dbg("Encode dma");
910 pnpacpi_encode_dma(resource, 905 pnpacpi_encode_dma(resource,
911 &res_table->dma_resource[dma]); 906 &res_table->dma_resource[dma]);
912 dma++; 907 dma++;
913 break; 908 break;
914 case ACPI_RESOURCE_TYPE_IO: 909 case ACPI_RESOURCE_TYPE_IO:
915 pnp_dbg("Encode io"); 910 pnp_dbg("Encode io");
916 pnpacpi_encode_io(resource, 911 pnpacpi_encode_io(resource,
917 &res_table->port_resource[port]); 912 &res_table->port_resource[port]);
918 port++; 913 port++;
919 break; 914 break;
920 case ACPI_RESOURCE_TYPE_FIXED_IO: 915 case ACPI_RESOURCE_TYPE_FIXED_IO:
921 pnp_dbg("Encode fixed io"); 916 pnp_dbg("Encode fixed io");
922 pnpacpi_encode_fixed_io(resource, 917 pnpacpi_encode_fixed_io(resource,
923 &res_table->port_resource[port]); 918 &res_table->
919 port_resource[port]);
924 port++; 920 port++;
925 break; 921 break;
926 case ACPI_RESOURCE_TYPE_MEMORY24: 922 case ACPI_RESOURCE_TYPE_MEMORY24:
927 pnp_dbg("Encode mem24"); 923 pnp_dbg("Encode mem24");
928 pnpacpi_encode_mem24(resource, 924 pnpacpi_encode_mem24(resource,
929 &res_table->mem_resource[mem]); 925 &res_table->mem_resource[mem]);
930 mem++; 926 mem++;
931 break; 927 break;
932 case ACPI_RESOURCE_TYPE_MEMORY32: 928 case ACPI_RESOURCE_TYPE_MEMORY32:
933 pnp_dbg("Encode mem32"); 929 pnp_dbg("Encode mem32");
934 pnpacpi_encode_mem32(resource, 930 pnpacpi_encode_mem32(resource,
935 &res_table->mem_resource[mem]); 931 &res_table->mem_resource[mem]);
936 mem++; 932 mem++;
937 break; 933 break;
938 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: 934 case ACPI_RESOURCE_TYPE_FIXED_MEMORY32:
939 pnp_dbg("Encode fixed mem32"); 935 pnp_dbg("Encode fixed mem32");
940 pnpacpi_encode_fixed_mem32(resource, 936 pnpacpi_encode_fixed_mem32(resource,
941 &res_table->mem_resource[mem]); 937 &res_table->
938 mem_resource[mem]);
942 mem++; 939 mem++;
943 break; 940 break;
944 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 941 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
945 pnp_dbg("Encode ext irq"); 942 pnp_dbg("Encode ext irq");
946 pnpacpi_encode_ext_irq(resource, 943 pnpacpi_encode_ext_irq(resource,
947 &res_table->irq_resource[irq]); 944 &res_table->irq_resource[irq]);
948 irq++; 945 irq++;
949 break; 946 break;
950 case ACPI_RESOURCE_TYPE_START_DEPENDENT: 947 case ACPI_RESOURCE_TYPE_START_DEPENDENT:
@@ -956,7 +953,7 @@ int pnpacpi_encode_resources(struct pnp_resource_table *res_table,
956 case ACPI_RESOURCE_TYPE_ADDRESS64: 953 case ACPI_RESOURCE_TYPE_ADDRESS64:
957 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: 954 case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64:
958 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: 955 case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
959 default: /* other type */ 956 default: /* other type */
960 pnp_warn("unknown resource type %d", resource->type); 957 pnp_warn("unknown resource type %d", resource->type);
961 return -EINVAL; 958 return -EINVAL;
962 } 959 }
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index a1f0b0ba2bfe..5dba68fe33f5 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * bioscalls.c - the lowlevel layer of the PnPBIOS driver 2 * bioscalls.c - the lowlevel layer of the PnPBIOS driver
3 *
4 */ 3 */
5 4
6#include <linux/types.h> 5#include <linux/types.h>
@@ -26,11 +25,10 @@
26#include "pnpbios.h" 25#include "pnpbios.h"
27 26
28static struct { 27static struct {
29 u16 offset; 28 u16 offset;
30 u16 segment; 29 u16 segment;
31} pnp_bios_callpoint; 30} pnp_bios_callpoint;
32 31
33
34/* 32/*
35 * These are some opcodes for a "static asmlinkage" 33 * These are some opcodes for a "static asmlinkage"
36 * As this code is *not* executed inside the linux kernel segment, but in a 34 * As this code is *not* executed inside the linux kernel segment, but in a
@@ -44,8 +42,7 @@ static struct {
44 42
45asmlinkage void pnp_bios_callfunc(void); 43asmlinkage void pnp_bios_callfunc(void);
46 44
47__asm__( 45__asm__(".text \n"
48 ".text \n"
49 __ALIGN_STR "\n" 46 __ALIGN_STR "\n"
50 "pnp_bios_callfunc:\n" 47 "pnp_bios_callfunc:\n"
51 " pushl %edx \n" 48 " pushl %edx \n"
@@ -55,8 +52,7 @@ __asm__(
55 " lcallw *pnp_bios_callpoint\n" 52 " lcallw *pnp_bios_callpoint\n"
56 " addl $16, %esp \n" 53 " addl $16, %esp \n"
57 " lret \n" 54 " lret \n"
58 ".previous \n" 55 ".previous \n");
59);
60 56
61#define Q2_SET_SEL(cpu, selname, address, size) \ 57#define Q2_SET_SEL(cpu, selname, address, size) \
62do { \ 58do { \
@@ -78,7 +74,6 @@ u32 pnp_bios_is_utter_crap = 0;
78 74
79static spinlock_t pnp_bios_lock; 75static spinlock_t pnp_bios_lock;
80 76
81
82/* 77/*
83 * Support Functions 78 * Support Functions
84 */ 79 */
@@ -97,7 +92,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
97 * PnP BIOSes are generally not terribly re-entrant. 92 * PnP BIOSes are generally not terribly re-entrant.
98 * Also, don't rely on them to save everything correctly. 93 * Also, don't rely on them to save everything correctly.
99 */ 94 */
100 if(pnp_bios_is_utter_crap) 95 if (pnp_bios_is_utter_crap)
101 return PNP_FUNCTION_NOT_SUPPORTED; 96 return PNP_FUNCTION_NOT_SUPPORTED;
102 97
103 cpu = get_cpu(); 98 cpu = get_cpu();
@@ -113,112 +108,128 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
113 if (ts2_size) 108 if (ts2_size)
114 Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size); 109 Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size);
115 110
116 __asm__ __volatile__( 111 __asm__ __volatile__("pushl %%ebp\n\t"
117 "pushl %%ebp\n\t" 112 "pushl %%edi\n\t"
118 "pushl %%edi\n\t" 113 "pushl %%esi\n\t"
119 "pushl %%esi\n\t" 114 "pushl %%ds\n\t"
120 "pushl %%ds\n\t" 115 "pushl %%es\n\t"
121 "pushl %%es\n\t" 116 "pushl %%fs\n\t"
122 "pushl %%fs\n\t" 117 "pushl %%gs\n\t"
123 "pushl %%gs\n\t" 118 "pushfl\n\t"
124 "pushfl\n\t" 119 "movl %%esp, pnp_bios_fault_esp\n\t"
125 "movl %%esp, pnp_bios_fault_esp\n\t" 120 "movl $1f, pnp_bios_fault_eip\n\t"
126 "movl $1f, pnp_bios_fault_eip\n\t" 121 "lcall %5,%6\n\t"
127 "lcall %5,%6\n\t" 122 "1:popfl\n\t"
128 "1:popfl\n\t" 123 "popl %%gs\n\t"
129 "popl %%gs\n\t" 124 "popl %%fs\n\t"
130 "popl %%fs\n\t" 125 "popl %%es\n\t"
131 "popl %%es\n\t" 126 "popl %%ds\n\t"
132 "popl %%ds\n\t" 127 "popl %%esi\n\t"
133 "popl %%esi\n\t" 128 "popl %%edi\n\t"
134 "popl %%edi\n\t" 129 "popl %%ebp\n\t":"=a"(status)
135 "popl %%ebp\n\t" 130 :"0"((func) | (((u32) arg1) << 16)),
136 : "=a" (status) 131 "b"((arg2) | (((u32) arg3) << 16)),
137 : "0" ((func) | (((u32)arg1) << 16)), 132 "c"((arg4) | (((u32) arg5) << 16)),
138 "b" ((arg2) | (((u32)arg3) << 16)), 133 "d"((arg6) | (((u32) arg7) << 16)),
139 "c" ((arg4) | (((u32)arg5) << 16)), 134 "i"(PNP_CS32), "i"(0)
140 "d" ((arg6) | (((u32)arg7) << 16)), 135 :"memory");
141 "i" (PNP_CS32),
142 "i" (0)
143 : "memory"
144 );
145 spin_unlock_irqrestore(&pnp_bios_lock, flags); 136 spin_unlock_irqrestore(&pnp_bios_lock, flags);
146 137
147 get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; 138 get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
148 put_cpu(); 139 put_cpu();
149 140
150 /* If we get here and this is set then the PnP BIOS faulted on us. */ 141 /* If we get here and this is set then the PnP BIOS faulted on us. */
151 if(pnp_bios_is_utter_crap) 142 if (pnp_bios_is_utter_crap) {
152 { 143 printk(KERN_ERR
153 printk(KERN_ERR "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n"); 144 "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n");
154 printk(KERN_ERR "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n"); 145 printk(KERN_ERR
155 printk(KERN_ERR "PnPBIOS: Check with your vendor for an updated BIOS\n"); 146 "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n");
147 printk(KERN_ERR
148 "PnPBIOS: Check with your vendor for an updated BIOS\n");
156 } 149 }
157 150
158 return status; 151 return status;
159} 152}
160 153
161void pnpbios_print_status(const char * module, u16 status) 154void pnpbios_print_status(const char *module, u16 status)
162{ 155{
163 switch(status) { 156 switch (status) {
164 case PNP_SUCCESS: 157 case PNP_SUCCESS:
165 printk(KERN_ERR "PnPBIOS: %s: function successful\n", module); 158 printk(KERN_ERR "PnPBIOS: %s: function successful\n", module);
166 break; 159 break;
167 case PNP_NOT_SET_STATICALLY: 160 case PNP_NOT_SET_STATICALLY:
168 printk(KERN_ERR "PnPBIOS: %s: unable to set static resources\n", module); 161 printk(KERN_ERR "PnPBIOS: %s: unable to set static resources\n",
162 module);
169 break; 163 break;
170 case PNP_UNKNOWN_FUNCTION: 164 case PNP_UNKNOWN_FUNCTION:
171 printk(KERN_ERR "PnPBIOS: %s: invalid function number passed\n", module); 165 printk(KERN_ERR "PnPBIOS: %s: invalid function number passed\n",
166 module);
172 break; 167 break;
173 case PNP_FUNCTION_NOT_SUPPORTED: 168 case PNP_FUNCTION_NOT_SUPPORTED:
174 printk(KERN_ERR "PnPBIOS: %s: function not supported on this system\n", module); 169 printk(KERN_ERR
170 "PnPBIOS: %s: function not supported on this system\n",
171 module);
175 break; 172 break;
176 case PNP_INVALID_HANDLE: 173 case PNP_INVALID_HANDLE:
177 printk(KERN_ERR "PnPBIOS: %s: invalid handle\n", module); 174 printk(KERN_ERR "PnPBIOS: %s: invalid handle\n", module);
178 break; 175 break;
179 case PNP_BAD_PARAMETER: 176 case PNP_BAD_PARAMETER:
180 printk(KERN_ERR "PnPBIOS: %s: invalid parameters were passed\n", module); 177 printk(KERN_ERR "PnPBIOS: %s: invalid parameters were passed\n",
178 module);
181 break; 179 break;
182 case PNP_SET_FAILED: 180 case PNP_SET_FAILED:
183 printk(KERN_ERR "PnPBIOS: %s: unable to set resources\n", module); 181 printk(KERN_ERR "PnPBIOS: %s: unable to set resources\n",
182 module);
184 break; 183 break;
185 case PNP_EVENTS_NOT_PENDING: 184 case PNP_EVENTS_NOT_PENDING:
186 printk(KERN_ERR "PnPBIOS: %s: no events are pending\n", module); 185 printk(KERN_ERR "PnPBIOS: %s: no events are pending\n", module);
187 break; 186 break;
188 case PNP_SYSTEM_NOT_DOCKED: 187 case PNP_SYSTEM_NOT_DOCKED:
189 printk(KERN_ERR "PnPBIOS: %s: the system is not docked\n", module); 188 printk(KERN_ERR "PnPBIOS: %s: the system is not docked\n",
189 module);
190 break; 190 break;
191 case PNP_NO_ISA_PNP_CARDS: 191 case PNP_NO_ISA_PNP_CARDS:
192 printk(KERN_ERR "PnPBIOS: %s: no isapnp cards are installed on this system\n", module); 192 printk(KERN_ERR
193 "PnPBIOS: %s: no isapnp cards are installed on this system\n",
194 module);
193 break; 195 break;
194 case PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES: 196 case PNP_UNABLE_TO_DETERMINE_DOCK_CAPABILITIES:
195 printk(KERN_ERR "PnPBIOS: %s: cannot determine the capabilities of the docking station\n", module); 197 printk(KERN_ERR
198 "PnPBIOS: %s: cannot determine the capabilities of the docking station\n",
199 module);
196 break; 200 break;
197 case PNP_CONFIG_CHANGE_FAILED_NO_BATTERY: 201 case PNP_CONFIG_CHANGE_FAILED_NO_BATTERY:
198 printk(KERN_ERR "PnPBIOS: %s: unable to undock, the system does not have a battery\n", module); 202 printk(KERN_ERR
203 "PnPBIOS: %s: unable to undock, the system does not have a battery\n",
204 module);
199 break; 205 break;
200 case PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT: 206 case PNP_CONFIG_CHANGE_FAILED_RESOURCE_CONFLICT:
201 printk(KERN_ERR "PnPBIOS: %s: could not dock due to resource conflicts\n", module); 207 printk(KERN_ERR
208 "PnPBIOS: %s: could not dock due to resource conflicts\n",
209 module);
202 break; 210 break;
203 case PNP_BUFFER_TOO_SMALL: 211 case PNP_BUFFER_TOO_SMALL:
204 printk(KERN_ERR "PnPBIOS: %s: the buffer passed is too small\n", module); 212 printk(KERN_ERR "PnPBIOS: %s: the buffer passed is too small\n",
213 module);
205 break; 214 break;
206 case PNP_USE_ESCD_SUPPORT: 215 case PNP_USE_ESCD_SUPPORT:
207 printk(KERN_ERR "PnPBIOS: %s: use ESCD instead\n", module); 216 printk(KERN_ERR "PnPBIOS: %s: use ESCD instead\n", module);
208 break; 217 break;
209 case PNP_MESSAGE_NOT_SUPPORTED: 218 case PNP_MESSAGE_NOT_SUPPORTED:
210 printk(KERN_ERR "PnPBIOS: %s: the message is unsupported\n", module); 219 printk(KERN_ERR "PnPBIOS: %s: the message is unsupported\n",
220 module);
211 break; 221 break;
212 case PNP_HARDWARE_ERROR: 222 case PNP_HARDWARE_ERROR:
213 printk(KERN_ERR "PnPBIOS: %s: a hardware failure has occured\n", module); 223 printk(KERN_ERR "PnPBIOS: %s: a hardware failure has occured\n",
224 module);
214 break; 225 break;
215 default: 226 default:
216 printk(KERN_ERR "PnPBIOS: %s: unexpected status 0x%x\n", module, status); 227 printk(KERN_ERR "PnPBIOS: %s: unexpected status 0x%x\n", module,
228 status);
217 break; 229 break;
218 } 230 }
219} 231}
220 232
221
222/* 233/*
223 * PnP BIOS Low Level Calls 234 * PnP BIOS Low Level Calls
224 */ 235 */
@@ -243,19 +254,22 @@ void pnpbios_print_status(const char * module, u16 status)
243static int __pnp_bios_dev_node_info(struct pnp_dev_node_info *data) 254static int __pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
244{ 255{
245 u16 status; 256 u16 status;
257
246 if (!pnp_bios_present()) 258 if (!pnp_bios_present())
247 return PNP_FUNCTION_NOT_SUPPORTED; 259 return PNP_FUNCTION_NOT_SUPPORTED;
248 status = call_pnp_bios(PNP_GET_NUM_SYS_DEV_NODES, 0, PNP_TS1, 2, PNP_TS1, PNP_DS, 0, 0, 260 status = call_pnp_bios(PNP_GET_NUM_SYS_DEV_NODES, 0, PNP_TS1, 2,
249 data, sizeof(struct pnp_dev_node_info), NULL, 0); 261 PNP_TS1, PNP_DS, 0, 0, data,
262 sizeof(struct pnp_dev_node_info), NULL, 0);
250 data->no_nodes &= 0xff; 263 data->no_nodes &= 0xff;
251 return status; 264 return status;
252} 265}
253 266
254int pnp_bios_dev_node_info(struct pnp_dev_node_info *data) 267int pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
255{ 268{
256 int status = __pnp_bios_dev_node_info( data ); 269 int status = __pnp_bios_dev_node_info(data);
257 if ( status ) 270
258 pnpbios_print_status( "dev_node_info", status ); 271 if (status)
272 pnpbios_print_status("dev_node_info", status);
259 return status; 273 return status;
260} 274}
261 275
@@ -273,17 +287,20 @@ int pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
273 * or volatile current (0) config 287 * or volatile current (0) config
274 * Output: *nodenum=next node or 0xff if no more nodes 288 * Output: *nodenum=next node or 0xff if no more nodes
275 */ 289 */
276static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data) 290static int __pnp_bios_get_dev_node(u8 *nodenum, char boot,
291 struct pnp_bios_node *data)
277{ 292{
278 u16 status; 293 u16 status;
279 u16 tmp_nodenum; 294 u16 tmp_nodenum;
295
280 if (!pnp_bios_present()) 296 if (!pnp_bios_present())
281 return PNP_FUNCTION_NOT_SUPPORTED; 297 return PNP_FUNCTION_NOT_SUPPORTED;
282 if ( !boot && pnpbios_dont_use_current_config ) 298 if (!boot && pnpbios_dont_use_current_config)
283 return PNP_FUNCTION_NOT_SUPPORTED; 299 return PNP_FUNCTION_NOT_SUPPORTED;
284 tmp_nodenum = *nodenum; 300 tmp_nodenum = *nodenum;
285 status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2, boot ? 2 : 1, PNP_DS, 0, 301 status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2,
286 &tmp_nodenum, sizeof(tmp_nodenum), data, 65536); 302 boot ? 2 : 1, PNP_DS, 0, &tmp_nodenum,
303 sizeof(tmp_nodenum), data, 65536);
287 *nodenum = tmp_nodenum; 304 *nodenum = tmp_nodenum;
288 return status; 305 return status;
289} 306}
@@ -291,104 +308,66 @@ static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node
291int pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data) 308int pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data)
292{ 309{
293 int status; 310 int status;
294 status = __pnp_bios_get_dev_node( nodenum, boot, data ); 311
295 if ( status ) 312 status = __pnp_bios_get_dev_node(nodenum, boot, data);
296 pnpbios_print_status( "get_dev_node", status ); 313 if (status)
314 pnpbios_print_status("get_dev_node", status);
297 return status; 315 return status;
298} 316}
299 317
300
301/* 318/*
302 * Call PnP BIOS with function 0x02, "set system device node" 319 * Call PnP BIOS with function 0x02, "set system device node"
303 * Input: *nodenum = desired node, 320 * Input: *nodenum = desired node,
304 * boot = whether to set nonvolatile boot (!=0) 321 * boot = whether to set nonvolatile boot (!=0)
305 * or volatile current (0) config 322 * or volatile current (0) config
306 */ 323 */
307static int __pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data) 324static int __pnp_bios_set_dev_node(u8 nodenum, char boot,
325 struct pnp_bios_node *data)
308{ 326{
309 u16 status; 327 u16 status;
328
310 if (!pnp_bios_present()) 329 if (!pnp_bios_present())
311 return PNP_FUNCTION_NOT_SUPPORTED; 330 return PNP_FUNCTION_NOT_SUPPORTED;
312 if ( !boot && pnpbios_dont_use_current_config ) 331 if (!boot && pnpbios_dont_use_current_config)
313 return PNP_FUNCTION_NOT_SUPPORTED; 332 return PNP_FUNCTION_NOT_SUPPORTED;
314 status = call_pnp_bios(PNP_SET_SYS_DEV_NODE, nodenum, 0, PNP_TS1, boot ? 2 : 1, PNP_DS, 0, 0, 333 status = call_pnp_bios(PNP_SET_SYS_DEV_NODE, nodenum, 0, PNP_TS1,
315 data, 65536, NULL, 0); 334 boot ? 2 : 1, PNP_DS, 0, 0, data, 65536, NULL,
335 0);
316 return status; 336 return status;
317} 337}
318 338
319int pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data) 339int pnp_bios_set_dev_node(u8 nodenum, char boot, struct pnp_bios_node *data)
320{ 340{
321 int status; 341 int status;
322 status = __pnp_bios_set_dev_node( nodenum, boot, data ); 342
323 if ( status ) { 343 status = __pnp_bios_set_dev_node(nodenum, boot, data);
324 pnpbios_print_status( "set_dev_node", status ); 344 if (status) {
345 pnpbios_print_status("set_dev_node", status);
325 return status; 346 return status;
326 } 347 }
327 if ( !boot ) { /* Update devlist */ 348 if (!boot) { /* Update devlist */
328 status = pnp_bios_get_dev_node( &nodenum, boot, data ); 349 status = pnp_bios_get_dev_node(&nodenum, boot, data);
329 if ( status ) 350 if (status)
330 return status; 351 return status;
331 } 352 }
332 return status; 353 return status;
333} 354}
334 355
335#if needed
336/*
337 * Call PnP BIOS with function 0x03, "get event"
338 */
339static int pnp_bios_get_event(u16 *event)
340{
341 u16 status;
342 if (!pnp_bios_present())
343 return PNP_FUNCTION_NOT_SUPPORTED;
344 status = call_pnp_bios(PNP_GET_EVENT, 0, PNP_TS1, PNP_DS, 0, 0 ,0 ,0,
345 event, sizeof(u16), NULL, 0);
346 return status;
347}
348#endif
349
350#if needed
351/*
352 * Call PnP BIOS with function 0x04, "send message"
353 */
354static int pnp_bios_send_message(u16 message)
355{
356 u16 status;
357 if (!pnp_bios_present())
358 return PNP_FUNCTION_NOT_SUPPORTED;
359 status = call_pnp_bios(PNP_SEND_MESSAGE, message, PNP_DS, 0, 0, 0, 0, 0, 0, 0, 0, 0);
360 return status;
361}
362#endif
363
364/* 356/*
365 * Call PnP BIOS with function 0x05, "get docking station information" 357 * Call PnP BIOS with function 0x05, "get docking station information"
366 */ 358 */
367int pnp_bios_dock_station_info(struct pnp_docking_station_info *data) 359int pnp_bios_dock_station_info(struct pnp_docking_station_info *data)
368{ 360{
369 u16 status; 361 u16 status;
370 if (!pnp_bios_present())
371 return PNP_FUNCTION_NOT_SUPPORTED;
372 status = call_pnp_bios(PNP_GET_DOCKING_STATION_INFORMATION, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0,
373 data, sizeof(struct pnp_docking_station_info), NULL, 0);
374 return status;
375}
376 362
377#if needed
378/*
379 * Call PnP BIOS with function 0x09, "set statically allocated resource
380 * information"
381 */
382static int pnp_bios_set_stat_res(char *info)
383{
384 u16 status;
385 if (!pnp_bios_present()) 363 if (!pnp_bios_present())
386 return PNP_FUNCTION_NOT_SUPPORTED; 364 return PNP_FUNCTION_NOT_SUPPORTED;
387 status = call_pnp_bios(PNP_SET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 365 status = call_pnp_bios(PNP_GET_DOCKING_STATION_INFORMATION, 0, PNP_TS1,
388 info, *((u16 *) info), 0, 0); 366 PNP_DS, 0, 0, 0, 0, data,
367 sizeof(struct pnp_docking_station_info), NULL,
368 0);
389 return status; 369 return status;
390} 370}
391#endif
392 371
393/* 372/*
394 * Call PnP BIOS with function 0x0a, "get statically allocated resource 373 * Call PnP BIOS with function 0x0a, "get statically allocated resource
@@ -397,36 +376,23 @@ static int pnp_bios_set_stat_res(char *info)
397static int __pnp_bios_get_stat_res(char *info) 376static int __pnp_bios_get_stat_res(char *info)
398{ 377{
399 u16 status; 378 u16 status;
379
400 if (!pnp_bios_present()) 380 if (!pnp_bios_present())
401 return PNP_FUNCTION_NOT_SUPPORTED; 381 return PNP_FUNCTION_NOT_SUPPORTED;
402 status = call_pnp_bios(PNP_GET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 382 status = call_pnp_bios(PNP_GET_STATIC_ALLOCED_RES_INFO, 0, PNP_TS1,
403 info, 65536, NULL, 0); 383 PNP_DS, 0, 0, 0, 0, info, 65536, NULL, 0);
404 return status; 384 return status;
405} 385}
406 386
407int pnp_bios_get_stat_res(char *info) 387int pnp_bios_get_stat_res(char *info)
408{ 388{
409 int status; 389 int status;
410 status = __pnp_bios_get_stat_res( info );
411 if ( status )
412 pnpbios_print_status( "get_stat_res", status );
413 return status;
414}
415 390
416#if needed 391 status = __pnp_bios_get_stat_res(info);
417/* 392 if (status)
418 * Call PnP BIOS with function 0x0b, "get APM id table" 393 pnpbios_print_status("get_stat_res", status);
419 */
420static int pnp_bios_apm_id_table(char *table, u16 *size)
421{
422 u16 status;
423 if (!pnp_bios_present())
424 return PNP_FUNCTION_NOT_SUPPORTED;
425 status = call_pnp_bios(PNP_GET_APM_ID_TABLE, 0, PNP_TS2, 0, PNP_TS1, PNP_DS, 0, 0,
426 table, *size, size, sizeof(u16));
427 return status; 394 return status;
428} 395}
429#endif
430 396
431/* 397/*
432 * Call PnP BIOS with function 0x40, "get isa pnp configuration structure" 398 * Call PnP BIOS with function 0x40, "get isa pnp configuration structure"
@@ -434,19 +400,22 @@ static int pnp_bios_apm_id_table(char *table, u16 *size)
434static int __pnp_bios_isapnp_config(struct pnp_isa_config_struc *data) 400static int __pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
435{ 401{
436 u16 status; 402 u16 status;
403
437 if (!pnp_bios_present()) 404 if (!pnp_bios_present())
438 return PNP_FUNCTION_NOT_SUPPORTED; 405 return PNP_FUNCTION_NOT_SUPPORTED;
439 status = call_pnp_bios(PNP_GET_PNP_ISA_CONFIG_STRUC, 0, PNP_TS1, PNP_DS, 0, 0, 0, 0, 406 status = call_pnp_bios(PNP_GET_PNP_ISA_CONFIG_STRUC, 0, PNP_TS1, PNP_DS,
440 data, sizeof(struct pnp_isa_config_struc), NULL, 0); 407 0, 0, 0, 0, data,
408 sizeof(struct pnp_isa_config_struc), NULL, 0);
441 return status; 409 return status;
442} 410}
443 411
444int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data) 412int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
445{ 413{
446 int status; 414 int status;
447 status = __pnp_bios_isapnp_config( data ); 415
448 if ( status ) 416 status = __pnp_bios_isapnp_config(data);
449 pnpbios_print_status( "isapnp_config", status ); 417 if (status)
418 pnpbios_print_status("isapnp_config", status);
450 return status; 419 return status;
451} 420}
452 421
@@ -456,19 +425,22 @@ int pnp_bios_isapnp_config(struct pnp_isa_config_struc *data)
456static int __pnp_bios_escd_info(struct escd_info_struc *data) 425static int __pnp_bios_escd_info(struct escd_info_struc *data)
457{ 426{
458 u16 status; 427 u16 status;
428
459 if (!pnp_bios_present()) 429 if (!pnp_bios_present())
460 return ESCD_FUNCTION_NOT_SUPPORTED; 430 return ESCD_FUNCTION_NOT_SUPPORTED;
461 status = call_pnp_bios(PNP_GET_ESCD_INFO, 0, PNP_TS1, 2, PNP_TS1, 4, PNP_TS1, PNP_DS, 431 status = call_pnp_bios(PNP_GET_ESCD_INFO, 0, PNP_TS1, 2, PNP_TS1, 4,
462 data, sizeof(struct escd_info_struc), NULL, 0); 432 PNP_TS1, PNP_DS, data,
433 sizeof(struct escd_info_struc), NULL, 0);
463 return status; 434 return status;
464} 435}
465 436
466int pnp_bios_escd_info(struct escd_info_struc *data) 437int pnp_bios_escd_info(struct escd_info_struc *data)
467{ 438{
468 int status; 439 int status;
469 status = __pnp_bios_escd_info( data ); 440
470 if ( status ) 441 status = __pnp_bios_escd_info(data);
471 pnpbios_print_status( "escd_info", status ); 442 if (status)
443 pnpbios_print_status("escd_info", status);
472 return status; 444 return status;
473} 445}
474 446
@@ -479,57 +451,42 @@ int pnp_bios_escd_info(struct escd_info_struc *data)
479static int __pnp_bios_read_escd(char *data, u32 nvram_base) 451static int __pnp_bios_read_escd(char *data, u32 nvram_base)
480{ 452{
481 u16 status; 453 u16 status;
454
482 if (!pnp_bios_present()) 455 if (!pnp_bios_present())
483 return ESCD_FUNCTION_NOT_SUPPORTED; 456 return ESCD_FUNCTION_NOT_SUPPORTED;
484 status = call_pnp_bios(PNP_READ_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0, 0, 457 status = call_pnp_bios(PNP_READ_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0,
485 data, 65536, __va(nvram_base), 65536); 458 0, data, 65536, __va(nvram_base), 65536);
486 return status; 459 return status;
487} 460}
488 461
489int pnp_bios_read_escd(char *data, u32 nvram_base) 462int pnp_bios_read_escd(char *data, u32 nvram_base)
490{ 463{
491 int status; 464 int status;
492 status = __pnp_bios_read_escd( data, nvram_base );
493 if ( status )
494 pnpbios_print_status( "read_escd", status );
495 return status;
496}
497 465
498#if needed 466 status = __pnp_bios_read_escd(data, nvram_base);
499/* 467 if (status)
500 * Call PnP BIOS function 0x43, "write ESCD" 468 pnpbios_print_status("read_escd", status);
501 */
502static int pnp_bios_write_escd(char *data, u32 nvram_base)
503{
504 u16 status;
505 if (!pnp_bios_present())
506 return ESCD_FUNCTION_NOT_SUPPORTED;
507 status = call_pnp_bios(PNP_WRITE_ESCD, 0, PNP_TS1, PNP_TS2, PNP_DS, 0, 0, 0,
508 data, 65536, __va(nvram_base), 65536);
509 return status; 469 return status;
510} 470}
511#endif
512
513
514/*
515 * Initialization
516 */
517 471
518void pnpbios_calls_init(union pnp_bios_install_struct *header) 472void pnpbios_calls_init(union pnp_bios_install_struct *header)
519{ 473{
520 int i; 474 int i;
475
521 spin_lock_init(&pnp_bios_lock); 476 spin_lock_init(&pnp_bios_lock);
522 pnp_bios_callpoint.offset = header->fields.pm16offset; 477 pnp_bios_callpoint.offset = header->fields.pm16offset;
523 pnp_bios_callpoint.segment = PNP_CS16; 478 pnp_bios_callpoint.segment = PNP_CS16;
524 479
525 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); 480 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
526 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); 481 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
527 for (i = 0; i < NR_CPUS; i++) { 482 for (i = 0; i < NR_CPUS; i++) {
528 struct desc_struct *gdt = get_cpu_gdt_table(i); 483 struct desc_struct *gdt = get_cpu_gdt_table(i);
529 if (!gdt) 484 if (!gdt)
530 continue; 485 continue;
531 set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc); 486 set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc);
532 set_base(gdt[GDT_ENTRY_PNPBIOS_CS16], __va(header->fields.pm16cseg)); 487 set_base(gdt[GDT_ENTRY_PNPBIOS_CS16],
533 set_base(gdt[GDT_ENTRY_PNPBIOS_DS], __va(header->fields.pm16dseg)); 488 __va(header->fields.pm16cseg));
534 } 489 set_base(gdt[GDT_ENTRY_PNPBIOS_DS],
490 __va(header->fields.pm16dseg));
491 }
535} 492}
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index ed112ee16012..3692a099b45f 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -32,7 +32,7 @@
32 * along with this program; if not, write to the Free Software 32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 */ 34 */
35 35
36/* Change Log 36/* Change Log
37 * 37 *
38 * Adam Belay - <ambx1@neo.rr.com> - March 16, 2003 38 * Adam Belay - <ambx1@neo.rr.com> - March 16, 2003
@@ -71,14 +71,13 @@
71 71
72#include "pnpbios.h" 72#include "pnpbios.h"
73 73
74
75/* 74/*
76 * 75 *
77 * PnP BIOS INTERFACE 76 * PnP BIOS INTERFACE
78 * 77 *
79 */ 78 */
80 79
81static union pnp_bios_install_struct * pnp_bios_install = NULL; 80static union pnp_bios_install_struct *pnp_bios_install = NULL;
82 81
83int pnp_bios_present(void) 82int pnp_bios_present(void)
84{ 83{
@@ -101,36 +100,35 @@ static struct completion unload_sem;
101/* 100/*
102 * (Much of this belongs in a shared routine somewhere) 101 * (Much of this belongs in a shared routine somewhere)
103 */ 102 */
104
105static int pnp_dock_event(int dock, struct pnp_docking_station_info *info) 103static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
106{ 104{
107 char *argv [3], **envp, *buf, *scratch; 105 char *argv[3], **envp, *buf, *scratch;
108 int i = 0, value; 106 int i = 0, value;
109 107
110 if (!current->fs->root) { 108 if (!current->fs->root)
111 return -EAGAIN; 109 return -EAGAIN;
112 } 110 if (!(envp = kcalloc(20, sizeof(char *), GFP_KERNEL)))
113 if (!(envp = kcalloc(20, sizeof (char *), GFP_KERNEL))) {
114 return -ENOMEM; 111 return -ENOMEM;
115 }
116 if (!(buf = kzalloc(256, GFP_KERNEL))) { 112 if (!(buf = kzalloc(256, GFP_KERNEL))) {
117 kfree (envp); 113 kfree(envp);
118 return -ENOMEM; 114 return -ENOMEM;
119 } 115 }
120 116
121 /* FIXME: if there are actual users of this, it should be integrated into 117 /* FIXME: if there are actual users of this, it should be
122 * the driver core and use the usual infrastructure like sysfs and uevents */ 118 * integrated into the driver core and use the usual infrastructure
123 argv [0] = "/sbin/pnpbios"; 119 * like sysfs and uevents
124 argv [1] = "dock"; 120 */
125 argv [2] = NULL; 121 argv[0] = "/sbin/pnpbios";
122 argv[1] = "dock";
123 argv[2] = NULL;
126 124
127 /* minimal command environment */ 125 /* minimal command environment */
128 envp [i++] = "HOME=/"; 126 envp[i++] = "HOME=/";
129 envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 127 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
130 128
131#ifdef DEBUG 129#ifdef DEBUG
132 /* hint that policy agent should enter no-stdout debug mode */ 130 /* hint that policy agent should enter no-stdout debug mode */
133 envp [i++] = "DEBUG=kernel"; 131 envp[i++] = "DEBUG=kernel";
134#endif 132#endif
135 /* extensible set of named bus-specific parameters, 133 /* extensible set of named bus-specific parameters,
136 * supporting multiple driver selection algorithms. 134 * supporting multiple driver selection algorithms.
@@ -138,33 +136,33 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
138 scratch = buf; 136 scratch = buf;
139 137
140 /* action: add, remove */ 138 /* action: add, remove */
141 envp [i++] = scratch; 139 envp[i++] = scratch;
142 scratch += sprintf (scratch, "ACTION=%s", dock?"add":"remove") + 1; 140 scratch += sprintf(scratch, "ACTION=%s", dock ? "add" : "remove") + 1;
143 141
144 /* Report the ident for the dock */ 142 /* Report the ident for the dock */
145 envp [i++] = scratch; 143 envp[i++] = scratch;
146 scratch += sprintf (scratch, "DOCK=%x/%x/%x", 144 scratch += sprintf(scratch, "DOCK=%x/%x/%x",
147 info->location_id, info->serial, info->capabilities); 145 info->location_id, info->serial, info->capabilities);
148 envp[i] = NULL; 146 envp[i] = NULL;
149 147
150 value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); 148 value = call_usermodehelper(argv [0], argv, envp, UMH_WAIT_EXEC);
151 kfree (buf); 149 kfree(buf);
152 kfree (envp); 150 kfree(envp);
153 return 0; 151 return 0;
154} 152}
155 153
156/* 154/*
157 * Poll the PnP docking at regular intervals 155 * Poll the PnP docking at regular intervals
158 */ 156 */
159static int pnp_dock_thread(void * unused) 157static int pnp_dock_thread(void *unused)
160{ 158{
161 static struct pnp_docking_station_info now; 159 static struct pnp_docking_station_info now;
162 int docked = -1, d = 0; 160 int docked = -1, d = 0;
161
163 set_freezable(); 162 set_freezable();
164 while (!unloading) 163 while (!unloading) {
165 {
166 int status; 164 int status;
167 165
168 /* 166 /*
169 * Poll every 2 seconds 167 * Poll every 2 seconds
170 */ 168 */
@@ -175,30 +173,29 @@ static int pnp_dock_thread(void * unused)
175 173
176 status = pnp_bios_dock_station_info(&now); 174 status = pnp_bios_dock_station_info(&now);
177 175
178 switch(status) 176 switch (status) {
179 {
180 /* 177 /*
181 * No dock to manage 178 * No dock to manage
182 */ 179 */
183 case PNP_FUNCTION_NOT_SUPPORTED: 180 case PNP_FUNCTION_NOT_SUPPORTED:
184 complete_and_exit(&unload_sem, 0); 181 complete_and_exit(&unload_sem, 0);
185 case PNP_SYSTEM_NOT_DOCKED: 182 case PNP_SYSTEM_NOT_DOCKED:
186 d = 0; 183 d = 0;
187 break; 184 break;
188 case PNP_SUCCESS: 185 case PNP_SUCCESS:
189 d = 1; 186 d = 1;
190 break; 187 break;
191 default: 188 default:
192 pnpbios_print_status( "pnp_dock_thread", status ); 189 pnpbios_print_status("pnp_dock_thread", status);
193 continue; 190 continue;
194 } 191 }
195 if(d != docked) 192 if (d != docked) {
196 { 193 if (pnp_dock_event(d, &now) == 0) {
197 if(pnp_dock_event(d, &now)==0)
198 {
199 docked = d; 194 docked = d;
200#if 0 195#if 0
201 printk(KERN_INFO "PnPBIOS: Docking station %stached\n", docked?"at":"de"); 196 printk(KERN_INFO
197 "PnPBIOS: Docking station %stached\n",
198 docked ? "at" : "de");
202#endif 199#endif
203 } 200 }
204 } 201 }
@@ -206,21 +203,21 @@ static int pnp_dock_thread(void * unused)
206 complete_and_exit(&unload_sem, 0); 203 complete_and_exit(&unload_sem, 0);
207} 204}
208 205
209#endif /* CONFIG_HOTPLUG */ 206#endif /* CONFIG_HOTPLUG */
210 207
211static int pnpbios_get_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 208static int pnpbios_get_resources(struct pnp_dev *dev,
209 struct pnp_resource_table *res)
212{ 210{
213 u8 nodenum = dev->number; 211 u8 nodenum = dev->number;
214 struct pnp_bios_node * node; 212 struct pnp_bios_node *node;
215 213
216 /* just in case */ 214 if (!pnpbios_is_dynamic(dev))
217 if(!pnpbios_is_dynamic(dev))
218 return -EPERM; 215 return -EPERM;
219 216
220 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 217 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
221 if (!node) 218 if (!node)
222 return -1; 219 return -1;
223 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 220 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
224 kfree(node); 221 kfree(node);
225 return -ENODEV; 222 return -ENODEV;
226 } 223 }
@@ -230,24 +227,24 @@ static int pnpbios_get_resources(struct pnp_dev * dev, struct pnp_resource_table
230 return 0; 227 return 0;
231} 228}
232 229
233static int pnpbios_set_resources(struct pnp_dev * dev, struct pnp_resource_table * res) 230static int pnpbios_set_resources(struct pnp_dev *dev,
231 struct pnp_resource_table *res)
234{ 232{
235 u8 nodenum = dev->number; 233 u8 nodenum = dev->number;
236 struct pnp_bios_node * node; 234 struct pnp_bios_node *node;
237 int ret; 235 int ret;
238 236
239 /* just in case */
240 if (!pnpbios_is_dynamic(dev)) 237 if (!pnpbios_is_dynamic(dev))
241 return -EPERM; 238 return -EPERM;
242 239
243 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 240 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
244 if (!node) 241 if (!node)
245 return -1; 242 return -1;
246 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 243 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
247 kfree(node); 244 kfree(node);
248 return -ENODEV; 245 return -ENODEV;
249 } 246 }
250 if(pnpbios_write_resources_to_node(res, node)<0) { 247 if (pnpbios_write_resources_to_node(res, node) < 0) {
251 kfree(node); 248 kfree(node);
252 return -1; 249 return -1;
253 } 250 }
@@ -258,18 +255,19 @@ static int pnpbios_set_resources(struct pnp_dev * dev, struct pnp_resource_table
258 return ret; 255 return ret;
259} 256}
260 257
261static void pnpbios_zero_data_stream(struct pnp_bios_node * node) 258static void pnpbios_zero_data_stream(struct pnp_bios_node *node)
262{ 259{
263 unsigned char * p = (char *)node->data; 260 unsigned char *p = (char *)node->data;
264 unsigned char * end = (char *)(node->data + node->size); 261 unsigned char *end = (char *)(node->data + node->size);
265 unsigned int len; 262 unsigned int len;
266 int i; 263 int i;
264
267 while ((char *)p < (char *)end) { 265 while ((char *)p < (char *)end) {
268 if(p[0] & 0x80) { /* large tag */ 266 if (p[0] & 0x80) { /* large tag */
269 len = (p[2] << 8) | p[1]; 267 len = (p[2] << 8) | p[1];
270 p += 3; 268 p += 3;
271 } else { 269 } else {
272 if (((p[0]>>3) & 0x0f) == 0x0f) 270 if (((p[0] >> 3) & 0x0f) == 0x0f)
273 return; 271 return;
274 len = p[0] & 0x07; 272 len = p[0] & 0x07;
275 p += 1; 273 p += 1;
@@ -278,24 +276,24 @@ static void pnpbios_zero_data_stream(struct pnp_bios_node * node)
278 p[i] = 0; 276 p[i] = 0;
279 p += len; 277 p += len;
280 } 278 }
281 printk(KERN_ERR "PnPBIOS: Resource structure did not contain an end tag.\n"); 279 printk(KERN_ERR
280 "PnPBIOS: Resource structure did not contain an end tag.\n");
282} 281}
283 282
284static int pnpbios_disable_resources(struct pnp_dev *dev) 283static int pnpbios_disable_resources(struct pnp_dev *dev)
285{ 284{
286 struct pnp_bios_node * node; 285 struct pnp_bios_node *node;
287 u8 nodenum = dev->number; 286 u8 nodenum = dev->number;
288 int ret; 287 int ret;
289 288
290 /* just in case */ 289 if (dev->flags & PNPBIOS_NO_DISABLE || !pnpbios_is_dynamic(dev))
291 if(dev->flags & PNPBIOS_NO_DISABLE || !pnpbios_is_dynamic(dev))
292 return -EPERM; 290 return -EPERM;
293 291
294 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 292 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
295 if (!node) 293 if (!node)
296 return -ENOMEM; 294 return -ENOMEM;
297 295
298 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) { 296 if (pnp_bios_get_dev_node(&nodenum, (char)PNPMODE_DYNAMIC, node)) {
299 kfree(node); 297 kfree(node);
300 return -ENODEV; 298 return -ENODEV;
301 } 299 }
@@ -311,22 +309,22 @@ static int pnpbios_disable_resources(struct pnp_dev *dev)
311/* PnP Layer support */ 309/* PnP Layer support */
312 310
313struct pnp_protocol pnpbios_protocol = { 311struct pnp_protocol pnpbios_protocol = {
314 .name = "Plug and Play BIOS", 312 .name = "Plug and Play BIOS",
315 .get = pnpbios_get_resources, 313 .get = pnpbios_get_resources,
316 .set = pnpbios_set_resources, 314 .set = pnpbios_set_resources,
317 .disable = pnpbios_disable_resources, 315 .disable = pnpbios_disable_resources,
318}; 316};
319 317
320static int insert_device(struct pnp_dev *dev, struct pnp_bios_node * node) 318static int insert_device(struct pnp_dev *dev, struct pnp_bios_node *node)
321{ 319{
322 struct list_head * pos; 320 struct list_head *pos;
323 struct pnp_dev * pnp_dev; 321 struct pnp_dev *pnp_dev;
324 struct pnp_id *dev_id; 322 struct pnp_id *dev_id;
325 char id[8]; 323 char id[8];
326 324
327 /* check if the device is already added */ 325 /* check if the device is already added */
328 dev->number = node->handle; 326 dev->number = node->handle;
329 list_for_each (pos, &pnpbios_protocol.devices){ 327 list_for_each(pos, &pnpbios_protocol.devices) {
330 pnp_dev = list_entry(pos, struct pnp_dev, protocol_list); 328 pnp_dev = list_entry(pos, struct pnp_dev, protocol_list);
331 if (dev->number == pnp_dev->number) 329 if (dev->number == pnp_dev->number)
332 return -1; 330 return -1;
@@ -336,8 +334,8 @@ static int insert_device(struct pnp_dev *dev, struct pnp_bios_node * node)
336 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL); 334 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
337 if (!dev_id) 335 if (!dev_id)
338 return -1; 336 return -1;
339 pnpid32_to_pnpid(node->eisa_id,id); 337 pnpid32_to_pnpid(node->eisa_id, id);
340 memcpy(dev_id->id,id,7); 338 memcpy(dev_id->id, id, 7);
341 pnp_add_id(dev_id, dev); 339 pnp_add_id(dev_id, dev);
342 pnpbios_parse_data_stream(dev, node); 340 pnpbios_parse_data_stream(dev, node);
343 dev->active = pnp_is_active(dev); 341 dev->active = pnp_is_active(dev);
@@ -375,35 +373,41 @@ static void __init build_devlist(void)
375 if (!node) 373 if (!node)
376 return; 374 return;
377 375
378 for(nodenum=0; nodenum<0xff; ) { 376 for (nodenum = 0; nodenum < 0xff;) {
379 u8 thisnodenum = nodenum; 377 u8 thisnodenum = nodenum;
380 /* eventually we will want to use PNPMODE_STATIC here but for now 378 /* eventually we will want to use PNPMODE_STATIC here but for now
381 * dynamic will help us catch buggy bioses to add to the blacklist. 379 * dynamic will help us catch buggy bioses to add to the blacklist.
382 */ 380 */
383 if (!pnpbios_dont_use_current_config) { 381 if (!pnpbios_dont_use_current_config) {
384 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_DYNAMIC, node)) 382 if (pnp_bios_get_dev_node
383 (&nodenum, (char)PNPMODE_DYNAMIC, node))
385 break; 384 break;
386 } else { 385 } else {
387 if (pnp_bios_get_dev_node(&nodenum, (char )PNPMODE_STATIC, node)) 386 if (pnp_bios_get_dev_node
387 (&nodenum, (char)PNPMODE_STATIC, node))
388 break; 388 break;
389 } 389 }
390 nodes_got++; 390 nodes_got++;
391 dev = kzalloc(sizeof (struct pnp_dev), GFP_KERNEL); 391 dev = kzalloc(sizeof(struct pnp_dev), GFP_KERNEL);
392 if (!dev) 392 if (!dev)
393 break; 393 break;
394 if(insert_device(dev,node)<0) 394 if (insert_device(dev, node) < 0)
395 kfree(dev); 395 kfree(dev);
396 else 396 else
397 devs++; 397 devs++;
398 if (nodenum <= thisnodenum) { 398 if (nodenum <= thisnodenum) {
399 printk(KERN_ERR "PnPBIOS: build_devlist: Node number 0x%x is out of sequence following node 0x%x. Aborting.\n", (unsigned int)nodenum, (unsigned int)thisnodenum); 399 printk(KERN_ERR
400 "PnPBIOS: build_devlist: Node number 0x%x is out of sequence following node 0x%x. Aborting.\n",
401 (unsigned int)nodenum,
402 (unsigned int)thisnodenum);
400 break; 403 break;
401 } 404 }
402 } 405 }
403 kfree(node); 406 kfree(node);
404 407
405 printk(KERN_INFO "PnPBIOS: %i node%s reported by PnP BIOS; %i recorded by driver\n", 408 printk(KERN_INFO
406 nodes_got, nodes_got != 1 ? "s" : "", devs); 409 "PnPBIOS: %i node%s reported by PnP BIOS; %i recorded by driver\n",
410 nodes_got, nodes_got != 1 ? "s" : "", devs);
407} 411}
408 412
409/* 413/*
@@ -412,8 +416,8 @@ static void __init build_devlist(void)
412 * 416 *
413 */ 417 */
414 418
415static int pnpbios_disabled; /* = 0 */ 419static int pnpbios_disabled;
416int pnpbios_dont_use_current_config; /* = 0 */ 420int pnpbios_dont_use_current_config;
417 421
418#ifndef MODULE 422#ifndef MODULE
419static int __init pnpbios_setup(char *str) 423static int __init pnpbios_setup(char *str)
@@ -422,9 +426,9 @@ static int __init pnpbios_setup(char *str)
422 426
423 while ((str != NULL) && (*str != '\0')) { 427 while ((str != NULL) && (*str != '\0')) {
424 if (strncmp(str, "off", 3) == 0) 428 if (strncmp(str, "off", 3) == 0)
425 pnpbios_disabled=1; 429 pnpbios_disabled = 1;
426 if (strncmp(str, "on", 2) == 0) 430 if (strncmp(str, "on", 2) == 0)
427 pnpbios_disabled=0; 431 pnpbios_disabled = 0;
428 invert = (strncmp(str, "no-", 3) == 0); 432 invert = (strncmp(str, "no-", 3) == 0);
429 if (invert) 433 if (invert)
430 str += 3; 434 str += 3;
@@ -453,35 +457,41 @@ static int __init pnpbios_probe_system(void)
453 printk(KERN_INFO "PnPBIOS: Scanning system for PnP BIOS support...\n"); 457 printk(KERN_INFO "PnPBIOS: Scanning system for PnP BIOS support...\n");
454 458
455 /* 459 /*
456 * Search the defined area (0xf0000-0xffff0) for a valid PnP BIOS 460 * Search the defined area (0xf0000-0xffff0) for a valid PnP BIOS
457 * structure and, if one is found, sets up the selectors and 461 * structure and, if one is found, sets up the selectors and
458 * entry points 462 * entry points
459 */ 463 */
460 for (check = (union pnp_bios_install_struct *) __va(0xf0000); 464 for (check = (union pnp_bios_install_struct *)__va(0xf0000);
461 check < (union pnp_bios_install_struct *) __va(0xffff0); 465 check < (union pnp_bios_install_struct *)__va(0xffff0);
462 check = (void *)check + 16) { 466 check = (void *)check + 16) {
463 if (check->fields.signature != PNP_SIGNATURE) 467 if (check->fields.signature != PNP_SIGNATURE)
464 continue; 468 continue;
465 printk(KERN_INFO "PnPBIOS: Found PnP BIOS installation structure at 0x%p\n", check); 469 printk(KERN_INFO
470 "PnPBIOS: Found PnP BIOS installation structure at 0x%p\n",
471 check);
466 length = check->fields.length; 472 length = check->fields.length;
467 if (!length) { 473 if (!length) {
468 printk(KERN_ERR "PnPBIOS: installation structure is invalid, skipping\n"); 474 printk(KERN_ERR
475 "PnPBIOS: installation structure is invalid, skipping\n");
469 continue; 476 continue;
470 } 477 }
471 for (sum = 0, i = 0; i < length; i++) 478 for (sum = 0, i = 0; i < length; i++)
472 sum += check->chars[i]; 479 sum += check->chars[i];
473 if (sum) { 480 if (sum) {
474 printk(KERN_ERR "PnPBIOS: installation structure is corrupted, skipping\n"); 481 printk(KERN_ERR
482 "PnPBIOS: installation structure is corrupted, skipping\n");
475 continue; 483 continue;
476 } 484 }
477 if (check->fields.version < 0x10) { 485 if (check->fields.version < 0x10) {
478 printk(KERN_WARNING "PnPBIOS: PnP BIOS version %d.%d is not supported\n", 486 printk(KERN_WARNING
487 "PnPBIOS: PnP BIOS version %d.%d is not supported\n",
479 check->fields.version >> 4, 488 check->fields.version >> 4,
480 check->fields.version & 15); 489 check->fields.version & 15);
481 continue; 490 continue;
482 } 491 }
483 printk(KERN_INFO "PnPBIOS: PnP BIOS version %d.%d, entry 0x%x:0x%x, dseg 0x%x\n", 492 printk(KERN_INFO
484 check->fields.version >> 4, check->fields.version & 15, 493 "PnPBIOS: PnP BIOS version %d.%d, entry 0x%x:0x%x, dseg 0x%x\n",
494 check->fields.version >> 4, check->fields.version & 15,
485 check->fields.pm16cseg, check->fields.pm16offset, 495 check->fields.pm16cseg, check->fields.pm16offset,
486 check->fields.pm16dseg); 496 check->fields.pm16dseg);
487 pnp_bios_install = check; 497 pnp_bios_install = check;
@@ -499,25 +509,25 @@ static int __init exploding_pnp_bios(struct dmi_system_id *d)
499} 509}
500 510
501static struct dmi_system_id pnpbios_dmi_table[] __initdata = { 511static struct dmi_system_id pnpbios_dmi_table[] __initdata = {
502 { /* PnPBIOS GPF on boot */ 512 { /* PnPBIOS GPF on boot */
503 .callback = exploding_pnp_bios, 513 .callback = exploding_pnp_bios,
504 .ident = "Higraded P14H", 514 .ident = "Higraded P14H",
505 .matches = { 515 .matches = {
506 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), 516 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
507 DMI_MATCH(DMI_BIOS_VERSION, "07.00T"), 517 DMI_MATCH(DMI_BIOS_VERSION, "07.00T"),
508 DMI_MATCH(DMI_SYS_VENDOR, "Higraded"), 518 DMI_MATCH(DMI_SYS_VENDOR, "Higraded"),
509 DMI_MATCH(DMI_PRODUCT_NAME, "P14H"), 519 DMI_MATCH(DMI_PRODUCT_NAME, "P14H"),
510 }, 520 },
511 }, 521 },
512 { /* PnPBIOS GPF on boot */ 522 { /* PnPBIOS GPF on boot */
513 .callback = exploding_pnp_bios, 523 .callback = exploding_pnp_bios,
514 .ident = "ASUS P4P800", 524 .ident = "ASUS P4P800",
515 .matches = { 525 .matches = {
516 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."), 526 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc."),
517 DMI_MATCH(DMI_BOARD_NAME, "P4P800"), 527 DMI_MATCH(DMI_BOARD_NAME, "P4P800"),
518 }, 528 },
519 }, 529 },
520 { } 530 {}
521}; 531};
522 532
523static int __init pnpbios_init(void) 533static int __init pnpbios_init(void)
@@ -533,14 +543,13 @@ static int __init pnpbios_init(void)
533 printk(KERN_INFO "PnPBIOS: Disabled\n"); 543 printk(KERN_INFO "PnPBIOS: Disabled\n");
534 return -ENODEV; 544 return -ENODEV;
535 } 545 }
536
537#ifdef CONFIG_PNPACPI 546#ifdef CONFIG_PNPACPI
538 if (!acpi_disabled && !pnpacpi_disabled) { 547 if (!acpi_disabled && !pnpacpi_disabled) {
539 pnpbios_disabled = 1; 548 pnpbios_disabled = 1;
540 printk(KERN_INFO "PnPBIOS: Disabled by ACPI PNP\n"); 549 printk(KERN_INFO "PnPBIOS: Disabled by ACPI PNP\n");
541 return -ENODEV; 550 return -ENODEV;
542 } 551 }
543#endif /* CONFIG_ACPI */ 552#endif /* CONFIG_ACPI */
544 553
545 /* scan the system for pnpbios support */ 554 /* scan the system for pnpbios support */
546 if (!pnpbios_probe_system()) 555 if (!pnpbios_probe_system())
@@ -552,14 +561,16 @@ static int __init pnpbios_init(void)
552 /* read the node info */ 561 /* read the node info */
553 ret = pnp_bios_dev_node_info(&node_info); 562 ret = pnp_bios_dev_node_info(&node_info);
554 if (ret) { 563 if (ret) {
555 printk(KERN_ERR "PnPBIOS: Unable to get node info. Aborting.\n"); 564 printk(KERN_ERR
565 "PnPBIOS: Unable to get node info. Aborting.\n");
556 return ret; 566 return ret;
557 } 567 }
558 568
559 /* register with the pnp layer */ 569 /* register with the pnp layer */
560 ret = pnp_register_protocol(&pnpbios_protocol); 570 ret = pnp_register_protocol(&pnpbios_protocol);
561 if (ret) { 571 if (ret) {
562 printk(KERN_ERR "PnPBIOS: Unable to register driver. Aborting.\n"); 572 printk(KERN_ERR
573 "PnPBIOS: Unable to register driver. Aborting.\n");
563 return ret; 574 return ret;
564 } 575 }
565 576
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 8027073f7919..9c8c07701b65 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -18,9 +18,6 @@
18 * The other files are human-readable. 18 * The other files are human-readable.
19 */ 19 */
20 20
21//#include <pcmcia/config.h>
22//#include <pcmcia/k_compat.h>
23
24#include <linux/module.h> 21#include <linux/module.h>
25#include <linux/kernel.h> 22#include <linux/kernel.h>
26#include <linux/slab.h> 23#include <linux/slab.h>
@@ -37,42 +34,37 @@ static struct proc_dir_entry *proc_pnp = NULL;
37static struct proc_dir_entry *proc_pnp_boot = NULL; 34static struct proc_dir_entry *proc_pnp_boot = NULL;
38 35
39static int proc_read_pnpconfig(char *buf, char **start, off_t pos, 36static int proc_read_pnpconfig(char *buf, char **start, off_t pos,
40 int count, int *eof, void *data) 37 int count, int *eof, void *data)
41{ 38{
42 struct pnp_isa_config_struc pnps; 39 struct pnp_isa_config_struc pnps;
43 40
44 if (pnp_bios_isapnp_config(&pnps)) 41 if (pnp_bios_isapnp_config(&pnps))
45 return -EIO; 42 return -EIO;
46 return snprintf(buf, count, 43 return snprintf(buf, count,
47 "structure_revision %d\n" 44 "structure_revision %d\n"
48 "number_of_CSNs %d\n" 45 "number_of_CSNs %d\n"
49 "ISA_read_data_port 0x%x\n", 46 "ISA_read_data_port 0x%x\n",
50 pnps.revision, 47 pnps.revision, pnps.no_csns, pnps.isa_rd_data_port);
51 pnps.no_csns,
52 pnps.isa_rd_data_port
53 );
54} 48}
55 49
56static int proc_read_escdinfo(char *buf, char **start, off_t pos, 50static int proc_read_escdinfo(char *buf, char **start, off_t pos,
57 int count, int *eof, void *data) 51 int count, int *eof, void *data)
58{ 52{
59 struct escd_info_struc escd; 53 struct escd_info_struc escd;
60 54
61 if (pnp_bios_escd_info(&escd)) 55 if (pnp_bios_escd_info(&escd))
62 return -EIO; 56 return -EIO;
63 return snprintf(buf, count, 57 return snprintf(buf, count,
64 "min_ESCD_write_size %d\n" 58 "min_ESCD_write_size %d\n"
65 "ESCD_size %d\n" 59 "ESCD_size %d\n"
66 "NVRAM_base 0x%x\n", 60 "NVRAM_base 0x%x\n",
67 escd.min_escd_write_size, 61 escd.min_escd_write_size,
68 escd.escd_size, 62 escd.escd_size, escd.nv_storage_base);
69 escd.nv_storage_base
70 );
71} 63}
72 64
73#define MAX_SANE_ESCD_SIZE (32*1024) 65#define MAX_SANE_ESCD_SIZE (32*1024)
74static int proc_read_escd(char *buf, char **start, off_t pos, 66static int proc_read_escd(char *buf, char **start, off_t pos,
75 int count, int *eof, void *data) 67 int count, int *eof, void *data)
76{ 68{
77 struct escd_info_struc escd; 69 struct escd_info_struc escd;
78 char *tmpbuf; 70 char *tmpbuf;
@@ -83,30 +75,36 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
83 75
84 /* sanity check */ 76 /* sanity check */
85 if (escd.escd_size > MAX_SANE_ESCD_SIZE) { 77 if (escd.escd_size > MAX_SANE_ESCD_SIZE) {
86 printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n"); 78 printk(KERN_ERR
79 "PnPBIOS: proc_read_escd: ESCD size reported by BIOS escd_info call is too great\n");
87 return -EFBIG; 80 return -EFBIG;
88 } 81 }
89 82
90 tmpbuf = kzalloc(escd.escd_size, GFP_KERNEL); 83 tmpbuf = kzalloc(escd.escd_size, GFP_KERNEL);
91 if (!tmpbuf) return -ENOMEM; 84 if (!tmpbuf)
85 return -ENOMEM;
92 86
93 if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) { 87 if (pnp_bios_read_escd(tmpbuf, escd.nv_storage_base)) {
94 kfree(tmpbuf); 88 kfree(tmpbuf);
95 return -EIO; 89 return -EIO;
96 } 90 }
97 91
98 escd_size = (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1])*256; 92 escd_size =
93 (unsigned char)(tmpbuf[0]) + (unsigned char)(tmpbuf[1]) * 256;
99 94
100 /* sanity check */ 95 /* sanity check */
101 if (escd_size > MAX_SANE_ESCD_SIZE) { 96 if (escd_size > MAX_SANE_ESCD_SIZE) {
102 printk(KERN_ERR "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n"); 97 printk(KERN_ERR
98 "PnPBIOS: proc_read_escd: ESCD size reported by BIOS read_escd call is too great\n");
103 return -EFBIG; 99 return -EFBIG;
104 } 100 }
105 101
106 escd_left_to_read = escd_size - pos; 102 escd_left_to_read = escd_size - pos;
107 if (escd_left_to_read < 0) escd_left_to_read = 0; 103 if (escd_left_to_read < 0)
108 if (escd_left_to_read == 0) *eof = 1; 104 escd_left_to_read = 0;
109 n = min(count,escd_left_to_read); 105 if (escd_left_to_read == 0)
106 *eof = 1;
107 n = min(count, escd_left_to_read);
110 memcpy(buf, tmpbuf + pos, n); 108 memcpy(buf, tmpbuf + pos, n);
111 kfree(tmpbuf); 109 kfree(tmpbuf);
112 *start = buf; 110 *start = buf;
@@ -114,17 +112,17 @@ static int proc_read_escd(char *buf, char **start, off_t pos,
114} 112}
115 113
116static int proc_read_legacyres(char *buf, char **start, off_t pos, 114static int proc_read_legacyres(char *buf, char **start, off_t pos,
117 int count, int *eof, void *data) 115 int count, int *eof, void *data)
118{ 116{
119 /* Assume that the following won't overflow the buffer */ 117 /* Assume that the following won't overflow the buffer */
120 if (pnp_bios_get_stat_res(buf)) 118 if (pnp_bios_get_stat_res(buf))
121 return -EIO; 119 return -EIO;
122 120
123 return count; // FIXME: Return actual length 121 return count; // FIXME: Return actual length
124} 122}
125 123
126static int proc_read_devices(char *buf, char **start, off_t pos, 124static int proc_read_devices(char *buf, char **start, off_t pos,
127 int count, int *eof, void *data) 125 int count, int *eof, void *data)
128{ 126{
129 struct pnp_bios_node *node; 127 struct pnp_bios_node *node;
130 u8 nodenum; 128 u8 nodenum;
@@ -134,9 +132,10 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
134 return 0; 132 return 0;
135 133
136 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 134 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
137 if (!node) return -ENOMEM; 135 if (!node)
136 return -ENOMEM;
138 137
139 for (nodenum=pos; nodenum<0xff; ) { 138 for (nodenum = pos; nodenum < 0xff;) {
140 u8 thisnodenum = nodenum; 139 u8 thisnodenum = nodenum;
141 /* 26 = the number of characters per line sprintf'ed */ 140 /* 26 = the number of characters per line sprintf'ed */
142 if ((p - buf + 26) > count) 141 if ((p - buf + 26) > count)
@@ -148,7 +147,11 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
148 node->type_code[0], node->type_code[1], 147 node->type_code[0], node->type_code[1],
149 node->type_code[2], node->flags); 148 node->type_code[2], node->flags);
150 if (nodenum <= thisnodenum) { 149 if (nodenum <= thisnodenum) {
151 printk(KERN_ERR "%s Node number 0x%x is out of sequence following node 0x%x. Aborting.\n", "PnPBIOS: proc_read_devices:", (unsigned int)nodenum, (unsigned int)thisnodenum); 150 printk(KERN_ERR
151 "%s Node number 0x%x is out of sequence following node 0x%x. Aborting.\n",
152 "PnPBIOS: proc_read_devices:",
153 (unsigned int)nodenum,
154 (unsigned int)thisnodenum);
152 *eof = 1; 155 *eof = 1;
153 break; 156 break;
154 } 157 }
@@ -156,12 +159,12 @@ static int proc_read_devices(char *buf, char **start, off_t pos,
156 kfree(node); 159 kfree(node);
157 if (nodenum == 0xff) 160 if (nodenum == 0xff)
158 *eof = 1; 161 *eof = 1;
159 *start = (char *)((off_t)nodenum - pos); 162 *start = (char *)((off_t) nodenum - pos);
160 return p - buf; 163 return p - buf;
161} 164}
162 165
163static int proc_read_node(char *buf, char **start, off_t pos, 166static int proc_read_node(char *buf, char **start, off_t pos,
164 int count, int *eof, void *data) 167 int count, int *eof, void *data)
165{ 168{
166 struct pnp_bios_node *node; 169 struct pnp_bios_node *node;
167 int boot = (long)data >> 8; 170 int boot = (long)data >> 8;
@@ -169,7 +172,8 @@ static int proc_read_node(char *buf, char **start, off_t pos,
169 int len; 172 int len;
170 173
171 node = kzalloc(node_info.max_node_size, GFP_KERNEL); 174 node = kzalloc(node_info.max_node_size, GFP_KERNEL);
172 if (!node) return -ENOMEM; 175 if (!node)
176 return -ENOMEM;
173 if (pnp_bios_get_dev_node(&nodenum, boot, node)) { 177 if (pnp_bios_get_dev_node(&nodenum, boot, node)) {
174 kfree(node); 178 kfree(node);
175 return -EIO; 179 return -EIO;
@@ -180,8 +184,8 @@ static int proc_read_node(char *buf, char **start, off_t pos,
180 return len; 184 return len;
181} 185}
182 186
183static int proc_write_node(struct file *file, const char __user *buf, 187static int proc_write_node(struct file *file, const char __user * buf,
184 unsigned long count, void *data) 188 unsigned long count, void *data)
185{ 189{
186 struct pnp_bios_node *node; 190 struct pnp_bios_node *node;
187 int boot = (long)data >> 8; 191 int boot = (long)data >> 8;
@@ -208,12 +212,12 @@ static int proc_write_node(struct file *file, const char __user *buf,
208 goto out; 212 goto out;
209 } 213 }
210 ret = count; 214 ret = count;
211out: 215 out:
212 kfree(node); 216 kfree(node);
213 return ret; 217 return ret;
214} 218}
215 219
216int pnpbios_interface_attach_device(struct pnp_bios_node * node) 220int pnpbios_interface_attach_device(struct pnp_bios_node *node)
217{ 221{
218 char name[3]; 222 char name[3];
219 struct proc_dir_entry *ent; 223 struct proc_dir_entry *ent;
@@ -222,7 +226,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
222 226
223 if (!proc_pnp) 227 if (!proc_pnp)
224 return -EIO; 228 return -EIO;
225 if ( !pnpbios_dont_use_current_config ) { 229 if (!pnpbios_dont_use_current_config) {
226 ent = create_proc_entry(name, 0, proc_pnp); 230 ent = create_proc_entry(name, 0, proc_pnp);
227 if (ent) { 231 if (ent) {
228 ent->read_proc = proc_read_node; 232 ent->read_proc = proc_read_node;
@@ -237,7 +241,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
237 if (ent) { 241 if (ent) {
238 ent->read_proc = proc_read_node; 242 ent->read_proc = proc_read_node;
239 ent->write_proc = proc_write_node; 243 ent->write_proc = proc_write_node;
240 ent->data = (void *)(long)(node->handle+0x100); 244 ent->data = (void *)(long)(node->handle + 0x100);
241 return 0; 245 return 0;
242 } 246 }
243 247
@@ -249,7 +253,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node * node)
249 * work and the pnpbios_dont_use_current_config flag 253 * work and the pnpbios_dont_use_current_config flag
250 * should already have been set to the appropriate value 254 * should already have been set to the appropriate value
251 */ 255 */
252int __init pnpbios_proc_init( void ) 256int __init pnpbios_proc_init(void)
253{ 257{
254 proc_pnp = proc_mkdir("pnp", proc_bus); 258 proc_pnp = proc_mkdir("pnp", proc_bus);
255 if (!proc_pnp) 259 if (!proc_pnp)
@@ -258,10 +262,13 @@ int __init pnpbios_proc_init( void )
258 if (!proc_pnp_boot) 262 if (!proc_pnp_boot)
259 return -EIO; 263 return -EIO;
260 create_proc_read_entry("devices", 0, proc_pnp, proc_read_devices, NULL); 264 create_proc_read_entry("devices", 0, proc_pnp, proc_read_devices, NULL);
261 create_proc_read_entry("configuration_info", 0, proc_pnp, proc_read_pnpconfig, NULL); 265 create_proc_read_entry("configuration_info", 0, proc_pnp,
262 create_proc_read_entry("escd_info", 0, proc_pnp, proc_read_escdinfo, NULL); 266 proc_read_pnpconfig, NULL);
267 create_proc_read_entry("escd_info", 0, proc_pnp, proc_read_escdinfo,
268 NULL);
263 create_proc_read_entry("escd", S_IRUSR, proc_pnp, proc_read_escd, NULL); 269 create_proc_read_entry("escd", S_IRUSR, proc_pnp, proc_read_escd, NULL);
264 create_proc_read_entry("legacy_device_resources", 0, proc_pnp, proc_read_legacyres, NULL); 270 create_proc_read_entry("legacy_device_resources", 0, proc_pnp,
271 proc_read_legacyres, NULL);
265 272
266 return 0; 273 return 0;
267} 274}
@@ -274,9 +281,9 @@ void __exit pnpbios_proc_exit(void)
274 if (!proc_pnp) 281 if (!proc_pnp)
275 return; 282 return;
276 283
277 for (i=0; i<0xff; i++) { 284 for (i = 0; i < 0xff; i++) {
278 sprintf(name, "%02x", i); 285 sprintf(name, "%02x", i);
279 if ( !pnpbios_dont_use_current_config ) 286 if (!pnpbios_dont_use_current_config)
280 remove_proc_entry(name, proc_pnp); 287 remove_proc_entry(name, proc_pnp);
281 remove_proc_entry(name, proc_pnp_boot); 288 remove_proc_entry(name, proc_pnp_boot);
282 } 289 }
@@ -287,6 +294,4 @@ void __exit pnpbios_proc_exit(void)
287 remove_proc_entry("devices", proc_pnp); 294 remove_proc_entry("devices", proc_pnp);
288 remove_proc_entry("boot", proc_pnp); 295 remove_proc_entry("boot", proc_pnp);
289 remove_proc_entry("pnp", proc_bus); 296 remove_proc_entry("pnp", proc_bus);
290
291 return;
292} 297}
diff --git a/drivers/pnp/pnpbios/rsparser.c b/drivers/pnp/pnpbios/rsparser.c
index 3c2ab8394e3f..04ecd7b67230 100644
--- a/drivers/pnp/pnpbios/rsparser.c
+++ b/drivers/pnp/pnpbios/rsparser.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * rsparser.c - parses and encodes pnpbios resource data streams 2 * rsparser.c - parses and encodes pnpbios resource data streams
3 *
4 */ 3 */
5 4
6#include <linux/ctype.h> 5#include <linux/ctype.h>
@@ -12,8 +11,10 @@
12#ifdef CONFIG_PCI 11#ifdef CONFIG_PCI
13#include <linux/pci.h> 12#include <linux/pci.h>
14#else 13#else
15inline void pcibios_penalize_isa_irq(int irq, int active) {} 14inline void pcibios_penalize_isa_irq(int irq, int active)
16#endif /* CONFIG_PCI */ 15{
16}
17#endif /* CONFIG_PCI */
17 18
18#include "pnpbios.h" 19#include "pnpbios.h"
19 20
@@ -52,75 +53,88 @@ inline void pcibios_penalize_isa_irq(int irq, int active) {}
52 * Allocated Resources 53 * Allocated Resources
53 */ 54 */
54 55
55static void 56static void pnpbios_parse_allocated_irqresource(struct pnp_resource_table *res,
56pnpbios_parse_allocated_irqresource(struct pnp_resource_table * res, int irq) 57 int irq)
57{ 58{
58 int i = 0; 59 int i = 0;
59 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_IRQ) i++; 60
61 while (!(res->irq_resource[i].flags & IORESOURCE_UNSET)
62 && i < PNP_MAX_IRQ)
63 i++;
60 if (i < PNP_MAX_IRQ) { 64 if (i < PNP_MAX_IRQ) {
61 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag 65 res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag
62 if (irq == -1) { 66 if (irq == -1) {
63 res->irq_resource[i].flags |= IORESOURCE_DISABLED; 67 res->irq_resource[i].flags |= IORESOURCE_DISABLED;
64 return; 68 return;
65 } 69 }
66 res->irq_resource[i].start = 70 res->irq_resource[i].start =
67 res->irq_resource[i].end = (unsigned long) irq; 71 res->irq_resource[i].end = (unsigned long)irq;
68 pcibios_penalize_isa_irq(irq, 1); 72 pcibios_penalize_isa_irq(irq, 1);
69 } 73 }
70} 74}
71 75
72static void 76static void pnpbios_parse_allocated_dmaresource(struct pnp_resource_table *res,
73pnpbios_parse_allocated_dmaresource(struct pnp_resource_table * res, int dma) 77 int dma)
74{ 78{
75 int i = 0; 79 int i = 0;
80
76 while (i < PNP_MAX_DMA && 81 while (i < PNP_MAX_DMA &&
77 !(res->dma_resource[i].flags & IORESOURCE_UNSET)) 82 !(res->dma_resource[i].flags & IORESOURCE_UNSET))
78 i++; 83 i++;
79 if (i < PNP_MAX_DMA) { 84 if (i < PNP_MAX_DMA) {
80 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag 85 res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag
81 if (dma == -1) { 86 if (dma == -1) {
82 res->dma_resource[i].flags |= IORESOURCE_DISABLED; 87 res->dma_resource[i].flags |= IORESOURCE_DISABLED;
83 return; 88 return;
84 } 89 }
85 res->dma_resource[i].start = 90 res->dma_resource[i].start =
86 res->dma_resource[i].end = (unsigned long) dma; 91 res->dma_resource[i].end = (unsigned long)dma;
87 } 92 }
88} 93}
89 94
90static void 95static void pnpbios_parse_allocated_ioresource(struct pnp_resource_table *res,
91pnpbios_parse_allocated_ioresource(struct pnp_resource_table * res, int io, int len) 96 int io, int len)
92{ 97{
93 int i = 0; 98 int i = 0;
94 while (!(res->port_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_PORT) i++; 99
100 while (!(res->port_resource[i].flags & IORESOURCE_UNSET)
101 && i < PNP_MAX_PORT)
102 i++;
95 if (i < PNP_MAX_PORT) { 103 if (i < PNP_MAX_PORT) {
96 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag 104 res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag
97 if (len <= 0 || (io + len -1) >= 0x10003) { 105 if (len <= 0 || (io + len - 1) >= 0x10003) {
98 res->port_resource[i].flags |= IORESOURCE_DISABLED; 106 res->port_resource[i].flags |= IORESOURCE_DISABLED;
99 return; 107 return;
100 } 108 }
101 res->port_resource[i].start = (unsigned long) io; 109 res->port_resource[i].start = (unsigned long)io;
102 res->port_resource[i].end = (unsigned long)(io + len - 1); 110 res->port_resource[i].end = (unsigned long)(io + len - 1);
103 } 111 }
104} 112}
105 113
106static void 114static void pnpbios_parse_allocated_memresource(struct pnp_resource_table *res,
107pnpbios_parse_allocated_memresource(struct pnp_resource_table * res, int mem, int len) 115 int mem, int len)
108{ 116{
109 int i = 0; 117 int i = 0;
110 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) && i < PNP_MAX_MEM) i++; 118
119 while (!(res->mem_resource[i].flags & IORESOURCE_UNSET)
120 && i < PNP_MAX_MEM)
121 i++;
111 if (i < PNP_MAX_MEM) { 122 if (i < PNP_MAX_MEM) {
112 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag 123 res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag
113 if (len <= 0) { 124 if (len <= 0) {
114 res->mem_resource[i].flags |= IORESOURCE_DISABLED; 125 res->mem_resource[i].flags |= IORESOURCE_DISABLED;
115 return; 126 return;
116 } 127 }
117 res->mem_resource[i].start = (unsigned long) mem; 128 res->mem_resource[i].start = (unsigned long)mem;
118 res->mem_resource[i].end = (unsigned long)(mem + len - 1); 129 res->mem_resource[i].end = (unsigned long)(mem + len - 1);
119 } 130 }
120} 131}
121 132
122static unsigned char * 133static unsigned char *pnpbios_parse_allocated_resource_data(unsigned char *p,
123pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, struct pnp_resource_table * res) 134 unsigned char *end,
135 struct
136 pnp_resource_table
137 *res)
124{ 138{
125 unsigned int len, tag; 139 unsigned int len, tag;
126 int io, size, mask, i; 140 int io, size, mask, i;
@@ -134,12 +148,12 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
134 while ((char *)p < (char *)end) { 148 while ((char *)p < (char *)end) {
135 149
136 /* determine the type of tag */ 150 /* determine the type of tag */
137 if (p[0] & LARGE_TAG) { /* large tag */ 151 if (p[0] & LARGE_TAG) { /* large tag */
138 len = (p[2] << 8) | p[1]; 152 len = (p[2] << 8) | p[1];
139 tag = p[0]; 153 tag = p[0];
140 } else { /* small tag */ 154 } else { /* small tag */
141 len = p[0] & 0x07; 155 len = p[0] & 0x07;
142 tag = ((p[0]>>3) & 0x0f); 156 tag = ((p[0] >> 3) & 0x0f);
143 } 157 }
144 158
145 switch (tag) { 159 switch (tag) {
@@ -147,8 +161,8 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
147 case LARGE_TAG_MEM: 161 case LARGE_TAG_MEM:
148 if (len != 9) 162 if (len != 9)
149 goto len_err; 163 goto len_err;
150 io = *(short *) &p[4]; 164 io = *(short *)&p[4];
151 size = *(short *) &p[10]; 165 size = *(short *)&p[10];
152 pnpbios_parse_allocated_memresource(res, io, size); 166 pnpbios_parse_allocated_memresource(res, io, size);
153 break; 167 break;
154 168
@@ -163,16 +177,16 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
163 case LARGE_TAG_MEM32: 177 case LARGE_TAG_MEM32:
164 if (len != 17) 178 if (len != 17)
165 goto len_err; 179 goto len_err;
166 io = *(int *) &p[4]; 180 io = *(int *)&p[4];
167 size = *(int *) &p[16]; 181 size = *(int *)&p[16];
168 pnpbios_parse_allocated_memresource(res, io, size); 182 pnpbios_parse_allocated_memresource(res, io, size);
169 break; 183 break;
170 184
171 case LARGE_TAG_FIXEDMEM32: 185 case LARGE_TAG_FIXEDMEM32:
172 if (len != 9) 186 if (len != 9)
173 goto len_err; 187 goto len_err;
174 io = *(int *) &p[4]; 188 io = *(int *)&p[4];
175 size = *(int *) &p[8]; 189 size = *(int *)&p[8];
176 pnpbios_parse_allocated_memresource(res, io, size); 190 pnpbios_parse_allocated_memresource(res, io, size);
177 break; 191 break;
178 192
@@ -180,9 +194,10 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
180 if (len < 2 || len > 3) 194 if (len < 2 || len > 3)
181 goto len_err; 195 goto len_err;
182 io = -1; 196 io = -1;
183 mask= p[1] + p[2]*256; 197 mask = p[1] + p[2] * 256;
184 for (i=0;i<16;i++, mask=mask>>1) 198 for (i = 0; i < 16; i++, mask = mask >> 1)
185 if(mask & 0x01) io=i; 199 if (mask & 0x01)
200 io = i;
186 pnpbios_parse_allocated_irqresource(res, io); 201 pnpbios_parse_allocated_irqresource(res, io);
187 break; 202 break;
188 203
@@ -191,15 +206,16 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
191 goto len_err; 206 goto len_err;
192 io = -1; 207 io = -1;
193 mask = p[1]; 208 mask = p[1];
194 for (i=0;i<8;i++, mask = mask>>1) 209 for (i = 0; i < 8; i++, mask = mask >> 1)
195 if(mask & 0x01) io=i; 210 if (mask & 0x01)
211 io = i;
196 pnpbios_parse_allocated_dmaresource(res, io); 212 pnpbios_parse_allocated_dmaresource(res, io);
197 break; 213 break;
198 214
199 case SMALL_TAG_PORT: 215 case SMALL_TAG_PORT:
200 if (len != 7) 216 if (len != 7)
201 goto len_err; 217 goto len_err;
202 io = p[2] + p[3] *256; 218 io = p[2] + p[3] * 256;
203 size = p[7]; 219 size = p[7];
204 pnpbios_parse_allocated_ioresource(res, io, size); 220 pnpbios_parse_allocated_ioresource(res, io, size);
205 break; 221 break;
@@ -218,12 +234,14 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
218 234
219 case SMALL_TAG_END: 235 case SMALL_TAG_END:
220 p = p + 2; 236 p = p + 2;
221 return (unsigned char *)p; 237 return (unsigned char *)p;
222 break; 238 break;
223 239
224 default: /* an unkown tag */ 240 default: /* an unkown tag */
225 len_err: 241 len_err:
226 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 242 printk(KERN_ERR
243 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
244 tag, len);
227 break; 245 break;
228 } 246 }
229 247
@@ -234,20 +252,21 @@ pnpbios_parse_allocated_resource_data(unsigned char * p, unsigned char * end, st
234 p += len + 1; 252 p += len + 1;
235 } 253 }
236 254
237 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 255 printk(KERN_ERR
256 "PnPBIOS: Resource structure does not contain an end tag.\n");
238 257
239 return NULL; 258 return NULL;
240} 259}
241 260
242
243/* 261/*
244 * Resource Configuration Options 262 * Resource Configuration Options
245 */ 263 */
246 264
247static void 265static void pnpbios_parse_mem_option(unsigned char *p, int size,
248pnpbios_parse_mem_option(unsigned char *p, int size, struct pnp_option *option) 266 struct pnp_option *option)
249{ 267{
250 struct pnp_mem * mem; 268 struct pnp_mem *mem;
269
251 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 270 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
252 if (!mem) 271 if (!mem)
253 return; 272 return;
@@ -256,14 +275,14 @@ pnpbios_parse_mem_option(unsigned char *p, int size, struct pnp_option *option)
256 mem->align = (p[9] << 8) | p[8]; 275 mem->align = (p[9] << 8) | p[8];
257 mem->size = ((p[11] << 8) | p[10]) << 8; 276 mem->size = ((p[11] << 8) | p[10]) << 8;
258 mem->flags = p[3]; 277 mem->flags = p[3];
259 pnp_register_mem_resource(option,mem); 278 pnp_register_mem_resource(option, mem);
260 return;
261} 279}
262 280
263static void 281static void pnpbios_parse_mem32_option(unsigned char *p, int size,
264pnpbios_parse_mem32_option(unsigned char *p, int size, struct pnp_option *option) 282 struct pnp_option *option)
265{ 283{
266 struct pnp_mem * mem; 284 struct pnp_mem *mem;
285
267 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 286 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
268 if (!mem) 287 if (!mem)
269 return; 288 return;
@@ -272,14 +291,13 @@ pnpbios_parse_mem32_option(unsigned char *p, int size, struct pnp_option *option
272 mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12]; 291 mem->align = (p[15] << 24) | (p[14] << 16) | (p[13] << 8) | p[12];
273 mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16]; 292 mem->size = (p[19] << 24) | (p[18] << 16) | (p[17] << 8) | p[16];
274 mem->flags = p[3]; 293 mem->flags = p[3];
275 pnp_register_mem_resource(option,mem); 294 pnp_register_mem_resource(option, mem);
276 return;
277} 295}
278 296
279static void 297static void pnpbios_parse_fixed_mem32_option(unsigned char *p, int size,
280pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, struct pnp_option *option) 298 struct pnp_option *option)
281{ 299{
282 struct pnp_mem * mem; 300 struct pnp_mem *mem;
283 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL); 301 mem = kzalloc(sizeof(struct pnp_mem), GFP_KERNEL);
284 if (!mem) 302 if (!mem)
285 return; 303 return;
@@ -287,14 +305,13 @@ pnpbios_parse_fixed_mem32_option(unsigned char *p, int size, struct pnp_option *
287 mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8]; 305 mem->size = (p[11] << 24) | (p[10] << 16) | (p[9] << 8) | p[8];
288 mem->align = 0; 306 mem->align = 0;
289 mem->flags = p[3]; 307 mem->flags = p[3];
290 pnp_register_mem_resource(option,mem); 308 pnp_register_mem_resource(option, mem);
291 return;
292} 309}
293 310
294static void 311static void pnpbios_parse_irq_option(unsigned char *p, int size,
295pnpbios_parse_irq_option(unsigned char *p, int size, struct pnp_option *option) 312 struct pnp_option *option)
296{ 313{
297 struct pnp_irq * irq; 314 struct pnp_irq *irq;
298 unsigned long bits; 315 unsigned long bits;
299 316
300 irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL); 317 irq = kzalloc(sizeof(struct pnp_irq), GFP_KERNEL);
@@ -306,27 +323,27 @@ pnpbios_parse_irq_option(unsigned char *p, int size, struct pnp_option *option)
306 irq->flags = p[3]; 323 irq->flags = p[3];
307 else 324 else
308 irq->flags = IORESOURCE_IRQ_HIGHEDGE; 325 irq->flags = IORESOURCE_IRQ_HIGHEDGE;
309 pnp_register_irq_resource(option,irq); 326 pnp_register_irq_resource(option, irq);
310 return;
311} 327}
312 328
313static void 329static void pnpbios_parse_dma_option(unsigned char *p, int size,
314pnpbios_parse_dma_option(unsigned char *p, int size, struct pnp_option *option) 330 struct pnp_option *option)
315{ 331{
316 struct pnp_dma * dma; 332 struct pnp_dma *dma;
333
317 dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL); 334 dma = kzalloc(sizeof(struct pnp_dma), GFP_KERNEL);
318 if (!dma) 335 if (!dma)
319 return; 336 return;
320 dma->map = p[1]; 337 dma->map = p[1];
321 dma->flags = p[2]; 338 dma->flags = p[2];
322 pnp_register_dma_resource(option,dma); 339 pnp_register_dma_resource(option, dma);
323 return;
324} 340}
325 341
326static void 342static void pnpbios_parse_port_option(unsigned char *p, int size,
327pnpbios_parse_port_option(unsigned char *p, int size, struct pnp_option *option) 343 struct pnp_option *option)
328{ 344{
329 struct pnp_port * port; 345 struct pnp_port *port;
346
330 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 347 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
331 if (!port) 348 if (!port)
332 return; 349 return;
@@ -335,14 +352,14 @@ pnpbios_parse_port_option(unsigned char *p, int size, struct pnp_option *option)
335 port->align = p[6]; 352 port->align = p[6];
336 port->size = p[7]; 353 port->size = p[7];
337 port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0; 354 port->flags = p[1] ? PNP_PORT_FLAG_16BITADDR : 0;
338 pnp_register_port_resource(option,port); 355 pnp_register_port_resource(option, port);
339 return;
340} 356}
341 357
342static void 358static void pnpbios_parse_fixed_port_option(unsigned char *p, int size,
343pnpbios_parse_fixed_port_option(unsigned char *p, int size, struct pnp_option *option) 359 struct pnp_option *option)
344{ 360{
345 struct pnp_port * port; 361 struct pnp_port *port;
362
346 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL); 363 port = kzalloc(sizeof(struct pnp_port), GFP_KERNEL);
347 if (!port) 364 if (!port)
348 return; 365 return;
@@ -350,12 +367,12 @@ pnpbios_parse_fixed_port_option(unsigned char *p, int size, struct pnp_option *o
350 port->size = p[3]; 367 port->size = p[3];
351 port->align = 0; 368 port->align = 0;
352 port->flags = PNP_PORT_FLAG_FIXED; 369 port->flags = PNP_PORT_FLAG_FIXED;
353 pnp_register_port_resource(option,port); 370 pnp_register_port_resource(option, port);
354 return;
355} 371}
356 372
357static unsigned char * 373static unsigned char *pnpbios_parse_resource_option_data(unsigned char *p,
358pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struct pnp_dev *dev) 374 unsigned char *end,
375 struct pnp_dev *dev)
359{ 376{
360 unsigned int len, tag; 377 unsigned int len, tag;
361 int priority = 0; 378 int priority = 0;
@@ -371,12 +388,12 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
371 while ((char *)p < (char *)end) { 388 while ((char *)p < (char *)end) {
372 389
373 /* determine the type of tag */ 390 /* determine the type of tag */
374 if (p[0] & LARGE_TAG) { /* large tag */ 391 if (p[0] & LARGE_TAG) { /* large tag */
375 len = (p[2] << 8) | p[1]; 392 len = (p[2] << 8) | p[1];
376 tag = p[0]; 393 tag = p[0];
377 } else { /* small tag */ 394 } else { /* small tag */
378 len = p[0] & 0x07; 395 len = p[0] & 0x07;
379 tag = ((p[0]>>3) & 0x0f); 396 tag = ((p[0] >> 3) & 0x0f);
380 } 397 }
381 398
382 switch (tag) { 399 switch (tag) {
@@ -442,16 +459,19 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
442 if (len != 0) 459 if (len != 0)
443 goto len_err; 460 goto len_err;
444 if (option_independent == option) 461 if (option_independent == option)
445 printk(KERN_WARNING "PnPBIOS: Missing SMALL_TAG_STARTDEP tag\n"); 462 printk(KERN_WARNING
463 "PnPBIOS: Missing SMALL_TAG_STARTDEP tag\n");
446 option = option_independent; 464 option = option_independent;
447 break; 465 break;
448 466
449 case SMALL_TAG_END: 467 case SMALL_TAG_END:
450 return p + 2; 468 return p + 2;
451 469
452 default: /* an unkown tag */ 470 default: /* an unkown tag */
453 len_err: 471 len_err:
454 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 472 printk(KERN_ERR
473 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
474 tag, len);
455 break; 475 break;
456 } 476 }
457 477
@@ -462,19 +482,18 @@ pnpbios_parse_resource_option_data(unsigned char * p, unsigned char * end, struc
462 p += len + 1; 482 p += len + 1;
463 } 483 }
464 484
465 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 485 printk(KERN_ERR
486 "PnPBIOS: Resource structure does not contain an end tag.\n");
466 487
467 return NULL; 488 return NULL;
468} 489}
469 490
470
471/* 491/*
472 * Compatible Device IDs 492 * Compatible Device IDs
473 */ 493 */
474 494
475#define HEX(id,a) hex[((id)>>a) & 15] 495#define HEX(id,a) hex[((id)>>a) & 15]
476#define CHAR(id,a) (0x40 + (((id)>>a) & 31)) 496#define CHAR(id,a) (0x40 + (((id)>>a) & 31))
477//
478 497
479void pnpid32_to_pnpid(u32 id, char *str) 498void pnpid32_to_pnpid(u32 id, char *str)
480{ 499{
@@ -483,21 +502,20 @@ void pnpid32_to_pnpid(u32 id, char *str)
483 id = be32_to_cpu(id); 502 id = be32_to_cpu(id);
484 str[0] = CHAR(id, 26); 503 str[0] = CHAR(id, 26);
485 str[1] = CHAR(id, 21); 504 str[1] = CHAR(id, 21);
486 str[2] = CHAR(id,16); 505 str[2] = CHAR(id, 16);
487 str[3] = HEX(id, 12); 506 str[3] = HEX(id, 12);
488 str[4] = HEX(id, 8); 507 str[4] = HEX(id, 8);
489 str[5] = HEX(id, 4); 508 str[5] = HEX(id, 4);
490 str[6] = HEX(id, 0); 509 str[6] = HEX(id, 0);
491 str[7] = '\0'; 510 str[7] = '\0';
492
493 return;
494} 511}
495// 512
496#undef CHAR 513#undef CHAR
497#undef HEX 514#undef HEX
498 515
499static unsigned char * 516static unsigned char *pnpbios_parse_compatible_ids(unsigned char *p,
500pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_dev *dev) 517 unsigned char *end,
518 struct pnp_dev *dev)
501{ 519{
502 int len, tag; 520 int len, tag;
503 char id[8]; 521 char id[8];
@@ -509,40 +527,45 @@ pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_de
509 while ((char *)p < (char *)end) { 527 while ((char *)p < (char *)end) {
510 528
511 /* determine the type of tag */ 529 /* determine the type of tag */
512 if (p[0] & LARGE_TAG) { /* large tag */ 530 if (p[0] & LARGE_TAG) { /* large tag */
513 len = (p[2] << 8) | p[1]; 531 len = (p[2] << 8) | p[1];
514 tag = p[0]; 532 tag = p[0];
515 } else { /* small tag */ 533 } else { /* small tag */
516 len = p[0] & 0x07; 534 len = p[0] & 0x07;
517 tag = ((p[0]>>3) & 0x0f); 535 tag = ((p[0] >> 3) & 0x0f);
518 } 536 }
519 537
520 switch (tag) { 538 switch (tag) {
521 539
522 case LARGE_TAG_ANSISTR: 540 case LARGE_TAG_ANSISTR:
523 strncpy(dev->name, p + 3, len >= PNP_NAME_LEN ? PNP_NAME_LEN - 2 : len); 541 strncpy(dev->name, p + 3,
524 dev->name[len >= PNP_NAME_LEN ? PNP_NAME_LEN - 1 : len] = '\0'; 542 len >= PNP_NAME_LEN ? PNP_NAME_LEN - 2 : len);
543 dev->name[len >=
544 PNP_NAME_LEN ? PNP_NAME_LEN - 1 : len] = '\0';
525 break; 545 break;
526 546
527 case SMALL_TAG_COMPATDEVID: /* compatible ID */ 547 case SMALL_TAG_COMPATDEVID: /* compatible ID */
528 if (len != 4) 548 if (len != 4)
529 goto len_err; 549 goto len_err;
530 dev_id = kzalloc(sizeof (struct pnp_id), GFP_KERNEL); 550 dev_id = kzalloc(sizeof(struct pnp_id), GFP_KERNEL);
531 if (!dev_id) 551 if (!dev_id)
532 return NULL; 552 return NULL;
533 pnpid32_to_pnpid(p[1] | p[2] << 8 | p[3] << 16 | p[4] << 24,id); 553 pnpid32_to_pnpid(p[1] | p[2] << 8 | p[3] << 16 | p[4] <<
554 24, id);
534 memcpy(&dev_id->id, id, 7); 555 memcpy(&dev_id->id, id, 7);
535 pnp_add_id(dev_id, dev); 556 pnp_add_id(dev_id, dev);
536 break; 557 break;
537 558
538 case SMALL_TAG_END: 559 case SMALL_TAG_END:
539 p = p + 2; 560 p = p + 2;
540 return (unsigned char *)p; 561 return (unsigned char *)p;
541 break; 562 break;
542 563
543 default: /* an unkown tag */ 564 default: /* an unkown tag */
544 len_err: 565 len_err:
545 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 566 printk(KERN_ERR
567 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
568 tag, len);
546 break; 569 break;
547 } 570 }
548 571
@@ -553,33 +576,34 @@ pnpbios_parse_compatible_ids(unsigned char *p, unsigned char *end, struct pnp_de
553 p += len + 1; 576 p += len + 1;
554 } 577 }
555 578
556 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 579 printk(KERN_ERR
580 "PnPBIOS: Resource structure does not contain an end tag.\n");
557 581
558 return NULL; 582 return NULL;
559} 583}
560 584
561
562/* 585/*
563 * Allocated Resource Encoding 586 * Allocated Resource Encoding
564 */ 587 */
565 588
566static void pnpbios_encode_mem(unsigned char *p, struct resource * res) 589static void pnpbios_encode_mem(unsigned char *p, struct resource *res)
567{ 590{
568 unsigned long base = res->start; 591 unsigned long base = res->start;
569 unsigned long len = res->end - res->start + 1; 592 unsigned long len = res->end - res->start + 1;
593
570 p[4] = (base >> 8) & 0xff; 594 p[4] = (base >> 8) & 0xff;
571 p[5] = ((base >> 8) >> 8) & 0xff; 595 p[5] = ((base >> 8) >> 8) & 0xff;
572 p[6] = (base >> 8) & 0xff; 596 p[6] = (base >> 8) & 0xff;
573 p[7] = ((base >> 8) >> 8) & 0xff; 597 p[7] = ((base >> 8) >> 8) & 0xff;
574 p[10] = (len >> 8) & 0xff; 598 p[10] = (len >> 8) & 0xff;
575 p[11] = ((len >> 8) >> 8) & 0xff; 599 p[11] = ((len >> 8) >> 8) & 0xff;
576 return;
577} 600}
578 601
579static void pnpbios_encode_mem32(unsigned char *p, struct resource * res) 602static void pnpbios_encode_mem32(unsigned char *p, struct resource *res)
580{ 603{
581 unsigned long base = res->start; 604 unsigned long base = res->start;
582 unsigned long len = res->end - res->start + 1; 605 unsigned long len = res->end - res->start + 1;
606
583 p[4] = base & 0xff; 607 p[4] = base & 0xff;
584 p[5] = (base >> 8) & 0xff; 608 p[5] = (base >> 8) & 0xff;
585 p[6] = (base >> 16) & 0xff; 609 p[6] = (base >> 16) & 0xff;
@@ -592,12 +616,13 @@ static void pnpbios_encode_mem32(unsigned char *p, struct resource * res)
592 p[17] = (len >> 8) & 0xff; 616 p[17] = (len >> 8) & 0xff;
593 p[18] = (len >> 16) & 0xff; 617 p[18] = (len >> 16) & 0xff;
594 p[19] = (len >> 24) & 0xff; 618 p[19] = (len >> 24) & 0xff;
595 return;
596} 619}
597 620
598static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource * res) 621static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource *res)
599{ unsigned long base = res->start; 622{
623 unsigned long base = res->start;
600 unsigned long len = res->end - res->start + 1; 624 unsigned long len = res->end - res->start + 1;
625
601 p[4] = base & 0xff; 626 p[4] = base & 0xff;
602 p[5] = (base >> 8) & 0xff; 627 p[5] = (base >> 8) & 0xff;
603 p[6] = (base >> 16) & 0xff; 628 p[6] = (base >> 16) & 0xff;
@@ -606,50 +631,52 @@ static void pnpbios_encode_fixed_mem32(unsigned char *p, struct resource * res)
606 p[9] = (len >> 8) & 0xff; 631 p[9] = (len >> 8) & 0xff;
607 p[10] = (len >> 16) & 0xff; 632 p[10] = (len >> 16) & 0xff;
608 p[11] = (len >> 24) & 0xff; 633 p[11] = (len >> 24) & 0xff;
609 return;
610} 634}
611 635
612static void pnpbios_encode_irq(unsigned char *p, struct resource * res) 636static void pnpbios_encode_irq(unsigned char *p, struct resource *res)
613{ 637{
614 unsigned long map = 0; 638 unsigned long map = 0;
639
615 map = 1 << res->start; 640 map = 1 << res->start;
616 p[1] = map & 0xff; 641 p[1] = map & 0xff;
617 p[2] = (map >> 8) & 0xff; 642 p[2] = (map >> 8) & 0xff;
618 return;
619} 643}
620 644
621static void pnpbios_encode_dma(unsigned char *p, struct resource * res) 645static void pnpbios_encode_dma(unsigned char *p, struct resource *res)
622{ 646{
623 unsigned long map = 0; 647 unsigned long map = 0;
648
624 map = 1 << res->start; 649 map = 1 << res->start;
625 p[1] = map & 0xff; 650 p[1] = map & 0xff;
626 return;
627} 651}
628 652
629static void pnpbios_encode_port(unsigned char *p, struct resource * res) 653static void pnpbios_encode_port(unsigned char *p, struct resource *res)
630{ 654{
631 unsigned long base = res->start; 655 unsigned long base = res->start;
632 unsigned long len = res->end - res->start + 1; 656 unsigned long len = res->end - res->start + 1;
657
633 p[2] = base & 0xff; 658 p[2] = base & 0xff;
634 p[3] = (base >> 8) & 0xff; 659 p[3] = (base >> 8) & 0xff;
635 p[4] = base & 0xff; 660 p[4] = base & 0xff;
636 p[5] = (base >> 8) & 0xff; 661 p[5] = (base >> 8) & 0xff;
637 p[7] = len & 0xff; 662 p[7] = len & 0xff;
638 return;
639} 663}
640 664
641static void pnpbios_encode_fixed_port(unsigned char *p, struct resource * res) 665static void pnpbios_encode_fixed_port(unsigned char *p, struct resource *res)
642{ 666{
643 unsigned long base = res->start; 667 unsigned long base = res->start;
644 unsigned long len = res->end - res->start + 1; 668 unsigned long len = res->end - res->start + 1;
669
645 p[1] = base & 0xff; 670 p[1] = base & 0xff;
646 p[2] = (base >> 8) & 0xff; 671 p[2] = (base >> 8) & 0xff;
647 p[3] = len & 0xff; 672 p[3] = len & 0xff;
648 return;
649} 673}
650 674
651static unsigned char * 675static unsigned char *pnpbios_encode_allocated_resource_data(unsigned char *p,
652pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, struct pnp_resource_table * res) 676 unsigned char *end,
677 struct
678 pnp_resource_table
679 *res)
653{ 680{
654 unsigned int len, tag; 681 unsigned int len, tag;
655 int port = 0, irq = 0, dma = 0, mem = 0; 682 int port = 0, irq = 0, dma = 0, mem = 0;
@@ -660,12 +687,12 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
660 while ((char *)p < (char *)end) { 687 while ((char *)p < (char *)end) {
661 688
662 /* determine the type of tag */ 689 /* determine the type of tag */
663 if (p[0] & LARGE_TAG) { /* large tag */ 690 if (p[0] & LARGE_TAG) { /* large tag */
664 len = (p[2] << 8) | p[1]; 691 len = (p[2] << 8) | p[1];
665 tag = p[0]; 692 tag = p[0];
666 } else { /* small tag */ 693 } else { /* small tag */
667 len = p[0] & 0x07; 694 len = p[0] & 0x07;
668 tag = ((p[0]>>3) & 0x0f); 695 tag = ((p[0] >> 3) & 0x0f);
669 } 696 }
670 697
671 switch (tag) { 698 switch (tag) {
@@ -725,12 +752,14 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
725 752
726 case SMALL_TAG_END: 753 case SMALL_TAG_END:
727 p = p + 2; 754 p = p + 2;
728 return (unsigned char *)p; 755 return (unsigned char *)p;
729 break; 756 break;
730 757
731 default: /* an unkown tag */ 758 default: /* an unkown tag */
732 len_err: 759 len_err:
733 printk(KERN_ERR "PnPBIOS: Unknown tag '0x%x', length '%d'.\n", tag, len); 760 printk(KERN_ERR
761 "PnPBIOS: Unknown tag '0x%x', length '%d'.\n",
762 tag, len);
734 break; 763 break;
735 } 764 }
736 765
@@ -741,52 +770,52 @@ pnpbios_encode_allocated_resource_data(unsigned char * p, unsigned char * end, s
741 p += len + 1; 770 p += len + 1;
742 } 771 }
743 772
744 printk(KERN_ERR "PnPBIOS: Resource structure does not contain an end tag.\n"); 773 printk(KERN_ERR
774 "PnPBIOS: Resource structure does not contain an end tag.\n");
745 775
746 return NULL; 776 return NULL;
747} 777}
748 778
749
750/* 779/*
751 * Core Parsing Functions 780 * Core Parsing Functions
752 */ 781 */
753 782
754int 783int pnpbios_parse_data_stream(struct pnp_dev *dev, struct pnp_bios_node *node)
755pnpbios_parse_data_stream(struct pnp_dev *dev, struct pnp_bios_node * node)
756{ 784{
757 unsigned char * p = (char *)node->data; 785 unsigned char *p = (char *)node->data;
758 unsigned char * end = (char *)(node->data + node->size); 786 unsigned char *end = (char *)(node->data + node->size);
759 p = pnpbios_parse_allocated_resource_data(p,end,&dev->res); 787
788 p = pnpbios_parse_allocated_resource_data(p, end, &dev->res);
760 if (!p) 789 if (!p)
761 return -EIO; 790 return -EIO;
762 p = pnpbios_parse_resource_option_data(p,end,dev); 791 p = pnpbios_parse_resource_option_data(p, end, dev);
763 if (!p) 792 if (!p)
764 return -EIO; 793 return -EIO;
765 p = pnpbios_parse_compatible_ids(p,end,dev); 794 p = pnpbios_parse_compatible_ids(p, end, dev);
766 if (!p) 795 if (!p)
767 return -EIO; 796 return -EIO;
768 return 0; 797 return 0;
769} 798}
770 799
771int 800int pnpbios_read_resources_from_node(struct pnp_resource_table *res,
772pnpbios_read_resources_from_node(struct pnp_resource_table *res, 801 struct pnp_bios_node *node)
773 struct pnp_bios_node * node)
774{ 802{
775 unsigned char * p = (char *)node->data; 803 unsigned char *p = (char *)node->data;
776 unsigned char * end = (char *)(node->data + node->size); 804 unsigned char *end = (char *)(node->data + node->size);
777 p = pnpbios_parse_allocated_resource_data(p,end,res); 805
806 p = pnpbios_parse_allocated_resource_data(p, end, res);
778 if (!p) 807 if (!p)
779 return -EIO; 808 return -EIO;
780 return 0; 809 return 0;
781} 810}
782 811
783int 812int pnpbios_write_resources_to_node(struct pnp_resource_table *res,
784pnpbios_write_resources_to_node(struct pnp_resource_table *res, 813 struct pnp_bios_node *node)
785 struct pnp_bios_node * node)
786{ 814{
787 unsigned char * p = (char *)node->data; 815 unsigned char *p = (char *)node->data;
788 unsigned char * end = (char *)(node->data + node->size); 816 unsigned char *end = (char *)(node->data + node->size);
789 p = pnpbios_encode_allocated_resource_data(p,end,res); 817
818 p = pnpbios_encode_allocated_resource_data(p, end, res);
790 if (!p) 819 if (!p)
791 return -EIO; 820 return -EIO;
792 return 0; 821 return 0;
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index 7c3236690cc3..90755d4cdb9f 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -19,7 +19,6 @@
19#include <linux/io.h> 19#include <linux/io.h>
20#include "base.h" 20#include "base.h"
21 21
22
23static void quirk_awe32_resources(struct pnp_dev *dev) 22static void quirk_awe32_resources(struct pnp_dev *dev)
24{ 23{
25 struct pnp_port *port, *port2, *port3; 24 struct pnp_port *port, *port2, *port3;
@@ -31,7 +30,7 @@ static void quirk_awe32_resources(struct pnp_dev *dev)
31 * two extra ports (at offset 0x400 and 0x800 from the one given) by 30 * two extra ports (at offset 0x400 and 0x800 from the one given) by
32 * hand. 31 * hand.
33 */ 32 */
34 for ( ; res ; res = res->next ) { 33 for (; res; res = res->next) {
35 port2 = pnp_alloc(sizeof(struct pnp_port)); 34 port2 = pnp_alloc(sizeof(struct pnp_port));
36 if (!port2) 35 if (!port2)
37 return; 36 return;
@@ -58,18 +57,19 @@ static void quirk_cmi8330_resources(struct pnp_dev *dev)
58 struct pnp_option *res = dev->dependent; 57 struct pnp_option *res = dev->dependent;
59 unsigned long tmp; 58 unsigned long tmp;
60 59
61 for ( ; res ; res = res->next ) { 60 for (; res; res = res->next) {
62 61
63 struct pnp_irq *irq; 62 struct pnp_irq *irq;
64 struct pnp_dma *dma; 63 struct pnp_dma *dma;
65 64
66 for( irq = res->irq; irq; irq = irq->next ) { // Valid irqs are 5, 7, 10 65 for (irq = res->irq; irq; irq = irq->next) { // Valid irqs are 5, 7, 10
67 tmp = 0x04A0; 66 tmp = 0x04A0;
68 bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000 67 bitmap_copy(irq->map, &tmp, 16); // 0000 0100 1010 0000
69 } 68 }
70 69
71 for( dma = res->dma; dma; dma = dma->next ) // Valid 8bit dma channels are 1,3 70 for (dma = res->dma; dma; dma = dma->next) // Valid 8bit dma channels are 1,3
72 if( ( dma->flags & IORESOURCE_DMA_TYPE_MASK ) == IORESOURCE_DMA_8BIT ) 71 if ((dma->flags & IORESOURCE_DMA_TYPE_MASK) ==
72 IORESOURCE_DMA_8BIT)
73 dma->map = 0x000A; 73 dma->map = 0x000A;
74 } 74 }
75 printk(KERN_INFO "pnp: CMI8330 quirk - fixing interrupts and dma\n"); 75 printk(KERN_INFO "pnp: CMI8330 quirk - fixing interrupts and dma\n");
@@ -79,7 +79,7 @@ static void quirk_sb16audio_resources(struct pnp_dev *dev)
79{ 79{
80 struct pnp_port *port; 80 struct pnp_port *port;
81 struct pnp_option *res = dev->dependent; 81 struct pnp_option *res = dev->dependent;
82 int changed = 0; 82 int changed = 0;
83 83
84 /* 84 /*
85 * The default range on the mpu port for these devices is 0x388-0x388. 85 * The default range on the mpu port for these devices is 0x388-0x388.
@@ -87,24 +87,24 @@ static void quirk_sb16audio_resources(struct pnp_dev *dev)
87 * auto-configured. 87 * auto-configured.
88 */ 88 */
89 89
90 for( ; res ; res = res->next ) { 90 for (; res; res = res->next) {
91 port = res->port; 91 port = res->port;
92 if(!port) 92 if (!port)
93 continue; 93 continue;
94 port = port->next; 94 port = port->next;
95 if(!port) 95 if (!port)
96 continue; 96 continue;
97 port = port->next; 97 port = port->next;
98 if(!port) 98 if (!port)
99 continue; 99 continue;
100 if(port->min != port->max) 100 if (port->min != port->max)
101 continue; 101 continue;
102 port->max += 0x70; 102 port->max += 0x70;
103 changed = 1; 103 changed = 1;
104 } 104 }
105 if(changed) 105 if (changed)
106 printk(KERN_INFO "pnp: SB audio device quirk - increasing port range\n"); 106 printk(KERN_INFO
107 return; 107 "pnp: SB audio device quirk - increasing port range\n");
108} 108}
109 109
110static int quirk_smc_fir_enabled(struct pnp_dev *dev) 110static int quirk_smc_fir_enabled(struct pnp_dev *dev)
@@ -124,7 +124,7 @@ static int quirk_smc_fir_enabled(struct pnp_dev *dev)
124 outb(bank, firbase + 7); 124 outb(bank, firbase + 7);
125 125
126 high = inb(firbase + 0); 126 high = inb(firbase + 0);
127 low = inb(firbase + 1); 127 low = inb(firbase + 1);
128 chip = inb(firbase + 2); 128 chip = inb(firbase + 2);
129 129
130 /* This corresponds to the check in smsc_ircc_present() */ 130 /* This corresponds to the check in smsc_ircc_present() */
@@ -153,8 +153,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
153 */ 153 */
154 dev_err(&dev->dev, "%s not responding at SIR 0x%lx, FIR 0x%lx; " 154 dev_err(&dev->dev, "%s not responding at SIR 0x%lx, FIR 0x%lx; "
155 "auto-configuring\n", dev->id->id, 155 "auto-configuring\n", dev->id->id,
156 (unsigned long) pnp_port_start(dev, 0), 156 (unsigned long)pnp_port_start(dev, 0),
157 (unsigned long) pnp_port_start(dev, 1)); 157 (unsigned long)pnp_port_start(dev, 1));
158 158
159 pnp_disable_dev(dev); 159 pnp_disable_dev(dev);
160 pnp_init_resource_table(&dev->res); 160 pnp_init_resource_table(&dev->res);
@@ -162,8 +162,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
162 pnp_activate_dev(dev); 162 pnp_activate_dev(dev);
163 if (quirk_smc_fir_enabled(dev)) { 163 if (quirk_smc_fir_enabled(dev)) {
164 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n", 164 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n",
165 (unsigned long) pnp_port_start(dev, 0), 165 (unsigned long)pnp_port_start(dev, 0),
166 (unsigned long) pnp_port_start(dev, 1)); 166 (unsigned long)pnp_port_start(dev, 1));
167 return; 167 return;
168 } 168 }
169 169
@@ -175,8 +175,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
175 */ 175 */
176 dev_err(&dev->dev, "not responding at SIR 0x%lx, FIR 0x%lx; " 176 dev_err(&dev->dev, "not responding at SIR 0x%lx, FIR 0x%lx; "
177 "swapping SIR/FIR and reconfiguring\n", 177 "swapping SIR/FIR and reconfiguring\n",
178 (unsigned long) pnp_port_start(dev, 0), 178 (unsigned long)pnp_port_start(dev, 0),
179 (unsigned long) pnp_port_start(dev, 1)); 179 (unsigned long)pnp_port_start(dev, 1));
180 180
181 /* 181 /*
182 * Clear IORESOURCE_AUTO so pnp_activate_dev() doesn't reassign 182 * Clear IORESOURCE_AUTO so pnp_activate_dev() doesn't reassign
@@ -200,8 +200,8 @@ static void quirk_smc_enable(struct pnp_dev *dev)
200 200
201 if (quirk_smc_fir_enabled(dev)) { 201 if (quirk_smc_fir_enabled(dev)) {
202 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n", 202 dev_err(&dev->dev, "responds at SIR 0x%lx, FIR 0x%lx\n",
203 (unsigned long) pnp_port_start(dev, 0), 203 (unsigned long)pnp_port_start(dev, 0),
204 (unsigned long) pnp_port_start(dev, 1)); 204 (unsigned long)pnp_port_start(dev, 1));
205 return; 205 return;
206 } 206 }
207 207
@@ -209,7 +209,6 @@ static void quirk_smc_enable(struct pnp_dev *dev)
209 "email bjorn.helgaas@hp.com\n"); 209 "email bjorn.helgaas@hp.com\n");
210} 210}
211 211
212
213/* 212/*
214 * PnP Quirks 213 * PnP Quirks
215 * Cards or devices that need some tweaking due to incomplete resource info 214 * Cards or devices that need some tweaking due to incomplete resource info
@@ -217,21 +216,21 @@ static void quirk_smc_enable(struct pnp_dev *dev)
217 216
218static struct pnp_fixup pnp_fixups[] = { 217static struct pnp_fixup pnp_fixups[] = {
219 /* Soundblaster awe io port quirk */ 218 /* Soundblaster awe io port quirk */
220 { "CTL0021", quirk_awe32_resources }, 219 {"CTL0021", quirk_awe32_resources},
221 { "CTL0022", quirk_awe32_resources }, 220 {"CTL0022", quirk_awe32_resources},
222 { "CTL0023", quirk_awe32_resources }, 221 {"CTL0023", quirk_awe32_resources},
223 /* CMI 8330 interrupt and dma fix */ 222 /* CMI 8330 interrupt and dma fix */
224 { "@X@0001", quirk_cmi8330_resources }, 223 {"@X@0001", quirk_cmi8330_resources},
225 /* Soundblaster audio device io port range quirk */ 224 /* Soundblaster audio device io port range quirk */
226 { "CTL0001", quirk_sb16audio_resources }, 225 {"CTL0001", quirk_sb16audio_resources},
227 { "CTL0031", quirk_sb16audio_resources }, 226 {"CTL0031", quirk_sb16audio_resources},
228 { "CTL0041", quirk_sb16audio_resources }, 227 {"CTL0041", quirk_sb16audio_resources},
229 { "CTL0042", quirk_sb16audio_resources }, 228 {"CTL0042", quirk_sb16audio_resources},
230 { "CTL0043", quirk_sb16audio_resources }, 229 {"CTL0043", quirk_sb16audio_resources},
231 { "CTL0044", quirk_sb16audio_resources }, 230 {"CTL0044", quirk_sb16audio_resources},
232 { "CTL0045", quirk_sb16audio_resources }, 231 {"CTL0045", quirk_sb16audio_resources},
233 { "SMCf010", quirk_smc_enable }, 232 {"SMCf010", quirk_smc_enable},
234 { "" } 233 {""}
235}; 234};
236 235
237void pnp_fixup_device(struct pnp_dev *dev) 236void pnp_fixup_device(struct pnp_dev *dev)
@@ -239,9 +238,8 @@ void pnp_fixup_device(struct pnp_dev *dev)
239 int i = 0; 238 int i = 0;
240 239
241 while (*pnp_fixups[i].id) { 240 while (*pnp_fixups[i].id) {
242 if (compare_pnp_id(dev->id,pnp_fixups[i].id)) { 241 if (compare_pnp_id(dev->id, pnp_fixups[i].id)) {
243 pnp_dbg("Calling quirk for %s", 242 pnp_dbg("Calling quirk for %s", dev->dev.bus_id);
244 dev->dev.bus_id);
245 pnp_fixups[i].quirk_function(dev); 243 pnp_fixups[i].quirk_function(dev);
246 } 244 }
247 i++; 245 i++;
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index a685fbec4604..ea6ec14a0559 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz> 4 * based on isapnp.c resource management (c) Jaroslav Kysela <perex@suse.cz>
5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 5 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
6 *
7 */ 6 */
8 7
9#include <linux/module.h> 8#include <linux/module.h>
@@ -20,21 +19,19 @@
20#include <linux/pnp.h> 19#include <linux/pnp.h>
21#include "base.h" 20#include "base.h"
22 21
23static int pnp_reserve_irq[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some IRQ */ 22static int pnp_reserve_irq[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some IRQ */
24static int pnp_reserve_dma[8] = { [0 ... 7] = -1 }; /* reserve (don't use) some DMA */ 23static int pnp_reserve_dma[8] = {[0 ... 7] = -1 }; /* reserve (don't use) some DMA */
25static int pnp_reserve_io[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some I/O region */ 24static int pnp_reserve_io[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some I/O region */
26static int pnp_reserve_mem[16] = { [0 ... 15] = -1 }; /* reserve (don't use) some memory region */ 25static int pnp_reserve_mem[16] = {[0 ... 15] = -1 }; /* reserve (don't use) some memory region */
27
28 26
29/* 27/*
30 * option registration 28 * option registration
31 */ 29 */
32 30
33static struct pnp_option * pnp_build_option(int priority) 31static struct pnp_option *pnp_build_option(int priority)
34{ 32{
35 struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option)); 33 struct pnp_option *option = pnp_alloc(sizeof(struct pnp_option));
36 34
37 /* check if pnp_alloc ran out of memory */
38 if (!option) 35 if (!option)
39 return NULL; 36 return NULL;
40 37
@@ -46,9 +43,10 @@ static struct pnp_option * pnp_build_option(int priority)
46 return option; 43 return option;
47} 44}
48 45
49struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev) 46struct pnp_option *pnp_register_independent_option(struct pnp_dev *dev)
50{ 47{
51 struct pnp_option *option; 48 struct pnp_option *option;
49
52 if (!dev) 50 if (!dev)
53 return NULL; 51 return NULL;
54 52
@@ -61,9 +59,11 @@ struct pnp_option * pnp_register_independent_option(struct pnp_dev *dev)
61 return option; 59 return option;
62} 60}
63 61
64struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int priority) 62struct pnp_option *pnp_register_dependent_option(struct pnp_dev *dev,
63 int priority)
65{ 64{
66 struct pnp_option *option; 65 struct pnp_option *option;
66
67 if (!dev) 67 if (!dev)
68 return NULL; 68 return NULL;
69 69
@@ -82,6 +82,7 @@ struct pnp_option * pnp_register_dependent_option(struct pnp_dev *dev, int prior
82int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data) 82int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data)
83{ 83{
84 struct pnp_irq *ptr; 84 struct pnp_irq *ptr;
85
85 if (!option) 86 if (!option)
86 return -EINVAL; 87 return -EINVAL;
87 if (!data) 88 if (!data)
@@ -110,6 +111,7 @@ int pnp_register_irq_resource(struct pnp_option *option, struct pnp_irq *data)
110int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data) 111int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data)
111{ 112{
112 struct pnp_dma *ptr; 113 struct pnp_dma *ptr;
114
113 if (!option) 115 if (!option)
114 return -EINVAL; 116 return -EINVAL;
115 if (!data) 117 if (!data)
@@ -129,6 +131,7 @@ int pnp_register_dma_resource(struct pnp_option *option, struct pnp_dma *data)
129int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data) 131int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data)
130{ 132{
131 struct pnp_port *ptr; 133 struct pnp_port *ptr;
134
132 if (!option) 135 if (!option)
133 return -EINVAL; 136 return -EINVAL;
134 if (!data) 137 if (!data)
@@ -148,6 +151,7 @@ int pnp_register_port_resource(struct pnp_option *option, struct pnp_port *data)
148int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data) 151int pnp_register_mem_resource(struct pnp_option *option, struct pnp_mem *data)
149{ 152{
150 struct pnp_mem *ptr; 153 struct pnp_mem *ptr;
154
151 if (!option) 155 if (!option)
152 return -EINVAL; 156 return -EINVAL;
153 if (!data) 157 if (!data)
@@ -222,7 +226,6 @@ void pnp_free_option(struct pnp_option *option)
222 } 226 }
223} 227}
224 228
225
226/* 229/*
227 * resource validity checking 230 * resource validity checking
228 */ 231 */
@@ -236,11 +239,12 @@ void pnp_free_option(struct pnp_option *option)
236#define cannot_compare(flags) \ 239#define cannot_compare(flags) \
237((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED)) 240((flags) & (IORESOURCE_UNSET | IORESOURCE_DISABLED))
238 241
239int pnp_check_port(struct pnp_dev * dev, int idx) 242int pnp_check_port(struct pnp_dev *dev, int idx)
240{ 243{
241 int tmp; 244 int tmp;
242 struct pnp_dev *tdev; 245 struct pnp_dev *tdev;
243 resource_size_t *port, *end, *tport, *tend; 246 resource_size_t *port, *end, *tport, *tend;
247
244 port = &dev->res.port_resource[idx].start; 248 port = &dev->res.port_resource[idx].start;
245 end = &dev->res.port_resource[idx].end; 249 end = &dev->res.port_resource[idx].end;
246 250
@@ -250,8 +254,8 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
250 254
251 /* check if the resource is already in use, skip if the 255 /* check if the resource is already in use, skip if the
252 * device is active because it itself may be in use */ 256 * device is active because it itself may be in use */
253 if(!dev->active) { 257 if (!dev->active) {
254 if (__check_region(&ioport_resource, *port, length(port,end))) 258 if (__check_region(&ioport_resource, *port, length(port, end)))
255 return 0; 259 return 0;
256 } 260 }
257 261
@@ -259,7 +263,7 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
259 for (tmp = 0; tmp < 8; tmp++) { 263 for (tmp = 0; tmp < 8; tmp++) {
260 int rport = pnp_reserve_io[tmp << 1]; 264 int rport = pnp_reserve_io[tmp << 1];
261 int rend = pnp_reserve_io[(tmp << 1) + 1] + rport - 1; 265 int rend = pnp_reserve_io[(tmp << 1) + 1] + rport - 1;
262 if (ranged_conflict(port,end,&rport,&rend)) 266 if (ranged_conflict(port, end, &rport, &rend))
263 return 0; 267 return 0;
264 } 268 }
265 269
@@ -268,7 +272,7 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
268 if (dev->res.port_resource[tmp].flags & IORESOURCE_IO) { 272 if (dev->res.port_resource[tmp].flags & IORESOURCE_IO) {
269 tport = &dev->res.port_resource[tmp].start; 273 tport = &dev->res.port_resource[tmp].start;
270 tend = &dev->res.port_resource[tmp].end; 274 tend = &dev->res.port_resource[tmp].end;
271 if (ranged_conflict(port,end,tport,tend)) 275 if (ranged_conflict(port, end, tport, tend))
272 return 0; 276 return 0;
273 } 277 }
274 } 278 }
@@ -279,11 +283,12 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
279 continue; 283 continue;
280 for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { 284 for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) {
281 if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { 285 if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) {
282 if (cannot_compare(tdev->res.port_resource[tmp].flags)) 286 if (cannot_compare
287 (tdev->res.port_resource[tmp].flags))
283 continue; 288 continue;
284 tport = &tdev->res.port_resource[tmp].start; 289 tport = &tdev->res.port_resource[tmp].start;
285 tend = &tdev->res.port_resource[tmp].end; 290 tend = &tdev->res.port_resource[tmp].end;
286 if (ranged_conflict(port,end,tport,tend)) 291 if (ranged_conflict(port, end, tport, tend))
287 return 0; 292 return 0;
288 } 293 }
289 } 294 }
@@ -292,11 +297,12 @@ int pnp_check_port(struct pnp_dev * dev, int idx)
292 return 1; 297 return 1;
293} 298}
294 299
295int pnp_check_mem(struct pnp_dev * dev, int idx) 300int pnp_check_mem(struct pnp_dev *dev, int idx)
296{ 301{
297 int tmp; 302 int tmp;
298 struct pnp_dev *tdev; 303 struct pnp_dev *tdev;
299 resource_size_t *addr, *end, *taddr, *tend; 304 resource_size_t *addr, *end, *taddr, *tend;
305
300 addr = &dev->res.mem_resource[idx].start; 306 addr = &dev->res.mem_resource[idx].start;
301 end = &dev->res.mem_resource[idx].end; 307 end = &dev->res.mem_resource[idx].end;
302 308
@@ -306,8 +312,8 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
306 312
307 /* check if the resource is already in use, skip if the 313 /* check if the resource is already in use, skip if the
308 * device is active because it itself may be in use */ 314 * device is active because it itself may be in use */
309 if(!dev->active) { 315 if (!dev->active) {
310 if (check_mem_region(*addr, length(addr,end))) 316 if (check_mem_region(*addr, length(addr, end)))
311 return 0; 317 return 0;
312 } 318 }
313 319
@@ -315,7 +321,7 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
315 for (tmp = 0; tmp < 8; tmp++) { 321 for (tmp = 0; tmp < 8; tmp++) {
316 int raddr = pnp_reserve_mem[tmp << 1]; 322 int raddr = pnp_reserve_mem[tmp << 1];
317 int rend = pnp_reserve_mem[(tmp << 1) + 1] + raddr - 1; 323 int rend = pnp_reserve_mem[(tmp << 1) + 1] + raddr - 1;
318 if (ranged_conflict(addr,end,&raddr,&rend)) 324 if (ranged_conflict(addr, end, &raddr, &rend))
319 return 0; 325 return 0;
320 } 326 }
321 327
@@ -324,7 +330,7 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
324 if (dev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { 330 if (dev->res.mem_resource[tmp].flags & IORESOURCE_MEM) {
325 taddr = &dev->res.mem_resource[tmp].start; 331 taddr = &dev->res.mem_resource[tmp].start;
326 tend = &dev->res.mem_resource[tmp].end; 332 tend = &dev->res.mem_resource[tmp].end;
327 if (ranged_conflict(addr,end,taddr,tend)) 333 if (ranged_conflict(addr, end, taddr, tend))
328 return 0; 334 return 0;
329 } 335 }
330 } 336 }
@@ -335,11 +341,12 @@ int pnp_check_mem(struct pnp_dev * dev, int idx)
335 continue; 341 continue;
336 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { 342 for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) {
337 if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { 343 if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) {
338 if (cannot_compare(tdev->res.mem_resource[tmp].flags)) 344 if (cannot_compare
345 (tdev->res.mem_resource[tmp].flags))
339 continue; 346 continue;
340 taddr = &tdev->res.mem_resource[tmp].start; 347 taddr = &tdev->res.mem_resource[tmp].start;
341 tend = &tdev->res.mem_resource[tmp].end; 348 tend = &tdev->res.mem_resource[tmp].end;
342 if (ranged_conflict(addr,end,taddr,tend)) 349 if (ranged_conflict(addr, end, taddr, tend))
343 return 0; 350 return 0;
344 } 351 }
345 } 352 }
@@ -353,11 +360,11 @@ static irqreturn_t pnp_test_handler(int irq, void *dev_id)
353 return IRQ_HANDLED; 360 return IRQ_HANDLED;
354} 361}
355 362
356int pnp_check_irq(struct pnp_dev * dev, int idx) 363int pnp_check_irq(struct pnp_dev *dev, int idx)
357{ 364{
358 int tmp; 365 int tmp;
359 struct pnp_dev *tdev; 366 struct pnp_dev *tdev;
360 resource_size_t * irq = &dev->res.irq_resource[idx].start; 367 resource_size_t *irq = &dev->res.irq_resource[idx].start;
361 368
362 /* if the resource doesn't exist, don't complain about it */ 369 /* if the resource doesn't exist, don't complain about it */
363 if (cannot_compare(dev->res.irq_resource[idx].flags)) 370 if (cannot_compare(dev->res.irq_resource[idx].flags))
@@ -394,9 +401,9 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
394 401
395 /* check if the resource is already in use, skip if the 402 /* check if the resource is already in use, skip if the
396 * device is active because it itself may be in use */ 403 * device is active because it itself may be in use */
397 if(!dev->active) { 404 if (!dev->active) {
398 if (request_irq(*irq, pnp_test_handler, 405 if (request_irq(*irq, pnp_test_handler,
399 IRQF_DISABLED|IRQF_PROBE_SHARED, "pnp", NULL)) 406 IRQF_DISABLED | IRQF_PROBE_SHARED, "pnp", NULL))
400 return 0; 407 return 0;
401 free_irq(*irq, NULL); 408 free_irq(*irq, NULL);
402 } 409 }
@@ -407,7 +414,8 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
407 continue; 414 continue;
408 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { 415 for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) {
409 if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { 416 if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) {
410 if (cannot_compare(tdev->res.irq_resource[tmp].flags)) 417 if (cannot_compare
418 (tdev->res.irq_resource[tmp].flags))
411 continue; 419 continue;
412 if ((tdev->res.irq_resource[tmp].start == *irq)) 420 if ((tdev->res.irq_resource[tmp].start == *irq))
413 return 0; 421 return 0;
@@ -418,12 +426,12 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
418 return 1; 426 return 1;
419} 427}
420 428
421int pnp_check_dma(struct pnp_dev * dev, int idx) 429int pnp_check_dma(struct pnp_dev *dev, int idx)
422{ 430{
423#ifndef CONFIG_IA64 431#ifndef CONFIG_IA64
424 int tmp; 432 int tmp;
425 struct pnp_dev *tdev; 433 struct pnp_dev *tdev;
426 resource_size_t * dma = &dev->res.dma_resource[idx].start; 434 resource_size_t *dma = &dev->res.dma_resource[idx].start;
427 435
428 /* if the resource doesn't exist, don't complain about it */ 436 /* if the resource doesn't exist, don't complain about it */
429 if (cannot_compare(dev->res.dma_resource[idx].flags)) 437 if (cannot_compare(dev->res.dma_resource[idx].flags))
@@ -449,7 +457,7 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
449 457
450 /* check if the resource is already in use, skip if the 458 /* check if the resource is already in use, skip if the
451 * device is active because it itself may be in use */ 459 * device is active because it itself may be in use */
452 if(!dev->active) { 460 if (!dev->active) {
453 if (request_dma(*dma, "pnp")) 461 if (request_dma(*dma, "pnp"))
454 return 0; 462 return 0;
455 free_dma(*dma); 463 free_dma(*dma);
@@ -461,7 +469,8 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
461 continue; 469 continue;
462 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { 470 for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) {
463 if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { 471 if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) {
464 if (cannot_compare(tdev->res.dma_resource[tmp].flags)) 472 if (cannot_compare
473 (tdev->res.dma_resource[tmp].flags))
465 continue; 474 continue;
466 if ((tdev->res.dma_resource[tmp].start == *dma)) 475 if ((tdev->res.dma_resource[tmp].start == *dma))
467 return 0; 476 return 0;
@@ -471,30 +480,18 @@ int pnp_check_dma(struct pnp_dev * dev, int idx)
471 480
472 return 1; 481 return 1;
473#else 482#else
474 /* IA64 hasn't legacy DMA */ 483 /* IA64 does not have legacy DMA */
475 return 0; 484 return 0;
476#endif 485#endif
477} 486}
478 487
479
480#if 0
481EXPORT_SYMBOL(pnp_register_dependent_option);
482EXPORT_SYMBOL(pnp_register_independent_option);
483EXPORT_SYMBOL(pnp_register_irq_resource);
484EXPORT_SYMBOL(pnp_register_dma_resource);
485EXPORT_SYMBOL(pnp_register_port_resource);
486EXPORT_SYMBOL(pnp_register_mem_resource);
487#endif /* 0 */
488
489
490/* format is: pnp_reserve_irq=irq1[,irq2] .... */ 488/* format is: pnp_reserve_irq=irq1[,irq2] .... */
491
492static int __init pnp_setup_reserve_irq(char *str) 489static int __init pnp_setup_reserve_irq(char *str)
493{ 490{
494 int i; 491 int i;
495 492
496 for (i = 0; i < 16; i++) 493 for (i = 0; i < 16; i++)
497 if (get_option(&str,&pnp_reserve_irq[i]) != 2) 494 if (get_option(&str, &pnp_reserve_irq[i]) != 2)
498 break; 495 break;
499 return 1; 496 return 1;
500} 497}
@@ -502,13 +499,12 @@ static int __init pnp_setup_reserve_irq(char *str)
502__setup("pnp_reserve_irq=", pnp_setup_reserve_irq); 499__setup("pnp_reserve_irq=", pnp_setup_reserve_irq);
503 500
504/* format is: pnp_reserve_dma=dma1[,dma2] .... */ 501/* format is: pnp_reserve_dma=dma1[,dma2] .... */
505
506static int __init pnp_setup_reserve_dma(char *str) 502static int __init pnp_setup_reserve_dma(char *str)
507{ 503{
508 int i; 504 int i;
509 505
510 for (i = 0; i < 8; i++) 506 for (i = 0; i < 8; i++)
511 if (get_option(&str,&pnp_reserve_dma[i]) != 2) 507 if (get_option(&str, &pnp_reserve_dma[i]) != 2)
512 break; 508 break;
513 return 1; 509 return 1;
514} 510}
@@ -516,13 +512,12 @@ static int __init pnp_setup_reserve_dma(char *str)
516__setup("pnp_reserve_dma=", pnp_setup_reserve_dma); 512__setup("pnp_reserve_dma=", pnp_setup_reserve_dma);
517 513
518/* format is: pnp_reserve_io=io1,size1[,io2,size2] .... */ 514/* format is: pnp_reserve_io=io1,size1[,io2,size2] .... */
519
520static int __init pnp_setup_reserve_io(char *str) 515static int __init pnp_setup_reserve_io(char *str)
521{ 516{
522 int i; 517 int i;
523 518
524 for (i = 0; i < 16; i++) 519 for (i = 0; i < 16; i++)
525 if (get_option(&str,&pnp_reserve_io[i]) != 2) 520 if (get_option(&str, &pnp_reserve_io[i]) != 2)
526 break; 521 break;
527 return 1; 522 return 1;
528} 523}
@@ -530,13 +525,12 @@ static int __init pnp_setup_reserve_io(char *str)
530__setup("pnp_reserve_io=", pnp_setup_reserve_io); 525__setup("pnp_reserve_io=", pnp_setup_reserve_io);
531 526
532/* format is: pnp_reserve_mem=mem1,size1[,mem2,size2] .... */ 527/* format is: pnp_reserve_mem=mem1,size1[,mem2,size2] .... */
533
534static int __init pnp_setup_reserve_mem(char *str) 528static int __init pnp_setup_reserve_mem(char *str)
535{ 529{
536 int i; 530 int i;
537 531
538 for (i = 0; i < 16; i++) 532 for (i = 0; i < 16; i++)
539 if (get_option(&str,&pnp_reserve_mem[i]) != 2) 533 if (get_option(&str, &pnp_reserve_mem[i]) != 2)
540 break; 534 break;
541 return 1; 535 return 1;
542} 536}
diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c
index 946a0dcd627d..13c608f5fb30 100644
--- a/drivers/pnp/support.c
+++ b/drivers/pnp/support.c
@@ -1,8 +1,7 @@
1/* 1/*
2 * support.c - provides standard pnp functions for the use of pnp protocol drivers, 2 * support.c - standard functions for the use of pnp protocol drivers
3 * 3 *
4 * Copyright 2003 Adam Belay <ambx1@neo.rr.com> 4 * Copyright 2003 Adam Belay <ambx1@neo.rr.com>
5 *
6 */ 5 */
7 6
8#include <linux/module.h> 7#include <linux/module.h>
@@ -11,22 +10,18 @@
11#include "base.h" 10#include "base.h"
12 11
13/** 12/**
14 * pnp_is_active - Determines if a device is active based on its current resources 13 * pnp_is_active - Determines if a device is active based on its current
14 * resources
15 * @dev: pointer to the desired PnP device 15 * @dev: pointer to the desired PnP device
16 *
17 */ 16 */
18 17int pnp_is_active(struct pnp_dev *dev)
19int pnp_is_active(struct pnp_dev * dev)
20{ 18{
21 if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 && 19 if (!pnp_port_start(dev, 0) && pnp_port_len(dev, 0) <= 1 &&
22 !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 && 20 !pnp_mem_start(dev, 0) && pnp_mem_len(dev, 0) <= 1 &&
23 pnp_irq(dev, 0) == -1 && 21 pnp_irq(dev, 0) == -1 && pnp_dma(dev, 0) == -1)
24 pnp_dma(dev, 0) == -1) 22 return 0;
25 return 0;
26 else 23 else
27 return 1; 24 return 1;
28} 25}
29 26
30
31
32EXPORT_SYMBOL(pnp_is_active); 27EXPORT_SYMBOL(pnp_is_active);
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c
index a8a95540b1ef..a06f980b3ac9 100644
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -16,13 +16,14 @@
16 16
17static const struct pnp_device_id pnp_dev_table[] = { 17static const struct pnp_device_id pnp_dev_table[] = {
18 /* General ID for reserving resources */ 18 /* General ID for reserving resources */
19 { "PNP0c02", 0 }, 19 {"PNP0c02", 0},
20 /* memory controller */ 20 /* memory controller */
21 { "PNP0c01", 0 }, 21 {"PNP0c01", 0},
22 { "", 0 } 22 {"", 0}
23}; 23};
24 24
25static void reserve_range(const char *pnpid, resource_size_t start, resource_size_t end, int port) 25static void reserve_range(const char *pnpid, resource_size_t start,
26 resource_size_t end, int port)
26{ 27{
27 struct resource *res; 28 struct resource *res;
28 char *regionid; 29 char *regionid;
@@ -32,9 +33,9 @@ static void reserve_range(const char *pnpid, resource_size_t start, resource_siz
32 return; 33 return;
33 snprintf(regionid, 16, "pnp %s", pnpid); 34 snprintf(regionid, 16, "pnp %s", pnpid);
34 if (port) 35 if (port)
35 res = request_region(start, end-start+1, regionid); 36 res = request_region(start, end - start + 1, regionid);
36 else 37 else
37 res = request_mem_region(start, end-start+1, regionid); 38 res = request_mem_region(start, end - start + 1, regionid);
38 if (res == NULL) 39 if (res == NULL)
39 kfree(regionid); 40 kfree(regionid);
40 else 41 else
@@ -44,11 +45,10 @@ static void reserve_range(const char *pnpid, resource_size_t start, resource_siz
44 * example do reserve stuff they know about too, so we may well 45 * example do reserve stuff they know about too, so we may well
45 * have double reservations. 46 * have double reservations.
46 */ 47 */
47 printk(KERN_INFO 48 printk(KERN_INFO "pnp: %s: %s range 0x%llx-0x%llx %s reserved\n",
48 "pnp: %s: %s range 0x%llx-0x%llx %s reserved\n", 49 pnpid, port ? "ioport" : "iomem",
49 pnpid, port ? "ioport" : "iomem", 50 (unsigned long long)start, (unsigned long long)end,
50 (unsigned long long)start, (unsigned long long)end, 51 NULL != res ? "has been" : "could not be");
51 NULL != res ? "has been" : "could not be");
52} 52}
53 53
54static void reserve_resources_of_dev(const struct pnp_dev *dev) 54static void reserve_resources_of_dev(const struct pnp_dev *dev)
@@ -74,7 +74,7 @@ static void reserve_resources_of_dev(const struct pnp_dev *dev)
74 continue; /* invalid */ 74 continue; /* invalid */
75 75
76 reserve_range(dev->dev.bus_id, pnp_port_start(dev, i), 76 reserve_range(dev->dev.bus_id, pnp_port_start(dev, i),
77 pnp_port_end(dev, i), 1); 77 pnp_port_end(dev, i), 1);
78 } 78 }
79 79
80 for (i = 0; i < PNP_MAX_MEM; i++) { 80 for (i = 0; i < PNP_MAX_MEM; i++) {
@@ -82,24 +82,22 @@ static void reserve_resources_of_dev(const struct pnp_dev *dev)
82 continue; 82 continue;
83 83
84 reserve_range(dev->dev.bus_id, pnp_mem_start(dev, i), 84 reserve_range(dev->dev.bus_id, pnp_mem_start(dev, i),
85 pnp_mem_end(dev, i), 0); 85 pnp_mem_end(dev, i), 0);
86 } 86 }
87
88 return;
89} 87}
90 88
91static int system_pnp_probe(struct pnp_dev * dev, const struct pnp_device_id *dev_id) 89static int system_pnp_probe(struct pnp_dev *dev,
90 const struct pnp_device_id *dev_id)
92{ 91{
93 reserve_resources_of_dev(dev); 92 reserve_resources_of_dev(dev);
94 return 0; 93 return 0;
95} 94}
96 95
97static struct pnp_driver system_pnp_driver = { 96static struct pnp_driver system_pnp_driver = {
98 .name = "system", 97 .name = "system",
99 .id_table = pnp_dev_table, 98 .id_table = pnp_dev_table,
100 .flags = PNP_DRIVER_RES_DO_NOT_CHANGE, 99 .flags = PNP_DRIVER_RES_DO_NOT_CHANGE,
101 .probe = system_pnp_probe, 100 .probe = system_pnp_probe,
102 .remove = NULL,
103}; 101};
104 102
105static int __init pnp_system_init(void) 103static int __init pnp_system_init(void)
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 7ede9e725360..d3a33aa2696f 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -15,34 +15,36 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o
15rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o 15rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
16rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o 16rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
17 17
18# Keep the list ordered.
19
20obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
21obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
22obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o
18obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o 23obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
19obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o 24obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o
20obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o
21obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
22obj-$(CONFIG_RTC_DRV_AT32AP700X) += rtc-at32ap700x.o
23obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o 25obj-$(CONFIG_RTC_DRV_DS1307) += rtc-ds1307.o
26obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
24obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o 27obj-$(CONFIG_RTC_DRV_DS1672) += rtc-ds1672.o
25obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o 28obj-$(CONFIG_RTC_DRV_DS1742) += rtc-ds1742.o
29obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o
30obj-$(CONFIG_RTC_DRV_ISL1208) += rtc-isl1208.o
31obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o
32obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o
33obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
34obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
35obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
26obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o 36obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o
27obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o 37obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
28obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o 38obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
39obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
40obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
41obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
29obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o 42obj-$(CONFIG_RTC_DRV_RS5C372) += rtc-rs5c372.o
30obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o 43obj-$(CONFIG_RTC_DRV_S3C) += rtc-s3c.o
31obj-$(CONFIG_RTC_DRV_RS5C348) += rtc-rs5c348.o
32obj-$(CONFIG_RTC_DRV_M41T80) += rtc-m41t80.o
33obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o
34obj-$(CONFIG_RTC_DRV_DS1553) += rtc-ds1553.o
35obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
36obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
37obj-$(CONFIG_RTC_DRV_EP93XX) += rtc-ep93xx.o
38obj-$(CONFIG_RTC_DRV_SA1100) += rtc-sa1100.o 44obj-$(CONFIG_RTC_DRV_SA1100) += rtc-sa1100.o
39obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
40obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
41obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o
42obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o
43obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o
44obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
45obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o 45obj-$(CONFIG_RTC_DRV_SH) += rtc-sh.o
46obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o 46obj-$(CONFIG_RTC_DRV_STK17TA8) += rtc-stk17ta8.o
47obj-$(CONFIG_RTC_DRV_M48T59) += rtc-m48t59.o 47obj-$(CONFIG_RTC_DRV_TEST) += rtc-test.o
48obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o 48obj-$(CONFIG_RTC_DRV_V3020) += rtc-v3020.o
49obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
50obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 5158a625671f..db6f3f0d8982 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -352,7 +352,7 @@ read_rtc:
352 /* oscillator fault? clear flag, and warn */ 352 /* oscillator fault? clear flag, and warn */
353 if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) { 353 if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) {
354 i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL, 354 i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL,
355 ds1307->regs[DS1337_REG_CONTROL] 355 ds1307->regs[DS1307_REG_CONTROL]
356 & ~DS1338_BIT_OSF); 356 & ~DS1338_BIT_OSF);
357 dev_warn(&client->dev, "SET TIME!\n"); 357 dev_warn(&client->dev, "SET TIME!\n");
358 goto read_rtc; 358 goto read_rtc;
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index f10d3facecbe..8288b6b2bf2b 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -258,7 +258,8 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
258 .ioctl = stk17ta8_rtc_ioctl, 258 .ioctl = stk17ta8_rtc_ioctl,
259}; 259};
260 260
261static ssize_t stk17ta8_nvram_read(struct kobject *kobj, char *buf, 261static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
262 struct bin_attribute *attr, char *buf,
262 loff_t pos, size_t size) 263 loff_t pos, size_t size)
263{ 264{
264 struct platform_device *pdev = 265 struct platform_device *pdev =
@@ -272,7 +273,8 @@ static ssize_t stk17ta8_nvram_read(struct kobject *kobj, char *buf,
272 return count; 273 return count;
273} 274}
274 275
275static ssize_t stk17ta8_nvram_write(struct kobject *kobj, char *buf, 276static ssize_t stk17ta8_nvram_write(struct kobject *kobj,
277 struct bin_attribute *attr, char *buf,
276 loff_t pos, size_t size) 278 loff_t pos, size_t size)
277{ 279{
278 struct platform_device *pdev = 280 struct platform_device *pdev =
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 7071ff8da63e..5cf48123e0ef 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -28,7 +28,7 @@
28#include <asm/hardware.h> 28#include <asm/hardware.h>
29 29
30#include <asm/arch/regs-gpio.h> 30#include <asm/arch/regs-gpio.h>
31#include <asm/arch/regs-spi.h> 31#include <asm/plat-s3c24xx/regs-spi.h>
32#include <asm/arch/spi.h> 32#include <asm/arch/spi.h>
33 33
34struct s3c24xx_spi { 34struct s3c24xx_spi {
diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c
index f48e8c534c87..6796ba62c3c6 100644
--- a/drivers/video/chipsfb.c
+++ b/drivers/video/chipsfb.c
@@ -24,6 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/interrupt.h> 25#include <linux/interrupt.h>
26#include <linux/fb.h> 26#include <linux/fb.h>
27#include <linux/pm.h>
27#include <linux/init.h> 28#include <linux/init.h>
28#include <linux/pci.h> 29#include <linux/pci.h>
29#include <linux/console.h> 30#include <linux/console.h>
@@ -458,7 +459,7 @@ static int chipsfb_pci_suspend(struct pci_dev *pdev, pm_message_t state)
458 459
459 if (state.event == pdev->dev.power.power_state.event) 460 if (state.event == pdev->dev.power.power_state.event)
460 return 0; 461 return 0;
461 if (state.event != PM_SUSPEND_MEM) 462 if (state.event != PM_EVENT_SUSPEND)
462 goto done; 463 goto done;
463 464
464 acquire_console_sem(); 465 acquire_console_sem();
diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
index 89facb73edfc..d292a37ec7d6 100644
--- a/drivers/video/tgafb.c
+++ b/drivers/video/tgafb.c
@@ -849,7 +849,7 @@ tgafb_clut_imageblit(struct fb_info *info, const struct fb_image *image)
849 u32 *palette = ((u32 *)info->pseudo_palette); 849 u32 *palette = ((u32 *)info->pseudo_palette);
850 unsigned long pos, line_length, i, j; 850 unsigned long pos, line_length, i, j;
851 const unsigned char *data; 851 const unsigned char *data;
852 void *regs_base, *fb_base; 852 void __iomem *regs_base, *fb_base;
853 853
854 dx = image->dx; 854 dx = image->dx;
855 dy = image->dy; 855 dy = image->dy;
diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c
index 763bc73e5070..4b696641ce33 100644
--- a/drivers/w1/masters/ds1wm.c
+++ b/drivers/w1/masters/ds1wm.c
@@ -85,7 +85,7 @@ static struct {
85}; 85};
86 86
87struct ds1wm_data { 87struct ds1wm_data {
88 void *map; 88 void __iomem *map;
89 int bus_shift; /* # of shifts to calc register offsets */ 89 int bus_shift; /* # of shifts to calc register offsets */
90 struct platform_device *pdev; 90 struct platform_device *pdev;
91 struct ds1wm_platform_data *pdata; 91 struct ds1wm_platform_data *pdata;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 9e943fbce81b..227d53b12a5c 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -782,8 +782,8 @@ static int process_msg(void)
782 msg->u.watch.vec = split(body, msg->hdr.len, 782 msg->u.watch.vec = split(body, msg->hdr.len,
783 &msg->u.watch.vec_size); 783 &msg->u.watch.vec_size);
784 if (IS_ERR(msg->u.watch.vec)) { 784 if (IS_ERR(msg->u.watch.vec)) {
785 kfree(msg);
786 err = PTR_ERR(msg->u.watch.vec); 785 err = PTR_ERR(msg->u.watch.vec);
786 kfree(msg);
787 goto out; 787 goto out;
788 } 788 }
789 789