aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/virtio_blk.c10
-rw-r--r--drivers/char/hw_random/virtio-rng.c30
-rw-r--r--drivers/char/virtio_console.c26
-rw-r--r--drivers/ide/at91_ide.c7
-rw-r--r--drivers/ide/au1xxx-ide.c8
-rw-r--r--drivers/ide/buddha.c9
-rw-r--r--drivers/ide/cmd640.c7
-rw-r--r--drivers/ide/cs5520.c4
-rw-r--r--drivers/ide/delkin_cb.c6
-rw-r--r--drivers/ide/falconide.c9
-rw-r--r--drivers/ide/gayle.c9
-rw-r--r--drivers/ide/hpt366.c25
-rw-r--r--drivers/ide/icside.c77
-rw-r--r--drivers/ide/ide-4drives.c6
-rw-r--r--drivers/ide/ide-atapi.c2
-rw-r--r--drivers/ide/ide-cs.c6
-rw-r--r--drivers/ide/ide-disk.c75
-rw-r--r--drivers/ide/ide-dma.c1
-rw-r--r--drivers/ide/ide-eh.c14
-rw-r--r--drivers/ide/ide-gd.c14
-rw-r--r--drivers/ide/ide-generic.c7
-rw-r--r--drivers/ide/ide-h8300.c10
-rw-r--r--drivers/ide/ide-io.c77
-rw-r--r--drivers/ide/ide-iops.c26
-rw-r--r--drivers/ide/ide-legacy.c7
-rw-r--r--drivers/ide/ide-pnp.c6
-rw-r--r--drivers/ide/ide-probe.c95
-rw-r--r--drivers/ide/ide-tape.c90
-rw-r--r--drivers/ide/ide-taskfile.c3
-rw-r--r--drivers/ide/ide.c10
-rw-r--r--drivers/ide/ide_platform.c9
-rw-r--r--drivers/ide/macide.c9
-rw-r--r--drivers/ide/palm_bk3710.c6
-rw-r--r--drivers/ide/pdc202xx_new.c26
-rw-r--r--drivers/ide/pdc202xx_old.c92
-rw-r--r--drivers/ide/pmac.c13
-rw-r--r--drivers/ide/q40ide.c11
-rw-r--r--drivers/ide/rapide.c8
-rw-r--r--drivers/ide/scc_pata.c6
-rw-r--r--drivers/ide/setup-pci.c85
-rw-r--r--drivers/ide/sgiioc4.c7
-rw-r--r--drivers/ide/siimage.c4
-rw-r--r--drivers/ide/sl82c105.c9
-rw-r--r--drivers/ide/tx4938ide.c5
-rw-r--r--drivers/ide/tx4939ide.c5
-rw-r--r--drivers/lguest/Kconfig2
-rw-r--r--drivers/lguest/core.c30
-rw-r--r--drivers/lguest/hypercalls.c14
-rw-r--r--drivers/lguest/interrupts_and_traps.c57
-rw-r--r--drivers/lguest/lg.h28
-rw-r--r--drivers/lguest/lguest_device.c41
-rw-r--r--drivers/lguest/lguest_user.c127
-rw-r--r--drivers/lguest/page_tables.c396
-rw-r--r--drivers/lguest/segments.c2
-rw-r--r--drivers/net/virtio_net.c45
-rw-r--r--drivers/s390/kvm/kvm_virtio.c43
-rw-r--r--drivers/video/aty/aty128fb.c2
-rw-r--r--drivers/video/cyber2000fb.c9
-rw-r--r--drivers/video/uvesafb.c10
-rw-r--r--drivers/virtio/virtio.c29
-rw-r--r--drivers/virtio/virtio_balloon.c27
-rw-r--r--drivers/virtio/virtio_pci.c307
-rw-r--r--drivers/virtio/virtio_ring.c102
63 files changed, 1447 insertions, 795 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0facaa55cf4..43db3ea15b54 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -254,7 +254,7 @@ static int index_to_minor(int index)
254 return index << PART_BITS; 254 return index << PART_BITS;
255} 255}
256 256
257static int virtblk_probe(struct virtio_device *vdev) 257static int __devinit virtblk_probe(struct virtio_device *vdev)
258{ 258{
259 struct virtio_blk *vblk; 259 struct virtio_blk *vblk;
260 int err; 260 int err;
@@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev)
288 sg_init_table(vblk->sg, vblk->sg_elems); 288 sg_init_table(vblk->sg, vblk->sg_elems);
289 289
290 /* We expect one virtqueue, for output. */ 290 /* We expect one virtqueue, for output. */
291 vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); 291 vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
292 if (IS_ERR(vblk->vq)) { 292 if (IS_ERR(vblk->vq)) {
293 err = PTR_ERR(vblk->vq); 293 err = PTR_ERR(vblk->vq);
294 goto out_free_vblk; 294 goto out_free_vblk;
@@ -388,14 +388,14 @@ out_put_disk:
388out_mempool: 388out_mempool:
389 mempool_destroy(vblk->pool); 389 mempool_destroy(vblk->pool);
390out_free_vq: 390out_free_vq:
391 vdev->config->del_vq(vblk->vq); 391 vdev->config->del_vqs(vdev);
392out_free_vblk: 392out_free_vblk:
393 kfree(vblk); 393 kfree(vblk);
394out: 394out:
395 return err; 395 return err;
396} 396}
397 397
398static void virtblk_remove(struct virtio_device *vdev) 398static void __devexit virtblk_remove(struct virtio_device *vdev)
399{ 399{
400 struct virtio_blk *vblk = vdev->priv; 400 struct virtio_blk *vblk = vdev->priv;
401 401
@@ -409,7 +409,7 @@ static void virtblk_remove(struct virtio_device *vdev)
409 blk_cleanup_queue(vblk->disk->queue); 409 blk_cleanup_queue(vblk->disk->queue);
410 put_disk(vblk->disk); 410 put_disk(vblk->disk);
411 mempool_destroy(vblk->pool); 411 mempool_destroy(vblk->pool);
412 vdev->config->del_vq(vblk->vq); 412 vdev->config->del_vqs(vdev);
413 kfree(vblk); 413 kfree(vblk);
414} 414}
415 415
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 86e83f883139..32216b623248 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -35,13 +35,13 @@ static DECLARE_COMPLETION(have_data);
35 35
36static void random_recv_done(struct virtqueue *vq) 36static void random_recv_done(struct virtqueue *vq)
37{ 37{
38 int len; 38 unsigned int len;
39 39
40 /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ 40 /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
41 if (!vq->vq_ops->get_buf(vq, &len)) 41 if (!vq->vq_ops->get_buf(vq, &len))
42 return; 42 return;
43 43
44 data_left = len / sizeof(random_data[0]); 44 data_left += len;
45 complete(&have_data); 45 complete(&have_data);
46} 46}
47 47
@@ -49,7 +49,7 @@ static void register_buffer(void)
49{ 49{
50 struct scatterlist sg; 50 struct scatterlist sg;
51 51
52 sg_init_one(&sg, random_data, RANDOM_DATA_SIZE); 52 sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
53 /* There should always be room for one buffer. */ 53 /* There should always be room for one buffer. */
54 if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0) 54 if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
55 BUG(); 55 BUG();
@@ -59,24 +59,32 @@ static void register_buffer(void)
59/* At least we don't udelay() in a loop like some other drivers. */ 59/* At least we don't udelay() in a loop like some other drivers. */
60static int virtio_data_present(struct hwrng *rng, int wait) 60static int virtio_data_present(struct hwrng *rng, int wait)
61{ 61{
62 if (data_left) 62 if (data_left >= sizeof(u32))
63 return 1; 63 return 1;
64 64
65again:
65 if (!wait) 66 if (!wait)
66 return 0; 67 return 0;
67 68
68 wait_for_completion(&have_data); 69 wait_for_completion(&have_data);
70
71 /* Not enough? Re-register. */
72 if (unlikely(data_left < sizeof(u32))) {
73 register_buffer();
74 goto again;
75 }
76
69 return 1; 77 return 1;
70} 78}
71 79
72/* virtio_data_present() must have succeeded before this is called. */ 80/* virtio_data_present() must have succeeded before this is called. */
73static int virtio_data_read(struct hwrng *rng, u32 *data) 81static int virtio_data_read(struct hwrng *rng, u32 *data)
74{ 82{
75 BUG_ON(!data_left); 83 BUG_ON(data_left < sizeof(u32));
76 84 data_left -= sizeof(u32);
77 *data = random_data[--data_left]; 85 *data = random_data[data_left / 4];
78 86
79 if (!data_left) { 87 if (data_left < sizeof(u32)) {
80 init_completion(&have_data); 88 init_completion(&have_data);
81 register_buffer(); 89 register_buffer();
82 } 90 }
@@ -94,13 +102,13 @@ static int virtrng_probe(struct virtio_device *vdev)
94 int err; 102 int err;
95 103
96 /* We expect a single virtqueue. */ 104 /* We expect a single virtqueue. */
97 vq = vdev->config->find_vq(vdev, 0, random_recv_done); 105 vq = virtio_find_single_vq(vdev, random_recv_done, "input");
98 if (IS_ERR(vq)) 106 if (IS_ERR(vq))
99 return PTR_ERR(vq); 107 return PTR_ERR(vq);
100 108
101 err = hwrng_register(&virtio_hwrng); 109 err = hwrng_register(&virtio_hwrng);
102 if (err) { 110 if (err) {
103 vdev->config->del_vq(vq); 111 vdev->config->del_vqs(vdev);
104 return err; 112 return err;
105 } 113 }
106 114
@@ -112,7 +120,7 @@ static void virtrng_remove(struct virtio_device *vdev)
112{ 120{
113 vdev->config->reset(vdev); 121 vdev->config->reset(vdev);
114 hwrng_unregister(&virtio_hwrng); 122 hwrng_unregister(&virtio_hwrng);
115 vdev->config->del_vq(vq); 123 vdev->config->del_vqs(vdev);
116} 124}
117 125
118static struct virtio_device_id id_table[] = { 126static struct virtio_device_id id_table[] = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index ff6f5a4b58fb..c74dacfa6795 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -188,6 +188,9 @@ static void hvc_handle_input(struct virtqueue *vq)
188 * Finally we put our input buffer in the input queue, ready to receive. */ 188 * Finally we put our input buffer in the input queue, ready to receive. */
189static int __devinit virtcons_probe(struct virtio_device *dev) 189static int __devinit virtcons_probe(struct virtio_device *dev)
190{ 190{
191 vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
192 const char *names[] = { "input", "output" };
193 struct virtqueue *vqs[2];
191 int err; 194 int err;
192 195
193 vdev = dev; 196 vdev = dev;
@@ -199,20 +202,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
199 goto fail; 202 goto fail;
200 } 203 }
201 204
202 /* Find the input queue. */ 205 /* Find the queues. */
203 /* FIXME: This is why we want to wean off hvc: we do nothing 206 /* FIXME: This is why we want to wean off hvc: we do nothing
204 * when input comes in. */ 207 * when input comes in. */
205 in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); 208 err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
206 if (IS_ERR(in_vq)) { 209 if (err)
207 err = PTR_ERR(in_vq);
208 goto free; 210 goto free;
209 }
210 211
211 out_vq = vdev->config->find_vq(vdev, 1, NULL); 212 in_vq = vqs[0];
212 if (IS_ERR(out_vq)) { 213 out_vq = vqs[1];
213 err = PTR_ERR(out_vq);
214 goto free_in_vq;
215 }
216 214
217 /* Start using the new console output. */ 215 /* Start using the new console output. */
218 virtio_cons.get_chars = get_chars; 216 virtio_cons.get_chars = get_chars;
@@ -233,17 +231,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
233 hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); 231 hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
234 if (IS_ERR(hvc)) { 232 if (IS_ERR(hvc)) {
235 err = PTR_ERR(hvc); 233 err = PTR_ERR(hvc);
236 goto free_out_vq; 234 goto free_vqs;
237 } 235 }
238 236
239 /* Register the input buffer the first time. */ 237 /* Register the input buffer the first time. */
240 add_inbuf(); 238 add_inbuf();
241 return 0; 239 return 0;
242 240
243free_out_vq: 241free_vqs:
244 vdev->config->del_vq(out_vq); 242 vdev->config->del_vqs(vdev);
245free_in_vq:
246 vdev->config->del_vq(in_vq);
247free: 243free:
248 kfree(inbuf); 244 kfree(inbuf);
249fail: 245fail:
diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 403d0e4265db..fc0949a8cfde 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -216,6 +216,7 @@ static const struct ide_port_info at91_ide_port_info __initdata = {
216 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE | 216 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE |
217 IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS, 217 IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS,
218 .pio_mask = ATA_PIO6, 218 .pio_mask = ATA_PIO6,
219 .chipset = ide_generic,
219}; 220};
220 221
221/* 222/*
@@ -246,8 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
246static int __init at91_ide_probe(struct platform_device *pdev) 247static int __init at91_ide_probe(struct platform_device *pdev)
247{ 248{
248 int ret; 249 int ret;
249 hw_regs_t hw; 250 struct ide_hw hw, *hws[] = { &hw };
250 hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
251 struct ide_host *host; 251 struct ide_host *host;
252 struct resource *res; 252 struct resource *res;
253 unsigned long tf_base = 0, ctl_base = 0; 253 unsigned long tf_base = 0, ctl_base = 0;
@@ -304,10 +304,9 @@ static int __init at91_ide_probe(struct platform_device *pdev)
304 ide_std_init_ports(&hw, tf_base, ctl_base + 6); 304 ide_std_init_ports(&hw, tf_base, ctl_base + 6);
305 305
306 hw.irq = board->irq_pin; 306 hw.irq = board->irq_pin;
307 hw.chipset = ide_generic;
308 hw.dev = &pdev->dev; 307 hw.dev = &pdev->dev;
309 308
310 host = ide_host_alloc(&at91_ide_port_info, hws); 309 host = ide_host_alloc(&at91_ide_port_info, hws, 1);
311 if (!host) { 310 if (!host) {
312 perr("failed to allocate ide host\n"); 311 perr("failed to allocate ide host\n");
313 return -ENOMEM; 312 return -ENOMEM;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 46013644c965..58121bd6c115 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -449,7 +449,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
449} 449}
450#endif 450#endif
451 451
452static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif) 452static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif)
453{ 453{
454 int i; 454 int i;
455 unsigned long *ata_regs = hw->io_ports_array; 455 unsigned long *ata_regs = hw->io_ports_array;
@@ -499,6 +499,7 @@ static const struct ide_port_info au1xxx_port_info = {
499#ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA 499#ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
500 .mwdma_mask = ATA_MWDMA2, 500 .mwdma_mask = ATA_MWDMA2,
501#endif 501#endif
502 .chipset = ide_au1xxx,
502}; 503};
503 504
504static int au_ide_probe(struct platform_device *dev) 505static int au_ide_probe(struct platform_device *dev)
@@ -507,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev)
507 struct resource *res; 508 struct resource *res;
508 struct ide_host *host; 509 struct ide_host *host;
509 int ret = 0; 510 int ret = 0;
510 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 511 struct ide_hw hw, *hws[] = { &hw };
511 512
512#if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA) 513#if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
513 char *mode = "MWDMA2"; 514 char *mode = "MWDMA2";
@@ -548,9 +549,8 @@ static int au_ide_probe(struct platform_device *dev)
548 auide_setup_ports(&hw, ahwif); 549 auide_setup_ports(&hw, ahwif);
549 hw.irq = ahwif->irq; 550 hw.irq = ahwif->irq;
550 hw.dev = &dev->dev; 551 hw.dev = &dev->dev;
551 hw.chipset = ide_au1xxx;
552 552
553 ret = ide_host_add(&au1xxx_port_info, hws, &host); 553 ret = ide_host_add(&au1xxx_port_info, hws, 1, &host);
554 if (ret) 554 if (ret)
555 goto out; 555 goto out;
556 556
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index d028f8864bc1..e3c6a5913305 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -121,7 +121,7 @@ static int xsurf_ack_intr(ide_hwif_t *hwif)
121 return 1; 121 return 1;
122} 122}
123 123
124static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base, 124static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
125 unsigned long ctl, unsigned long irq_port, 125 unsigned long ctl, unsigned long irq_port,
126 ide_ack_intr_t *ack_intr) 126 ide_ack_intr_t *ack_intr)
127{ 127{
@@ -139,13 +139,12 @@ static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
139 139
140 hw->irq = IRQ_AMIGA_PORTS; 140 hw->irq = IRQ_AMIGA_PORTS;
141 hw->ack_intr = ack_intr; 141 hw->ack_intr = ack_intr;
142
143 hw->chipset = ide_generic;
144} 142}
145 143
146static const struct ide_port_info buddha_port_info = { 144static const struct ide_port_info buddha_port_info = {
147 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, 145 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
148 .irq_flags = IRQF_SHARED, 146 .irq_flags = IRQF_SHARED,
147 .chipset = ide_generic,
149}; 148};
150 149
151 /* 150 /*
@@ -161,7 +160,7 @@ static int __init buddha_init(void)
161 160
162 while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { 161 while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
163 unsigned long board; 162 unsigned long board;
164 hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; 163 struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
165 164
166 if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) { 165 if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
167 buddha_num_hwifs = BUDDHA_NUM_HWIFS; 166 buddha_num_hwifs = BUDDHA_NUM_HWIFS;
@@ -225,7 +224,7 @@ fail_base2:
225 hws[i] = &hw[i]; 224 hws[i] = &hw[i];
226 } 225 }
227 226
228 ide_host_add(&buddha_port_info, hws, NULL); 227 ide_host_add(&buddha_port_info, hws, i, NULL);
229 } 228 }
230 229
231 return 0; 230 return 0;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 8890276fef7f..1683ed5c7329 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
708 int second_port_cmd640 = 0, rc; 708 int second_port_cmd640 = 0, rc;
709 const char *bus_type, *port2; 709 const char *bus_type, *port2;
710 u8 b, cfr; 710 u8 b, cfr;
711 hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL }; 711 struct ide_hw hw[2], *hws[2];
712 712
713 if (cmd640_vlb && probe_for_cmd640_vlb()) { 713 if (cmd640_vlb && probe_for_cmd640_vlb()) {
714 bus_type = "VLB"; 714 bus_type = "VLB";
@@ -762,11 +762,9 @@ static int __init cmd640x_init(void)
762 762
763 ide_std_init_ports(&hw[0], 0x1f0, 0x3f6); 763 ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
764 hw[0].irq = 14; 764 hw[0].irq = 14;
765 hw[0].chipset = ide_cmd640;
766 765
767 ide_std_init_ports(&hw[1], 0x170, 0x376); 766 ide_std_init_ports(&hw[1], 0x170, 0x376);
768 hw[1].irq = 15; 767 hw[1].irq = 15;
769 hw[1].chipset = ide_cmd640;
770 768
771 printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x" 769 printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
772 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr); 770 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
@@ -824,7 +822,8 @@ static int __init cmd640x_init(void)
824 cmd640_dump_regs(); 822 cmd640_dump_regs();
825#endif 823#endif
826 824
827 return ide_host_add(&cmd640_port_info, hws, NULL); 825 return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
826 NULL);
828} 827}
829 828
830module_param_named(probe_vlb, cmd640_vlb, bool, 0); 829module_param_named(probe_vlb, cmd640_vlb, bool, 0);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 87987a7d36c9..bd066bb9d611 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
110static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id) 110static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
111{ 111{
112 const struct ide_port_info *d = &cyrix_chipset; 112 const struct ide_port_info *d = &cyrix_chipset;
113 hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL }; 113 struct ide_hw hw[2], *hws[] = { NULL, NULL };
114 114
115 ide_setup_pci_noise(dev, d); 115 ide_setup_pci_noise(dev, d);
116 116
@@ -136,7 +136,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
136 ide_pci_setup_ports(dev, d, &hw[0], &hws[0]); 136 ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
137 hw[0].irq = 14; 137 hw[0].irq = 14;
138 138
139 return ide_host_add(d, hws, NULL); 139 return ide_host_add(d, hws, 2, NULL);
140} 140}
141 141
142static const struct pci_device_id cs5520_pci_tbl[] = { 142static const struct pci_device_id cs5520_pci_tbl[] = {
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index f153b95619bb..1e10eba62ceb 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -68,6 +68,7 @@ static const struct ide_port_info delkin_cb_port_info = {
68 IDE_HFLAG_NO_DMA, 68 IDE_HFLAG_NO_DMA,
69 .irq_flags = IRQF_SHARED, 69 .irq_flags = IRQF_SHARED,
70 .init_chipset = delkin_cb_init_chipset, 70 .init_chipset = delkin_cb_init_chipset,
71 .chipset = ide_pci,
71}; 72};
72 73
73static int __devinit 74static int __devinit
@@ -76,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
76 struct ide_host *host; 77 struct ide_host *host;
77 unsigned long base; 78 unsigned long base;
78 int rc; 79 int rc;
79 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 80 struct ide_hw hw, *hws[] = { &hw };
80 81
81 rc = pci_enable_device(dev); 82 rc = pci_enable_device(dev);
82 if (rc) { 83 if (rc) {
@@ -97,9 +98,8 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
97 ide_std_init_ports(&hw, base + 0x10, base + 0x1e); 98 ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
98 hw.irq = dev->irq; 99 hw.irq = dev->irq;
99 hw.dev = &dev->dev; 100 hw.dev = &dev->dev;
100 hw.chipset = ide_pci; /* this enables IRQ sharing */
101 101
102 rc = ide_host_add(&delkin_cb_port_info, hws, &host); 102 rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
103 if (rc) 103 if (rc)
104 goto out_disable; 104 goto out_disable;
105 105
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 0e2df6755ec9..22fa27389c3b 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -111,9 +111,10 @@ static const struct ide_port_info falconide_port_info = {
111 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE | 111 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
112 IDE_HFLAG_NO_DMA, 112 IDE_HFLAG_NO_DMA,
113 .irq_flags = IRQF_SHARED, 113 .irq_flags = IRQF_SHARED,
114 .chipset = ide_generic,
114}; 115};
115 116
116static void __init falconide_setup_ports(hw_regs_t *hw) 117static void __init falconide_setup_ports(struct ide_hw *hw)
117{ 118{
118 int i; 119 int i;
119 120
@@ -128,8 +129,6 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
128 129
129 hw->irq = IRQ_MFP_IDE; 130 hw->irq = IRQ_MFP_IDE;
130 hw->ack_intr = NULL; 131 hw->ack_intr = NULL;
131
132 hw->chipset = ide_generic;
133} 132}
134 133
135 /* 134 /*
@@ -139,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
139static int __init falconide_init(void) 138static int __init falconide_init(void)
140{ 139{
141 struct ide_host *host; 140 struct ide_host *host;
142 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 141 struct ide_hw hw, *hws[] = { &hw };
143 int rc; 142 int rc;
144 143
145 if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE)) 144 if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
@@ -154,7 +153,7 @@ static int __init falconide_init(void)
154 153
155 falconide_setup_ports(&hw); 154 falconide_setup_ports(&hw);
156 155
157 host = ide_host_alloc(&falconide_port_info, hws); 156 host = ide_host_alloc(&falconide_port_info, hws, 1);
158 if (host == NULL) { 157 if (host == NULL) {
159 rc = -ENOMEM; 158 rc = -ENOMEM;
160 goto err; 159 goto err;
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index c7119516c5a7..4451a6a5dfe0 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -88,7 +88,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif)
88 return 1; 88 return 1;
89} 89}
90 90
91static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base, 91static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
92 unsigned long ctl, unsigned long irq_port, 92 unsigned long ctl, unsigned long irq_port,
93 ide_ack_intr_t *ack_intr) 93 ide_ack_intr_t *ack_intr)
94{ 94{
@@ -106,14 +106,13 @@ static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
106 106
107 hw->irq = IRQ_AMIGA_PORTS; 107 hw->irq = IRQ_AMIGA_PORTS;
108 hw->ack_intr = ack_intr; 108 hw->ack_intr = ack_intr;
109
110 hw->chipset = ide_generic;
111} 109}
112 110
113static const struct ide_port_info gayle_port_info = { 111static const struct ide_port_info gayle_port_info = {
114 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE | 112 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
115 IDE_HFLAG_NO_DMA, 113 IDE_HFLAG_NO_DMA,
116 .irq_flags = IRQF_SHARED, 114 .irq_flags = IRQF_SHARED,
115 .chipset = ide_generic,
117}; 116};
118 117
119 /* 118 /*
@@ -126,7 +125,7 @@ static int __init gayle_init(void)
126 unsigned long base, ctrlport, irqport; 125 unsigned long base, ctrlport, irqport;
127 ide_ack_intr_t *ack_intr; 126 ide_ack_intr_t *ack_intr;
128 int a4000, i, rc; 127 int a4000, i, rc;
129 hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; 128 struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
130 129
131 if (!MACH_IS_AMIGA) 130 if (!MACH_IS_AMIGA)
132 return -ENODEV; 131 return -ENODEV;
@@ -171,7 +170,7 @@ found:
171 hws[i] = &hw[i]; 170 hws[i] = &hw[i];
172 } 171 }
173 172
174 rc = ide_host_add(&gayle_port_info, hws, NULL); 173 rc = ide_host_add(&gayle_port_info, hws, i, NULL);
175 if (rc) 174 if (rc)
176 release_mem_region(res_start, res_n); 175 release_mem_region(res_start, res_n);
177 176
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 0feb66c720e1..7ce68ef6b904 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -138,14 +138,6 @@
138#undef HPT_RESET_STATE_ENGINE 138#undef HPT_RESET_STATE_ENGINE
139#undef HPT_DELAY_INTERRUPT 139#undef HPT_DELAY_INTERRUPT
140 140
141static const char *quirk_drives[] = {
142 "QUANTUM FIREBALLlct08 08",
143 "QUANTUM FIREBALLP KA6.4",
144 "QUANTUM FIREBALLP LM20.4",
145 "QUANTUM FIREBALLP LM20.5",
146 NULL
147};
148
149static const char *bad_ata100_5[] = { 141static const char *bad_ata100_5[] = {
150 "IBM-DTLA-307075", 142 "IBM-DTLA-307075",
151 "IBM-DTLA-307060", 143 "IBM-DTLA-307060",
@@ -729,27 +721,13 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
729 hpt3xx_set_mode(drive, XFER_PIO_0 + pio); 721 hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
730} 722}
731 723
732static void hpt3xx_quirkproc(ide_drive_t *drive)
733{
734 char *m = (char *)&drive->id[ATA_ID_PROD];
735 const char **list = quirk_drives;
736
737 while (*list)
738 if (strstr(m, *list++)) {
739 drive->quirk_list = 1;
740 return;
741 }
742
743 drive->quirk_list = 0;
744}
745
746static void hpt3xx_maskproc(ide_drive_t *drive, int mask) 724static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
747{ 725{
748 ide_hwif_t *hwif = drive->hwif; 726 ide_hwif_t *hwif = drive->hwif;
749 struct pci_dev *dev = to_pci_dev(hwif->dev); 727 struct pci_dev *dev = to_pci_dev(hwif->dev);
750 struct hpt_info *info = hpt3xx_get_info(hwif->dev); 728 struct hpt_info *info = hpt3xx_get_info(hwif->dev);
751 729
752 if (drive->quirk_list == 0) 730 if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
753 return; 731 return;
754 732
755 if (info->chip_type >= HPT370) { 733 if (info->chip_type >= HPT370) {
@@ -1404,7 +1382,6 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
1404static const struct ide_port_ops hpt3xx_port_ops = { 1382static const struct ide_port_ops hpt3xx_port_ops = {
1405 .set_pio_mode = hpt3xx_set_pio_mode, 1383 .set_pio_mode = hpt3xx_set_pio_mode,
1406 .set_dma_mode = hpt3xx_set_mode, 1384 .set_dma_mode = hpt3xx_set_mode,
1407 .quirkproc = hpt3xx_quirkproc,
1408 .maskproc = hpt3xx_maskproc, 1385 .maskproc = hpt3xx_maskproc,
1409 .mdma_filter = hpt3xx_mdma_filter, 1386 .mdma_filter = hpt3xx_mdma_filter,
1410 .udma_filter = hpt3xx_udma_filter, 1387 .udma_filter = hpt3xx_udma_filter,
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 36da913cc553..5af3d0ffaf0a 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -65,8 +65,6 @@ static struct cardinfo icside_cardinfo_v6_2 = {
65}; 65};
66 66
67struct icside_state { 67struct icside_state {
68 unsigned int channel;
69 unsigned int enabled;
70 void __iomem *irq_port; 68 void __iomem *irq_port;
71 void __iomem *ioc_base; 69 void __iomem *ioc_base;
72 unsigned int sel; 70 unsigned int sel;
@@ -116,18 +114,11 @@ static void icside_irqenable_arcin_v6 (struct expansion_card *ec, int irqnr)
116 struct icside_state *state = ec->irq_data; 114 struct icside_state *state = ec->irq_data;
117 void __iomem *base = state->irq_port; 115 void __iomem *base = state->irq_port;
118 116
119 state->enabled = 1; 117 writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
118 readb(base + ICS_ARCIN_V6_INTROFFSET_2);
120 119
121 switch (state->channel) { 120 writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
122 case 0: 121 readb(base + ICS_ARCIN_V6_INTROFFSET_1);
123 writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
124 readb(base + ICS_ARCIN_V6_INTROFFSET_2);
125 break;
126 case 1:
127 writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
128 readb(base + ICS_ARCIN_V6_INTROFFSET_1);
129 break;
130 }
131} 122}
132 123
133/* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr) 124/* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
@@ -137,8 +128,6 @@ static void icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
137{ 128{
138 struct icside_state *state = ec->irq_data; 129 struct icside_state *state = ec->irq_data;
139 130
140 state->enabled = 0;
141
142 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1); 131 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
143 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2); 132 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
144} 133}
@@ -160,44 +149,6 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
160 .irqpending = icside_irqpending_arcin_v6, 149 .irqpending = icside_irqpending_arcin_v6,
161}; 150};
162 151
163/*
164 * Handle routing of interrupts. This is called before
165 * we write the command to the drive.
166 */
167static void icside_maskproc(ide_drive_t *drive, int mask)
168{
169 ide_hwif_t *hwif = drive->hwif;
170 struct expansion_card *ec = ECARD_DEV(hwif->dev);
171 struct icside_state *state = ecard_get_drvdata(ec);
172 unsigned long flags;
173
174 local_irq_save(flags);
175
176 state->channel = hwif->channel;
177
178 if (state->enabled && !mask) {
179 switch (hwif->channel) {
180 case 0:
181 writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
182 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
183 break;
184 case 1:
185 writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
186 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
187 break;
188 }
189 } else {
190 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
191 readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
192 }
193
194 local_irq_restore(flags);
195}
196
197static const struct ide_port_ops icside_v6_no_dma_port_ops = {
198 .maskproc = icside_maskproc,
199};
200
201#ifdef CONFIG_BLK_DEV_IDEDMA_ICS 152#ifdef CONFIG_BLK_DEV_IDEDMA_ICS
202/* 153/*
203 * SG-DMA support. 154 * SG-DMA support.
@@ -275,7 +226,6 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
275 226
276static const struct ide_port_ops icside_v6_port_ops = { 227static const struct ide_port_ops icside_v6_port_ops = {
277 .set_dma_mode = icside_set_dma_mode, 228 .set_dma_mode = icside_set_dma_mode,
278 .maskproc = icside_maskproc,
279}; 229};
280 230
281static void icside_dma_host_set(ide_drive_t *drive, int on) 231static void icside_dma_host_set(ide_drive_t *drive, int on)
@@ -320,11 +270,6 @@ static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
320 BUG_ON(dma_channel_active(ec->dma)); 270 BUG_ON(dma_channel_active(ec->dma));
321 271
322 /* 272 /*
323 * Ensure that we have the right interrupt routed.
324 */
325 icside_maskproc(drive, 0);
326
327 /*
328 * Route the DMA signals to the correct interface. 273 * Route the DMA signals to the correct interface.
329 */ 274 */
330 writeb(state->sel | hwif->channel, state->ioc_base); 275 writeb(state->sel | hwif->channel, state->ioc_base);
@@ -381,7 +326,7 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
381 return -EOPNOTSUPP; 326 return -EOPNOTSUPP;
382} 327}
383 328
384static void icside_setup_ports(hw_regs_t *hw, void __iomem *base, 329static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
385 struct cardinfo *info, struct expansion_card *ec) 330 struct cardinfo *info, struct expansion_card *ec)
386{ 331{
387 unsigned long port = (unsigned long)base + info->dataoffset; 332 unsigned long port = (unsigned long)base + info->dataoffset;
@@ -398,11 +343,11 @@ static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
398 343
399 hw->irq = ec->irq; 344 hw->irq = ec->irq;
400 hw->dev = &ec->dev; 345 hw->dev = &ec->dev;
401 hw->chipset = ide_acorn;
402} 346}
403 347
404static const struct ide_port_info icside_v5_port_info = { 348static const struct ide_port_info icside_v5_port_info = {
405 .host_flags = IDE_HFLAG_NO_DMA, 349 .host_flags = IDE_HFLAG_NO_DMA,
350 .chipset = ide_acorn,
406}; 351};
407 352
408static int __devinit 353static int __devinit
@@ -410,7 +355,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
410{ 355{
411 void __iomem *base; 356 void __iomem *base;
412 struct ide_host *host; 357 struct ide_host *host;
413 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 358 struct ide_hw hw, *hws[] = { &hw };
414 int ret; 359 int ret;
415 360
416 base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0); 361 base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -431,7 +376,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
431 376
432 icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec); 377 icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
433 378
434 host = ide_host_alloc(&icside_v5_port_info, hws); 379 host = ide_host_alloc(&icside_v5_port_info, hws, 1);
435 if (host == NULL) 380 if (host == NULL)
436 return -ENODEV; 381 return -ENODEV;
437 382
@@ -452,11 +397,11 @@ err_free:
452 397
453static const struct ide_port_info icside_v6_port_info __initdata = { 398static const struct ide_port_info icside_v6_port_info __initdata = {
454 .init_dma = icside_dma_off_init, 399 .init_dma = icside_dma_off_init,
455 .port_ops = &icside_v6_no_dma_port_ops,
456 .dma_ops = &icside_v6_dma_ops, 400 .dma_ops = &icside_v6_dma_ops,
457 .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO, 401 .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
458 .mwdma_mask = ATA_MWDMA2, 402 .mwdma_mask = ATA_MWDMA2,
459 .swdma_mask = ATA_SWDMA2, 403 .swdma_mask = ATA_SWDMA2,
404 .chipset = ide_acorn,
460}; 405};
461 406
462static int __devinit 407static int __devinit
@@ -466,7 +411,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
466 struct ide_host *host; 411 struct ide_host *host;
467 unsigned int sel = 0; 412 unsigned int sel = 0;
468 int ret; 413 int ret;
469 hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL }; 414 struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
470 struct ide_port_info d = icside_v6_port_info; 415 struct ide_port_info d = icside_v6_port_info;
471 416
472 ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); 417 ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
@@ -506,7 +451,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
506 icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec); 451 icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
507 icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec); 452 icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
508 453
509 host = ide_host_alloc(&d, hws); 454 host = ide_host_alloc(&d, hws, 2);
510 if (host == NULL) 455 if (host == NULL)
511 return -ENODEV; 456 return -ENODEV;
512 457
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 78aca75a2c48..979d342c338a 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -25,12 +25,13 @@ static const struct ide_port_info ide_4drives_port_info = {
25 .port_ops = &ide_4drives_port_ops, 25 .port_ops = &ide_4drives_port_ops,
26 .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA | 26 .host_flags = IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
27 IDE_HFLAG_4DRIVES, 27 IDE_HFLAG_4DRIVES,
28 .chipset = ide_4drives,
28}; 29};
29 30
30static int __init ide_4drives_init(void) 31static int __init ide_4drives_init(void)
31{ 32{
32 unsigned long base = 0x1f0, ctl = 0x3f6; 33 unsigned long base = 0x1f0, ctl = 0x3f6;
33 hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL }; 34 struct ide_hw hw, *hws[] = { &hw, &hw };
34 35
35 if (probe_4drives == 0) 36 if (probe_4drives == 0)
36 return -ENODEV; 37 return -ENODEV;
@@ -52,9 +53,8 @@ static int __init ide_4drives_init(void)
52 53
53 ide_std_init_ports(&hw, base, ctl); 54 ide_std_init_ports(&hw, base, ctl);
54 hw.irq = 14; 55 hw.irq = 14;
55 hw.chipset = ide_4drives;
56 56
57 return ide_host_add(&ide_4drives_port_info, hws, NULL); 57 return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
58} 58}
59 59
60module_init(ide_4drives_init); 60module_init(ide_4drives_init);
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 757e5956b132..bbdd2547f12a 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -259,7 +259,7 @@ void ide_retry_pc(ide_drive_t *drive)
259 pc->req_xfer = blk_rq_bytes(sense_rq); 259 pc->req_xfer = blk_rq_bytes(sense_rq);
260 260
261 if (drive->media == ide_tape) 261 if (drive->media == ide_tape)
262 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); 262 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
263 263
264 /* 264 /*
265 * Push back the failed request and put request sense on top 265 * Push back the failed request and put request sense on top
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 9e47f3529d55..527908ff298c 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -155,6 +155,7 @@ static const struct ide_port_info idecs_port_info = {
155 .port_ops = &idecs_port_ops, 155 .port_ops = &idecs_port_ops,
156 .host_flags = IDE_HFLAG_NO_DMA, 156 .host_flags = IDE_HFLAG_NO_DMA,
157 .irq_flags = IRQF_SHARED, 157 .irq_flags = IRQF_SHARED,
158 .chipset = ide_pci,
158}; 159};
159 160
160static struct ide_host *idecs_register(unsigned long io, unsigned long ctl, 161static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
@@ -163,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
163 struct ide_host *host; 164 struct ide_host *host;
164 ide_hwif_t *hwif; 165 ide_hwif_t *hwif;
165 int i, rc; 166 int i, rc;
166 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 167 struct ide_hw hw, *hws[] = { &hw };
167 168
168 if (!request_region(io, 8, DRV_NAME)) { 169 if (!request_region(io, 8, DRV_NAME)) {
169 printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n", 170 printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
@@ -181,10 +182,9 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
181 memset(&hw, 0, sizeof(hw)); 182 memset(&hw, 0, sizeof(hw));
182 ide_std_init_ports(&hw, io, ctl); 183 ide_std_init_ports(&hw, io, ctl);
183 hw.irq = irq; 184 hw.irq = irq;
184 hw.chipset = ide_pci;
185 hw.dev = &handle->dev; 185 hw.dev = &handle->dev;
186 186
187 rc = ide_host_add(&idecs_port_info, hws, &host); 187 rc = ide_host_add(&idecs_port_info, hws, 1, &host);
188 if (rc) 188 if (rc)
189 goto out_release; 189 goto out_release;
190 190
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index c6f7fcfb9d67..6a1de2169709 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -302,14 +302,12 @@ static const struct drive_list_entry hpa_list[] = {
302 { NULL, NULL } 302 { NULL, NULL }
303}; 303};
304 304
305static void idedisk_check_hpa(ide_drive_t *drive) 305static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
306{ 306{
307 unsigned long long capacity, set_max; 307 u64 capacity, set_max;
308 int lba48 = ata_id_lba48_enabled(drive->id);
309 308
310 capacity = drive->capacity64; 309 capacity = drive->capacity64;
311 310 set_max = idedisk_read_native_max_address(drive, lba48);
312 set_max = idedisk_read_native_max_address(drive, lba48);
313 311
314 if (ide_in_drive_list(drive->id, hpa_list)) { 312 if (ide_in_drive_list(drive->id, hpa_list)) {
315 /* 313 /*
@@ -320,9 +318,31 @@ static void idedisk_check_hpa(ide_drive_t *drive)
320 set_max--; 318 set_max--;
321 } 319 }
322 320
321 return set_max;
322}
323
324static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
325{
326 set_max = idedisk_set_max_address(drive, set_max, lba48);
327 if (set_max)
328 drive->capacity64 = set_max;
329
330 return set_max;
331}
332
333static void idedisk_check_hpa(ide_drive_t *drive)
334{
335 u64 capacity, set_max;
336 int lba48 = ata_id_lba48_enabled(drive->id);
337
338 capacity = drive->capacity64;
339 set_max = ide_disk_hpa_get_native_capacity(drive, lba48);
340
323 if (set_max <= capacity) 341 if (set_max <= capacity)
324 return; 342 return;
325 343
344 drive->probed_capacity = set_max;
345
326 printk(KERN_INFO "%s: Host Protected Area detected.\n" 346 printk(KERN_INFO "%s: Host Protected Area detected.\n"
327 "\tcurrent capacity is %llu sectors (%llu MB)\n" 347 "\tcurrent capacity is %llu sectors (%llu MB)\n"
328 "\tnative capacity is %llu sectors (%llu MB)\n", 348 "\tnative capacity is %llu sectors (%llu MB)\n",
@@ -330,13 +350,13 @@ static void idedisk_check_hpa(ide_drive_t *drive)
330 capacity, sectors_to_MB(capacity), 350 capacity, sectors_to_MB(capacity),
331 set_max, sectors_to_MB(set_max)); 351 set_max, sectors_to_MB(set_max));
332 352
333 set_max = idedisk_set_max_address(drive, set_max, lba48); 353 if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
354 return;
334 355
335 if (set_max) { 356 set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
336 drive->capacity64 = set_max; 357 if (set_max)
337 printk(KERN_INFO "%s: Host Protected Area disabled.\n", 358 printk(KERN_INFO "%s: Host Protected Area disabled.\n",
338 drive->name); 359 drive->name);
339 }
340} 360}
341 361
342static int ide_disk_get_capacity(ide_drive_t *drive) 362static int ide_disk_get_capacity(ide_drive_t *drive)
@@ -358,6 +378,8 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
358 drive->capacity64 = drive->cyl * drive->head * drive->sect; 378 drive->capacity64 = drive->cyl * drive->head * drive->sect;
359 } 379 }
360 380
381 drive->probed_capacity = drive->capacity64;
382
361 if (lba) { 383 if (lba) {
362 drive->dev_flags |= IDE_DFLAG_LBA; 384 drive->dev_flags |= IDE_DFLAG_LBA;
363 385
@@ -376,7 +398,7 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
376 "%llu sectors (%llu MB)\n", 398 "%llu sectors (%llu MB)\n",
377 drive->name, (unsigned long long)drive->capacity64, 399 drive->name, (unsigned long long)drive->capacity64,
378 sectors_to_MB(drive->capacity64)); 400 sectors_to_MB(drive->capacity64));
379 drive->capacity64 = 1ULL << 28; 401 drive->probed_capacity = drive->capacity64 = 1ULL << 28;
380 } 402 }
381 403
382 if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && 404 if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
@@ -392,6 +414,34 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
392 return 0; 414 return 0;
393} 415}
394 416
417static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
418{
419 u64 set = min(capacity, drive->probed_capacity);
420 u16 *id = drive->id;
421 int lba48 = ata_id_lba48_enabled(id);
422
423 if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
424 ata_id_hpa_enabled(id) == 0)
425 goto out;
426
427 /*
428 * according to the spec the SET MAX ADDRESS command shall be
429 * immediately preceded by a READ NATIVE MAX ADDRESS command
430 */
431 capacity = ide_disk_hpa_get_native_capacity(drive, lba48);
432 if (capacity == 0)
433 goto out;
434
435 set = ide_disk_hpa_set_capacity(drive, set, lba48);
436 if (set) {
437 /* needed for ->resume to disable HPA */
438 drive->dev_flags |= IDE_DFLAG_NOHPA;
439 return set;
440 }
441out:
442 return drive->capacity64;
443}
444
395static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) 445static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
396{ 446{
397 ide_drive_t *drive = q->queuedata; 447 ide_drive_t *drive = q->queuedata;
@@ -428,14 +478,14 @@ static int set_multcount(ide_drive_t *drive, int arg)
428 if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) 478 if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
429 return -EINVAL; 479 return -EINVAL;
430 480
431 if (drive->special.b.set_multmode) 481 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
432 return -EBUSY; 482 return -EBUSY;
433 483
434 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 484 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
435 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 485 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
436 486
437 drive->mult_req = arg; 487 drive->mult_req = arg;
438 drive->special.b.set_multmode = 1; 488 drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
439 error = blk_execute_rq(drive->queue, NULL, rq, 0); 489 error = blk_execute_rq(drive->queue, NULL, rq, 0);
440 blk_put_request(rq); 490 blk_put_request(rq);
441 491
@@ -740,6 +790,7 @@ static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
740 790
741const struct ide_disk_ops ide_ata_disk_ops = { 791const struct ide_disk_ops ide_ata_disk_ops = {
742 .check = ide_disk_check, 792 .check = ide_disk_check,
793 .set_capacity = ide_disk_set_capacity,
743 .get_capacity = ide_disk_get_capacity, 794 .get_capacity = ide_disk_get_capacity,
744 .setup = ide_disk_setup, 795 .setup = ide_disk_setup,
745 .flush = ide_disk_flush, 796 .flush = ide_disk_flush,
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 001f68f0bb28..219e6fb78dc6 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -347,7 +347,6 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode)
347 347
348 return mode; 348 return mode;
349} 349}
350EXPORT_SYMBOL_GPL(ide_find_dma_mode);
351 350
352static int ide_tune_dma(ide_drive_t *drive) 351static int ide_tune_dma(ide_drive_t *drive)
353{ 352{
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 5d5fb961b5ce..2b9141979613 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -52,7 +52,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
52 } 52 }
53 53
54 if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) 54 if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
55 drive->special.b.recalibrate = 1; 55 drive->special_flags |= IDE_SFLAG_RECALIBRATE;
56 56
57 ++rq->errors; 57 ++rq->errors;
58 58
@@ -268,9 +268,8 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
268{ 268{
269 int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1; 269 int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
270 270
271 drive->special.all = 0; 271 drive->special_flags =
272 drive->special.b.set_geometry = legacy; 272 legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
273 drive->special.b.recalibrate = legacy;
274 273
275 drive->mult_count = 0; 274 drive->mult_count = 0;
276 drive->dev_flags &= ~IDE_DFLAG_PARKED; 275 drive->dev_flags &= ~IDE_DFLAG_PARKED;
@@ -280,7 +279,7 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
280 drive->mult_req = 0; 279 drive->mult_req = 0;
281 280
282 if (drive->mult_req != drive->mult_count) 281 if (drive->mult_req != drive->mult_count)
283 drive->special.b.set_multmode = 1; 282 drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
284} 283}
285 284
286static void pre_reset(ide_drive_t *drive) 285static void pre_reset(ide_drive_t *drive)
@@ -408,8 +407,9 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
408 /* more than enough time */ 407 /* more than enough time */
409 udelay(10); 408 udelay(10);
410 /* clear SRST, leave nIEN (unless device is on the quirk list) */ 409 /* clear SRST, leave nIEN (unless device is on the quirk list) */
411 tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) | 410 tp_ops->write_devctl(hwif,
412 ATA_DEVCTL_OBS); 411 ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
412 ATA_DEVCTL_OBS);
413 /* more than enough time */ 413 /* more than enough time */
414 udelay(10); 414 udelay(10);
415 hwif->poll_timeout = jiffies + WAIT_WORSTCASE; 415 hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 4b6b71e2cdf5..214119026b3f 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -287,6 +287,19 @@ static int ide_gd_media_changed(struct gendisk *disk)
287 return ret; 287 return ret;
288} 288}
289 289
290static unsigned long long ide_gd_set_capacity(struct gendisk *disk,
291 unsigned long long capacity)
292{
293 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
294 ide_drive_t *drive = idkp->drive;
295 const struct ide_disk_ops *disk_ops = drive->disk_ops;
296
297 if (disk_ops->set_capacity)
298 return disk_ops->set_capacity(drive, capacity);
299
300 return drive->capacity64;
301}
302
290static int ide_gd_revalidate_disk(struct gendisk *disk) 303static int ide_gd_revalidate_disk(struct gendisk *disk)
291{ 304{
292 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); 305 struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -315,6 +328,7 @@ static struct block_device_operations ide_gd_ops = {
315 .locked_ioctl = ide_gd_ioctl, 328 .locked_ioctl = ide_gd_ioctl,
316 .getgeo = ide_gd_getgeo, 329 .getgeo = ide_gd_getgeo,
317 .media_changed = ide_gd_media_changed, 330 .media_changed = ide_gd_media_changed,
331 .set_capacity = ide_gd_set_capacity,
318 .revalidate_disk = ide_gd_revalidate_disk 332 .revalidate_disk = ide_gd_revalidate_disk
319}; 333};
320 334
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 7812ca0be13b..54d7c4685d23 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -29,6 +29,7 @@ MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
29 29
30static const struct ide_port_info ide_generic_port_info = { 30static const struct ide_port_info ide_generic_port_info = {
31 .host_flags = IDE_HFLAG_NO_DMA, 31 .host_flags = IDE_HFLAG_NO_DMA,
32 .chipset = ide_generic,
32}; 33};
33 34
34#ifdef CONFIG_ARM 35#ifdef CONFIG_ARM
@@ -85,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
85 86
86static int __init ide_generic_init(void) 87static int __init ide_generic_init(void)
87{ 88{
88 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 89 struct ide_hw hw, *hws[] = { &hw };
89 unsigned long io_addr; 90 unsigned long io_addr;
90 int i, rc = 0, primary = 0, secondary = 0; 91 int i, rc = 0, primary = 0, secondary = 0;
91 92
@@ -132,9 +133,7 @@ static int __init ide_generic_init(void)
132#else 133#else
133 hw.irq = legacy_irqs[i]; 134 hw.irq = legacy_irqs[i];
134#endif 135#endif
135 hw.chipset = ide_generic; 136 rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
136
137 rc = ide_host_add(&ide_generic_port_info, hws, NULL);
138 if (rc) { 137 if (rc) {
139 release_region(io_addr + 0x206, 1); 138 release_region(io_addr + 0x206, 1);
140 release_region(io_addr, 8); 139 release_region(io_addr, 8);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index c06ebdc4a130..520f42c5445a 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -64,26 +64,26 @@ static const struct ide_tp_ops h8300_tp_ops = {
64 64
65#define H8300_IDE_GAP (2) 65#define H8300_IDE_GAP (2)
66 66
67static inline void hw_setup(hw_regs_t *hw) 67static inline void hw_setup(struct ide_hw *hw)
68{ 68{
69 int i; 69 int i;
70 70
71 memset(hw, 0, sizeof(hw_regs_t)); 71 memset(hw, 0, sizeof(*hw));
72 for (i = 0; i <= 7; i++) 72 for (i = 0; i <= 7; i++)
73 hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i; 73 hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
74 hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT; 74 hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
75 hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ; 75 hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
76 hw->chipset = ide_generic;
77} 76}
78 77
79static const struct ide_port_info h8300_port_info = { 78static const struct ide_port_info h8300_port_info = {
80 .tp_ops = &h8300_tp_ops, 79 .tp_ops = &h8300_tp_ops,
81 .host_flags = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA, 80 .host_flags = IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
81 .chipset = ide_generic,
82}; 82};
83 83
84static int __init h8300_ide_init(void) 84static int __init h8300_ide_init(void)
85{ 85{
86 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 86 struct ide_hw hw, *hws[] = { &hw };
87 87
88 printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n"); 88 printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
89 89
@@ -96,7 +96,7 @@ static int __init h8300_ide_init(void)
96 96
97 hw_setup(&hw); 97 hw_setup(&hw);
98 98
99 return ide_host_add(&h8300_port_info, hws, NULL); 99 return ide_host_add(&h8300_port_info, hws, 1, NULL);
100 100
101out_busy: 101out_busy:
102 printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n"); 102 printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n");
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index bba4297f2f03..272cc38f6dbe 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -184,29 +184,42 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
184 tf->command = ATA_CMD_SET_MULTI; 184 tf->command = ATA_CMD_SET_MULTI;
185} 185}
186 186
187static ide_startstop_t ide_disk_special(ide_drive_t *drive) 187/**
188 * do_special - issue some special commands
189 * @drive: drive the command is for
190 *
191 * do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
192 * ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
193 */
194
195static ide_startstop_t do_special(ide_drive_t *drive)
188{ 196{
189 special_t *s = &drive->special;
190 struct ide_cmd cmd; 197 struct ide_cmd cmd;
191 198
199#ifdef DEBUG
200 printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
201 drive->special_flags);
202#endif
203 if (drive->media != ide_disk) {
204 drive->special_flags = 0;
205 drive->mult_req = 0;
206 return ide_stopped;
207 }
208
192 memset(&cmd, 0, sizeof(cmd)); 209 memset(&cmd, 0, sizeof(cmd));
193 cmd.protocol = ATA_PROT_NODATA; 210 cmd.protocol = ATA_PROT_NODATA;
194 211
195 if (s->b.set_geometry) { 212 if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
196 s->b.set_geometry = 0; 213 drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
197 ide_tf_set_specify_cmd(drive, &cmd.tf); 214 ide_tf_set_specify_cmd(drive, &cmd.tf);
198 } else if (s->b.recalibrate) { 215 } else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
199 s->b.recalibrate = 0; 216 drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
200 ide_tf_set_restore_cmd(drive, &cmd.tf); 217 ide_tf_set_restore_cmd(drive, &cmd.tf);
201 } else if (s->b.set_multmode) { 218 } else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
202 s->b.set_multmode = 0; 219 drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
203 ide_tf_set_setmult_cmd(drive, &cmd.tf); 220 ide_tf_set_setmult_cmd(drive, &cmd.tf);
204 } else if (s->all) { 221 } else
205 int special = s->all; 222 BUG();
206 s->all = 0;
207 printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
208 return ide_stopped;
209 }
210 223
211 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 224 cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
212 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; 225 cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE;
@@ -217,31 +230,6 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive)
217 return ide_started; 230 return ide_started;
218} 231}
219 232
220/**
221 * do_special - issue some special commands
222 * @drive: drive the command is for
223 *
224 * do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
225 * ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
226 *
227 * It used to do much more, but has been scaled back.
228 */
229
230static ide_startstop_t do_special (ide_drive_t *drive)
231{
232 special_t *s = &drive->special;
233
234#ifdef DEBUG
235 printk("%s: do_special: 0x%02x\n", drive->name, s->all);
236#endif
237 if (drive->media == ide_disk)
238 return ide_disk_special(drive);
239
240 s->all = 0;
241 drive->mult_req = 0;
242 return ide_stopped;
243}
244
245void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) 233void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
246{ 234{
247 ide_hwif_t *hwif = drive->hwif; 235 ide_hwif_t *hwif = drive->hwif;
@@ -351,7 +339,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
351 printk(KERN_ERR "%s: drive not ready for command\n", drive->name); 339 printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
352 return startstop; 340 return startstop;
353 } 341 }
354 if (!drive->special.all) { 342
343 if (drive->special_flags == 0) {
355 struct ide_driver *drv; 344 struct ide_driver *drv;
356 345
357 /* 346 /*
@@ -499,11 +488,15 @@ repeat:
499 488
500 if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) && 489 if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
501 hwif != prev_port) { 490 hwif != prev_port) {
491 ide_drive_t *cur_dev =
492 prev_port ? prev_port->cur_dev : NULL;
493
502 /* 494 /*
503 * set nIEN for previous port, drives in the 495 * set nIEN for previous port, drives in the
504 * quirk_list may not like intr setups/cleanups 496 * quirk list may not like intr setups/cleanups
505 */ 497 */
506 if (prev_port && prev_port->cur_dev->quirk_list == 0) 498 if (cur_dev &&
499 (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
507 prev_port->tp_ops->write_devctl(prev_port, 500 prev_port->tp_ops->write_devctl(prev_port,
508 ATA_NIEN | 501 ATA_NIEN |
509 ATA_DEVCTL_OBS); 502 ATA_DEVCTL_OBS);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 06fe002116ec..fa047150a1c6 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -282,6 +282,29 @@ no_80w:
282 return 0; 282 return 0;
283} 283}
284 284
285static const char *nien_quirk_list[] = {
286 "QUANTUM FIREBALLlct08 08",
287 "QUANTUM FIREBALLP KA6.4",
288 "QUANTUM FIREBALLP KA9.1",
289 "QUANTUM FIREBALLP KX13.6",
290 "QUANTUM FIREBALLP KX20.5",
291 "QUANTUM FIREBALLP KX27.3",
292 "QUANTUM FIREBALLP LM20.4",
293 "QUANTUM FIREBALLP LM20.5",
294 NULL
295};
296
297void ide_check_nien_quirk_list(ide_drive_t *drive)
298{
299 const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
300
301 for (list = nien_quirk_list; *list != NULL; list++)
302 if (strstr(m, *list) != NULL) {
303 drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
304 return;
305 }
306}
307
285int ide_driveid_update(ide_drive_t *drive) 308int ide_driveid_update(ide_drive_t *drive)
286{ 309{
287 u16 *id; 310 u16 *id;
@@ -311,7 +334,6 @@ int ide_driveid_update(ide_drive_t *drive)
311 334
312 return 1; 335 return 1;
313out_err: 336out_err:
314 SELECT_MASK(drive, 0);
315 if (rc == 2) 337 if (rc == 2)
316 printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__); 338 printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__);
317 kfree(id); 339 kfree(id);
@@ -365,7 +387,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
365 387
366 tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); 388 tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
367 389
368 if (drive->quirk_list == 2) 390 if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
369 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); 391 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
370 392
371 error = __ide_wait_stat(drive, drive->ready_stat, 393 error = __ide_wait_stat(drive, drive->ready_stat,
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 8c5dcbf22547..b9654a7bb7be 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -1,7 +1,7 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/ide.h> 2#include <linux/ide.h>
3 3
4static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw, 4static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
5 u8 port_no, const struct ide_port_info *d, 5 u8 port_no, const struct ide_port_info *d,
6 unsigned long config) 6 unsigned long config)
7{ 7{
@@ -33,7 +33,6 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
33 33
34 ide_std_init_ports(hw, base, ctl); 34 ide_std_init_ports(hw, base, ctl);
35 hw->irq = irq; 35 hw->irq = irq;
36 hw->chipset = d->chipset;
37 hw->config = config; 36 hw->config = config;
38 37
39 hws[port_no] = hw; 38 hws[port_no] = hw;
@@ -41,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
41 40
42int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config) 41int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
43{ 42{
44 hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL }; 43 struct ide_hw hw[2], *hws[] = { NULL, NULL };
45 44
46 memset(&hw, 0, sizeof(hw)); 45 memset(&hw, 0, sizeof(hw));
47 46
@@ -53,6 +52,6 @@ int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
53 (d->host_flags & IDE_HFLAG_SINGLE)) 52 (d->host_flags & IDE_HFLAG_SINGLE))
54 return -ENOENT; 53 return -ENOENT;
55 54
56 return ide_host_add(d, hws, NULL); 55 return ide_host_add(d, hws, 2, NULL);
57} 56}
58EXPORT_SYMBOL_GPL(ide_legacy_device_add); 57EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6e80b774e88a..017b1df3b805 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -29,6 +29,7 @@ static struct pnp_device_id idepnp_devices[] = {
29 29
30static const struct ide_port_info ide_pnp_port_info = { 30static const struct ide_port_info ide_pnp_port_info = {
31 .host_flags = IDE_HFLAG_NO_DMA, 31 .host_flags = IDE_HFLAG_NO_DMA,
32 .chipset = ide_generic,
32}; 33};
33 34
34static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) 35static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
@@ -36,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
36 struct ide_host *host; 37 struct ide_host *host;
37 unsigned long base, ctl; 38 unsigned long base, ctl;
38 int rc; 39 int rc;
39 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 40 struct ide_hw hw, *hws[] = { &hw };
40 41
41 printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n"); 42 printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
42 43
@@ -62,9 +63,8 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
62 memset(&hw, 0, sizeof(hw)); 63 memset(&hw, 0, sizeof(hw));
63 ide_std_init_ports(&hw, base, ctl); 64 ide_std_init_ports(&hw, base, ctl);
64 hw.irq = pnp_irq(dev, 0); 65 hw.irq = pnp_irq(dev, 0);
65 hw.chipset = ide_generic;
66 66
67 rc = ide_host_add(&ide_pnp_port_info, hws, &host); 67 rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
68 if (rc) 68 if (rc)
69 goto out; 69 goto out;
70 70
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c895ed52b2e8..f371b0de314f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -97,7 +97,7 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
97 drive->mult_req = id[ATA_ID_MULTSECT] & 0xff; 97 drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
98 98
99 if (drive->mult_req) 99 if (drive->mult_req)
100 drive->special.b.set_multmode = 1; 100 drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
101 } 101 }
102} 102}
103 103
@@ -465,23 +465,8 @@ static u8 probe_for_drive(ide_drive_t *drive)
465 int rc; 465 int rc;
466 u8 cmd; 466 u8 cmd;
467 467
468 /*
469 * In order to keep things simple we have an id
470 * block for all drives at all times. If the device
471 * is pre ATA or refuses ATA/ATAPI identify we
472 * will add faked data to this.
473 *
474 * Also note that 0 everywhere means "can't do X"
475 */
476
477 drive->dev_flags &= ~IDE_DFLAG_ID_READ; 468 drive->dev_flags &= ~IDE_DFLAG_ID_READ;
478 469
479 drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL);
480 if (drive->id == NULL) {
481 printk(KERN_ERR "ide: out of memory for id data.\n");
482 return 0;
483 }
484
485 m = (char *)&drive->id[ATA_ID_PROD]; 470 m = (char *)&drive->id[ATA_ID_PROD];
486 strcpy(m, "UNKNOWN"); 471 strcpy(m, "UNKNOWN");
487 472
@@ -497,7 +482,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
497 } 482 }
498 483
499 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 484 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
500 goto out_free; 485 return 0;
501 486
502 /* identification failed? */ 487 /* identification failed? */
503 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) { 488 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -521,7 +506,7 @@ static u8 probe_for_drive(ide_drive_t *drive)
521 } 506 }
522 507
523 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 508 if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
524 goto out_free; 509 return 0;
525 510
526 /* The drive wasn't being helpful. Add generic info only */ 511 /* The drive wasn't being helpful. Add generic info only */
527 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) { 512 if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
@@ -535,9 +520,6 @@ static u8 probe_for_drive(ide_drive_t *drive)
535 } 520 }
536 521
537 return 1; 522 return 1;
538out_free:
539 kfree(drive->id);
540 return 0;
541} 523}
542 524
543static void hwif_release_dev(struct device *dev) 525static void hwif_release_dev(struct device *dev)
@@ -702,8 +684,14 @@ static int ide_probe_port(ide_hwif_t *hwif)
702 if (irqd) 684 if (irqd)
703 disable_irq(hwif->irq); 685 disable_irq(hwif->irq);
704 686
705 if (ide_port_wait_ready(hwif) == -EBUSY) 687 rc = ide_port_wait_ready(hwif);
706 printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name); 688 if (rc == -ENODEV) {
689 printk(KERN_INFO "%s: no devices on the port\n", hwif->name);
690 goto out;
691 } else if (rc == -EBUSY)
692 printk(KERN_ERR "%s: not ready before the probe\n", hwif->name);
693 else
694 rc = -ENODEV;
707 695
708 /* 696 /*
709 * Second drive should only exist if first drive was found, 697 * Second drive should only exist if first drive was found,
@@ -714,7 +702,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
714 if (drive->dev_flags & IDE_DFLAG_PRESENT) 702 if (drive->dev_flags & IDE_DFLAG_PRESENT)
715 rc = 0; 703 rc = 0;
716 } 704 }
717 705out:
718 /* 706 /*
719 * Use cached IRQ number. It might be (and is...) changed by probe 707 * Use cached IRQ number. It might be (and is...) changed by probe
720 * code above 708 * code above
@@ -732,6 +720,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
732 int i; 720 int i;
733 721
734 ide_port_for_each_present_dev(i, drive, hwif) { 722 ide_port_for_each_present_dev(i, drive, hwif) {
723 ide_check_nien_quirk_list(drive);
724
735 if (port_ops && port_ops->quirkproc) 725 if (port_ops && port_ops->quirkproc)
736 port_ops->quirkproc(drive); 726 port_ops->quirkproc(drive);
737 } 727 }
@@ -817,8 +807,6 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
817 if (ide_init_queue(drive)) { 807 if (ide_init_queue(drive)) {
818 printk(KERN_ERR "ide: failed to init %s\n", 808 printk(KERN_ERR "ide: failed to init %s\n",
819 drive->name); 809 drive->name);
820 kfree(drive->id);
821 drive->id = NULL;
822 drive->dev_flags &= ~IDE_DFLAG_PRESENT; 810 drive->dev_flags &= ~IDE_DFLAG_PRESENT;
823 continue; 811 continue;
824 } 812 }
@@ -947,9 +935,6 @@ static void drive_release_dev (struct device *dev)
947 blk_cleanup_queue(drive->queue); 935 blk_cleanup_queue(drive->queue);
948 drive->queue = NULL; 936 drive->queue = NULL;
949 937
950 kfree(drive->id);
951 drive->id = NULL;
952
953 drive->dev_flags &= ~IDE_DFLAG_PRESENT; 938 drive->dev_flags &= ~IDE_DFLAG_PRESENT;
954 939
955 complete(&drive->gendev_rel_comp); 940 complete(&drive->gendev_rel_comp);
@@ -1035,6 +1020,15 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
1035 if (port_ops && port_ops->init_dev) 1020 if (port_ops && port_ops->init_dev)
1036 port_ops->init_dev(drive); 1021 port_ops->init_dev(drive);
1037 } 1022 }
1023
1024 ide_port_for_each_dev(i, drive, hwif) {
1025 /*
1026 * default to PIO Mode 0 before we figure out
1027 * the most suited mode for the attached device
1028 */
1029 if (port_ops && port_ops->set_pio_mode)
1030 port_ops->set_pio_mode(drive, 0);
1031 }
1038} 1032}
1039 1033
1040static void ide_init_port(ide_hwif_t *hwif, unsigned int port, 1034static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
@@ -1042,8 +1036,7 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
1042{ 1036{
1043 hwif->channel = port; 1037 hwif->channel = port;
1044 1038
1045 if (d->chipset) 1039 hwif->chipset = d->chipset ? d->chipset : ide_pci;
1046 hwif->chipset = d->chipset;
1047 1040
1048 if (d->init_iops) 1041 if (d->init_iops)
1049 d->init_iops(hwif); 1042 d->init_iops(hwif);
@@ -1124,16 +1117,19 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
1124 1117
1125 ide_port_for_each_dev(i, drive, hwif) { 1118 ide_port_for_each_dev(i, drive, hwif) {
1126 u8 j = (hwif->index * MAX_DRIVES) + i; 1119 u8 j = (hwif->index * MAX_DRIVES) + i;
1120 u16 *saved_id = drive->id;
1127 1121
1128 memset(drive, 0, sizeof(*drive)); 1122 memset(drive, 0, sizeof(*drive));
1123 memset(saved_id, 0, SECTOR_SIZE);
1124 drive->id = saved_id;
1129 1125
1130 drive->media = ide_disk; 1126 drive->media = ide_disk;
1131 drive->select = (i << 4) | ATA_DEVICE_OBS; 1127 drive->select = (i << 4) | ATA_DEVICE_OBS;
1132 drive->hwif = hwif; 1128 drive->hwif = hwif;
1133 drive->ready_stat = ATA_DRDY; 1129 drive->ready_stat = ATA_DRDY;
1134 drive->bad_wstat = BAD_W_STAT; 1130 drive->bad_wstat = BAD_W_STAT;
1135 drive->special.b.recalibrate = 1; 1131 drive->special_flags = IDE_SFLAG_RECALIBRATE |
1136 drive->special.b.set_geometry = 1; 1132 IDE_SFLAG_SET_GEOMETRY;
1137 drive->name[0] = 'h'; 1133 drive->name[0] = 'h';
1138 drive->name[1] = 'd'; 1134 drive->name[1] = 'd';
1139 drive->name[2] = 'a' + j; 1135 drive->name[2] = 'a' + j;
@@ -1168,11 +1164,10 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
1168 ide_port_init_devices_data(hwif); 1164 ide_port_init_devices_data(hwif);
1169} 1165}
1170 1166
1171static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) 1167static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
1172{ 1168{
1173 memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports)); 1169 memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
1174 hwif->irq = hw->irq; 1170 hwif->irq = hw->irq;
1175 hwif->chipset = hw->chipset;
1176 hwif->dev = hw->dev; 1171 hwif->dev = hw->dev;
1177 hwif->gendev.parent = hw->parent ? hw->parent : hw->dev; 1172 hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
1178 hwif->ack_intr = hw->ack_intr; 1173 hwif->ack_intr = hw->ack_intr;
@@ -1233,8 +1228,10 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
1233 ide_drive_t *drive; 1228 ide_drive_t *drive;
1234 int i; 1229 int i;
1235 1230
1236 ide_port_for_each_dev(i, drive, hwif) 1231 ide_port_for_each_dev(i, drive, hwif) {
1232 kfree(drive->id);
1237 kfree(drive); 1233 kfree(drive);
1234 }
1238} 1235}
1239 1236
1240static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) 1237static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
@@ -1248,6 +1245,18 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
1248 if (drive == NULL) 1245 if (drive == NULL)
1249 goto out_nomem; 1246 goto out_nomem;
1250 1247
1248 /*
1249 * In order to keep things simple we have an id
1250 * block for all drives at all times. If the device
1251 * is pre ATA or refuses ATA/ATAPI identify we
1252 * will add faked data to this.
1253 *
1254 * Also note that 0 everywhere means "can't do X"
1255 */
1256 drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
1257 if (drive->id == NULL)
1258 goto out_nomem;
1259
1251 hwif->devices[i] = drive; 1260 hwif->devices[i] = drive;
1252 } 1261 }
1253 return 0; 1262 return 0;
@@ -1257,7 +1266,8 @@ out_nomem:
1257 return -ENOMEM; 1266 return -ENOMEM;
1258} 1267}
1259 1268
1260struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws) 1269struct ide_host *ide_host_alloc(const struct ide_port_info *d,
1270 struct ide_hw **hws, unsigned int n_ports)
1261{ 1271{
1262 struct ide_host *host; 1272 struct ide_host *host;
1263 struct device *dev = hws[0] ? hws[0]->dev : NULL; 1273 struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1268,7 +1278,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
1268 if (host == NULL) 1278 if (host == NULL)
1269 return NULL; 1279 return NULL;
1270 1280
1271 for (i = 0; i < MAX_HOST_PORTS; i++) { 1281 for (i = 0; i < n_ports; i++) {
1272 ide_hwif_t *hwif; 1282 ide_hwif_t *hwif;
1273 int idx; 1283 int idx;
1274 1284
@@ -1288,6 +1298,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
1288 if (idx < 0) { 1298 if (idx < 0) {
1289 printk(KERN_ERR "%s: no free slot for interface\n", 1299 printk(KERN_ERR "%s: no free slot for interface\n",
1290 d ? d->name : "ide"); 1300 d ? d->name : "ide");
1301 ide_port_free_devices(hwif);
1291 kfree(hwif); 1302 kfree(hwif);
1292 continue; 1303 continue;
1293 } 1304 }
@@ -1344,7 +1355,7 @@ static void ide_disable_port(ide_hwif_t *hwif)
1344} 1355}
1345 1356
1346int ide_host_register(struct ide_host *host, const struct ide_port_info *d, 1357int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1347 hw_regs_t **hws) 1358 struct ide_hw **hws)
1348{ 1359{
1349 ide_hwif_t *hwif, *mate = NULL; 1360 ide_hwif_t *hwif, *mate = NULL;
1350 int i, j = 0; 1361 int i, j = 0;
@@ -1438,13 +1449,13 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
1438} 1449}
1439EXPORT_SYMBOL_GPL(ide_host_register); 1450EXPORT_SYMBOL_GPL(ide_host_register);
1440 1451
1441int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws, 1452int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
1442 struct ide_host **hostp) 1453 unsigned int n_ports, struct ide_host **hostp)
1443{ 1454{
1444 struct ide_host *host; 1455 struct ide_host *host;
1445 int rc; 1456 int rc;
1446 1457
1447 host = ide_host_alloc(d, hws); 1458 host = ide_host_alloc(d, hws, n_ports);
1448 if (host == NULL) 1459 if (host == NULL)
1449 return -ENOMEM; 1460 return -ENOMEM;
1450 1461
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index d9764f0bc82f..4b447a8a49d4 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -240,18 +240,27 @@ static struct class *idetape_sysfs_class;
240 240
241static void ide_tape_release(struct device *); 241static void ide_tape_release(struct device *);
242 242
243static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) 243static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
244
245static struct ide_tape_obj *ide_tape_get(struct gendisk *disk, bool cdev,
246 unsigned int i)
244{ 247{
245 struct ide_tape_obj *tape = NULL; 248 struct ide_tape_obj *tape = NULL;
246 249
247 mutex_lock(&idetape_ref_mutex); 250 mutex_lock(&idetape_ref_mutex);
248 tape = ide_drv_g(disk, ide_tape_obj); 251
252 if (cdev)
253 tape = idetape_devs[i];
254 else
255 tape = ide_drv_g(disk, ide_tape_obj);
256
249 if (tape) { 257 if (tape) {
250 if (ide_device_get(tape->drive)) 258 if (ide_device_get(tape->drive))
251 tape = NULL; 259 tape = NULL;
252 else 260 else
253 get_device(&tape->dev); 261 get_device(&tape->dev);
254 } 262 }
263
255 mutex_unlock(&idetape_ref_mutex); 264 mutex_unlock(&idetape_ref_mutex);
256 return tape; 265 return tape;
257} 266}
@@ -267,24 +276,6 @@ static void ide_tape_put(struct ide_tape_obj *tape)
267} 276}
268 277
269/* 278/*
270 * The variables below are used for the character device interface. Additional
271 * state variables are defined in our ide_drive_t structure.
272 */
273static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
274
275static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
276{
277 struct ide_tape_obj *tape = NULL;
278
279 mutex_lock(&idetape_ref_mutex);
280 tape = idetape_devs[i];
281 if (tape)
282 get_device(&tape->dev);
283 mutex_unlock(&idetape_ref_mutex);
284 return tape;
285}
286
287/*
288 * called on each failed packet command retry to analyze the request sense. We 279 * called on each failed packet command retry to analyze the request sense. We
289 * currently do not utilize this information. 280 * currently do not utilize this information.
290 */ 281 */
@@ -397,7 +388,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
397 if (readpos[0] & 0x4) { 388 if (readpos[0] & 0x4) {
398 printk(KERN_INFO "ide-tape: Block location is unknown" 389 printk(KERN_INFO "ide-tape: Block location is unknown"
399 "to the tape\n"); 390 "to the tape\n");
400 clear_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags); 391 clear_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
392 &drive->atapi_flags);
401 uptodate = 0; 393 uptodate = 0;
402 err = IDE_DRV_ERROR_GENERAL; 394 err = IDE_DRV_ERROR_GENERAL;
403 } else { 395 } else {
@@ -406,7 +398,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
406 398
407 tape->partition = readpos[1]; 399 tape->partition = readpos[1];
408 tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]); 400 tape->first_frame = be32_to_cpup((__be32 *)&readpos[4]);
409 set_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags); 401 set_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
402 &drive->atapi_flags);
410 } 403 }
411 } 404 }
412 405
@@ -656,15 +649,15 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
656 649
657 if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 && 650 if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
658 (rq->cmd[13] & REQ_IDETAPE_PC2) == 0) 651 (rq->cmd[13] & REQ_IDETAPE_PC2) == 0)
659 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); 652 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
660 653
661 if (drive->dev_flags & IDE_DFLAG_POST_RESET) { 654 if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
662 set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); 655 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
663 drive->dev_flags &= ~IDE_DFLAG_POST_RESET; 656 drive->dev_flags &= ~IDE_DFLAG_POST_RESET;
664 } 657 }
665 658
666 if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) && 659 if (!(drive->atapi_flags & IDE_AFLAG_IGNORE_DSC) &&
667 (stat & ATA_DSC) == 0) { 660 !(stat & ATA_DSC)) {
668 if (postponed_rq == NULL) { 661 if (postponed_rq == NULL) {
669 tape->dsc_polling_start = jiffies; 662 tape->dsc_polling_start = jiffies;
670 tape->dsc_poll_freq = tape->best_dsc_rw_freq; 663 tape->dsc_poll_freq = tape->best_dsc_rw_freq;
@@ -684,7 +677,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
684 tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW; 677 tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW;
685 idetape_postpone_request(drive); 678 idetape_postpone_request(drive);
686 return ide_stopped; 679 return ide_stopped;
687 } 680 } else
681 drive->atapi_flags &= ~IDE_AFLAG_IGNORE_DSC;
682
688 if (rq->cmd[13] & REQ_IDETAPE_READ) { 683 if (rq->cmd[13] & REQ_IDETAPE_READ) {
689 pc = &tape->queued_pc; 684 pc = &tape->queued_pc;
690 ide_tape_create_rw_cmd(tape, pc, rq, READ_6); 685 ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
@@ -744,7 +739,7 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
744 int load_attempted = 0; 739 int load_attempted = 0;
745 740
746 /* Wait for the tape to become ready */ 741 /* Wait for the tape to become ready */
747 set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); 742 set_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT), &drive->atapi_flags);
748 timeout += jiffies; 743 timeout += jiffies;
749 while (time_before(jiffies, timeout)) { 744 while (time_before(jiffies, timeout)) {
750 if (ide_do_test_unit_ready(drive, disk) == 0) 745 if (ide_do_test_unit_ready(drive, disk) == 0)
@@ -820,7 +815,7 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
820 if (tape->chrdev_dir != IDETAPE_DIR_READ) 815 if (tape->chrdev_dir != IDETAPE_DIR_READ)
821 return; 816 return;
822 817
823 clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); 818 clear_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags);
824 tape->valid = 0; 819 tape->valid = 0;
825 if (tape->buf != NULL) { 820 if (tape->buf != NULL) {
826 kfree(tape->buf); 821 kfree(tape->buf);
@@ -1113,7 +1108,8 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
1113 1108
1114 if (tape->chrdev_dir == IDETAPE_DIR_READ) { 1109 if (tape->chrdev_dir == IDETAPE_DIR_READ) {
1115 tape->valid = 0; 1110 tape->valid = 0;
1116 if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) 1111 if (test_and_clear_bit(ilog2(IDE_AFLAG_FILEMARK),
1112 &drive->atapi_flags))
1117 ++count; 1113 ++count;
1118 ide_tape_discard_merge_buffer(drive, 0); 1114 ide_tape_discard_merge_buffer(drive, 0);
1119 } 1115 }
@@ -1168,7 +1164,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
1168 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); 1164 debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
1169 1165
1170 if (tape->chrdev_dir != IDETAPE_DIR_READ) { 1166 if (tape->chrdev_dir != IDETAPE_DIR_READ) {
1171 if (test_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags)) 1167 if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
1172 if (count > tape->blk_size && 1168 if (count > tape->blk_size &&
1173 (count % tape->blk_size) == 0) 1169 (count % tape->blk_size) == 0)
1174 tape->user_bs_factor = count / tape->blk_size; 1170 tape->user_bs_factor = count / tape->blk_size;
@@ -1184,7 +1180,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
1184 /* refill if staging buffer is empty */ 1180 /* refill if staging buffer is empty */
1185 if (!tape->valid) { 1181 if (!tape->valid) {
1186 /* If we are at a filemark, nothing more to read */ 1182 /* If we are at a filemark, nothing more to read */
1187 if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) 1183 if (test_bit(ilog2(IDE_AFLAG_FILEMARK),
1184 &drive->atapi_flags))
1188 break; 1185 break;
1189 /* read */ 1186 /* read */
1190 if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, 1187 if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
@@ -1202,7 +1199,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
1202 done += todo; 1199 done += todo;
1203 } 1200 }
1204 1201
1205 if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { 1202 if (!done && test_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags)) {
1206 debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); 1203 debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
1207 1204
1208 idetape_space_over_filemarks(drive, MTFSF, 1); 1205 idetape_space_over_filemarks(drive, MTFSF, 1);
@@ -1336,7 +1333,8 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
1336 ide_tape_discard_merge_buffer(drive, 0); 1333 ide_tape_discard_merge_buffer(drive, 0);
1337 retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK); 1334 retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
1338 if (!retval) 1335 if (!retval)
1339 clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); 1336 clear_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
1337 &drive->atapi_flags);
1340 return retval; 1338 return retval;
1341 case MTNOP: 1339 case MTNOP:
1342 ide_tape_discard_merge_buffer(drive, 0); 1340 ide_tape_discard_merge_buffer(drive, 0);
@@ -1358,9 +1356,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
1358 mt_count % tape->blk_size) 1356 mt_count % tape->blk_size)
1359 return -EIO; 1357 return -EIO;
1360 tape->user_bs_factor = mt_count / tape->blk_size; 1358 tape->user_bs_factor = mt_count / tape->blk_size;
1361 clear_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags); 1359 clear_bit(ilog2(IDE_AFLAG_DETECT_BS),
1360 &drive->atapi_flags);
1362 } else 1361 } else
1363 set_bit(IDE_AFLAG_DETECT_BS, &drive->atapi_flags); 1362 set_bit(ilog2(IDE_AFLAG_DETECT_BS),
1363 &drive->atapi_flags);
1364 return 0; 1364 return 0;
1365 case MTSEEK: 1365 case MTSEEK:
1366 ide_tape_discard_merge_buffer(drive, 0); 1366 ide_tape_discard_merge_buffer(drive, 0);
@@ -1486,7 +1486,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
1486 return -ENXIO; 1486 return -ENXIO;
1487 1487
1488 lock_kernel(); 1488 lock_kernel();
1489 tape = ide_tape_chrdev_get(i); 1489 tape = ide_tape_get(NULL, true, i);
1490 if (!tape) { 1490 if (!tape) {
1491 unlock_kernel(); 1491 unlock_kernel();
1492 return -ENXIO; 1492 return -ENXIO;
@@ -1505,20 +1505,20 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
1505 1505
1506 filp->private_data = tape; 1506 filp->private_data = tape;
1507 1507
1508 if (test_and_set_bit(IDE_AFLAG_BUSY, &drive->atapi_flags)) { 1508 if (test_and_set_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags)) {
1509 retval = -EBUSY; 1509 retval = -EBUSY;
1510 goto out_put_tape; 1510 goto out_put_tape;
1511 } 1511 }
1512 1512
1513 retval = idetape_wait_ready(drive, 60 * HZ); 1513 retval = idetape_wait_ready(drive, 60 * HZ);
1514 if (retval) { 1514 if (retval) {
1515 clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); 1515 clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
1516 printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name); 1516 printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name);
1517 goto out_put_tape; 1517 goto out_put_tape;
1518 } 1518 }
1519 1519
1520 idetape_read_position(drive); 1520 idetape_read_position(drive);
1521 if (!test_bit(IDE_AFLAG_ADDRESS_VALID, &drive->atapi_flags)) 1521 if (!test_bit(ilog2(IDE_AFLAG_ADDRESS_VALID), &drive->atapi_flags))
1522 (void)idetape_rewind_tape(drive); 1522 (void)idetape_rewind_tape(drive);
1523 1523
1524 /* Read block size and write protect status from drive. */ 1524 /* Read block size and write protect status from drive. */
@@ -1534,7 +1534,7 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp)
1534 if (tape->write_prot) { 1534 if (tape->write_prot) {
1535 if ((filp->f_flags & O_ACCMODE) == O_WRONLY || 1535 if ((filp->f_flags & O_ACCMODE) == O_WRONLY ||
1536 (filp->f_flags & O_ACCMODE) == O_RDWR) { 1536 (filp->f_flags & O_ACCMODE) == O_RDWR) {
1537 clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); 1537 clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
1538 retval = -EROFS; 1538 retval = -EROFS;
1539 goto out_put_tape; 1539 goto out_put_tape;
1540 } 1540 }
@@ -1591,15 +1591,17 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp)
1591 ide_tape_discard_merge_buffer(drive, 1); 1591 ide_tape_discard_merge_buffer(drive, 1);
1592 } 1592 }
1593 1593
1594 if (minor < 128 && test_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags)) 1594 if (minor < 128 && test_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
1595 &drive->atapi_flags))
1595 (void) idetape_rewind_tape(drive); 1596 (void) idetape_rewind_tape(drive);
1597
1596 if (tape->chrdev_dir == IDETAPE_DIR_NONE) { 1598 if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
1597 if (tape->door_locked == DOOR_LOCKED) { 1599 if (tape->door_locked == DOOR_LOCKED) {
1598 if (!ide_set_media_lock(drive, tape->disk, 0)) 1600 if (!ide_set_media_lock(drive, tape->disk, 0))
1599 tape->door_locked = DOOR_UNLOCKED; 1601 tape->door_locked = DOOR_UNLOCKED;
1600 } 1602 }
1601 } 1603 }
1602 clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); 1604 clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
1603 ide_tape_put(tape); 1605 ide_tape_put(tape);
1604 unlock_kernel(); 1606 unlock_kernel();
1605 return 0; 1607 return 0;
@@ -1905,7 +1907,7 @@ static const struct file_operations idetape_fops = {
1905 1907
1906static int idetape_open(struct block_device *bdev, fmode_t mode) 1908static int idetape_open(struct block_device *bdev, fmode_t mode)
1907{ 1909{
1908 struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk); 1910 struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
1909 1911
1910 if (!tape) 1912 if (!tape)
1911 return -ENXIO; 1913 return -ENXIO;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index a0c3e1b2f73c..75b85a8cd2d4 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -98,7 +98,6 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
98 if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { 98 if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
99 ide_tf_dump(drive->name, cmd); 99 ide_tf_dump(drive->name, cmd);
100 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS); 100 tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
101 SELECT_MASK(drive, 0);
102 101
103 if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) { 102 if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
104 u8 data[2] = { cmd->tf.data, cmd->hob.data }; 103 u8 data[2] = { cmd->tf.data, cmd->hob.data };
@@ -166,7 +165,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
166 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { 165 if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
167 if (custom && tf->command == ATA_CMD_SET_MULTI) { 166 if (custom && tf->command == ATA_CMD_SET_MULTI) {
168 drive->mult_req = drive->mult_count = 0; 167 drive->mult_req = drive->mult_count = 0;
169 drive->special.b.recalibrate = 1; 168 drive->special_flags |= IDE_SFLAG_RECALIBRATE;
170 (void)ide_dump_status(drive, __func__, stat); 169 (void)ide_dump_status(drive, __func__, stat);
171 return ide_stopped; 170 return ide_stopped;
172 } else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) { 171 } else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 92c9b90931e7..16d056939f9f 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -211,6 +211,11 @@ static unsigned int ide_noflush;
211module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0); 211module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
212MODULE_PARM_DESC(noflush, "disable flush requests for a device"); 212MODULE_PARM_DESC(noflush, "disable flush requests for a device");
213 213
214static unsigned int ide_nohpa;
215
216module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
217MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
218
214static unsigned int ide_noprobe; 219static unsigned int ide_noprobe;
215 220
216module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0); 221module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
@@ -281,6 +286,11 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
281 drive->name); 286 drive->name);
282 drive->dev_flags |= IDE_DFLAG_NOFLUSH; 287 drive->dev_flags |= IDE_DFLAG_NOFLUSH;
283 } 288 }
289 if (ide_nohpa & (1 << i)) {
290 printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
291 drive->name);
292 drive->dev_flags |= IDE_DFLAG_NOHPA;
293 }
284 if (ide_noprobe & (1 << i)) { 294 if (ide_noprobe & (1 << i)) {
285 printk(KERN_INFO "ide: skipping probe for %s\n", drive->name); 295 printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
286 drive->dev_flags |= IDE_DFLAG_NOPROBE; 296 drive->dev_flags |= IDE_DFLAG_NOPROBE;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 051b4ab0f359..ee9b55ecc62b 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -21,7 +21,7 @@
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/io.h> 22#include <linux/io.h>
23 23
24static void __devinit plat_ide_setup_ports(hw_regs_t *hw, 24static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
25 void __iomem *base, 25 void __iomem *base,
26 void __iomem *ctrl, 26 void __iomem *ctrl,
27 struct pata_platform_info *pdata, 27 struct pata_platform_info *pdata,
@@ -40,12 +40,11 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
40 hw->io_ports.ctl_addr = (unsigned long)ctrl; 40 hw->io_ports.ctl_addr = (unsigned long)ctrl;
41 41
42 hw->irq = irq; 42 hw->irq = irq;
43
44 hw->chipset = ide_generic;
45} 43}
46 44
47static const struct ide_port_info platform_ide_port_info = { 45static const struct ide_port_info platform_ide_port_info = {
48 .host_flags = IDE_HFLAG_NO_DMA, 46 .host_flags = IDE_HFLAG_NO_DMA,
47 .chipset = ide_generic,
49}; 48};
50 49
51static int __devinit plat_ide_probe(struct platform_device *pdev) 50static int __devinit plat_ide_probe(struct platform_device *pdev)
@@ -55,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
55 struct pata_platform_info *pdata; 54 struct pata_platform_info *pdata;
56 struct ide_host *host; 55 struct ide_host *host;
57 int ret = 0, mmio = 0; 56 int ret = 0, mmio = 0;
58 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 57 struct ide_hw hw, *hws[] = { &hw };
59 struct ide_port_info d = platform_ide_port_info; 58 struct ide_port_info d = platform_ide_port_info;
60 59
61 pdata = pdev->dev.platform_data; 60 pdata = pdev->dev.platform_data;
@@ -99,7 +98,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
99 if (mmio) 98 if (mmio)
100 d.host_flags |= IDE_HFLAG_MMIO; 99 d.host_flags |= IDE_HFLAG_MMIO;
101 100
102 ret = ide_host_add(&d, hws, &host); 101 ret = ide_host_add(&d, hws, 1, &host);
103 if (ret) 102 if (ret)
104 goto out; 103 goto out;
105 104
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 4b1718e83283..1447c8c90565 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -62,7 +62,7 @@ int macide_ack_intr(ide_hwif_t* hwif)
62 return 0; 62 return 0;
63} 63}
64 64
65static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base, 65static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
66 int irq, ide_ack_intr_t *ack_intr) 66 int irq, ide_ack_intr_t *ack_intr)
67{ 67{
68 int i; 68 int i;
@@ -76,13 +76,12 @@ static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
76 76
77 hw->irq = irq; 77 hw->irq = irq;
78 hw->ack_intr = ack_intr; 78 hw->ack_intr = ack_intr;
79
80 hw->chipset = ide_generic;
81} 79}
82 80
83static const struct ide_port_info macide_port_info = { 81static const struct ide_port_info macide_port_info = {
84 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, 82 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
85 .irq_flags = IRQF_SHARED, 83 .irq_flags = IRQF_SHARED,
84 .chipset = ide_generic,
86}; 85};
87 86
88static const char *mac_ide_name[] = 87static const char *mac_ide_name[] =
@@ -97,7 +96,7 @@ static int __init macide_init(void)
97 ide_ack_intr_t *ack_intr; 96 ide_ack_intr_t *ack_intr;
98 unsigned long base; 97 unsigned long base;
99 int irq; 98 int irq;
100 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 99 struct ide_hw hw, *hws[] = { &hw };
101 100
102 if (!MACH_IS_MAC) 101 if (!MACH_IS_MAC)
103 return -ENODEV; 102 return -ENODEV;
@@ -127,7 +126,7 @@ static int __init macide_init(void)
127 126
128 macide_setup_ports(&hw, base, irq, ack_intr); 127 macide_setup_ports(&hw, base, irq, ack_intr);
129 128
130 return ide_host_add(&macide_port_info, hws, NULL); 129 return ide_host_add(&macide_port_info, hws, 1, NULL);
131} 130}
132 131
133module_init(macide_init); 132module_init(macide_init);
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 09d813d313f4..3c1dc0152153 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -306,6 +306,7 @@ static struct ide_port_info __devinitdata palm_bk3710_port_info = {
306 .host_flags = IDE_HFLAG_MMIO, 306 .host_flags = IDE_HFLAG_MMIO,
307 .pio_mask = ATA_PIO4, 307 .pio_mask = ATA_PIO4,
308 .mwdma_mask = ATA_MWDMA2, 308 .mwdma_mask = ATA_MWDMA2,
309 .chipset = ide_palm3710,
309}; 310};
310 311
311static int __init palm_bk3710_probe(struct platform_device *pdev) 312static int __init palm_bk3710_probe(struct platform_device *pdev)
@@ -315,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
315 void __iomem *base; 316 void __iomem *base;
316 unsigned long rate, mem_size; 317 unsigned long rate, mem_size;
317 int i, rc; 318 int i, rc;
318 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 319 struct ide_hw hw, *hws[] = { &hw };
319 320
320 clk = clk_get(&pdev->dev, "IDECLK"); 321 clk = clk_get(&pdev->dev, "IDECLK");
321 if (IS_ERR(clk)) 322 if (IS_ERR(clk))
@@ -363,13 +364,12 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
363 (base + IDE_PALM_ATA_PRI_CTL_OFFSET); 364 (base + IDE_PALM_ATA_PRI_CTL_OFFSET);
364 hw.irq = irq->start; 365 hw.irq = irq->start;
365 hw.dev = &pdev->dev; 366 hw.dev = &pdev->dev;
366 hw.chipset = ide_palm3710;
367 367
368 palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 : 368 palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
369 ATA_UDMA5; 369 ATA_UDMA5;
370 370
371 /* Register the IDE interface with Linux */ 371 /* Register the IDE interface with Linux */
372 rc = ide_host_add(&palm_bk3710_port_info, hws, NULL); 372 rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
373 if (rc) 373 if (rc)
374 goto out; 374 goto out;
375 375
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index b68906c3c17e..65ba8239e7b5 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -40,18 +40,6 @@
40#define DBG(fmt, args...) 40#define DBG(fmt, args...)
41#endif 41#endif
42 42
43static const char *pdc_quirk_drives[] = {
44 "QUANTUM FIREBALLlct08 08",
45 "QUANTUM FIREBALLP KA6.4",
46 "QUANTUM FIREBALLP KA9.1",
47 "QUANTUM FIREBALLP LM20.4",
48 "QUANTUM FIREBALLP KX13.6",
49 "QUANTUM FIREBALLP KX20.5",
50 "QUANTUM FIREBALLP KX27.3",
51 "QUANTUM FIREBALLP LM20.5",
52 NULL
53};
54
55static u8 max_dma_rate(struct pci_dev *pdev) 43static u8 max_dma_rate(struct pci_dev *pdev)
56{ 44{
57 u8 mode; 45 u8 mode;
@@ -200,19 +188,6 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
200 return ATA_CBL_PATA80; 188 return ATA_CBL_PATA80;
201} 189}
202 190
203static void pdcnew_quirkproc(ide_drive_t *drive)
204{
205 const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
206
207 for (list = pdc_quirk_drives; *list != NULL; list++)
208 if (strstr(m, *list) != NULL) {
209 drive->quirk_list = 2;
210 return;
211 }
212
213 drive->quirk_list = 0;
214}
215
216static void pdcnew_reset(ide_drive_t *drive) 191static void pdcnew_reset(ide_drive_t *drive)
217{ 192{
218 /* 193 /*
@@ -473,7 +448,6 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
473static const struct ide_port_ops pdcnew_port_ops = { 448static const struct ide_port_ops pdcnew_port_ops = {
474 .set_pio_mode = pdcnew_set_pio_mode, 449 .set_pio_mode = pdcnew_set_pio_mode,
475 .set_dma_mode = pdcnew_set_dma_mode, 450 .set_dma_mode = pdcnew_set_dma_mode,
476 .quirkproc = pdcnew_quirkproc,
477 .resetproc = pdcnew_reset, 451 .resetproc = pdcnew_reset,
478 .cable_detect = pdcnew_cable_detect, 452 .cable_detect = pdcnew_cable_detect,
479}; 453};
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index e24ecc87a9b1..b6abf7e52cac 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -23,18 +23,6 @@
23 23
24#define PDC202XX_DEBUG_DRIVE_INFO 0 24#define PDC202XX_DEBUG_DRIVE_INFO 0
25 25
26static const char *pdc_quirk_drives[] = {
27 "QUANTUM FIREBALLlct08 08",
28 "QUANTUM FIREBALLP KA6.4",
29 "QUANTUM FIREBALLP KA9.1",
30 "QUANTUM FIREBALLP LM20.4",
31 "QUANTUM FIREBALLP KX13.6",
32 "QUANTUM FIREBALLP KX20.5",
33 "QUANTUM FIREBALLP KX27.3",
34 "QUANTUM FIREBALLP LM20.5",
35 NULL
36};
37
38static void pdc_old_disable_66MHz_clock(ide_hwif_t *); 26static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
39 27
40static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) 28static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
@@ -151,19 +139,6 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
151 outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg); 139 outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
152} 140}
153 141
154static void pdc202xx_quirkproc(ide_drive_t *drive)
155{
156 const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
157
158 for (list = pdc_quirk_drives; *list != NULL; list++)
159 if (strstr(m, *list) != NULL) {
160 drive->quirk_list = 2;
161 return;
162 }
163
164 drive->quirk_list = 0;
165}
166
167static void pdc202xx_dma_start(ide_drive_t *drive) 142static void pdc202xx_dma_start(ide_drive_t *drive)
168{ 143{
169 if (drive->current_speed > XFER_UDMA_2) 144 if (drive->current_speed > XFER_UDMA_2)
@@ -203,52 +178,6 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
203 return ide_dma_end(drive); 178 return ide_dma_end(drive);
204} 179}
205 180
206static int pdc202xx_dma_test_irq(ide_drive_t *drive)
207{
208 ide_hwif_t *hwif = drive->hwif;
209 unsigned long high_16 = hwif->extra_base - 16;
210 u8 dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
211 u8 sc1d = inb(high_16 + 0x001d);
212
213 if (hwif->channel) {
214 /* bit7: Error, bit6: Interrupting, bit5: FIFO Full, bit4: FIFO Empty */
215 if ((sc1d & 0x50) == 0x50)
216 goto somebody_else;
217 else if ((sc1d & 0x40) == 0x40)
218 return (dma_stat & 4) == 4;
219 } else {
220 /* bit3: Error, bit2: Interrupting, bit1: FIFO Full, bit0: FIFO Empty */
221 if ((sc1d & 0x05) == 0x05)
222 goto somebody_else;
223 else if ((sc1d & 0x04) == 0x04)
224 return (dma_stat & 4) == 4;
225 }
226somebody_else:
227 return (dma_stat & 4) == 4; /* return 1 if INTR asserted */
228}
229
230static void pdc202xx_reset(ide_drive_t *drive)
231{
232 ide_hwif_t *hwif = drive->hwif;
233 unsigned long high_16 = hwif->extra_base - 16;
234 u8 udma_speed_flag = inb(high_16 | 0x001f);
235
236 printk(KERN_WARNING "PDC202xx: software reset...\n");
237
238 outb(udma_speed_flag | 0x10, high_16 | 0x001f);
239 mdelay(100);
240 outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
241 mdelay(2000); /* 2 seconds ?! */
242
243 ide_set_max_pio(drive);
244}
245
246static void pdc202xx_dma_lost_irq(ide_drive_t *drive)
247{
248 pdc202xx_reset(drive);
249 ide_dma_lost_irq(drive);
250}
251
252static int init_chipset_pdc202xx(struct pci_dev *dev) 181static int init_chipset_pdc202xx(struct pci_dev *dev)
253{ 182{
254 unsigned long dmabase = pci_resource_start(dev, 4); 183 unsigned long dmabase = pci_resource_start(dev, 4);
@@ -302,37 +231,22 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
302static const struct ide_port_ops pdc20246_port_ops = { 231static const struct ide_port_ops pdc20246_port_ops = {
303 .set_pio_mode = pdc202xx_set_pio_mode, 232 .set_pio_mode = pdc202xx_set_pio_mode,
304 .set_dma_mode = pdc202xx_set_mode, 233 .set_dma_mode = pdc202xx_set_mode,
305 .quirkproc = pdc202xx_quirkproc,
306}; 234};
307 235
308static const struct ide_port_ops pdc2026x_port_ops = { 236static const struct ide_port_ops pdc2026x_port_ops = {
309 .set_pio_mode = pdc202xx_set_pio_mode, 237 .set_pio_mode = pdc202xx_set_pio_mode,
310 .set_dma_mode = pdc202xx_set_mode, 238 .set_dma_mode = pdc202xx_set_mode,
311 .quirkproc = pdc202xx_quirkproc,
312 .resetproc = pdc202xx_reset,
313 .cable_detect = pdc2026x_cable_detect, 239 .cable_detect = pdc2026x_cable_detect,
314}; 240};
315 241
316static const struct ide_dma_ops pdc20246_dma_ops = {
317 .dma_host_set = ide_dma_host_set,
318 .dma_setup = ide_dma_setup,
319 .dma_start = ide_dma_start,
320 .dma_end = ide_dma_end,
321 .dma_test_irq = pdc202xx_dma_test_irq,
322 .dma_lost_irq = ide_dma_lost_irq,
323 .dma_timer_expiry = ide_dma_sff_timer_expiry,
324 .dma_sff_read_status = ide_dma_sff_read_status,
325};
326
327static const struct ide_dma_ops pdc2026x_dma_ops = { 242static const struct ide_dma_ops pdc2026x_dma_ops = {
328 .dma_host_set = ide_dma_host_set, 243 .dma_host_set = ide_dma_host_set,
329 .dma_setup = ide_dma_setup, 244 .dma_setup = ide_dma_setup,
330 .dma_start = pdc202xx_dma_start, 245 .dma_start = pdc202xx_dma_start,
331 .dma_end = pdc202xx_dma_end, 246 .dma_end = pdc202xx_dma_end,
332 .dma_test_irq = pdc202xx_dma_test_irq, 247 .dma_test_irq = ide_dma_test_irq,
333 .dma_lost_irq = pdc202xx_dma_lost_irq, 248 .dma_lost_irq = ide_dma_lost_irq,
334 .dma_timer_expiry = ide_dma_sff_timer_expiry, 249 .dma_timer_expiry = ide_dma_sff_timer_expiry,
335 .dma_clear = pdc202xx_reset,
336 .dma_sff_read_status = ide_dma_sff_read_status, 250 .dma_sff_read_status = ide_dma_sff_read_status,
337}; 251};
338 252
@@ -354,7 +268,7 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
354 .name = DRV_NAME, 268 .name = DRV_NAME,
355 .init_chipset = init_chipset_pdc202xx, 269 .init_chipset = init_chipset_pdc202xx,
356 .port_ops = &pdc20246_port_ops, 270 .port_ops = &pdc20246_port_ops,
357 .dma_ops = &pdc20246_dma_ops, 271 .dma_ops = &sff_dma_ops,
358 .host_flags = IDE_HFLAGS_PDC202XX, 272 .host_flags = IDE_HFLAGS_PDC202XX,
359 .pio_mask = ATA_PIO4, 273 .pio_mask = ATA_PIO4,
360 .mwdma_mask = ATA_MWDMA2, 274 .mwdma_mask = ATA_MWDMA2,
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f76e4e6b408f..97642a7a79c4 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1023,13 +1023,14 @@ static const struct ide_port_info pmac_port_info = {
1023 * Setup, register & probe an IDE channel driven by this driver, this is 1023 * Setup, register & probe an IDE channel driven by this driver, this is
1024 * called by one of the 2 probe functions (macio or PCI). 1024 * called by one of the 2 probe functions (macio or PCI).
1025 */ 1025 */
1026static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw) 1026static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
1027 struct ide_hw *hw)
1027{ 1028{
1028 struct device_node *np = pmif->node; 1029 struct device_node *np = pmif->node;
1029 const int *bidp; 1030 const int *bidp;
1030 struct ide_host *host; 1031 struct ide_host *host;
1031 ide_hwif_t *hwif; 1032 ide_hwif_t *hwif;
1032 hw_regs_t *hws[] = { hw, NULL, NULL, NULL }; 1033 struct ide_hw *hws[] = { hw };
1033 struct ide_port_info d = pmac_port_info; 1034 struct ide_port_info d = pmac_port_info;
1034 int rc; 1035 int rc;
1035 1036
@@ -1077,7 +1078,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
1077 /* Make sure we have sane timings */ 1078 /* Make sure we have sane timings */
1078 sanitize_timings(pmif); 1079 sanitize_timings(pmif);
1079 1080
1080 host = ide_host_alloc(&d, hws); 1081 host = ide_host_alloc(&d, hws, 1);
1081 if (host == NULL) 1082 if (host == NULL)
1082 return -ENOMEM; 1083 return -ENOMEM;
1083 hwif = host->ports[0]; 1084 hwif = host->ports[0];
@@ -1124,7 +1125,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
1124 return 0; 1125 return 0;
1125} 1126}
1126 1127
1127static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base) 1128static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
1128{ 1129{
1129 int i; 1130 int i;
1130 1131
@@ -1144,7 +1145,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
1144 unsigned long regbase; 1145 unsigned long regbase;
1145 pmac_ide_hwif_t *pmif; 1146 pmac_ide_hwif_t *pmif;
1146 int irq, rc; 1147 int irq, rc;
1147 hw_regs_t hw; 1148 struct ide_hw hw;
1148 1149
1149 pmif = kzalloc(sizeof(*pmif), GFP_KERNEL); 1150 pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
1150 if (pmif == NULL) 1151 if (pmif == NULL)
@@ -1268,7 +1269,7 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
1268 void __iomem *base; 1269 void __iomem *base;
1269 unsigned long rbase, rlen; 1270 unsigned long rbase, rlen;
1270 int rc; 1271 int rc;
1271 hw_regs_t hw; 1272 struct ide_hw hw;
1272 1273
1273 np = pci_device_to_OF_node(pdev); 1274 np = pci_device_to_OF_node(pdev);
1274 if (np == NULL) { 1275 if (np == NULL) {
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index c79346679244..ab49a97023d9 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -51,11 +51,11 @@ static int q40ide_default_irq(unsigned long base)
51/* 51/*
52 * Addresses are pretranslated for Q40 ISA access. 52 * Addresses are pretranslated for Q40 ISA access.
53 */ 53 */
54static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base, 54static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
55 ide_ack_intr_t *ack_intr, 55 ide_ack_intr_t *ack_intr,
56 int irq) 56 int irq)
57{ 57{
58 memset(hw, 0, sizeof(hw_regs_t)); 58 memset(hw, 0, sizeof(*hw));
59 /* BIG FAT WARNING: 59 /* BIG FAT WARNING:
60 assumption: only DATA port is ever used in 16 bit mode */ 60 assumption: only DATA port is ever used in 16 bit mode */
61 hw->io_ports.data_addr = Q40_ISA_IO_W(base); 61 hw->io_ports.data_addr = Q40_ISA_IO_W(base);
@@ -70,8 +70,6 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
70 70
71 hw->irq = irq; 71 hw->irq = irq;
72 hw->ack_intr = ack_intr; 72 hw->ack_intr = ack_intr;
73
74 hw->chipset = ide_generic;
75} 73}
76 74
77static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, 75static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -119,6 +117,7 @@ static const struct ide_port_info q40ide_port_info = {
119 .tp_ops = &q40ide_tp_ops, 117 .tp_ops = &q40ide_tp_ops,
120 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, 118 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
121 .irq_flags = IRQF_SHARED, 119 .irq_flags = IRQF_SHARED,
120 .chipset = ide_generic,
122}; 121};
123 122
124/* 123/*
@@ -136,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
136static int __init q40ide_init(void) 135static int __init q40ide_init(void)
137{ 136{
138 int i; 137 int i;
139 hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL }; 138 struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
140 139
141 if (!MACH_IS_Q40) 140 if (!MACH_IS_Q40)
142 return -ENODEV; 141 return -ENODEV;
@@ -163,7 +162,7 @@ static int __init q40ide_init(void)
163 hws[i] = &hw[i]; 162 hws[i] = &hw[i];
164 } 163 }
165 164
166 return ide_host_add(&q40ide_port_info, hws, NULL); 165 return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
167} 166}
168 167
169module_init(q40ide_init); 168module_init(q40ide_init);
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index d5003ca69801..00f54248f41f 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -13,9 +13,10 @@
13 13
14static const struct ide_port_info rapide_port_info = { 14static const struct ide_port_info rapide_port_info = {
15 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, 15 .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
16 .chipset = ide_generic,
16}; 17};
17 18
18static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base, 19static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
19 void __iomem *ctrl, unsigned int sz, int irq) 20 void __iomem *ctrl, unsigned int sz, int irq)
20{ 21{
21 unsigned long port = (unsigned long)base; 22 unsigned long port = (unsigned long)base;
@@ -35,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
35 void __iomem *base; 36 void __iomem *base;
36 struct ide_host *host; 37 struct ide_host *host;
37 int ret; 38 int ret;
38 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 39 struct ide_hw hw, *hws[] = { &hw };
39 40
40 ret = ecard_request_resources(ec); 41 ret = ecard_request_resources(ec);
41 if (ret) 42 if (ret)
@@ -49,10 +50,9 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
49 50
50 memset(&hw, 0, sizeof(hw)); 51 memset(&hw, 0, sizeof(hw));
51 rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq); 52 rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
52 hw.chipset = ide_generic;
53 hw.dev = &ec->dev; 53 hw.dev = &ec->dev;
54 54
55 ret = ide_host_add(&rapide_port_info, hws, &host); 55 ret = ide_host_add(&rapide_port_info, hws, 1, &host);
56 if (ret) 56 if (ret)
57 goto release; 57 goto release;
58 58
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 5be41f25204f..1104bb301eb9 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
559{ 559{
560 struct scc_ports *ports = pci_get_drvdata(dev); 560 struct scc_ports *ports = pci_get_drvdata(dev);
561 struct ide_host *host; 561 struct ide_host *host;
562 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 562 struct ide_hw hw, *hws[] = { &hw };
563 int i, rc; 563 int i, rc;
564 564
565 memset(&hw, 0, sizeof(hw)); 565 memset(&hw, 0, sizeof(hw));
@@ -567,9 +567,8 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
567 hw.io_ports_array[i] = ports->dma + 0x20 + i * 4; 567 hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
568 hw.irq = dev->irq; 568 hw.irq = dev->irq;
569 hw.dev = &dev->dev; 569 hw.dev = &dev->dev;
570 hw.chipset = ide_pci;
571 570
572 rc = ide_host_add(d, hws, &host); 571 rc = ide_host_add(d, hws, 1, &host);
573 if (rc) 572 if (rc)
574 return rc; 573 return rc;
575 574
@@ -823,6 +822,7 @@ static const struct ide_port_info scc_chipset __devinitdata = {
823 .host_flags = IDE_HFLAG_SINGLE, 822 .host_flags = IDE_HFLAG_SINGLE,
824 .irq_flags = IRQF_SHARED, 823 .irq_flags = IRQF_SHARED,
825 .pio_mask = ATA_PIO4, 824 .pio_mask = ATA_PIO4,
825 .chipset = ide_pci,
826}; 826};
827 827
828/** 828/**
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 7a3a12d6e638..ab3db61d2ba0 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org> 2 * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
3 * Copyright (C) 1995-1998 Mark Lord 3 * Copyright (C) 1995-1998 Mark Lord
4 * Copyright (C) 2007 Bartlomiej Zolnierkiewicz 4 * Copyright (C) 2007-2009 Bartlomiej Zolnierkiewicz
5 * 5 *
6 * May be copied or modified under the terms of the GNU General Public License 6 * May be copied or modified under the terms of the GNU General Public License
7 */ 7 */
@@ -301,11 +301,11 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
301} 301}
302 302
303/** 303/**
304 * ide_hw_configure - configure a hw_regs_t instance 304 * ide_hw_configure - configure a struct ide_hw instance
305 * @dev: PCI device holding interface 305 * @dev: PCI device holding interface
306 * @d: IDE port info 306 * @d: IDE port info
307 * @port: port number 307 * @port: port number
308 * @hw: hw_regs_t instance corresponding to this port 308 * @hw: struct ide_hw instance corresponding to this port
309 * 309 *
310 * Perform the initial set up for the hardware interface structure. This 310 * Perform the initial set up for the hardware interface structure. This
311 * is done per interface port rather than per PCI device. There may be 311 * is done per interface port rather than per PCI device. There may be
@@ -315,7 +315,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
315 */ 315 */
316 316
317static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d, 317static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
318 unsigned int port, hw_regs_t *hw) 318 unsigned int port, struct ide_hw *hw)
319{ 319{
320 unsigned long ctl = 0, base = 0; 320 unsigned long ctl = 0, base = 0;
321 321
@@ -344,7 +344,6 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
344 344
345 memset(hw, 0, sizeof(*hw)); 345 memset(hw, 0, sizeof(*hw));
346 hw->dev = &dev->dev; 346 hw->dev = &dev->dev;
347 hw->chipset = d->chipset ? d->chipset : ide_pci;
348 ide_std_init_ports(hw, base, ctl | 2); 347 ide_std_init_ports(hw, base, ctl | 2);
349 348
350 return 0; 349 return 0;
@@ -446,8 +445,8 @@ out:
446 * ide_pci_setup_ports - configure ports/devices on PCI IDE 445 * ide_pci_setup_ports - configure ports/devices on PCI IDE
447 * @dev: PCI device 446 * @dev: PCI device
448 * @d: IDE port info 447 * @d: IDE port info
449 * @hw: hw_regs_t instances corresponding to this PCI IDE device 448 * @hw: struct ide_hw instances corresponding to this PCI IDE device
450 * @hws: hw_regs_t pointers table to update 449 * @hws: struct ide_hw pointers table to update
451 * 450 *
452 * Scan the interfaces attached to this device and do any 451 * Scan the interfaces attached to this device and do any
453 * necessary per port setup. Attach the devices and ask the 452 * necessary per port setup. Attach the devices and ask the
@@ -459,7 +458,7 @@ out:
459 */ 458 */
460 459
461void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d, 460void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
462 hw_regs_t *hw, hw_regs_t **hws) 461 struct ide_hw *hw, struct ide_hw **hws)
463{ 462{
464 int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port; 463 int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
465 u8 tmp; 464 u8 tmp;
@@ -535,61 +534,15 @@ out:
535 return ret; 534 return ret;
536} 535}
537 536
538int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
539 void *priv)
540{
541 struct ide_host *host;
542 hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
543 int ret;
544
545 ret = ide_setup_pci_controller(dev, d, 1);
546 if (ret < 0)
547 goto out;
548
549 ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
550
551 host = ide_host_alloc(d, hws);
552 if (host == NULL) {
553 ret = -ENOMEM;
554 goto out;
555 }
556
557 host->dev[0] = &dev->dev;
558
559 host->host_priv = priv;
560
561 host->irq_flags = IRQF_SHARED;
562
563 pci_set_drvdata(dev, host);
564
565 ret = do_ide_setup_pci_device(dev, d, 1);
566 if (ret < 0)
567 goto out;
568
569 /* fixup IRQ */
570 if (ide_pci_is_in_compatibility_mode(dev)) {
571 hw[0].irq = pci_get_legacy_ide_irq(dev, 0);
572 hw[1].irq = pci_get_legacy_ide_irq(dev, 1);
573 } else
574 hw[1].irq = hw[0].irq = ret;
575
576 ret = ide_host_register(host, d, hws);
577 if (ret)
578 ide_host_free(host);
579out:
580 return ret;
581}
582EXPORT_SYMBOL_GPL(ide_pci_init_one);
583
584int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, 537int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
585 const struct ide_port_info *d, void *priv) 538 const struct ide_port_info *d, void *priv)
586{ 539{
587 struct pci_dev *pdev[] = { dev1, dev2 }; 540 struct pci_dev *pdev[] = { dev1, dev2 };
588 struct ide_host *host; 541 struct ide_host *host;
589 int ret, i; 542 int ret, i, n_ports = dev2 ? 4 : 2;
590 hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL }; 543 struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
591 544
592 for (i = 0; i < 2; i++) { 545 for (i = 0; i < n_ports / 2; i++) {
593 ret = ide_setup_pci_controller(pdev[i], d, !i); 546 ret = ide_setup_pci_controller(pdev[i], d, !i);
594 if (ret < 0) 547 if (ret < 0)
595 goto out; 548 goto out;
@@ -597,23 +550,24 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
597 ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]); 550 ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
598 } 551 }
599 552
600 host = ide_host_alloc(d, hws); 553 host = ide_host_alloc(d, hws, n_ports);
601 if (host == NULL) { 554 if (host == NULL) {
602 ret = -ENOMEM; 555 ret = -ENOMEM;
603 goto out; 556 goto out;
604 } 557 }
605 558
606 host->dev[0] = &dev1->dev; 559 host->dev[0] = &dev1->dev;
607 host->dev[1] = &dev2->dev; 560 if (dev2)
561 host->dev[1] = &dev2->dev;
608 562
609 host->host_priv = priv; 563 host->host_priv = priv;
610
611 host->irq_flags = IRQF_SHARED; 564 host->irq_flags = IRQF_SHARED;
612 565
613 pci_set_drvdata(pdev[0], host); 566 pci_set_drvdata(pdev[0], host);
614 pci_set_drvdata(pdev[1], host); 567 if (dev2)
568 pci_set_drvdata(pdev[1], host);
615 569
616 for (i = 0; i < 2; i++) { 570 for (i = 0; i < n_ports / 2; i++) {
617 ret = do_ide_setup_pci_device(pdev[i], d, !i); 571 ret = do_ide_setup_pci_device(pdev[i], d, !i);
618 572
619 /* 573 /*
@@ -639,6 +593,13 @@ out:
639} 593}
640EXPORT_SYMBOL_GPL(ide_pci_init_two); 594EXPORT_SYMBOL_GPL(ide_pci_init_two);
641 595
596int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
597 void *priv)
598{
599 return ide_pci_init_two(dev, NULL, d, priv);
600}
601EXPORT_SYMBOL_GPL(ide_pci_init_one);
602
642void ide_pci_remove(struct pci_dev *dev) 603void ide_pci_remove(struct pci_dev *dev)
643{ 604{
644 struct ide_host *host = pci_get_drvdata(dev); 605 struct ide_host *host = pci_get_drvdata(dev);
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index e5d2a48a84de..5f37f168f944 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -91,7 +91,7 @@ typedef struct {
91 91
92 92
93static void 93static void
94sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port, 94sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port,
95 unsigned long ctrl_port, unsigned long irq_port) 95 unsigned long ctrl_port, unsigned long irq_port)
96{ 96{
97 unsigned long reg = data_port; 97 unsigned long reg = data_port;
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
546 unsigned long cmd_base, irqport; 546 unsigned long cmd_base, irqport;
547 unsigned long bar0, cmd_phys_base, ctl; 547 unsigned long bar0, cmd_phys_base, ctl;
548 void __iomem *virt_base; 548 void __iomem *virt_base;
549 hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; 549 struct ide_hw hw, *hws[] = { &hw };
550 int rc; 550 int rc;
551 551
552 /* Get the CmdBlk and CtrlBlk Base Registers */ 552 /* Get the CmdBlk and CtrlBlk Base Registers */
@@ -575,13 +575,12 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
575 memset(&hw, 0, sizeof(hw)); 575 memset(&hw, 0, sizeof(hw));
576 sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport); 576 sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
577 hw.irq = dev->irq; 577 hw.irq = dev->irq;
578 hw.chipset = ide_pci;
579 hw.dev = &dev->dev; 578 hw.dev = &dev->dev;
580 579
581 /* Initializing chipset IRQ Registers */ 580 /* Initializing chipset IRQ Registers */
582 writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4)); 581 writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4));
583 582
584 rc = ide_host_add(&sgiioc4_port_info, hws, NULL); 583 rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL);
585 if (!rc) 584 if (!rc)
586 return 0; 585 return 0;
587 586
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index e4973cd1fba9..bd82d228608c 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -451,8 +451,8 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
451static void sil_sata_pre_reset(ide_drive_t *drive) 451static void sil_sata_pre_reset(ide_drive_t *drive)
452{ 452{
453 if (drive->media == ide_disk) { 453 if (drive->media == ide_disk) {
454 drive->special.b.set_geometry = 0; 454 drive->special_flags &=
455 drive->special.b.recalibrate = 0; 455 ~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
456 } 456 }
457} 457}
458 458
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index b0a460625335..0924abff52ff 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -10,7 +10,7 @@
10 * with the timing registers setup. 10 * with the timing registers setup.
11 * -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org 11 * -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org
12 * 12 *
13 * Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com> 13 * Copyright (C) 2006-2007,2009 MontaVista Software, Inc. <source@mvista.com>
14 * Copyright (C) 2007 Bartlomiej Zolnierkiewicz 14 * Copyright (C) 2007 Bartlomiej Zolnierkiewicz
15 */ 15 */
16 16
@@ -146,14 +146,15 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
146 u32 val, mask = hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA; 146 u32 val, mask = hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
147 u8 dma_cmd; 147 u8 dma_cmd;
148 148
149 printk("sl82c105: lost IRQ, resetting host\n"); 149 printk(KERN_WARNING "sl82c105: lost IRQ, resetting host\n");
150 150
151 /* 151 /*
152 * Check the raw interrupt from the drive. 152 * Check the raw interrupt from the drive.
153 */ 153 */
154 pci_read_config_dword(dev, 0x40, &val); 154 pci_read_config_dword(dev, 0x40, &val);
155 if (val & mask) 155 if (val & mask)
156 printk("sl82c105: drive was requesting IRQ, but host lost it\n"); 156 printk(KERN_INFO "sl82c105: drive was requesting IRQ, "
157 "but host lost it\n");
157 158
158 /* 159 /*
159 * Was DMA enabled? If so, disable it - we're resetting the 160 * Was DMA enabled? If so, disable it - we're resetting the
@@ -162,7 +163,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
162 dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD); 163 dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
163 if (dma_cmd & 1) { 164 if (dma_cmd & 1) {
164 outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD); 165 outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
165 printk("sl82c105: DMA was enabled\n"); 166 printk(KERN_INFO "sl82c105: DMA was enabled\n");
166 } 167 }
167 168
168 sl82c105_reset_host(dev); 169 sl82c105_reset_host(dev);
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index e33d764e2945..ea89fddeed91 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,8 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
130 130
131static int __init tx4938ide_probe(struct platform_device *pdev) 131static int __init tx4938ide_probe(struct platform_device *pdev)
132{ 132{
133 hw_regs_t hw; 133 struct ide_hw hw, *hws[] = { &hw };
134 hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
135 struct ide_host *host; 134 struct ide_host *host;
136 struct resource *res; 135 struct resource *res;
137 struct tx4938ide_platform_info *pdata = pdev->dev.platform_data; 136 struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
@@ -183,7 +182,7 @@ static int __init tx4938ide_probe(struct platform_device *pdev)
183 tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0); 182 tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
184 else 183 else
185 d.port_ops = NULL; 184 d.port_ops = NULL;
186 ret = ide_host_add(&d, hws, &host); 185 ret = ide_host_add(&d, hws, 1, &host);
187 if (!ret) 186 if (!ret)
188 platform_set_drvdata(pdev, host); 187 platform_set_drvdata(pdev, host);
189 return ret; 188 return ret;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 5ca76224f6d1..64b58ecc3f0e 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -537,8 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
537 537
538static int __init tx4939ide_probe(struct platform_device *pdev) 538static int __init tx4939ide_probe(struct platform_device *pdev)
539{ 539{
540 hw_regs_t hw; 540 struct ide_hw hw, *hws[] = { &hw };
541 hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
542 struct ide_host *host; 541 struct ide_host *host;
543 struct resource *res; 542 struct resource *res;
544 int irq, ret; 543 int irq, ret;
@@ -581,7 +580,7 @@ static int __init tx4939ide_probe(struct platform_device *pdev)
581 hw.dev = &pdev->dev; 580 hw.dev = &pdev->dev;
582 581
583 pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq); 582 pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
584 host = ide_host_alloc(&tx4939ide_port_info, hws); 583 host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
585 if (!host) 584 if (!host)
586 return -ENOMEM; 585 return -ENOMEM;
587 /* use extra_base for base address of the all registers */ 586 /* use extra_base for base address of the all registers */
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index a3d3cbab359a..0aaa0597a622 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
1config LGUEST 1config LGUEST
2 tristate "Linux hypervisor example code" 2 tristate "Linux hypervisor example code"
3 depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX 3 depends on X86_32 && EXPERIMENTAL && EVENTFD
4 select HVC_DRIVER 4 select HVC_DRIVER
5 ---help--- 5 ---help---
6 This is a very simple module which allows you to run 6 This is a very simple module which allows you to run
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4845fb3cf74b..a6974e9b8ebf 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -95,7 +95,7 @@ static __init int map_switcher(void)
95 * array of struct pages. It increments that pointer, but we don't 95 * array of struct pages. It increments that pointer, but we don't
96 * care. */ 96 * care. */
97 pagep = switcher_page; 97 pagep = switcher_page;
98 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); 98 err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
99 if (err) { 99 if (err) {
100 printk("lguest: map_vm_area failed: %i\n", err); 100 printk("lguest: map_vm_area failed: %i\n", err);
101 goto free_vma; 101 goto free_vma;
@@ -188,6 +188,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
188{ 188{
189 /* We stop running once the Guest is dead. */ 189 /* We stop running once the Guest is dead. */
190 while (!cpu->lg->dead) { 190 while (!cpu->lg->dead) {
191 unsigned int irq;
192 bool more;
193
191 /* First we run any hypercalls the Guest wants done. */ 194 /* First we run any hypercalls the Guest wants done. */
192 if (cpu->hcall) 195 if (cpu->hcall)
193 do_hypercalls(cpu); 196 do_hypercalls(cpu);
@@ -195,23 +198,23 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
195 /* It's possible the Guest did a NOTIFY hypercall to the 198 /* It's possible the Guest did a NOTIFY hypercall to the
196 * Launcher, in which case we return from the read() now. */ 199 * Launcher, in which case we return from the read() now. */
197 if (cpu->pending_notify) { 200 if (cpu->pending_notify) {
198 if (put_user(cpu->pending_notify, user)) 201 if (!send_notify_to_eventfd(cpu)) {
199 return -EFAULT; 202 if (put_user(cpu->pending_notify, user))
200 return sizeof(cpu->pending_notify); 203 return -EFAULT;
204 return sizeof(cpu->pending_notify);
205 }
201 } 206 }
202 207
203 /* Check for signals */ 208 /* Check for signals */
204 if (signal_pending(current)) 209 if (signal_pending(current))
205 return -ERESTARTSYS; 210 return -ERESTARTSYS;
206 211
207 /* If Waker set break_out, return to Launcher. */
208 if (cpu->break_out)
209 return -EAGAIN;
210
211 /* Check if there are any interrupts which can be delivered now: 212 /* Check if there are any interrupts which can be delivered now:
212 * if so, this sets up the hander to be executed when we next 213 * if so, this sets up the hander to be executed when we next
213 * run the Guest. */ 214 * run the Guest. */
214 maybe_do_interrupt(cpu); 215 irq = interrupt_pending(cpu, &more);
216 if (irq < LGUEST_IRQS)
217 try_deliver_interrupt(cpu, irq, more);
215 218
216 /* All long-lived kernel loops need to check with this horrible 219 /* All long-lived kernel loops need to check with this horrible
217 * thing called the freezer. If the Host is trying to suspend, 220 * thing called the freezer. If the Host is trying to suspend,
@@ -224,10 +227,15 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
224 break; 227 break;
225 228
226 /* If the Guest asked to be stopped, we sleep. The Guest's 229 /* If the Guest asked to be stopped, we sleep. The Guest's
227 * clock timer or LHREQ_BREAK from the Waker will wake us. */ 230 * clock timer will wake us. */
228 if (cpu->halted) { 231 if (cpu->halted) {
229 set_current_state(TASK_INTERRUPTIBLE); 232 set_current_state(TASK_INTERRUPTIBLE);
230 schedule(); 233 /* Just before we sleep, make sure no interrupt snuck in
234 * which we should be doing. */
235 if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
236 set_current_state(TASK_RUNNING);
237 else
238 schedule();
231 continue; 239 continue;
232 } 240 }
233 241
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f05fefa..c29ffa19cb74 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
37 /* This call does nothing, except by breaking out of the Guest 37 /* This call does nothing, except by breaking out of the Guest
38 * it makes us process all the asynchronous hypercalls. */ 38 * it makes us process all the asynchronous hypercalls. */
39 break; 39 break;
40 case LHCALL_SEND_INTERRUPTS:
41 /* This call does nothing too, but by breaking out of the Guest
42 * it makes us process any pending interrupts. */
43 break;
40 case LHCALL_LGUEST_INIT: 44 case LHCALL_LGUEST_INIT:
41 /* You can't get here unless you're already initialized. Don't 45 /* You can't get here unless you're already initialized. Don't
42 * do that. */ 46 * do that. */
@@ -73,11 +77,21 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
73 guest_set_stack(cpu, args->arg1, args->arg2, args->arg3); 77 guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
74 break; 78 break;
75 case LHCALL_SET_PTE: 79 case LHCALL_SET_PTE:
80#ifdef CONFIG_X86_PAE
81 guest_set_pte(cpu, args->arg1, args->arg2,
82 __pte(args->arg3 | (u64)args->arg4 << 32));
83#else
76 guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3)); 84 guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
85#endif
86 break;
87 case LHCALL_SET_PGD:
88 guest_set_pgd(cpu->lg, args->arg1, args->arg2);
77 break; 89 break;
90#ifdef CONFIG_X86_PAE
78 case LHCALL_SET_PMD: 91 case LHCALL_SET_PMD:
79 guest_set_pmd(cpu->lg, args->arg1, args->arg2); 92 guest_set_pmd(cpu->lg, args->arg1, args->arg2);
80 break; 93 break;
94#endif
81 case LHCALL_SET_CLOCKEVENT: 95 case LHCALL_SET_CLOCKEVENT:
82 guest_set_clockevent(cpu, args->arg1); 96 guest_set_clockevent(cpu, args->arg1);
83 break; 97 break;
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 6e99adbe1946..0e9067b0d507 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -128,30 +128,39 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
128/*H:205 128/*H:205
129 * Virtual Interrupts. 129 * Virtual Interrupts.
130 * 130 *
131 * maybe_do_interrupt() gets called before every entry to the Guest, to see if 131 * interrupt_pending() returns the first pending interrupt which isn't blocked
132 * we should divert the Guest to running an interrupt handler. */ 132 * by the Guest. It is called before every entry to the Guest, and just before
133void maybe_do_interrupt(struct lg_cpu *cpu) 133 * we go to sleep when the Guest has halted itself. */
134unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
134{ 135{
135 unsigned int irq; 136 unsigned int irq;
136 DECLARE_BITMAP(blk, LGUEST_IRQS); 137 DECLARE_BITMAP(blk, LGUEST_IRQS);
137 struct desc_struct *idt;
138 138
139 /* If the Guest hasn't even initialized yet, we can do nothing. */ 139 /* If the Guest hasn't even initialized yet, we can do nothing. */
140 if (!cpu->lg->lguest_data) 140 if (!cpu->lg->lguest_data)
141 return; 141 return LGUEST_IRQS;
142 142
143 /* Take our "irqs_pending" array and remove any interrupts the Guest 143 /* Take our "irqs_pending" array and remove any interrupts the Guest
144 * wants blocked: the result ends up in "blk". */ 144 * wants blocked: the result ends up in "blk". */
145 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, 145 if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
146 sizeof(blk))) 146 sizeof(blk)))
147 return; 147 return LGUEST_IRQS;
148 bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); 148 bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
149 149
150 /* Find the first interrupt. */ 150 /* Find the first interrupt. */
151 irq = find_first_bit(blk, LGUEST_IRQS); 151 irq = find_first_bit(blk, LGUEST_IRQS);
152 /* None? Nothing to do */ 152 *more = find_next_bit(blk, LGUEST_IRQS, irq+1);
153 if (irq >= LGUEST_IRQS) 153
154 return; 154 return irq;
155}
156
157/* This actually diverts the Guest to running an interrupt handler, once an
158 * interrupt has been identified by interrupt_pending(). */
159void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
160{
161 struct desc_struct *idt;
162
163 BUG_ON(irq >= LGUEST_IRQS);
155 164
156 /* They may be in the middle of an iret, where they asked us never to 165 /* They may be in the middle of an iret, where they asked us never to
157 * deliver interrupts. */ 166 * deliver interrupts. */
@@ -170,8 +179,12 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
170 u32 irq_enabled; 179 u32 irq_enabled;
171 if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) 180 if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
172 irq_enabled = 0; 181 irq_enabled = 0;
173 if (!irq_enabled) 182 if (!irq_enabled) {
183 /* Make sure they know an IRQ is pending. */
184 put_user(X86_EFLAGS_IF,
185 &cpu->lg->lguest_data->irq_pending);
174 return; 186 return;
187 }
175 } 188 }
176 189
177 /* Look at the IDT entry the Guest gave us for this interrupt. The 190 /* Look at the IDT entry the Guest gave us for this interrupt. The
@@ -194,6 +207,25 @@ void maybe_do_interrupt(struct lg_cpu *cpu)
194 * here is a compromise which means at least it gets updated every 207 * here is a compromise which means at least it gets updated every
195 * timer interrupt. */ 208 * timer interrupt. */
196 write_timestamp(cpu); 209 write_timestamp(cpu);
210
211 /* If there are no other interrupts we want to deliver, clear
212 * the pending flag. */
213 if (!more)
214 put_user(0, &cpu->lg->lguest_data->irq_pending);
215}
216
217/* And this is the routine when we want to set an interrupt for the Guest. */
218void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
219{
220 /* Next time the Guest runs, the core code will see if it can deliver
221 * this interrupt. */
222 set_bit(irq, cpu->irqs_pending);
223
224 /* Make sure it sees it; it might be asleep (eg. halted), or
225 * running the Guest right now, in which case kick_process()
226 * will knock it out. */
227 if (!wake_up_process(cpu->tsk))
228 kick_process(cpu->tsk);
197} 229}
198/*:*/ 230/*:*/
199 231
@@ -510,10 +542,7 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer)
510 struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt); 542 struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt);
511 543
512 /* Remember the first interrupt is the timer interrupt. */ 544 /* Remember the first interrupt is the timer interrupt. */
513 set_bit(0, cpu->irqs_pending); 545 set_interrupt(cpu, 0);
514 /* If the Guest is actually stopped, we need to wake it up. */
515 if (cpu->halted)
516 wake_up_process(cpu->tsk);
517 return HRTIMER_NORESTART; 546 return HRTIMER_NORESTART;
518} 547}
519 548
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index af92a176697f..d4e8979735cb 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -49,7 +49,7 @@ struct lg_cpu {
49 u32 cr2; 49 u32 cr2;
50 int ts; 50 int ts;
51 u32 esp1; 51 u32 esp1;
52 u8 ss1; 52 u16 ss1;
53 53
54 /* Bitmap of what has changed: see CHANGED_* above. */ 54 /* Bitmap of what has changed: see CHANGED_* above. */
55 int changed; 55 int changed;
@@ -71,9 +71,7 @@ struct lg_cpu {
71 /* Virtual clock device */ 71 /* Virtual clock device */
72 struct hrtimer hrt; 72 struct hrtimer hrt;
73 73
74 /* Do we need to stop what we're doing and return to userspace? */ 74 /* Did the Guest tell us to halt? */
75 int break_out;
76 wait_queue_head_t break_wq;
77 int halted; 75 int halted;
78 76
79 /* Pending virtual interrupts */ 77 /* Pending virtual interrupts */
@@ -82,6 +80,16 @@ struct lg_cpu {
82 struct lg_cpu_arch arch; 80 struct lg_cpu_arch arch;
83}; 81};
84 82
83struct lg_eventfd {
84 unsigned long addr;
85 struct file *event;
86};
87
88struct lg_eventfd_map {
89 unsigned int num;
90 struct lg_eventfd map[];
91};
92
85/* The private info the thread maintains about the guest. */ 93/* The private info the thread maintains about the guest. */
86struct lguest 94struct lguest
87{ 95{
@@ -102,6 +110,8 @@ struct lguest
102 unsigned int stack_pages; 110 unsigned int stack_pages;
103 u32 tsc_khz; 111 u32 tsc_khz;
104 112
113 struct lg_eventfd_map *eventfds;
114
105 /* Dead? */ 115 /* Dead? */
106 const char *dead; 116 const char *dead;
107}; 117};
@@ -137,9 +147,13 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
137 * in the kernel. */ 147 * in the kernel. */
138#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) 148#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
139#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) 149#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
150#define pmd_flags(x) (pmd_val(x) & ~PAGE_MASK)
151#define pmd_pfn(x) (pmd_val(x) >> PAGE_SHIFT)
140 152
141/* interrupts_and_traps.c: */ 153/* interrupts_and_traps.c: */
142void maybe_do_interrupt(struct lg_cpu *cpu); 154unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
155void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
156void set_interrupt(struct lg_cpu *cpu, unsigned int irq);
143bool deliver_trap(struct lg_cpu *cpu, unsigned int num); 157bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
144void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i, 158void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
145 u32 low, u32 hi); 159 u32 low, u32 hi);
@@ -150,6 +164,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
150void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, 164void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
151 const unsigned long *def); 165 const unsigned long *def);
152void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); 166void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
167bool send_notify_to_eventfd(struct lg_cpu *cpu);
153void init_clockdev(struct lg_cpu *cpu); 168void init_clockdev(struct lg_cpu *cpu);
154bool check_syscall_vector(struct lguest *lg); 169bool check_syscall_vector(struct lguest *lg);
155int init_interrupts(void); 170int init_interrupts(void);
@@ -168,7 +183,10 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
168int init_guest_pagetable(struct lguest *lg); 183int init_guest_pagetable(struct lguest *lg);
169void free_guest_pagetable(struct lguest *lg); 184void free_guest_pagetable(struct lguest *lg);
170void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); 185void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
186void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 i);
187#ifdef CONFIG_X86_PAE
171void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); 188void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
189#endif
172void guest_pagetable_clear_all(struct lg_cpu *cpu); 190void guest_pagetable_clear_all(struct lg_cpu *cpu);
173void guest_pagetable_flush_user(struct lg_cpu *cpu); 191void guest_pagetable_flush_user(struct lg_cpu *cpu);
174void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, 192void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir,
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index df44d962626d..e082cdac88b4 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq);
228 * function. */ 228 * function. */
229static struct virtqueue *lg_find_vq(struct virtio_device *vdev, 229static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
230 unsigned index, 230 unsigned index,
231 void (*callback)(struct virtqueue *vq)) 231 void (*callback)(struct virtqueue *vq),
232 const char *name)
232{ 233{
233 struct lguest_device *ldev = to_lgdev(vdev); 234 struct lguest_device *ldev = to_lgdev(vdev);
234 struct lguest_vq_info *lvq; 235 struct lguest_vq_info *lvq;
@@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
263 /* OK, tell virtio_ring.c to set up a virtqueue now we know its size 264 /* OK, tell virtio_ring.c to set up a virtqueue now we know its size
264 * and we've got a pointer to its pages. */ 265 * and we've got a pointer to its pages. */
265 vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, 266 vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
266 vdev, lvq->pages, lg_notify, callback); 267 vdev, lvq->pages, lg_notify, callback, name);
267 if (!vq) { 268 if (!vq) {
268 err = -ENOMEM; 269 err = -ENOMEM;
269 goto unmap; 270 goto unmap;
@@ -312,6 +313,38 @@ static void lg_del_vq(struct virtqueue *vq)
312 kfree(lvq); 313 kfree(lvq);
313} 314}
314 315
316static void lg_del_vqs(struct virtio_device *vdev)
317{
318 struct virtqueue *vq, *n;
319
320 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
321 lg_del_vq(vq);
322}
323
324static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
325 struct virtqueue *vqs[],
326 vq_callback_t *callbacks[],
327 const char *names[])
328{
329 struct lguest_device *ldev = to_lgdev(vdev);
330 int i;
331
332 /* We must have this many virtqueues. */
333 if (nvqs > ldev->desc->num_vq)
334 return -ENOENT;
335
336 for (i = 0; i < nvqs; ++i) {
337 vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
338 if (IS_ERR(vqs[i]))
339 goto error;
340 }
341 return 0;
342
343error:
344 lg_del_vqs(vdev);
345 return PTR_ERR(vqs[i]);
346}
347
315/* The ops structure which hooks everything together. */ 348/* The ops structure which hooks everything together. */
316static struct virtio_config_ops lguest_config_ops = { 349static struct virtio_config_ops lguest_config_ops = {
317 .get_features = lg_get_features, 350 .get_features = lg_get_features,
@@ -321,8 +354,8 @@ static struct virtio_config_ops lguest_config_ops = {
321 .get_status = lg_get_status, 354 .get_status = lg_get_status,
322 .set_status = lg_set_status, 355 .set_status = lg_set_status,
323 .reset = lg_reset, 356 .reset = lg_reset,
324 .find_vq = lg_find_vq, 357 .find_vqs = lg_find_vqs,
325 .del_vq = lg_del_vq, 358 .del_vqs = lg_del_vqs,
326}; 359};
327 360
328/* The root device for the lguest virtio devices. This makes them appear as 361/* The root device for the lguest virtio devices. This makes them appear as
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b8ee103eed5f..32e297121058 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -7,32 +7,83 @@
7#include <linux/miscdevice.h> 7#include <linux/miscdevice.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/eventfd.h>
11#include <linux/file.h>
10#include "lg.h" 12#include "lg.h"
11 13
12/*L:055 When something happens, the Waker process needs a way to stop the 14bool send_notify_to_eventfd(struct lg_cpu *cpu)
13 * kernel running the Guest and return to the Launcher. So the Waker writes
14 * LHREQ_BREAK and the value "1" to /dev/lguest to do this. Once the Launcher
15 * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
16 * the Waker. */
17static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
18{ 15{
19 unsigned long on; 16 unsigned int i;
17 struct lg_eventfd_map *map;
18
19 /* lg->eventfds is RCU-protected */
20 rcu_read_lock();
21 map = rcu_dereference(cpu->lg->eventfds);
22 for (i = 0; i < map->num; i++) {
23 if (map->map[i].addr == cpu->pending_notify) {
24 eventfd_signal(map->map[i].event, 1);
25 cpu->pending_notify = 0;
26 break;
27 }
28 }
29 rcu_read_unlock();
30 return cpu->pending_notify == 0;
31}
20 32
21 /* Fetch whether they're turning break on or off. */ 33static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
22 if (get_user(on, input) != 0) 34{
23 return -EFAULT; 35 struct lg_eventfd_map *new, *old = lg->eventfds;
24 36
25 if (on) { 37 if (!addr)
26 cpu->break_out = 1; 38 return -EINVAL;
27 /* Pop it out of the Guest (may be running on different CPU) */ 39
28 wake_up_process(cpu->tsk); 40 /* Replace the old array with the new one, carefully: others can
29 /* Wait for them to reset it */ 41 * be accessing it at the same time */
30 return wait_event_interruptible(cpu->break_wq, !cpu->break_out); 42 new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
31 } else { 43 GFP_KERNEL);
32 cpu->break_out = 0; 44 if (!new)
33 wake_up(&cpu->break_wq); 45 return -ENOMEM;
34 return 0; 46
47 /* First make identical copy. */
48 memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
49 new->num = old->num;
50
51 /* Now append new entry. */
52 new->map[new->num].addr = addr;
53 new->map[new->num].event = eventfd_fget(fd);
54 if (IS_ERR(new->map[new->num].event)) {
55 kfree(new);
56 return PTR_ERR(new->map[new->num].event);
35 } 57 }
58 new->num++;
59
60 /* Now put new one in place. */
61 rcu_assign_pointer(lg->eventfds, new);
62
63 /* We're not in a big hurry. Wait until noone's looking at old
64 * version, then delete it. */
65 synchronize_rcu();
66 kfree(old);
67
68 return 0;
69}
70
71static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
72{
73 unsigned long addr, fd;
74 int err;
75
76 if (get_user(addr, input) != 0)
77 return -EFAULT;
78 input++;
79 if (get_user(fd, input) != 0)
80 return -EFAULT;
81
82 mutex_lock(&lguest_lock);
83 err = add_eventfd(lg, addr, fd);
84 mutex_unlock(&lguest_lock);
85
86 return 0;
36} 87}
37 88
38/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt 89/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
@@ -45,9 +96,8 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
45 return -EFAULT; 96 return -EFAULT;
46 if (irq >= LGUEST_IRQS) 97 if (irq >= LGUEST_IRQS)
47 return -EINVAL; 98 return -EINVAL;
48 /* Next time the Guest runs, the core code will see if it can deliver 99
49 * this interrupt. */ 100 set_interrupt(cpu, irq);
50 set_bit(irq, cpu->irqs_pending);
51 return 0; 101 return 0;
52} 102}
53 103
@@ -126,9 +176,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
126 * address. */ 176 * address. */
127 lguest_arch_setup_regs(cpu, start_ip); 177 lguest_arch_setup_regs(cpu, start_ip);
128 178
129 /* Initialize the queue for the Waker to wait on */
130 init_waitqueue_head(&cpu->break_wq);
131
132 /* We keep a pointer to the Launcher task (ie. current task) for when 179 /* We keep a pointer to the Launcher task (ie. current task) for when
133 * other Guests want to wake this one (eg. console input). */ 180 * other Guests want to wake this one (eg. console input). */
134 cpu->tsk = current; 181 cpu->tsk = current;
@@ -185,6 +232,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
185 goto unlock; 232 goto unlock;
186 } 233 }
187 234
235 lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
236 if (!lg->eventfds) {
237 err = -ENOMEM;
238 goto free_lg;
239 }
240 lg->eventfds->num = 0;
241
188 /* Populate the easy fields of our "struct lguest" */ 242 /* Populate the easy fields of our "struct lguest" */
189 lg->mem_base = (void __user *)args[0]; 243 lg->mem_base = (void __user *)args[0];
190 lg->pfn_limit = args[1]; 244 lg->pfn_limit = args[1];
@@ -192,7 +246,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
192 /* This is the first cpu (cpu 0) and it will start booting at args[2] */ 246 /* This is the first cpu (cpu 0) and it will start booting at args[2] */
193 err = lg_cpu_start(&lg->cpus[0], 0, args[2]); 247 err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
194 if (err) 248 if (err)
195 goto release_guest; 249 goto free_eventfds;
196 250
197 /* Initialize the Guest's shadow page tables, using the toplevel 251 /* Initialize the Guest's shadow page tables, using the toplevel
198 * address the Launcher gave us. This allocates memory, so can fail. */ 252 * address the Launcher gave us. This allocates memory, so can fail. */
@@ -211,7 +265,9 @@ static int initialize(struct file *file, const unsigned long __user *input)
211free_regs: 265free_regs:
212 /* FIXME: This should be in free_vcpu */ 266 /* FIXME: This should be in free_vcpu */
213 free_page(lg->cpus[0].regs_page); 267 free_page(lg->cpus[0].regs_page);
214release_guest: 268free_eventfds:
269 kfree(lg->eventfds);
270free_lg:
215 kfree(lg); 271 kfree(lg);
216unlock: 272unlock:
217 mutex_unlock(&lguest_lock); 273 mutex_unlock(&lguest_lock);
@@ -252,11 +308,6 @@ static ssize_t write(struct file *file, const char __user *in,
252 /* Once the Guest is dead, you can only read() why it died. */ 308 /* Once the Guest is dead, you can only read() why it died. */
253 if (lg->dead) 309 if (lg->dead)
254 return -ENOENT; 310 return -ENOENT;
255
256 /* If you're not the task which owns the Guest, all you can do
257 * is break the Launcher out of running the Guest. */
258 if (current != cpu->tsk && req != LHREQ_BREAK)
259 return -EPERM;
260 } 311 }
261 312
262 switch (req) { 313 switch (req) {
@@ -264,8 +315,8 @@ static ssize_t write(struct file *file, const char __user *in,
264 return initialize(file, input); 315 return initialize(file, input);
265 case LHREQ_IRQ: 316 case LHREQ_IRQ:
266 return user_send_irq(cpu, input); 317 return user_send_irq(cpu, input);
267 case LHREQ_BREAK: 318 case LHREQ_EVENTFD:
268 return break_guest_out(cpu, input); 319 return attach_eventfd(lg, input);
269 default: 320 default:
270 return -EINVAL; 321 return -EINVAL;
271 } 322 }
@@ -303,6 +354,12 @@ static int close(struct inode *inode, struct file *file)
303 * the Launcher's memory management structure. */ 354 * the Launcher's memory management structure. */
304 mmput(lg->cpus[i].mm); 355 mmput(lg->cpus[i].mm);
305 } 356 }
357
358 /* Release any eventfds they registered. */
359 for (i = 0; i < lg->eventfds->num; i++)
360 fput(lg->eventfds->map[i].event);
361 kfree(lg->eventfds);
362
306 /* If lg->dead doesn't contain an error code it will be NULL or a 363 /* If lg->dead doesn't contain an error code it will be NULL or a
307 * kmalloc()ed string, either of which is ok to hand to kfree(). */ 364 * kmalloc()ed string, either of which is ok to hand to kfree(). */
308 if (!IS_ERR(lg->dead)) 365 if (!IS_ERR(lg->dead))
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a059cf9980f7..a6fe1abda240 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -53,6 +53,17 @@
53 * page. */ 53 * page. */
54#define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1) 54#define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
55 55
56/* For PAE we need the PMD index as well. We use the last 2MB, so we
57 * will need the last pmd entry of the last pmd page. */
58#ifdef CONFIG_X86_PAE
59#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1)
60#define RESERVE_MEM 2U
61#define CHECK_GPGD_MASK _PAGE_PRESENT
62#else
63#define RESERVE_MEM 4U
64#define CHECK_GPGD_MASK _PAGE_TABLE
65#endif
66
56/* We actually need a separate PTE page for each CPU. Remember that after the 67/* We actually need a separate PTE page for each CPU. Remember that after the
57 * Switcher code itself comes two pages for each CPU, and we don't want this 68 * Switcher code itself comes two pages for each CPU, and we don't want this
58 * CPU's guest to see the pages of any other CPU. */ 69 * CPU's guest to see the pages of any other CPU. */
@@ -73,24 +84,59 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
73{ 84{
74 unsigned int index = pgd_index(vaddr); 85 unsigned int index = pgd_index(vaddr);
75 86
87#ifndef CONFIG_X86_PAE
76 /* We kill any Guest trying to touch the Switcher addresses. */ 88 /* We kill any Guest trying to touch the Switcher addresses. */
77 if (index >= SWITCHER_PGD_INDEX) { 89 if (index >= SWITCHER_PGD_INDEX) {
78 kill_guest(cpu, "attempt to access switcher pages"); 90 kill_guest(cpu, "attempt to access switcher pages");
79 index = 0; 91 index = 0;
80 } 92 }
93#endif
81 /* Return a pointer index'th pgd entry for the i'th page table. */ 94 /* Return a pointer index'th pgd entry for the i'th page table. */
82 return &cpu->lg->pgdirs[i].pgdir[index]; 95 return &cpu->lg->pgdirs[i].pgdir[index];
83} 96}
84 97
98#ifdef CONFIG_X86_PAE
99/* This routine then takes the PGD entry given above, which contains the
100 * address of the PMD page. It then returns a pointer to the PMD entry for the
101 * given address. */
102static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
103{
104 unsigned int index = pmd_index(vaddr);
105 pmd_t *page;
106
107 /* We kill any Guest trying to touch the Switcher addresses. */
108 if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
109 index >= SWITCHER_PMD_INDEX) {
110 kill_guest(cpu, "attempt to access switcher pages");
111 index = 0;
112 }
113
114 /* You should never call this if the PGD entry wasn't valid */
115 BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
116 page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
117
118 return &page[index];
119}
120#endif
121
85/* This routine then takes the page directory entry returned above, which 122/* This routine then takes the page directory entry returned above, which
86 * contains the address of the page table entry (PTE) page. It then returns a 123 * contains the address of the page table entry (PTE) page. It then returns a
87 * pointer to the PTE entry for the given address. */ 124 * pointer to the PTE entry for the given address. */
88static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr) 125static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
89{ 126{
127#ifdef CONFIG_X86_PAE
128 pmd_t *pmd = spmd_addr(cpu, spgd, vaddr);
129 pte_t *page = __va(pmd_pfn(*pmd) << PAGE_SHIFT);
130
131 /* You should never call this if the PMD entry wasn't valid */
132 BUG_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT));
133#else
90 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); 134 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
91 /* You should never call this if the PGD entry wasn't valid */ 135 /* You should never call this if the PGD entry wasn't valid */
92 BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT)); 136 BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
93 return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE]; 137#endif
138
139 return &page[pte_index(vaddr)];
94} 140}
95 141
96/* These two functions just like the above two, except they access the Guest 142/* These two functions just like the above two, except they access the Guest
@@ -101,12 +147,32 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
101 return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); 147 return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
102} 148}
103 149
104static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) 150#ifdef CONFIG_X86_PAE
151static unsigned long gpmd_addr(pgd_t gpgd, unsigned long vaddr)
152{
153 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
154 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
155 return gpage + pmd_index(vaddr) * sizeof(pmd_t);
156}
157
158static unsigned long gpte_addr(struct lg_cpu *cpu,
159 pmd_t gpmd, unsigned long vaddr)
160{
161 unsigned long gpage = pmd_pfn(gpmd) << PAGE_SHIFT;
162
163 BUG_ON(!(pmd_flags(gpmd) & _PAGE_PRESENT));
164 return gpage + pte_index(vaddr) * sizeof(pte_t);
165}
166#else
167static unsigned long gpte_addr(struct lg_cpu *cpu,
168 pgd_t gpgd, unsigned long vaddr)
105{ 169{
106 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; 170 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
171
107 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); 172 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
108 return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t); 173 return gpage + pte_index(vaddr) * sizeof(pte_t);
109} 174}
175#endif
110/*:*/ 176/*:*/
111 177
112/*M:014 get_pfn is slow: we could probably try to grab batches of pages here as 178/*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
@@ -171,7 +237,7 @@ static void release_pte(pte_t pte)
171 /* Remember that get_user_pages_fast() took a reference to the page, in 237 /* Remember that get_user_pages_fast() took a reference to the page, in
172 * get_pfn()? We have to put it back now. */ 238 * get_pfn()? We have to put it back now. */
173 if (pte_flags(pte) & _PAGE_PRESENT) 239 if (pte_flags(pte) & _PAGE_PRESENT)
174 put_page(pfn_to_page(pte_pfn(pte))); 240 put_page(pte_page(pte));
175} 241}
176/*:*/ 242/*:*/
177 243
@@ -184,11 +250,20 @@ static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
184 250
185static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) 251static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
186{ 252{
187 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || 253 if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
188 (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) 254 (pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
189 kill_guest(cpu, "bad page directory entry"); 255 kill_guest(cpu, "bad page directory entry");
190} 256}
191 257
258#ifdef CONFIG_X86_PAE
259static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
260{
261 if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
262 (pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
263 kill_guest(cpu, "bad page middle directory entry");
264}
265#endif
266
192/*H:330 267/*H:330
193 * (i) Looking up a page table entry when the Guest faults. 268 * (i) Looking up a page table entry when the Guest faults.
194 * 269 *
@@ -207,6 +282,11 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
207 pte_t gpte; 282 pte_t gpte;
208 pte_t *spte; 283 pte_t *spte;
209 284
285#ifdef CONFIG_X86_PAE
286 pmd_t *spmd;
287 pmd_t gpmd;
288#endif
289
210 /* First step: get the top-level Guest page table entry. */ 290 /* First step: get the top-level Guest page table entry. */
211 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); 291 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
212 /* Toplevel not present? We can't map it in. */ 292 /* Toplevel not present? We can't map it in. */
@@ -228,12 +308,45 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
228 check_gpgd(cpu, gpgd); 308 check_gpgd(cpu, gpgd);
229 /* And we copy the flags to the shadow PGD entry. The page 309 /* And we copy the flags to the shadow PGD entry. The page
230 * number in the shadow PGD is the page we just allocated. */ 310 * number in the shadow PGD is the page we just allocated. */
231 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); 311 set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
232 } 312 }
233 313
314#ifdef CONFIG_X86_PAE
315 gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
316 /* middle level not present? We can't map it in. */
317 if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
318 return false;
319
320 /* Now look at the matching shadow entry. */
321 spmd = spmd_addr(cpu, *spgd, vaddr);
322
323 if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
324 /* No shadow entry: allocate a new shadow PTE page. */
325 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
326
327 /* This is not really the Guest's fault, but killing it is
328 * simple for this corner case. */
329 if (!ptepage) {
330 kill_guest(cpu, "out of memory allocating pte page");
331 return false;
332 }
333
334 /* We check that the Guest pmd is OK. */
335 check_gpmd(cpu, gpmd);
336
337 /* And we copy the flags to the shadow PMD entry. The page
338 * number in the shadow PMD is the page we just allocated. */
339 native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
340 }
341
342 /* OK, now we look at the lower level in the Guest page table: keep its
343 * address, because we might update it later. */
344 gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
345#else
234 /* OK, now we look at the lower level in the Guest page table: keep its 346 /* OK, now we look at the lower level in the Guest page table: keep its
235 * address, because we might update it later. */ 347 * address, because we might update it later. */
236 gpte_ptr = gpte_addr(gpgd, vaddr); 348 gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
349#endif
237 gpte = lgread(cpu, gpte_ptr, pte_t); 350 gpte = lgread(cpu, gpte_ptr, pte_t);
238 351
239 /* If this page isn't in the Guest page tables, we can't page it in. */ 352 /* If this page isn't in the Guest page tables, we can't page it in. */
@@ -259,7 +372,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
259 gpte = pte_mkdirty(gpte); 372 gpte = pte_mkdirty(gpte);
260 373
261 /* Get the pointer to the shadow PTE entry we're going to set. */ 374 /* Get the pointer to the shadow PTE entry we're going to set. */
262 spte = spte_addr(*spgd, vaddr); 375 spte = spte_addr(cpu, *spgd, vaddr);
263 /* If there was a valid shadow PTE entry here before, we release it. 376 /* If there was a valid shadow PTE entry here before, we release it.
264 * This can happen with a write to a previously read-only entry. */ 377 * This can happen with a write to a previously read-only entry. */
265 release_pte(*spte); 378 release_pte(*spte);
@@ -273,7 +386,7 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
273 * table entry, even if the Guest says it's writable. That way 386 * table entry, even if the Guest says it's writable. That way
274 * we will come back here when a write does actually occur, so 387 * we will come back here when a write does actually occur, so
275 * we can update the Guest's _PAGE_DIRTY flag. */ 388 * we can update the Guest's _PAGE_DIRTY flag. */
276 *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0); 389 native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
277 390
278 /* Finally, we write the Guest PTE entry back: we've set the 391 /* Finally, we write the Guest PTE entry back: we've set the
279 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ 392 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
@@ -301,14 +414,23 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
301 pgd_t *spgd; 414 pgd_t *spgd;
302 unsigned long flags; 415 unsigned long flags;
303 416
417#ifdef CONFIG_X86_PAE
418 pmd_t *spmd;
419#endif
304 /* Look at the current top level entry: is it present? */ 420 /* Look at the current top level entry: is it present? */
305 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); 421 spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
306 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) 422 if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
307 return false; 423 return false;
308 424
425#ifdef CONFIG_X86_PAE
426 spmd = spmd_addr(cpu, *spgd, vaddr);
427 if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
428 return false;
429#endif
430
309 /* Check the flags on the pte entry itself: it must be present and 431 /* Check the flags on the pte entry itself: it must be present and
310 * writable. */ 432 * writable. */
311 flags = pte_flags(*(spte_addr(*spgd, vaddr))); 433 flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
312 434
313 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 435 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
314} 436}
@@ -322,8 +444,43 @@ void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
322 kill_guest(cpu, "bad stack page %#lx", vaddr); 444 kill_guest(cpu, "bad stack page %#lx", vaddr);
323} 445}
324 446
447#ifdef CONFIG_X86_PAE
448static void release_pmd(pmd_t *spmd)
449{
450 /* If the entry's not present, there's nothing to release. */
451 if (pmd_flags(*spmd) & _PAGE_PRESENT) {
452 unsigned int i;
453 pte_t *ptepage = __va(pmd_pfn(*spmd) << PAGE_SHIFT);
454 /* For each entry in the page, we might need to release it. */
455 for (i = 0; i < PTRS_PER_PTE; i++)
456 release_pte(ptepage[i]);
457 /* Now we can free the page of PTEs */
458 free_page((long)ptepage);
459 /* And zero out the PMD entry so we never release it twice. */
460 native_set_pmd(spmd, __pmd(0));
461 }
462}
463
464static void release_pgd(pgd_t *spgd)
465{
466 /* If the entry's not present, there's nothing to release. */
467 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
468 unsigned int i;
469 pmd_t *pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
470
471 for (i = 0; i < PTRS_PER_PMD; i++)
472 release_pmd(&pmdpage[i]);
473
474 /* Now we can free the page of PMDs */
475 free_page((long)pmdpage);
476 /* And zero out the PGD entry so we never release it twice. */
477 set_pgd(spgd, __pgd(0));
478 }
479}
480
481#else /* !CONFIG_X86_PAE */
325/*H:450 If we chase down the release_pgd() code, it looks like this: */ 482/*H:450 If we chase down the release_pgd() code, it looks like this: */
326static void release_pgd(struct lguest *lg, pgd_t *spgd) 483static void release_pgd(pgd_t *spgd)
327{ 484{
328 /* If the entry's not present, there's nothing to release. */ 485 /* If the entry's not present, there's nothing to release. */
329 if (pgd_flags(*spgd) & _PAGE_PRESENT) { 486 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
@@ -341,7 +498,7 @@ static void release_pgd(struct lguest *lg, pgd_t *spgd)
341 *spgd = __pgd(0); 498 *spgd = __pgd(0);
342 } 499 }
343} 500}
344 501#endif
345/*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings() 502/*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
346 * hypercall and once in new_pgdir() when we re-used a top-level pgdir page. 503 * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
347 * It simply releases every PTE page from 0 up to the Guest's kernel address. */ 504 * It simply releases every PTE page from 0 up to the Guest's kernel address. */
@@ -350,7 +507,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
350 unsigned int i; 507 unsigned int i;
351 /* Release every pgd entry up to the kernel's address. */ 508 /* Release every pgd entry up to the kernel's address. */
352 for (i = 0; i < pgd_index(lg->kernel_address); i++) 509 for (i = 0; i < pgd_index(lg->kernel_address); i++)
353 release_pgd(lg, lg->pgdirs[idx].pgdir + i); 510 release_pgd(lg->pgdirs[idx].pgdir + i);
354} 511}
355 512
356/*H:440 (v) Flushing (throwing away) page tables, 513/*H:440 (v) Flushing (throwing away) page tables,
@@ -369,7 +526,9 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
369{ 526{
370 pgd_t gpgd; 527 pgd_t gpgd;
371 pte_t gpte; 528 pte_t gpte;
372 529#ifdef CONFIG_X86_PAE
530 pmd_t gpmd;
531#endif
373 /* First step: get the top-level Guest page table entry. */ 532 /* First step: get the top-level Guest page table entry. */
374 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); 533 gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
375 /* Toplevel not present? We can't map it in. */ 534 /* Toplevel not present? We can't map it in. */
@@ -378,7 +537,14 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
378 return -1UL; 537 return -1UL;
379 } 538 }
380 539
381 gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t); 540#ifdef CONFIG_X86_PAE
541 gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
542 if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
543 kill_guest(cpu, "Bad address %#lx", vaddr);
544 gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
545#else
546 gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
547#endif
382 if (!(pte_flags(gpte) & _PAGE_PRESENT)) 548 if (!(pte_flags(gpte) & _PAGE_PRESENT))
383 kill_guest(cpu, "Bad address %#lx", vaddr); 549 kill_guest(cpu, "Bad address %#lx", vaddr);
384 550
@@ -405,6 +571,9 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
405 int *blank_pgdir) 571 int *blank_pgdir)
406{ 572{
407 unsigned int next; 573 unsigned int next;
574#ifdef CONFIG_X86_PAE
575 pmd_t *pmd_table;
576#endif
408 577
409 /* We pick one entry at random to throw out. Choosing the Least 578 /* We pick one entry at random to throw out. Choosing the Least
410 * Recently Used might be better, but this is easy. */ 579 * Recently Used might be better, but this is easy. */
@@ -416,10 +585,27 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
416 /* If the allocation fails, just keep using the one we have */ 585 /* If the allocation fails, just keep using the one we have */
417 if (!cpu->lg->pgdirs[next].pgdir) 586 if (!cpu->lg->pgdirs[next].pgdir)
418 next = cpu->cpu_pgd; 587 next = cpu->cpu_pgd;
419 else 588 else {
420 /* This is a blank page, so there are no kernel 589#ifdef CONFIG_X86_PAE
421 * mappings: caller must map the stack! */ 590 /* In PAE mode, allocate a pmd page and populate the
591 * last pgd entry. */
592 pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
593 if (!pmd_table) {
594 free_page((long)cpu->lg->pgdirs[next].pgdir);
595 set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
596 next = cpu->cpu_pgd;
597 } else {
598 set_pgd(cpu->lg->pgdirs[next].pgdir +
599 SWITCHER_PGD_INDEX,
600 __pgd(__pa(pmd_table) | _PAGE_PRESENT));
601 /* This is a blank page, so there are no kernel
602 * mappings: caller must map the stack! */
603 *blank_pgdir = 1;
604 }
605#else
422 *blank_pgdir = 1; 606 *blank_pgdir = 1;
607#endif
608 }
423 } 609 }
424 /* Record which Guest toplevel this shadows. */ 610 /* Record which Guest toplevel this shadows. */
425 cpu->lg->pgdirs[next].gpgdir = gpgdir; 611 cpu->lg->pgdirs[next].gpgdir = gpgdir;
@@ -431,7 +617,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
431 617
432/*H:430 (iv) Switching page tables 618/*H:430 (iv) Switching page tables
433 * 619 *
434 * Now we've seen all the page table setting and manipulation, let's see what 620 * Now we've seen all the page table setting and manipulation, let's see
435 * what happens when the Guest changes page tables (ie. changes the top-level 621 * what happens when the Guest changes page tables (ie. changes the top-level
436 * pgdir). This occurs on almost every context switch. */ 622 * pgdir). This occurs on almost every context switch. */
437void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) 623void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
@@ -460,10 +646,25 @@ static void release_all_pagetables(struct lguest *lg)
460 646
461 /* Every shadow pagetable this Guest has */ 647 /* Every shadow pagetable this Guest has */
462 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 648 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
463 if (lg->pgdirs[i].pgdir) 649 if (lg->pgdirs[i].pgdir) {
650#ifdef CONFIG_X86_PAE
651 pgd_t *spgd;
652 pmd_t *pmdpage;
653 unsigned int k;
654
655 /* Get the last pmd page. */
656 spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
657 pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
658
659 /* And release the pmd entries of that pmd page,
660 * except for the switcher pmd. */
661 for (k = 0; k < SWITCHER_PMD_INDEX; k++)
662 release_pmd(&pmdpage[k]);
663#endif
464 /* Every PGD entry except the Switcher at the top */ 664 /* Every PGD entry except the Switcher at the top */
465 for (j = 0; j < SWITCHER_PGD_INDEX; j++) 665 for (j = 0; j < SWITCHER_PGD_INDEX; j++)
466 release_pgd(lg, lg->pgdirs[i].pgdir + j); 666 release_pgd(lg->pgdirs[i].pgdir + j);
667 }
467} 668}
468 669
469/* We also throw away everything when a Guest tells us it's changed a kernel 670/* We also throw away everything when a Guest tells us it's changed a kernel
@@ -504,24 +705,37 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
504{ 705{
505 /* Look up the matching shadow page directory entry. */ 706 /* Look up the matching shadow page directory entry. */
506 pgd_t *spgd = spgd_addr(cpu, idx, vaddr); 707 pgd_t *spgd = spgd_addr(cpu, idx, vaddr);
708#ifdef CONFIG_X86_PAE
709 pmd_t *spmd;
710#endif
507 711
508 /* If the top level isn't present, there's no entry to update. */ 712 /* If the top level isn't present, there's no entry to update. */
509 if (pgd_flags(*spgd) & _PAGE_PRESENT) { 713 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
510 /* Otherwise, we start by releasing the existing entry. */ 714#ifdef CONFIG_X86_PAE
511 pte_t *spte = spte_addr(*spgd, vaddr); 715 spmd = spmd_addr(cpu, *spgd, vaddr);
512 release_pte(*spte); 716 if (pmd_flags(*spmd) & _PAGE_PRESENT) {
513 717#endif
514 /* If they're setting this entry as dirty or accessed, we might 718 /* Otherwise, we start by releasing
515 * as well put that entry they've given us in now. This shaves 719 * the existing entry. */
516 * 10% off a copy-on-write micro-benchmark. */ 720 pte_t *spte = spte_addr(cpu, *spgd, vaddr);
517 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 721 release_pte(*spte);
518 check_gpte(cpu, gpte); 722
519 *spte = gpte_to_spte(cpu, gpte, 723 /* If they're setting this entry as dirty or accessed,
520 pte_flags(gpte) & _PAGE_DIRTY); 724 * we might as well put that entry they've given us
521 } else 725 * in now. This shaves 10% off a
522 /* Otherwise kill it and we can demand_page() it in 726 * copy-on-write micro-benchmark. */
523 * later. */ 727 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
524 *spte = __pte(0); 728 check_gpte(cpu, gpte);
729 native_set_pte(spte,
730 gpte_to_spte(cpu, gpte,
731 pte_flags(gpte) & _PAGE_DIRTY));
732 } else
733 /* Otherwise kill it and we can demand_page()
734 * it in later. */
735 native_set_pte(spte, __pte(0));
736#ifdef CONFIG_X86_PAE
737 }
738#endif
525 } 739 }
526} 740}
527 741
@@ -568,12 +782,10 @@ void guest_set_pte(struct lg_cpu *cpu,
568 * 782 *
569 * So with that in mind here's our code to to update a (top-level) PGD entry: 783 * So with that in mind here's our code to to update a (top-level) PGD entry:
570 */ 784 */
571void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx) 785void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
572{ 786{
573 int pgdir; 787 int pgdir;
574 788
575 /* The kernel seems to try to initialize this early on: we ignore its
576 * attempts to map over the Switcher. */
577 if (idx >= SWITCHER_PGD_INDEX) 789 if (idx >= SWITCHER_PGD_INDEX)
578 return; 790 return;
579 791
@@ -581,8 +793,14 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
581 pgdir = find_pgdir(lg, gpgdir); 793 pgdir = find_pgdir(lg, gpgdir);
582 if (pgdir < ARRAY_SIZE(lg->pgdirs)) 794 if (pgdir < ARRAY_SIZE(lg->pgdirs))
583 /* ... throw it away. */ 795 /* ... throw it away. */
584 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); 796 release_pgd(lg->pgdirs[pgdir].pgdir + idx);
585} 797}
798#ifdef CONFIG_X86_PAE
799void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
800{
801 guest_pagetable_clear_all(&lg->cpus[0]);
802}
803#endif
586 804
587/* Once we know how much memory we have we can construct simple identity 805/* Once we know how much memory we have we can construct simple identity
588 * (which set virtual == physical) and linear mappings 806 * (which set virtual == physical) and linear mappings
@@ -596,8 +814,16 @@ static unsigned long setup_pagetables(struct lguest *lg,
596{ 814{
597 pgd_t __user *pgdir; 815 pgd_t __user *pgdir;
598 pte_t __user *linear; 816 pte_t __user *linear;
599 unsigned int mapped_pages, i, linear_pages, phys_linear;
600 unsigned long mem_base = (unsigned long)lg->mem_base; 817 unsigned long mem_base = (unsigned long)lg->mem_base;
818 unsigned int mapped_pages, i, linear_pages;
819#ifdef CONFIG_X86_PAE
820 pmd_t __user *pmds;
821 unsigned int j;
822 pgd_t pgd;
823 pmd_t pmd;
824#else
825 unsigned int phys_linear;
826#endif
601 827
602 /* We have mapped_pages frames to map, so we need 828 /* We have mapped_pages frames to map, so we need
603 * linear_pages page tables to map them. */ 829 * linear_pages page tables to map them. */
@@ -610,6 +836,9 @@ static unsigned long setup_pagetables(struct lguest *lg,
610 /* Now we use the next linear_pages pages as pte pages */ 836 /* Now we use the next linear_pages pages as pte pages */
611 linear = (void *)pgdir - linear_pages * PAGE_SIZE; 837 linear = (void *)pgdir - linear_pages * PAGE_SIZE;
612 838
839#ifdef CONFIG_X86_PAE
840 pmds = (void *)linear - PAGE_SIZE;
841#endif
613 /* Linear mapping is easy: put every page's address into the 842 /* Linear mapping is easy: put every page's address into the
614 * mapping in order. */ 843 * mapping in order. */
615 for (i = 0; i < mapped_pages; i++) { 844 for (i = 0; i < mapped_pages; i++) {
@@ -621,6 +850,22 @@ static unsigned long setup_pagetables(struct lguest *lg,
621 850
622 /* The top level points to the linear page table pages above. 851 /* The top level points to the linear page table pages above.
623 * We setup the identity and linear mappings here. */ 852 * We setup the identity and linear mappings here. */
853#ifdef CONFIG_X86_PAE
854 for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
855 i += PTRS_PER_PTE, j++) {
856 native_set_pmd(&pmd, __pmd(((unsigned long)(linear + i)
857 - mem_base) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
858
859 if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
860 return -EFAULT;
861 }
862
863 set_pgd(&pgd, __pgd(((u32)pmds - mem_base) | _PAGE_PRESENT));
864 if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
865 return -EFAULT;
866 if (copy_to_user(&pgdir[3], &pgd, sizeof(pgd)) != 0)
867 return -EFAULT;
868#else
624 phys_linear = (unsigned long)linear - mem_base; 869 phys_linear = (unsigned long)linear - mem_base;
625 for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { 870 for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
626 pgd_t pgd; 871 pgd_t pgd;
@@ -633,6 +878,7 @@ static unsigned long setup_pagetables(struct lguest *lg,
633 &pgd, sizeof(pgd))) 878 &pgd, sizeof(pgd)))
634 return -EFAULT; 879 return -EFAULT;
635 } 880 }
881#endif
636 882
637 /* We return the top level (guest-physical) address: remember where 883 /* We return the top level (guest-physical) address: remember where
638 * this is. */ 884 * this is. */
@@ -648,7 +894,10 @@ int init_guest_pagetable(struct lguest *lg)
648 u64 mem; 894 u64 mem;
649 u32 initrd_size; 895 u32 initrd_size;
650 struct boot_params __user *boot = (struct boot_params *)lg->mem_base; 896 struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
651 897#ifdef CONFIG_X86_PAE
898 pgd_t *pgd;
899 pmd_t *pmd_table;
900#endif
652 /* Get the Guest memory size and the ramdisk size from the boot header 901 /* Get the Guest memory size and the ramdisk size from the boot header
653 * located at lg->mem_base (Guest address 0). */ 902 * located at lg->mem_base (Guest address 0). */
654 if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) 903 if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
@@ -663,6 +912,15 @@ int init_guest_pagetable(struct lguest *lg)
663 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); 912 lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
664 if (!lg->pgdirs[0].pgdir) 913 if (!lg->pgdirs[0].pgdir)
665 return -ENOMEM; 914 return -ENOMEM;
915#ifdef CONFIG_X86_PAE
916 pgd = lg->pgdirs[0].pgdir;
917 pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
918 if (!pmd_table)
919 return -ENOMEM;
920
921 set_pgd(pgd + SWITCHER_PGD_INDEX,
922 __pgd(__pa(pmd_table) | _PAGE_PRESENT));
923#endif
666 lg->cpus[0].cpu_pgd = 0; 924 lg->cpus[0].cpu_pgd = 0;
667 return 0; 925 return 0;
668} 926}
@@ -672,17 +930,24 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
672{ 930{
673 /* We get the kernel address: above this is all kernel memory. */ 931 /* We get the kernel address: above this is all kernel memory. */
674 if (get_user(cpu->lg->kernel_address, 932 if (get_user(cpu->lg->kernel_address,
675 &cpu->lg->lguest_data->kernel_address) 933 &cpu->lg->lguest_data->kernel_address)
676 /* We tell the Guest that it can't use the top 4MB of virtual 934 /* We tell the Guest that it can't use the top 2 or 4 MB
677 * addresses used by the Switcher. */ 935 * of virtual addresses used by the Switcher. */
678 || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem) 936 || put_user(RESERVE_MEM * 1024 * 1024,
679 || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir)) 937 &cpu->lg->lguest_data->reserve_mem)
938 || put_user(cpu->lg->pgdirs[0].gpgdir,
939 &cpu->lg->lguest_data->pgdir))
680 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); 940 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
681 941
682 /* In flush_user_mappings() we loop from 0 to 942 /* In flush_user_mappings() we loop from 0 to
683 * "pgd_index(lg->kernel_address)". This assumes it won't hit the 943 * "pgd_index(lg->kernel_address)". This assumes it won't hit the
684 * Switcher mappings, so check that now. */ 944 * Switcher mappings, so check that now. */
945#ifdef CONFIG_X86_PAE
946 if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
947 pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
948#else
685 if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) 949 if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
950#endif
686 kill_guest(cpu, "bad kernel address %#lx", 951 kill_guest(cpu, "bad kernel address %#lx",
687 cpu->lg->kernel_address); 952 cpu->lg->kernel_address);
688} 953}
@@ -708,16 +973,30 @@ void free_guest_pagetable(struct lguest *lg)
708void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) 973void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
709{ 974{
710 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 975 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
711 pgd_t switcher_pgd;
712 pte_t regs_pte; 976 pte_t regs_pte;
713 unsigned long pfn; 977 unsigned long pfn;
714 978
979#ifdef CONFIG_X86_PAE
980 pmd_t switcher_pmd;
981 pmd_t *pmd_table;
982
983 native_set_pmd(&switcher_pmd, pfn_pmd(__pa(switcher_pte_page) >>
984 PAGE_SHIFT, PAGE_KERNEL_EXEC));
985
986 pmd_table = __va(pgd_pfn(cpu->lg->
987 pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
988 << PAGE_SHIFT);
989 native_set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
990#else
991 pgd_t switcher_pgd;
992
715 /* Make the last PGD entry for this Guest point to the Switcher's PTE 993 /* Make the last PGD entry for this Guest point to the Switcher's PTE
716 * page for this CPU (with appropriate flags). */ 994 * page for this CPU (with appropriate flags). */
717 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL); 995 switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
718 996
719 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 997 cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
720 998
999#endif
721 /* We also change the Switcher PTE page. When we're running the Guest, 1000 /* We also change the Switcher PTE page. When we're running the Guest,
722 * we want the Guest's "regs" page to appear where the first Switcher 1001 * we want the Guest's "regs" page to appear where the first Switcher
723 * page for this CPU is. This is an optimization: when the Switcher 1002 * page for this CPU is. This is an optimization: when the Switcher
@@ -726,8 +1005,9 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
726 * page is already mapped there, we don't have to copy them out 1005 * page is already mapped there, we don't have to copy them out
727 * again. */ 1006 * again. */
728 pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; 1007 pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
729 regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL)); 1008 native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
730 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; 1009 native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
1010 regs_pte);
731} 1011}
732/*:*/ 1012/*:*/
733 1013
@@ -752,21 +1032,21 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
752 1032
753 /* The first entries are easy: they map the Switcher code. */ 1033 /* The first entries are easy: they map the Switcher code. */
754 for (i = 0; i < pages; i++) { 1034 for (i = 0; i < pages; i++) {
755 pte[i] = mk_pte(switcher_page[i], 1035 native_set_pte(&pte[i], mk_pte(switcher_page[i],
756 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)); 1036 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
757 } 1037 }
758 1038
759 /* The only other thing we map is this CPU's pair of pages. */ 1039 /* The only other thing we map is this CPU's pair of pages. */
760 i = pages + cpu*2; 1040 i = pages + cpu*2;
761 1041
762 /* First page (Guest registers) is writable from the Guest */ 1042 /* First page (Guest registers) is writable from the Guest */
763 pte[i] = pfn_pte(page_to_pfn(switcher_page[i]), 1043 native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
764 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)); 1044 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
765 1045
766 /* The second page contains the "struct lguest_ro_state", and is 1046 /* The second page contains the "struct lguest_ro_state", and is
767 * read-only. */ 1047 * read-only. */
768 pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]), 1048 native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
769 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)); 1049 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
770} 1050}
771 1051
772/* We've made it through the page table code. Perhaps our tired brains are 1052/* We've made it through the page table code. Perhaps our tired brains are
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 7ede64ffeef9..482ed5a18750 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -150,7 +150,7 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
150{ 150{
151 /* We assume the Guest has the same number of GDT entries as the 151 /* We assume the Guest has the same number of GDT entries as the
152 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ 152 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
153 if (num > ARRAY_SIZE(cpu->arch.gdt)) 153 if (num >= ARRAY_SIZE(cpu->arch.gdt))
154 kill_guest(cpu, "too many gdt entries %i", num); 154 kill_guest(cpu, "too many gdt entries %i", num);
155 155
156 /* Set it up, then fix it. */ 156 /* Set it up, then fix it. */
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4d1d47953fc6..7fa620ddeb21 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -845,6 +845,10 @@ static int virtnet_probe(struct virtio_device *vdev)
845 int err; 845 int err;
846 struct net_device *dev; 846 struct net_device *dev;
847 struct virtnet_info *vi; 847 struct virtnet_info *vi;
848 struct virtqueue *vqs[3];
849 vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
850 const char *names[] = { "input", "output", "control" };
851 int nvqs;
848 852
849 /* Allocate ourselves a network device with room for our info */ 853 /* Allocate ourselves a network device with room for our info */
850 dev = alloc_etherdev(sizeof(struct virtnet_info)); 854 dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -905,25 +909,19 @@ static int virtnet_probe(struct virtio_device *vdev)
905 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) 909 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
906 vi->mergeable_rx_bufs = true; 910 vi->mergeable_rx_bufs = true;
907 911
908 /* We expect two virtqueues, receive then send. */ 912 /* We expect two virtqueues, receive then send,
909 vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); 913 * and optionally control. */
910 if (IS_ERR(vi->rvq)) { 914 nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
911 err = PTR_ERR(vi->rvq); 915
916 err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
917 if (err)
912 goto free; 918 goto free;
913 }
914 919
915 vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done); 920 vi->rvq = vqs[0];
916 if (IS_ERR(vi->svq)) { 921 vi->svq = vqs[1];
917 err = PTR_ERR(vi->svq);
918 goto free_recv;
919 }
920 922
921 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { 923 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
922 vi->cvq = vdev->config->find_vq(vdev, 2, NULL); 924 vi->cvq = vqs[2];
923 if (IS_ERR(vi->cvq)) {
924 err = PTR_ERR(vi->svq);
925 goto free_send;
926 }
927 925
928 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) 926 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
929 dev->features |= NETIF_F_HW_VLAN_FILTER; 927 dev->features |= NETIF_F_HW_VLAN_FILTER;
@@ -941,7 +939,7 @@ static int virtnet_probe(struct virtio_device *vdev)
941 err = register_netdev(dev); 939 err = register_netdev(dev);
942 if (err) { 940 if (err) {
943 pr_debug("virtio_net: registering device failed\n"); 941 pr_debug("virtio_net: registering device failed\n");
944 goto free_ctrl; 942 goto free_vqs;
945 } 943 }
946 944
947 /* Last of all, set up some receive buffers. */ 945 /* Last of all, set up some receive buffers. */
@@ -962,13 +960,8 @@ static int virtnet_probe(struct virtio_device *vdev)
962 960
963unregister: 961unregister:
964 unregister_netdev(dev); 962 unregister_netdev(dev);
965free_ctrl: 963free_vqs:
966 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) 964 vdev->config->del_vqs(vdev);
967 vdev->config->del_vq(vi->cvq);
968free_send:
969 vdev->config->del_vq(vi->svq);
970free_recv:
971 vdev->config->del_vq(vi->rvq);
972free: 965free:
973 free_netdev(dev); 966 free_netdev(dev);
974 return err; 967 return err;
@@ -994,12 +987,10 @@ static void virtnet_remove(struct virtio_device *vdev)
994 987
995 BUG_ON(vi->num != 0); 988 BUG_ON(vi->num != 0);
996 989
997 vdev->config->del_vq(vi->svq);
998 vdev->config->del_vq(vi->rvq);
999 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
1000 vdev->config->del_vq(vi->cvq);
1001 unregister_netdev(vi->dev); 990 unregister_netdev(vi->dev);
1002 991
992 vdev->config->del_vqs(vi->vdev);
993
1003 while (vi->pages) 994 while (vi->pages)
1004 __free_pages(get_a_page(vi, GFP_KERNEL), 0); 995 __free_pages(get_a_page(vi, GFP_KERNEL), 0);
1005 996
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index cbc8566fab70..e38e5d306faf 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -173,8 +173,9 @@ static void kvm_notify(struct virtqueue *vq)
173 * this device and sets it up. 173 * this device and sets it up.
174 */ 174 */
175static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, 175static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
176 unsigned index, 176 unsigned index,
177 void (*callback)(struct virtqueue *vq)) 177 void (*callback)(struct virtqueue *vq),
178 const char *name)
178{ 179{
179 struct kvm_device *kdev = to_kvmdev(vdev); 180 struct kvm_device *kdev = to_kvmdev(vdev);
180 struct kvm_vqconfig *config; 181 struct kvm_vqconfig *config;
@@ -194,7 +195,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
194 195
195 vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, 196 vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
196 vdev, (void *) config->address, 197 vdev, (void *) config->address,
197 kvm_notify, callback); 198 kvm_notify, callback, name);
198 if (!vq) { 199 if (!vq) {
199 err = -ENOMEM; 200 err = -ENOMEM;
200 goto unmap; 201 goto unmap;
@@ -226,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq)
226 KVM_S390_VIRTIO_RING_ALIGN)); 227 KVM_S390_VIRTIO_RING_ALIGN));
227} 228}
228 229
230static void kvm_del_vqs(struct virtio_device *vdev)
231{
232 struct virtqueue *vq, *n;
233
234 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
235 kvm_del_vq(vq);
236}
237
238static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
239 struct virtqueue *vqs[],
240 vq_callback_t *callbacks[],
241 const char *names[])
242{
243 struct kvm_device *kdev = to_kvmdev(vdev);
244 int i;
245
246 /* We must have this many virtqueues. */
247 if (nvqs > kdev->desc->num_vq)
248 return -ENOENT;
249
250 for (i = 0; i < nvqs; ++i) {
251 vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
252 if (IS_ERR(vqs[i]))
253 goto error;
254 }
255 return 0;
256
257error:
258 kvm_del_vqs(vdev);
259 return PTR_ERR(vqs[i]);
260}
261
229/* 262/*
230 * The config ops structure as defined by virtio config 263 * The config ops structure as defined by virtio config
231 */ 264 */
@@ -237,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
237 .get_status = kvm_get_status, 270 .get_status = kvm_get_status,
238 .set_status = kvm_set_status, 271 .set_status = kvm_set_status,
239 .reset = kvm_reset, 272 .reset = kvm_reset,
240 .find_vq = kvm_find_vq, 273 .find_vqs = kvm_find_vqs,
241 .del_vq = kvm_del_vq, 274 .del_vqs = kvm_del_vqs,
242}; 275};
243 276
244/* 277/*
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 35e8eb02b9e9..e4e4d433b007 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0;
354static int default_lcd_on __devinitdata = 1; 354static int default_lcd_on __devinitdata = 1;
355 355
356#ifdef CONFIG_MTRR 356#ifdef CONFIG_MTRR
357static int mtrr = 1; 357static bool mtrr = true;
358#endif 358#endif
359 359
360#ifdef CONFIG_PMAC_BACKLIGHT 360#ifdef CONFIG_PMAC_BACKLIGHT
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 83c5cefc266c..da7c01b39be2 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -1736,10 +1736,8 @@ static int __init cyber2000fb_init(void)
1736 1736
1737#ifdef CONFIG_ARCH_SHARK 1737#ifdef CONFIG_ARCH_SHARK
1738 err = cyberpro_vl_probe(); 1738 err = cyberpro_vl_probe();
1739 if (!err) { 1739 if (!err)
1740 ret = 0; 1740 ret = 0;
1741 __module_get(THIS_MODULE);
1742 }
1743#endif 1741#endif
1744#ifdef CONFIG_PCI 1742#ifdef CONFIG_PCI
1745 err = pci_register_driver(&cyberpro_driver); 1743 err = pci_register_driver(&cyberpro_driver);
@@ -1749,14 +1747,15 @@ static int __init cyber2000fb_init(void)
1749 1747
1750 return ret ? err : 0; 1748 return ret ? err : 0;
1751} 1749}
1750module_init(cyber2000fb_init);
1752 1751
1752#ifndef CONFIG_ARCH_SHARK
1753static void __exit cyberpro_exit(void) 1753static void __exit cyberpro_exit(void)
1754{ 1754{
1755 pci_unregister_driver(&cyberpro_driver); 1755 pci_unregister_driver(&cyberpro_driver);
1756} 1756}
1757
1758module_init(cyber2000fb_init);
1759module_exit(cyberpro_exit); 1757module_exit(cyberpro_exit);
1758#endif
1760 1759
1761MODULE_AUTHOR("Russell King"); 1760MODULE_AUTHOR("Russell King");
1762MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver"); 1761MODULE_DESCRIPTION("CyberPro 2000, 2010 and 5000 framebuffer driver");
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 421770b5e6ab..ca5b4643a401 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -45,7 +45,7 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
45static int mtrr __devinitdata = 3; /* enable mtrr by default */ 45static int mtrr __devinitdata = 3; /* enable mtrr by default */
46static int blank = 1; /* enable blanking by default */ 46static int blank = 1; /* enable blanking by default */
47static int ypan = 1; /* 0: scroll, 1: ypan, 2: ywrap */ 47static int ypan = 1; /* 0: scroll, 1: ypan, 2: ywrap */
48static int pmi_setpal __devinitdata = 1; /* use PMI for palette changes */ 48static bool pmi_setpal __devinitdata = true; /* use PMI for palette changes */
49static int nocrtc __devinitdata; /* ignore CRTC settings */ 49static int nocrtc __devinitdata; /* ignore CRTC settings */
50static int noedid __devinitdata; /* don't try DDC transfers */ 50static int noedid __devinitdata; /* don't try DDC transfers */
51static int vram_remap __devinitdata; /* set amt. of memory to be used */ 51static int vram_remap __devinitdata; /* set amt. of memory to be used */
@@ -2002,11 +2002,7 @@ static void __devexit uvesafb_exit(void)
2002 2002
2003module_exit(uvesafb_exit); 2003module_exit(uvesafb_exit);
2004 2004
2005static int param_get_scroll(char *buffer, struct kernel_param *kp) 2005#define param_get_scroll NULL
2006{
2007 return 0;
2008}
2009
2010static int param_set_scroll(const char *val, struct kernel_param *kp) 2006static int param_set_scroll(const char *val, struct kernel_param *kp)
2011{ 2007{
2012 ypan = 0; 2008 ypan = 0;
@@ -2017,6 +2013,8 @@ static int param_set_scroll(const char *val, struct kernel_param *kp)
2017 ypan = 1; 2013 ypan = 1;
2018 else if (!strcmp(val, "ywrap")) 2014 else if (!strcmp(val, "ywrap"))
2019 ypan = 2; 2015 ypan = 2;
2016 else
2017 return -EINVAL;
2020 2018
2021 return 0; 2019 return 0;
2022} 2020}
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 018c070a357f..3a43ebf83a49 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -31,21 +31,37 @@ static ssize_t modalias_show(struct device *_d,
31 return sprintf(buf, "virtio:d%08Xv%08X\n", 31 return sprintf(buf, "virtio:d%08Xv%08X\n",
32 dev->id.device, dev->id.vendor); 32 dev->id.device, dev->id.vendor);
33} 33}
34static ssize_t features_show(struct device *_d,
35 struct device_attribute *attr, char *buf)
36{
37 struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
38 unsigned int i;
39 ssize_t len = 0;
40
41 /* We actually represent this as a bitstring, as it could be
42 * arbitrary length in future. */
43 for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
44 len += sprintf(buf+len, "%c",
45 test_bit(i, dev->features) ? '1' : '0');
46 len += sprintf(buf+len, "\n");
47 return len;
48}
34static struct device_attribute virtio_dev_attrs[] = { 49static struct device_attribute virtio_dev_attrs[] = {
35 __ATTR_RO(device), 50 __ATTR_RO(device),
36 __ATTR_RO(vendor), 51 __ATTR_RO(vendor),
37 __ATTR_RO(status), 52 __ATTR_RO(status),
38 __ATTR_RO(modalias), 53 __ATTR_RO(modalias),
54 __ATTR_RO(features),
39 __ATTR_NULL 55 __ATTR_NULL
40}; 56};
41 57
42static inline int virtio_id_match(const struct virtio_device *dev, 58static inline int virtio_id_match(const struct virtio_device *dev,
43 const struct virtio_device_id *id) 59 const struct virtio_device_id *id)
44{ 60{
45 if (id->device != dev->id.device) 61 if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
46 return 0; 62 return 0;
47 63
48 return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor; 64 return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
49} 65}
50 66
51/* This looks through all the IDs a driver claims to support. If any of them 67/* This looks through all the IDs a driver claims to support. If any of them
@@ -118,13 +134,14 @@ static int virtio_dev_probe(struct device *_d)
118 if (device_features & (1 << i)) 134 if (device_features & (1 << i))
119 set_bit(i, dev->features); 135 set_bit(i, dev->features);
120 136
137 dev->config->finalize_features(dev);
138
121 err = drv->probe(dev); 139 err = drv->probe(dev);
122 if (err) 140 if (err)
123 add_status(dev, VIRTIO_CONFIG_S_FAILED); 141 add_status(dev, VIRTIO_CONFIG_S_FAILED);
124 else { 142 else
125 dev->config->finalize_features(dev);
126 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 143 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
127 } 144
128 return err; 145 return err;
129} 146}
130 147
@@ -185,6 +202,8 @@ int register_virtio_device(struct virtio_device *dev)
185 /* Acknowledge that we've seen the device. */ 202 /* Acknowledge that we've seen the device. */
186 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 203 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
187 204
205 INIT_LIST_HEAD(&dev->vqs);
206
188 /* device_register() causes the bus infrastructure to look for a 207 /* device_register() causes the bus infrastructure to look for a
189 * matching driver. */ 208 * matching driver. */
190 err = device_register(&dev->dev); 209 err = device_register(&dev->dev);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9c76a061a04d..26b278264796 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -204,6 +204,9 @@ static int balloon(void *_vballoon)
204static int virtballoon_probe(struct virtio_device *vdev) 204static int virtballoon_probe(struct virtio_device *vdev)
205{ 205{
206 struct virtio_balloon *vb; 206 struct virtio_balloon *vb;
207 struct virtqueue *vqs[2];
208 vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
209 const char *names[] = { "inflate", "deflate" };
207 int err; 210 int err;
208 211
209 vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); 212 vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -218,22 +221,17 @@ static int virtballoon_probe(struct virtio_device *vdev)
218 vb->vdev = vdev; 221 vb->vdev = vdev;
219 222
220 /* We expect two virtqueues. */ 223 /* We expect two virtqueues. */
221 vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); 224 err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
222 if (IS_ERR(vb->inflate_vq)) { 225 if (err)
223 err = PTR_ERR(vb->inflate_vq);
224 goto out_free_vb; 226 goto out_free_vb;
225 }
226 227
227 vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); 228 vb->inflate_vq = vqs[0];
228 if (IS_ERR(vb->deflate_vq)) { 229 vb->deflate_vq = vqs[1];
229 err = PTR_ERR(vb->deflate_vq);
230 goto out_del_inflate_vq;
231 }
232 230
233 vb->thread = kthread_run(balloon, vb, "vballoon"); 231 vb->thread = kthread_run(balloon, vb, "vballoon");
234 if (IS_ERR(vb->thread)) { 232 if (IS_ERR(vb->thread)) {
235 err = PTR_ERR(vb->thread); 233 err = PTR_ERR(vb->thread);
236 goto out_del_deflate_vq; 234 goto out_del_vqs;
237 } 235 }
238 236
239 vb->tell_host_first 237 vb->tell_host_first
@@ -241,10 +239,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
241 239
242 return 0; 240 return 0;
243 241
244out_del_deflate_vq: 242out_del_vqs:
245 vdev->config->del_vq(vb->deflate_vq); 243 vdev->config->del_vqs(vdev);
246out_del_inflate_vq:
247 vdev->config->del_vq(vb->inflate_vq);
248out_free_vb: 244out_free_vb:
249 kfree(vb); 245 kfree(vb);
250out: 246out:
@@ -264,8 +260,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
264 /* Now we reset the device so we can clean up the queues. */ 260 /* Now we reset the device so we can clean up the queues. */
265 vdev->config->reset(vdev); 261 vdev->config->reset(vdev);
266 262
267 vdev->config->del_vq(vb->deflate_vq); 263 vdev->config->del_vqs(vdev);
268 vdev->config->del_vq(vb->inflate_vq);
269 kfree(vb); 264 kfree(vb);
270} 265}
271 266
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 330aacbdec1f..193c8f0e5cc5 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -42,6 +42,26 @@ struct virtio_pci_device
42 /* a list of queues so we can dispatch IRQs */ 42 /* a list of queues so we can dispatch IRQs */
43 spinlock_t lock; 43 spinlock_t lock;
44 struct list_head virtqueues; 44 struct list_head virtqueues;
45
46 /* MSI-X support */
47 int msix_enabled;
48 int intx_enabled;
49 struct msix_entry *msix_entries;
50 /* Name strings for interrupts. This size should be enough,
51 * and I'm too lazy to allocate each name separately. */
52 char (*msix_names)[256];
53 /* Number of available vectors */
54 unsigned msix_vectors;
55 /* Vectors allocated */
56 unsigned msix_used_vectors;
57};
58
59/* Constants for MSI-X */
60/* Use first vector for configuration changes, second and the rest for
61 * virtqueues Thus, we need at least 2 vectors for MSI. */
62enum {
63 VP_MSIX_CONFIG_VECTOR = 0,
64 VP_MSIX_VQ_VECTOR = 1,
45}; 65};
46 66
47struct virtio_pci_vq_info 67struct virtio_pci_vq_info
@@ -60,6 +80,9 @@ struct virtio_pci_vq_info
60 80
61 /* the list node for the virtqueues list */ 81 /* the list node for the virtqueues list */
62 struct list_head node; 82 struct list_head node;
83
84 /* MSI-X vector (or none) */
85 unsigned vector;
63}; 86};
64 87
65/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ 88/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
109 void *buf, unsigned len) 132 void *buf, unsigned len)
110{ 133{
111 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 134 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
112 void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 135 void __iomem *ioaddr = vp_dev->ioaddr +
136 VIRTIO_PCI_CONFIG(vp_dev) + offset;
113 u8 *ptr = buf; 137 u8 *ptr = buf;
114 int i; 138 int i;
115 139
@@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
123 const void *buf, unsigned len) 147 const void *buf, unsigned len)
124{ 148{
125 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 149 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
126 void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 150 void __iomem *ioaddr = vp_dev->ioaddr +
151 VIRTIO_PCI_CONFIG(vp_dev) + offset;
127 const u8 *ptr = buf; 152 const u8 *ptr = buf;
128 int i; 153 int i;
129 154
@@ -164,6 +189,37 @@ static void vp_notify(struct virtqueue *vq)
164 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); 189 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
165} 190}
166 191
192/* Handle a configuration change: Tell driver if it wants to know. */
193static irqreturn_t vp_config_changed(int irq, void *opaque)
194{
195 struct virtio_pci_device *vp_dev = opaque;
196 struct virtio_driver *drv;
197 drv = container_of(vp_dev->vdev.dev.driver,
198 struct virtio_driver, driver);
199
200 if (drv && drv->config_changed)
201 drv->config_changed(&vp_dev->vdev);
202 return IRQ_HANDLED;
203}
204
205/* Notify all virtqueues on an interrupt. */
206static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
207{
208 struct virtio_pci_device *vp_dev = opaque;
209 struct virtio_pci_vq_info *info;
210 irqreturn_t ret = IRQ_NONE;
211 unsigned long flags;
212
213 spin_lock_irqsave(&vp_dev->lock, flags);
214 list_for_each_entry(info, &vp_dev->virtqueues, node) {
215 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
216 ret = IRQ_HANDLED;
217 }
218 spin_unlock_irqrestore(&vp_dev->lock, flags);
219
220 return ret;
221}
222
167/* A small wrapper to also acknowledge the interrupt when it's handled. 223/* A small wrapper to also acknowledge the interrupt when it's handled.
168 * I really need an EIO hook for the vring so I can ack the interrupt once we 224 * I really need an EIO hook for the vring so I can ack the interrupt once we
169 * know that we'll be handling the IRQ but before we invoke the callback since 225 * know that we'll be handling the IRQ but before we invoke the callback since
@@ -173,9 +229,6 @@ static void vp_notify(struct virtqueue *vq)
173static irqreturn_t vp_interrupt(int irq, void *opaque) 229static irqreturn_t vp_interrupt(int irq, void *opaque)
174{ 230{
175 struct virtio_pci_device *vp_dev = opaque; 231 struct virtio_pci_device *vp_dev = opaque;
176 struct virtio_pci_vq_info *info;
177 irqreturn_t ret = IRQ_NONE;
178 unsigned long flags;
179 u8 isr; 232 u8 isr;
180 233
181 /* reading the ISR has the effect of also clearing it so it's very 234 /* reading the ISR has the effect of also clearing it so it's very
@@ -187,34 +240,137 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
187 return IRQ_NONE; 240 return IRQ_NONE;
188 241
189 /* Configuration change? Tell driver if it wants to know. */ 242 /* Configuration change? Tell driver if it wants to know. */
190 if (isr & VIRTIO_PCI_ISR_CONFIG) { 243 if (isr & VIRTIO_PCI_ISR_CONFIG)
191 struct virtio_driver *drv; 244 vp_config_changed(irq, opaque);
192 drv = container_of(vp_dev->vdev.dev.driver,
193 struct virtio_driver, driver);
194 245
195 if (drv && drv->config_changed) 246 return vp_vring_interrupt(irq, opaque);
196 drv->config_changed(&vp_dev->vdev); 247}
248
249static void vp_free_vectors(struct virtio_device *vdev)
250{
251 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
252 int i;
253
254 if (vp_dev->intx_enabled) {
255 free_irq(vp_dev->pci_dev->irq, vp_dev);
256 vp_dev->intx_enabled = 0;
197 } 257 }
198 258
199 spin_lock_irqsave(&vp_dev->lock, flags); 259 for (i = 0; i < vp_dev->msix_used_vectors; ++i)
200 list_for_each_entry(info, &vp_dev->virtqueues, node) { 260 free_irq(vp_dev->msix_entries[i].vector, vp_dev);
201 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) 261 vp_dev->msix_used_vectors = 0;
202 ret = IRQ_HANDLED; 262
263 if (vp_dev->msix_enabled) {
264 /* Disable the vector used for configuration */
265 iowrite16(VIRTIO_MSI_NO_VECTOR,
266 vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
267 /* Flush the write out to device */
268 ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
269
270 vp_dev->msix_enabled = 0;
271 pci_disable_msix(vp_dev->pci_dev);
203 } 272 }
204 spin_unlock_irqrestore(&vp_dev->lock, flags); 273}
205 274
206 return ret; 275static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
276 int *options, int noptions)
277{
278 int i;
279 for (i = 0; i < noptions; ++i)
280 if (!pci_enable_msix(dev, entries, options[i]))
281 return options[i];
282 return -EBUSY;
283}
284
285static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
286{
287 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
288 const char *name = dev_name(&vp_dev->vdev.dev);
289 unsigned i, v;
290 int err = -ENOMEM;
291 /* We want at most one vector per queue and one for config changes.
292 * Fallback to separate vectors for config and a shared for queues.
293 * Finally fall back to regular interrupts. */
294 int options[] = { max_vqs + 1, 2 };
295 int nvectors = max(options[0], options[1]);
296
297 vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
298 GFP_KERNEL);
299 if (!vp_dev->msix_entries)
300 goto error_entries;
301 vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
302 GFP_KERNEL);
303 if (!vp_dev->msix_names)
304 goto error_names;
305
306 for (i = 0; i < nvectors; ++i)
307 vp_dev->msix_entries[i].entry = i;
308
309 err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries,
310 options, ARRAY_SIZE(options));
311 if (err < 0) {
312 /* Can't allocate enough MSI-X vectors, use regular interrupt */
313 vp_dev->msix_vectors = 0;
314 err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
315 IRQF_SHARED, name, vp_dev);
316 if (err)
317 goto error_irq;
318 vp_dev->intx_enabled = 1;
319 } else {
320 vp_dev->msix_vectors = err;
321 vp_dev->msix_enabled = 1;
322
323 /* Set the vector used for configuration */
324 v = vp_dev->msix_used_vectors;
325 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
326 "%s-config", name);
327 err = request_irq(vp_dev->msix_entries[v].vector,
328 vp_config_changed, 0, vp_dev->msix_names[v],
329 vp_dev);
330 if (err)
331 goto error_irq;
332 ++vp_dev->msix_used_vectors;
333
334 iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
335 /* Verify we had enough resources to assign the vector */
336 v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
337 if (v == VIRTIO_MSI_NO_VECTOR) {
338 err = -EBUSY;
339 goto error_irq;
340 }
341 }
342
343 if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) {
344 /* Shared vector for all VQs */
345 v = vp_dev->msix_used_vectors;
346 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
347 "%s-virtqueues", name);
348 err = request_irq(vp_dev->msix_entries[v].vector,
349 vp_vring_interrupt, 0, vp_dev->msix_names[v],
350 vp_dev);
351 if (err)
352 goto error_irq;
353 ++vp_dev->msix_used_vectors;
354 }
355 return 0;
356error_irq:
357 vp_free_vectors(vdev);
358 kfree(vp_dev->msix_names);
359error_names:
360 kfree(vp_dev->msix_entries);
361error_entries:
362 return err;
207} 363}
208 364
209/* the config->find_vq() implementation */
210static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, 365static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
211 void (*callback)(struct virtqueue *vq)) 366 void (*callback)(struct virtqueue *vq),
367 const char *name)
212{ 368{
213 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 369 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
214 struct virtio_pci_vq_info *info; 370 struct virtio_pci_vq_info *info;
215 struct virtqueue *vq; 371 struct virtqueue *vq;
216 unsigned long flags, size; 372 unsigned long flags, size;
217 u16 num; 373 u16 num, vector;
218 int err; 374 int err;
219 375
220 /* Select the queue we're interested in */ 376 /* Select the queue we're interested in */
@@ -233,6 +389,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
233 389
234 info->queue_index = index; 390 info->queue_index = index;
235 info->num = num; 391 info->num = num;
392 info->vector = VIRTIO_MSI_NO_VECTOR;
236 393
237 size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); 394 size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
238 info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); 395 info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -247,7 +404,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
247 404
248 /* create the vring */ 405 /* create the vring */
249 vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, 406 vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
250 vdev, info->queue, vp_notify, callback); 407 vdev, info->queue, vp_notify, callback, name);
251 if (!vq) { 408 if (!vq) {
252 err = -ENOMEM; 409 err = -ENOMEM;
253 goto out_activate_queue; 410 goto out_activate_queue;
@@ -256,12 +413,43 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
256 vq->priv = info; 413 vq->priv = info;
257 info->vq = vq; 414 info->vq = vq;
258 415
416 /* allocate per-vq vector if available and necessary */
417 if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) {
418 vector = vp_dev->msix_used_vectors;
419 snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
420 "%s-%s", dev_name(&vp_dev->vdev.dev), name);
421 err = request_irq(vp_dev->msix_entries[vector].vector,
422 vring_interrupt, 0,
423 vp_dev->msix_names[vector], vq);
424 if (err)
425 goto out_request_irq;
426 info->vector = vector;
427 ++vp_dev->msix_used_vectors;
428 } else
429 vector = VP_MSIX_VQ_VECTOR;
430
431 if (callback && vp_dev->msix_enabled) {
432 iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
433 vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
434 if (vector == VIRTIO_MSI_NO_VECTOR) {
435 err = -EBUSY;
436 goto out_assign;
437 }
438 }
439
259 spin_lock_irqsave(&vp_dev->lock, flags); 440 spin_lock_irqsave(&vp_dev->lock, flags);
260 list_add(&info->node, &vp_dev->virtqueues); 441 list_add(&info->node, &vp_dev->virtqueues);
261 spin_unlock_irqrestore(&vp_dev->lock, flags); 442 spin_unlock_irqrestore(&vp_dev->lock, flags);
262 443
263 return vq; 444 return vq;
264 445
446out_assign:
447 if (info->vector != VIRTIO_MSI_NO_VECTOR) {
448 free_irq(vp_dev->msix_entries[info->vector].vector, vq);
449 --vp_dev->msix_used_vectors;
450 }
451out_request_irq:
452 vring_del_virtqueue(vq);
265out_activate_queue: 453out_activate_queue:
266 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 454 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
267 free_pages_exact(info->queue, size); 455 free_pages_exact(info->queue, size);
@@ -270,21 +458,27 @@ out_info:
270 return ERR_PTR(err); 458 return ERR_PTR(err);
271} 459}
272 460
273/* the config->del_vq() implementation */
274static void vp_del_vq(struct virtqueue *vq) 461static void vp_del_vq(struct virtqueue *vq)
275{ 462{
276 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 463 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
277 struct virtio_pci_vq_info *info = vq->priv; 464 struct virtio_pci_vq_info *info = vq->priv;
278 unsigned long flags, size; 465 unsigned long size;
279 466
280 spin_lock_irqsave(&vp_dev->lock, flags); 467 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
281 list_del(&info->node); 468
282 spin_unlock_irqrestore(&vp_dev->lock, flags); 469 if (info->vector != VIRTIO_MSI_NO_VECTOR)
470 free_irq(vp_dev->msix_entries[info->vector].vector, vq);
471
472 if (vp_dev->msix_enabled) {
473 iowrite16(VIRTIO_MSI_NO_VECTOR,
474 vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
475 /* Flush the write out to device */
476 ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
477 }
283 478
284 vring_del_virtqueue(vq); 479 vring_del_virtqueue(vq);
285 480
286 /* Select and deactivate the queue */ 481 /* Select and deactivate the queue */
287 iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
288 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 482 iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
289 483
290 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); 484 size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
@@ -292,14 +486,57 @@ static void vp_del_vq(struct virtqueue *vq)
292 kfree(info); 486 kfree(info);
293} 487}
294 488
489/* the config->del_vqs() implementation */
490static void vp_del_vqs(struct virtio_device *vdev)
491{
492 struct virtqueue *vq, *n;
493
494 list_for_each_entry_safe(vq, n, &vdev->vqs, list)
495 vp_del_vq(vq);
496
497 vp_free_vectors(vdev);
498}
499
500/* the config->find_vqs() implementation */
501static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
502 struct virtqueue *vqs[],
503 vq_callback_t *callbacks[],
504 const char *names[])
505{
506 int vectors = 0;
507 int i, err;
508
509 /* How many vectors would we like? */
510 for (i = 0; i < nvqs; ++i)
511 if (callbacks[i])
512 ++vectors;
513
514 err = vp_request_vectors(vdev, vectors);
515 if (err)
516 goto error_request;
517
518 for (i = 0; i < nvqs; ++i) {
519 vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
520 if (IS_ERR(vqs[i]))
521 goto error_find;
522 }
523 return 0;
524
525error_find:
526 vp_del_vqs(vdev);
527
528error_request:
529 return PTR_ERR(vqs[i]);
530}
531
295static struct virtio_config_ops virtio_pci_config_ops = { 532static struct virtio_config_ops virtio_pci_config_ops = {
296 .get = vp_get, 533 .get = vp_get,
297 .set = vp_set, 534 .set = vp_set,
298 .get_status = vp_get_status, 535 .get_status = vp_get_status,
299 .set_status = vp_set_status, 536 .set_status = vp_set_status,
300 .reset = vp_reset, 537 .reset = vp_reset,
301 .find_vq = vp_find_vq, 538 .find_vqs = vp_find_vqs,
302 .del_vq = vp_del_vq, 539 .del_vqs = vp_del_vqs,
303 .get_features = vp_get_features, 540 .get_features = vp_get_features,
304 .finalize_features = vp_finalize_features, 541 .finalize_features = vp_finalize_features,
305}; 542};
@@ -310,7 +547,7 @@ static void virtio_pci_release_dev(struct device *_d)
310 struct virtio_pci_device *vp_dev = to_vp_device(dev); 547 struct virtio_pci_device *vp_dev = to_vp_device(dev);
311 struct pci_dev *pci_dev = vp_dev->pci_dev; 548 struct pci_dev *pci_dev = vp_dev->pci_dev;
312 549
313 free_irq(pci_dev->irq, vp_dev); 550 vp_del_vqs(dev);
314 pci_set_drvdata(pci_dev, NULL); 551 pci_set_drvdata(pci_dev, NULL);
315 pci_iounmap(pci_dev, vp_dev->ioaddr); 552 pci_iounmap(pci_dev, vp_dev->ioaddr);
316 pci_release_regions(pci_dev); 553 pci_release_regions(pci_dev);
@@ -369,21 +606,13 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
369 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; 606 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
370 vp_dev->vdev.id.device = pci_dev->subsystem_device; 607 vp_dev->vdev.id.device = pci_dev->subsystem_device;
371 608
372 /* register a handler for the queue with the PCI device's interrupt */
373 err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
374 dev_name(&vp_dev->vdev.dev), vp_dev);
375 if (err)
376 goto out_set_drvdata;
377
378 /* finally register the virtio device */ 609 /* finally register the virtio device */
379 err = register_virtio_device(&vp_dev->vdev); 610 err = register_virtio_device(&vp_dev->vdev);
380 if (err) 611 if (err)
381 goto out_req_irq; 612 goto out_set_drvdata;
382 613
383 return 0; 614 return 0;
384 615
385out_req_irq:
386 free_irq(pci_dev->irq, vp_dev);
387out_set_drvdata: 616out_set_drvdata:
388 pci_set_drvdata(pci_dev, NULL); 617 pci_set_drvdata(pci_dev, NULL);
389 pci_iounmap(pci_dev, vp_dev->ioaddr); 618 pci_iounmap(pci_dev, vp_dev->ioaddr);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5c52369ab9bb..a882f2606515 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -23,21 +23,30 @@
23 23
24#ifdef DEBUG 24#ifdef DEBUG
25/* For development, we want to crash whenever the ring is screwed. */ 25/* For development, we want to crash whenever the ring is screwed. */
26#define BAD_RING(_vq, fmt...) \ 26#define BAD_RING(_vq, fmt, args...) \
27 do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0) 27 do { \
28 dev_err(&(_vq)->vq.vdev->dev, \
29 "%s:"fmt, (_vq)->vq.name, ##args); \
30 BUG(); \
31 } while (0)
28/* Caller is supposed to guarantee no reentry. */ 32/* Caller is supposed to guarantee no reentry. */
29#define START_USE(_vq) \ 33#define START_USE(_vq) \
30 do { \ 34 do { \
31 if ((_vq)->in_use) \ 35 if ((_vq)->in_use) \
32 panic("in_use = %i\n", (_vq)->in_use); \ 36 panic("%s:in_use = %i\n", \
37 (_vq)->vq.name, (_vq)->in_use); \
33 (_vq)->in_use = __LINE__; \ 38 (_vq)->in_use = __LINE__; \
34 mb(); \ 39 mb(); \
35 } while(0) 40 } while (0)
36#define END_USE(_vq) \ 41#define END_USE(_vq) \
37 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) 42 do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0)
38#else 43#else
39#define BAD_RING(_vq, fmt...) \ 44#define BAD_RING(_vq, fmt, args...) \
40 do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0) 45 do { \
46 dev_err(&_vq->vq.vdev->dev, \
47 "%s:"fmt, (_vq)->vq.name, ##args); \
48 (_vq)->broken = true; \
49 } while (0)
41#define START_USE(vq) 50#define START_USE(vq)
42#define END_USE(vq) 51#define END_USE(vq)
43#endif 52#endif
@@ -52,6 +61,9 @@ struct vring_virtqueue
52 /* Other side has made a mess, don't try any more. */ 61 /* Other side has made a mess, don't try any more. */
53 bool broken; 62 bool broken;
54 63
64 /* Host supports indirect buffers */
65 bool indirect;
66
55 /* Number of free buffers */ 67 /* Number of free buffers */
56 unsigned int num_free; 68 unsigned int num_free;
57 /* Head of free buffer list. */ 69 /* Head of free buffer list. */
@@ -76,6 +88,55 @@ struct vring_virtqueue
76 88
77#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 89#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
78 90
91/* Set up an indirect table of descriptors and add it to the queue. */
92static int vring_add_indirect(struct vring_virtqueue *vq,
93 struct scatterlist sg[],
94 unsigned int out,
95 unsigned int in)
96{
97 struct vring_desc *desc;
98 unsigned head;
99 int i;
100
101 desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
102 if (!desc)
103 return vq->vring.num;
104
105 /* Transfer entries from the sg list into the indirect page */
106 for (i = 0; i < out; i++) {
107 desc[i].flags = VRING_DESC_F_NEXT;
108 desc[i].addr = sg_phys(sg);
109 desc[i].len = sg->length;
110 desc[i].next = i+1;
111 sg++;
112 }
113 for (; i < (out + in); i++) {
114 desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
115 desc[i].addr = sg_phys(sg);
116 desc[i].len = sg->length;
117 desc[i].next = i+1;
118 sg++;
119 }
120
121 /* Last one doesn't continue. */
122 desc[i-1].flags &= ~VRING_DESC_F_NEXT;
123 desc[i-1].next = 0;
124
125 /* We're about to use a buffer */
126 vq->num_free--;
127
128 /* Use a single buffer which doesn't continue */
129 head = vq->free_head;
130 vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
131 vq->vring.desc[head].addr = virt_to_phys(desc);
132 vq->vring.desc[head].len = i * sizeof(struct vring_desc);
133
134 /* Update free pointer */
135 vq->free_head = vq->vring.desc[head].next;
136
137 return head;
138}
139
79static int vring_add_buf(struct virtqueue *_vq, 140static int vring_add_buf(struct virtqueue *_vq,
80 struct scatterlist sg[], 141 struct scatterlist sg[],
81 unsigned int out, 142 unsigned int out,
@@ -85,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq,
85 struct vring_virtqueue *vq = to_vvq(_vq); 146 struct vring_virtqueue *vq = to_vvq(_vq);
86 unsigned int i, avail, head, uninitialized_var(prev); 147 unsigned int i, avail, head, uninitialized_var(prev);
87 148
149 START_USE(vq);
150
88 BUG_ON(data == NULL); 151 BUG_ON(data == NULL);
152
153 /* If the host supports indirect descriptor tables, and we have multiple
154 * buffers, then go indirect. FIXME: tune this threshold */
155 if (vq->indirect && (out + in) > 1 && vq->num_free) {
156 head = vring_add_indirect(vq, sg, out, in);
157 if (head != vq->vring.num)
158 goto add_head;
159 }
160
89 BUG_ON(out + in > vq->vring.num); 161 BUG_ON(out + in > vq->vring.num);
90 BUG_ON(out + in == 0); 162 BUG_ON(out + in == 0);
91 163
92 START_USE(vq);
93
94 if (vq->num_free < out + in) { 164 if (vq->num_free < out + in) {
95 pr_debug("Can't add buf len %i - avail = %i\n", 165 pr_debug("Can't add buf len %i - avail = %i\n",
96 out + in, vq->num_free); 166 out + in, vq->num_free);
@@ -127,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq,
127 /* Update free pointer */ 197 /* Update free pointer */
128 vq->free_head = i; 198 vq->free_head = i;
129 199
200add_head:
130 /* Set token. */ 201 /* Set token. */
131 vq->data[head] = data; 202 vq->data[head] = data;
132 203
@@ -170,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
170 241
171 /* Put back on free list: find end */ 242 /* Put back on free list: find end */
172 i = head; 243 i = head;
244
245 /* Free the indirect table */
246 if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
247 kfree(phys_to_virt(vq->vring.desc[i].addr));
248
173 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { 249 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
174 i = vq->vring.desc[i].next; 250 i = vq->vring.desc[i].next;
175 vq->num_free++; 251 vq->num_free++;
@@ -284,7 +360,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
284 struct virtio_device *vdev, 360 struct virtio_device *vdev,
285 void *pages, 361 void *pages,
286 void (*notify)(struct virtqueue *), 362 void (*notify)(struct virtqueue *),
287 void (*callback)(struct virtqueue *)) 363 void (*callback)(struct virtqueue *),
364 const char *name)
288{ 365{
289 struct vring_virtqueue *vq; 366 struct vring_virtqueue *vq;
290 unsigned int i; 367 unsigned int i;
@@ -303,14 +380,18 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
303 vq->vq.callback = callback; 380 vq->vq.callback = callback;
304 vq->vq.vdev = vdev; 381 vq->vq.vdev = vdev;
305 vq->vq.vq_ops = &vring_vq_ops; 382 vq->vq.vq_ops = &vring_vq_ops;
383 vq->vq.name = name;
306 vq->notify = notify; 384 vq->notify = notify;
307 vq->broken = false; 385 vq->broken = false;
308 vq->last_used_idx = 0; 386 vq->last_used_idx = 0;
309 vq->num_added = 0; 387 vq->num_added = 0;
388 list_add_tail(&vq->vq.list, &vdev->vqs);
310#ifdef DEBUG 389#ifdef DEBUG
311 vq->in_use = false; 390 vq->in_use = false;
312#endif 391#endif
313 392
393 vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
394
314 /* No callback? Tell other side not to bother us. */ 395 /* No callback? Tell other side not to bother us. */
315 if (!callback) 396 if (!callback)
316 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 397 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -327,6 +408,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
327 408
328void vring_del_virtqueue(struct virtqueue *vq) 409void vring_del_virtqueue(struct virtqueue *vq)
329{ 410{
411 list_del(&vq->list);
330 kfree(to_vvq(vq)); 412 kfree(to_vvq(vq));
331} 413}
332EXPORT_SYMBOL_GPL(vring_del_virtqueue); 414EXPORT_SYMBOL_GPL(vring_del_virtqueue);
@@ -338,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev)
338 420
339 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 421 for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
340 switch (i) { 422 switch (i) {
423 case VIRTIO_RING_F_INDIRECT_DESC:
424 break;
341 default: 425 default:
342 /* We don't understand this bit. */ 426 /* We don't understand this bit. */
343 clear_bit(i, vdev->features); 427 clear_bit(i, vdev->features);