aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/ata/libata-core.c10
-rw-r--r--drivers/ata/libata-scsi.c2
-rw-r--r--drivers/base/memory.c9
-rw-r--r--drivers/block/DAC960.c3
-rw-r--r--drivers/block/Kconfig6
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/cpqarray.c4
-rw-r--r--drivers/block/cryptoloop.c12
-rw-r--r--drivers/block/lguest_blk.c421
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/sx8.c2
-rw-r--r--drivers/block/ub.c11
-rw-r--r--drivers/block/viodasd.c2
-rw-r--r--drivers/block/virtio_blk.c308
-rw-r--r--drivers/bluetooth/Kconfig35
-rw-r--r--drivers/bluetooth/Makefile4
-rw-r--r--drivers/bluetooth/bluecard_cs.c5
-rw-r--r--drivers/bluetooth/bpa10x.c624
-rw-r--r--drivers/bluetooth/bt3c_cs.c5
-rw-r--r--drivers/bluetooth/btsdio.c406
-rw-r--r--drivers/bluetooth/btuart_cs.c5
-rw-r--r--drivers/bluetooth/btusb.c564
-rw-r--r--drivers/bluetooth/dtl1_cs.c5
-rw-r--r--drivers/bluetooth/hci_bcsp.c3
-rw-r--r--drivers/bluetooth/hci_ldisc.c8
-rw-r--r--drivers/bluetooth/hci_ll.c531
-rw-r--r--drivers/bluetooth/hci_uart.h8
-rw-r--r--drivers/char/Kconfig4
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/cyclades.c2
-rw-r--r--drivers/char/hvc_lguest.c177
-rw-r--r--drivers/char/virtio_console.c225
-rw-r--r--drivers/firewire/fw-ohci.c13
-rw-r--r--drivers/ide/cris/ide-cris.c4
-rw-r--r--drivers/ide/ide-probe.c5
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/ide/mips/au1xxx-ide.c6
-rw-r--r--drivers/ieee1394/dma.c4
-rw-r--r--drivers/ieee1394/sbp2.c2
-rw-r--r--drivers/infiniband/core/cma.c160
-rw-r--r--drivers/infiniband/core/umem.c11
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c20
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c63
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c45
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c114
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c8
-rw-r--r--drivers/input/keyboard/bf54x-keys.c1
-rw-r--r--drivers/input/mouse/appletouch.c25
-rw-r--r--drivers/input/serio/i8042.c4
-rw-r--r--drivers/input/serio/i8042.h22
-rw-r--r--drivers/input/touchscreen/Kconfig6
-rw-r--r--drivers/input/touchscreen/usbtouchscreen.c36
-rw-r--r--drivers/kvm/Kconfig4
-rw-r--r--drivers/kvm/kvm_main.c37
-rw-r--r--drivers/kvm/lapic.c38
-rw-r--r--drivers/kvm/mmu.c3
-rw-r--r--drivers/kvm/vmx.c16
-rw-r--r--drivers/kvm/x86_emulate.c77
-rw-r--r--drivers/lguest/Kconfig13
-rw-r--r--drivers/lguest/Makefile10
-rw-r--r--drivers/lguest/core.c568
-rw-r--r--drivers/lguest/hypercalls.c177
-rw-r--r--drivers/lguest/interrupts_and_traps.c125
-rw-r--r--drivers/lguest/io.c626
-rw-r--r--drivers/lguest/lg.h189
-rw-r--r--drivers/lguest/lguest.c1108
-rw-r--r--drivers/lguest/lguest_asm.S93
-rw-r--r--drivers/lguest/lguest_bus.c218
-rw-r--r--drivers/lguest/lguest_device.c373
-rw-r--r--drivers/lguest/lguest_user.c138
-rw-r--r--drivers/lguest/page_tables.c250
-rw-r--r--drivers/lguest/segments.c28
-rw-r--r--drivers/lguest/x86/core.c577
-rw-r--r--drivers/lguest/x86/switcher_32.S (renamed from drivers/lguest/switcher.S)7
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/dm-crypt.c21
-rw-r--r--drivers/md/raid5.c17
-rw-r--r--drivers/media/common/ir-keymaps.c70
-rw-r--r--drivers/media/common/saa7146_core.c3
-rw-r--r--drivers/media/dvb/cinergyT2/cinergyT2.c42
-rw-r--r--drivers/media/dvb/dvb-core/dvb_ca_en50221.c2
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c2
-rw-r--r--drivers/media/radio/miropcm20-radio.c1
-rw-r--r--drivers/media/radio/radio-gemtek.c1
-rw-r--r--drivers/media/video/arv.c1
-rw-r--r--drivers/media/video/bt8xx/bttv-driver.c3
-rw-r--r--drivers/media/video/bw-qcam.c1
-rw-r--r--drivers/media/video/c-qcam.c1
-rw-r--r--drivers/media/video/cpia.c5
-rw-r--r--drivers/media/video/cpia2/cpia2_v4l.c5
-rw-r--r--drivers/media/video/cx23885/cx23885-core.c6
-rw-r--r--drivers/media/video/cx88/cx88-alsa.c86
-rw-r--r--drivers/media/video/cx88/cx88-blackbird.c57
-rw-r--r--drivers/media/video/cx88/cx88-dvb.c3
-rw-r--r--drivers/media/video/cx88/cx88-mpeg.c133
-rw-r--r--drivers/media/video/cx88/cx88-video.c1
-rw-r--r--drivers/media/video/cx88/cx88-vp3054-i2c.c16
-rw-r--r--drivers/media/video/cx88/cx88.h24
-rw-r--r--drivers/media/video/em28xx/em28xx-core.c3
-rw-r--r--drivers/media/video/em28xx/em28xx-video.c2
-rw-r--r--drivers/media/video/et61x251/et61x251_core.c1
-rw-r--r--drivers/media/video/ir-kbd-i2c.c1
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.c11
-rw-r--r--drivers/media/video/ivtv/ivtv-driver.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-fileops.c8
-rw-r--r--drivers/media/video/ivtv/ivtv-ioctl.c13
-rw-r--r--drivers/media/video/ivtv/ivtv-streams.c116
-rw-r--r--drivers/media/video/ivtv/ivtv-streams.h1
-rw-r--r--drivers/media/video/ivtv/ivtv-udma.c4
-rw-r--r--drivers/media/video/ivtv/ivtv-yuv.c160
-rw-r--r--drivers/media/video/ivtv/ivtv-yuv.h1
-rw-r--r--drivers/media/video/ivtv/ivtvfb.c92
-rw-r--r--drivers/media/video/meye.c1
-rw-r--r--drivers/media/video/ov511.c1
-rw-r--r--drivers/media/video/planb.c1
-rw-r--r--drivers/media/video/pms.c1
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-encoder.c6
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h11
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-hdw.c3
-rw-r--r--drivers/media/video/pvrusb2/pvrusb2-v4l2.c3
-rw-r--r--drivers/media/video/pwc/pwc-if.c1
-rw-r--r--drivers/media/video/saa7134/saa6752hs.c111
-rw-r--r--drivers/media/video/saa7134/saa7134-core.c44
-rw-r--r--drivers/media/video/saa7134/saa7134-empress.c12
-rw-r--r--drivers/media/video/saa7134/saa7134-input.c29
-rw-r--r--drivers/media/video/saa7134/saa7134-tvaudio.c32
-rw-r--r--drivers/media/video/saa7134/saa7134-video.c28
-rw-r--r--drivers/media/video/saa7134/saa7134.h7
-rw-r--r--drivers/media/video/se401.c1
-rw-r--r--drivers/media/video/sn9c102/sn9c102_core.c1
-rw-r--r--drivers/media/video/stradis.c1
-rw-r--r--drivers/media/video/stv680.c1
-rw-r--r--drivers/media/video/tuner-core.c2
-rw-r--r--drivers/media/video/usbvideo/usbvideo.c1
-rw-r--r--drivers/media/video/usbvideo/vicam.c1
-rw-r--r--drivers/media/video/usbvision/usbvision-video.c3
-rw-r--r--drivers/media/video/v4l2-common.c2
-rw-r--r--drivers/media/video/videobuf-core.c2
-rw-r--r--drivers/media/video/videobuf-dma-sg.c9
-rw-r--r--drivers/media/video/videocodec.c4
-rw-r--r--drivers/media/video/videodev.c42
-rw-r--r--drivers/media/video/vivi.c1
-rw-r--r--drivers/media/video/w9966.c1
-rw-r--r--drivers/media/video/w9968cf.c1
-rw-r--r--drivers/media/video/zc0301/zc0301_core.c1
-rw-r--r--drivers/media/video/zoran_card.c10
-rw-r--r--drivers/media/video/zoran_driver.c2
-rw-r--r--drivers/mmc/card/queue.c15
-rw-r--r--drivers/mmc/host/at91_mci.c8
-rw-r--r--drivers/mmc/host/au1xmmc.c11
-rw-r--r--drivers/mmc/host/imxmmc.c2
-rw-r--r--drivers/mmc/host/mmc_spi.c8
-rw-r--r--drivers/mmc/host/mmci.h2
-rw-r--r--drivers/mmc/host/omap.c4
-rw-r--r--drivers/mmc/host/sdhci.c3
-rw-r--r--drivers/mmc/host/tifm_sd.c8
-rw-r--r--drivers/mmc/host/wbsd.c6
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c146
-rw-r--r--drivers/mtd/nand/Kconfig2
-rw-r--r--drivers/mtd/nand/diskonchip.c4
-rw-r--r--drivers/mtd/nand/nand_base.c6
-rw-r--r--drivers/mtd/nand/nand_ecc.c2
-rw-r--r--drivers/mtd/nand/nandsim.c2
-rw-r--r--drivers/mtd/nand/s3c2410.c14
-rw-r--r--drivers/mtd/onenand/onenand_sim.c50
-rw-r--r--drivers/net/Kconfig6
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/cpmac.c2
-rw-r--r--drivers/net/fec.c24
-rw-r--r--drivers/net/lguest_net.c555
-rw-r--r--drivers/net/mlx4/fw.c2
-rw-r--r--drivers/net/mlx4/icm.c14
-rw-r--r--drivers/net/mv643xx_eth.c1
-rw-r--r--drivers/net/niu.c34
-rw-r--r--drivers/net/ppp_mppe.c6
-rw-r--r--drivers/net/r8169.c406
-rw-r--r--drivers/net/tg3.c95
-rw-r--r--drivers/net/tg3.h11
-rw-r--r--drivers/net/virtio_net.c435
-rw-r--r--drivers/parisc/ccio-dma.c1
-rw-r--r--drivers/parisc/lba_pci.c51
-rw-r--r--drivers/parisc/pdc_stable.c11
-rw-r--r--drivers/parisc/sba_iommu.c5
-rw-r--r--drivers/parisc/superio.c4
-rw-r--r--drivers/pci/Makefile3
-rw-r--r--drivers/pci/dmar.c329
-rw-r--r--drivers/pci/intel-iommu.c2271
-rw-r--r--drivers/pci/intel-iommu.h325
-rw-r--r--drivers/pci/iova.c394
-rw-r--r--drivers/pci/iova.h63
-rw-r--r--drivers/pci/pci.h1
-rw-r--r--drivers/pci/probe.c14
-rw-r--r--drivers/pci/search.c34
-rw-r--r--drivers/power/apm_power.c141
-rw-r--r--drivers/s390/char/raw3270.c26
-rw-r--r--drivers/s390/char/tape_class.c19
-rw-r--r--drivers/s390/char/tape_class.h4
-rw-r--r--drivers/s390/char/vmlogrdr.c15
-rw-r--r--drivers/s390/cio/chp.c12
-rw-r--r--drivers/s390/cio/css.c9
-rw-r--r--drivers/s390/scsi/zfcp_aux.c1
-rw-r--r--drivers/s390/scsi/zfcp_def.h4
-rw-r--r--drivers/s390/scsi/zfcp_erp.c10
-rw-r--r--drivers/sbus/char/vfc_dev.c2
-rw-r--r--drivers/scsi/3w-9xxx.c4
-rw-r--r--drivers/scsi/3w-xxxx.c2
-rw-r--r--drivers/scsi/NCR5380.c6
-rw-r--r--drivers/scsi/NCR53C9x.c4
-rw-r--r--drivers/scsi/NCR53c406a.c6
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/aha152x.c2
-rw-r--r--drivers/scsi/aha1542.c8
-rw-r--r--drivers/scsi/arcmsr/arcmsr_hba.c4
-rw-r--r--drivers/scsi/atari_NCR5380.c6
-rw-r--r--drivers/scsi/eata_pio.c4
-rw-r--r--drivers/scsi/fd_mcs.c6
-rw-r--r--drivers/scsi/fdomain.c7
-rw-r--r--drivers/scsi/gdth.c6
-rw-r--r--drivers/scsi/ibmmca.c2
-rw-r--r--drivers/scsi/ide-scsi.c12
-rw-r--r--drivers/scsi/imm.c8
-rw-r--r--drivers/scsi/in2000.c4
-rw-r--r--drivers/scsi/ipr.c19
-rw-r--r--drivers/scsi/ips.c6
-rw-r--r--drivers/scsi/iscsi_tcp.c15
-rw-r--r--drivers/scsi/megaraid.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c12
-rw-r--r--drivers/scsi/oktagon_esp.c6
-rw-r--r--drivers/scsi/osst.c32
-rw-r--r--drivers/scsi/pcmcia/nsp_cs.h2
-rw-r--r--drivers/scsi/pcmcia/sym53c500_cs.c6
-rw-r--r--drivers/scsi/ppa.c7
-rw-r--r--drivers/scsi/ps3rom.c6
-rw-r--r--drivers/scsi/qlogicfas408.c2
-rw-r--r--drivers/scsi/scsi_debug.c4
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/seagate.c8
-rw-r--r--drivers/scsi/sg.c30
-rw-r--r--drivers/scsi/st.c8
-rw-r--r--drivers/scsi/sun3_NCR5380.c3
-rw-r--r--drivers/scsi/sym53c416.c2
-rw-r--r--drivers/scsi/tmscsim.c5
-rw-r--r--drivers/scsi/ultrastor.c2
-rw-r--r--drivers/scsi/wd33c93.c6
-rw-r--r--drivers/scsi/wd7000.c2
-rw-r--r--drivers/serial/Kconfig2
-rw-r--r--drivers/serial/mcf.c653
-rw-r--r--drivers/usb/core/message.c8
-rw-r--r--drivers/usb/image/microtek.c5
-rw-r--r--drivers/usb/misc/usbtest.c4
-rw-r--r--drivers/usb/storage/protocol.c2
-rw-r--r--drivers/virtio/Kconfig8
-rw-r--r--drivers/virtio/Makefile2
-rw-r--r--drivers/virtio/config.c13
-rw-r--r--drivers/virtio/virtio.c189
-rw-r--r--drivers/virtio/virtio_ring.c313
-rw-r--r--drivers/watchdog/mpc5200_wdt.c3
275 files changed, 10799 insertions, 6497 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 34f40ea0ba..f4076d9e99 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -94,5 +94,5 @@ source "drivers/kvm/Kconfig"
94 94
95source "drivers/uio/Kconfig" 95source "drivers/uio/Kconfig"
96 96
97source "drivers/lguest/Kconfig" 97source "drivers/virtio/Kconfig"
98endmenu 98endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index cfe38ffff2..560496b433 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -91,3 +91,4 @@ obj-$(CONFIG_HID) += hid/
91obj-$(CONFIG_PPC_PS3) += ps3/ 91obj-$(CONFIG_PPC_PS3) += ps3/
92obj-$(CONFIG_OF) += of/ 92obj-$(CONFIG_OF) += of/
93obj-$(CONFIG_SSB) += ssb/ 93obj-$(CONFIG_SSB) += ssb/
94obj-$(CONFIG_VIRTIO) += virtio/
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 629eadbd0e..69092bce1a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4296,7 +4296,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc)
4296 sg_last(sg, qc->orig_n_elem)->length += qc->pad_len; 4296 sg_last(sg, qc->orig_n_elem)->length += qc->pad_len;
4297 if (pad_buf) { 4297 if (pad_buf) {
4298 struct scatterlist *psg = &qc->pad_sgent; 4298 struct scatterlist *psg = &qc->pad_sgent;
4299 void *addr = kmap_atomic(psg->page, KM_IRQ0); 4299 void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
4300 memcpy(addr + psg->offset, pad_buf, qc->pad_len); 4300 memcpy(addr + psg->offset, pad_buf, qc->pad_len);
4301 kunmap_atomic(addr, KM_IRQ0); 4301 kunmap_atomic(addr, KM_IRQ0);
4302 } 4302 }
@@ -4686,11 +4686,11 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
4686 * data in this function or read data in ata_sg_clean. 4686 * data in this function or read data in ata_sg_clean.
4687 */ 4687 */
4688 offset = lsg->offset + lsg->length - qc->pad_len; 4688 offset = lsg->offset + lsg->length - qc->pad_len;
4689 psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT); 4689 sg_set_page(psg, nth_page(sg_page(lsg), offset >> PAGE_SHIFT));
4690 psg->offset = offset_in_page(offset); 4690 psg->offset = offset_in_page(offset);
4691 4691
4692 if (qc->tf.flags & ATA_TFLAG_WRITE) { 4692 if (qc->tf.flags & ATA_TFLAG_WRITE) {
4693 void *addr = kmap_atomic(psg->page, KM_IRQ0); 4693 void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
4694 memcpy(pad_buf, addr + psg->offset, qc->pad_len); 4694 memcpy(pad_buf, addr + psg->offset, qc->pad_len);
4695 kunmap_atomic(addr, KM_IRQ0); 4695 kunmap_atomic(addr, KM_IRQ0);
4696 } 4696 }
@@ -4836,7 +4836,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
4836 if (qc->curbytes == qc->nbytes - qc->sect_size) 4836 if (qc->curbytes == qc->nbytes - qc->sect_size)
4837 ap->hsm_task_state = HSM_ST_LAST; 4837 ap->hsm_task_state = HSM_ST_LAST;
4838 4838
4839 page = qc->cursg->page; 4839 page = sg_page(qc->cursg);
4840 offset = qc->cursg->offset + qc->cursg_ofs; 4840 offset = qc->cursg->offset + qc->cursg_ofs;
4841 4841
4842 /* get the current page and offset */ 4842 /* get the current page and offset */
@@ -4988,7 +4988,7 @@ next_sg:
4988 4988
4989 sg = qc->cursg; 4989 sg = qc->cursg;
4990 4990
4991 page = sg->page; 4991 page = sg_page(sg);
4992 offset = sg->offset + qc->cursg_ofs; 4992 offset = sg->offset + qc->cursg_ofs;
4993 4993
4994 /* get the current page and offset */ 4994 /* get the current page and offset */
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9fbb39cd0f..5b758b9ad0 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1544,7 +1544,7 @@ static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out)
1544 struct scatterlist *sg = scsi_sglist(cmd); 1544 struct scatterlist *sg = scsi_sglist(cmd);
1545 1545
1546 if (sg) { 1546 if (sg) {
1547 buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1547 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1548 buflen = sg->length; 1548 buflen = sg->length;
1549 } else { 1549 } else {
1550 buf = NULL; 1550 buf = NULL;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c41d0728ef..7868707c7e 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf)
137 return len; 137 return len;
138} 138}
139 139
140static inline int memory_notify(unsigned long val, void *v) 140int memory_notify(unsigned long val, void *v)
141{ 141{
142 return blocking_notifier_call_chain(&memory_chain, val, v); 142 return blocking_notifier_call_chain(&memory_chain, val, v);
143} 143}
@@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
183 break; 183 break;
184 case MEM_OFFLINE: 184 case MEM_OFFLINE:
185 mem->state = MEM_GOING_OFFLINE; 185 mem->state = MEM_GOING_OFFLINE;
186 memory_notify(MEM_GOING_OFFLINE, NULL);
187 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 186 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
188 ret = remove_memory(start_paddr, 187 ret = remove_memory(start_paddr,
189 PAGES_PER_SECTION << PAGE_SHIFT); 188 PAGES_PER_SECTION << PAGE_SHIFT);
@@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
191 mem->state = old_state; 190 mem->state = old_state;
192 break; 191 break;
193 } 192 }
194 memory_notify(MEM_MAPPING_INVALID, NULL);
195 break; 193 break;
196 default: 194 default:
197 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 195 printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
@@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
199 WARN_ON(1); 197 WARN_ON(1);
200 ret = -EINVAL; 198 ret = -EINVAL;
201 } 199 }
202 /*
203 * For now, only notify on successful memory operations
204 */
205 if (!ret)
206 memory_notify(action, NULL);
207 200
208 return ret; 201 return ret;
209} 202}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 84d6aa500e..9030c373ce 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -44,6 +44,7 @@
44#include <linux/init.h> 44#include <linux/init.h>
45#include <linux/jiffies.h> 45#include <linux/jiffies.h>
46#include <linux/random.h> 46#include <linux/random.h>
47#include <linux/scatterlist.h>
47#include <asm/io.h> 48#include <asm/io.h>
48#include <asm/uaccess.h> 49#include <asm/uaccess.h>
49#include "DAC960.h" 50#include "DAC960.h"
@@ -345,6 +346,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
345 Command->V1.ScatterGatherList = 346 Command->V1.ScatterGatherList =
346 (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU; 347 (DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU;
347 Command->V1.ScatterGatherListDMA = ScatterGatherDMA; 348 Command->V1.ScatterGatherListDMA = ScatterGatherDMA;
349 sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit);
348 } else { 350 } else {
349 Command->cmd_sglist = Command->V2.ScatterList; 351 Command->cmd_sglist = Command->V2.ScatterList;
350 Command->V2.ScatterGatherList = 352 Command->V2.ScatterGatherList =
@@ -353,6 +355,7 @@ static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
353 Command->V2.RequestSense = 355 Command->V2.RequestSense =
354 (DAC960_SCSI_RequestSense_T *)RequestSenseCPU; 356 (DAC960_SCSI_RequestSense_T *)RequestSenseCPU;
355 Command->V2.RequestSenseDMA = RequestSenseDMA; 357 Command->V2.RequestSenseDMA = RequestSenseDMA;
358 sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit);
356 } 359 }
357 } 360 }
358 return true; 361 return true;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index ce4b1e484e..4d0119ea9e 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -425,4 +425,10 @@ config XEN_BLKDEV_FRONTEND
425 block device driver. It communicates with a back-end driver 425 block device driver. It communicates with a back-end driver
426 in another domain which drives the actual block device. 426 in another domain which drives the actual block device.
427 427
428config VIRTIO_BLK
429 tristate "Virtio block driver (EXPERIMENTAL)"
430 depends on EXPERIMENTAL && VIRTIO
431 ---help---
432 This is the virtual block driver for lguest. Say Y or M.
433
428endif # BLK_DEV 434endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 014e72121b..7691505a2e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -25,10 +25,10 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
25obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 25obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
26obj-$(CONFIG_BLK_DEV_NBD) += nbd.o 26obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
27obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o 27obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
28obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
28 29
29obj-$(CONFIG_VIODASD) += viodasd.o 30obj-$(CONFIG_VIODASD) += viodasd.o
30obj-$(CONFIG_BLK_DEV_SX8) += sx8.o 31obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
31obj-$(CONFIG_BLK_DEV_UB) += ub.o 32obj-$(CONFIG_BLK_DEV_UB) += ub.o
32 33
33obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o 34obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
34obj-$(CONFIG_LGUEST_BLOCK) += lguest_blk.o
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 7c2cfde08f..5a6fe17fc6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2610,7 +2610,7 @@ static void do_cciss_request(struct request_queue *q)
2610 (int)creq->nr_sectors); 2610 (int)creq->nr_sectors);
2611#endif /* CCISS_DEBUG */ 2611#endif /* CCISS_DEBUG */
2612 2612
2613 memset(tmp_sg, 0, sizeof(tmp_sg)); 2613 sg_init_table(tmp_sg, MAXSGENTRIES);
2614 seg = blk_rq_map_sg(q, creq, tmp_sg); 2614 seg = blk_rq_map_sg(q, creq, tmp_sg);
2615 2615
2616 /* get the DMA records for the setup */ 2616 /* get the DMA records for the setup */
@@ -2621,7 +2621,7 @@ static void do_cciss_request(struct request_queue *q)
2621 2621
2622 for (i = 0; i < seg; i++) { 2622 for (i = 0; i < seg; i++) {
2623 c->SG[i].Len = tmp_sg[i].length; 2623 c->SG[i].Len = tmp_sg[i].length;
2624 temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, 2624 temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
2625 tmp_sg[i].offset, 2625 tmp_sg[i].offset,
2626 tmp_sg[i].length, dir); 2626 tmp_sg[i].length, dir);
2627 c->SG[i].Addr.lower = temp64.val32.lower; 2627 c->SG[i].Addr.lower = temp64.val32.lower;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 568603d304..c8132d9587 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -37,6 +37,7 @@
37#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/blkdev.h> 38#include <linux/blkdev.h>
39#include <linux/genhd.h> 39#include <linux/genhd.h>
40#include <linux/scatterlist.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/io.h> 42#include <asm/io.h>
42 43
@@ -918,6 +919,7 @@ queue_next:
918DBGPX( 919DBGPX(
919 printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); 920 printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
920); 921);
922 sg_init_table(tmp_sg, SG_MAX);
921 seg = blk_rq_map_sg(q, creq, tmp_sg); 923 seg = blk_rq_map_sg(q, creq, tmp_sg);
922 924
923 /* Now do all the DMA Mappings */ 925 /* Now do all the DMA Mappings */
@@ -929,7 +931,7 @@ DBGPX(
929 { 931 {
930 c->req.sg[i].size = tmp_sg[i].length; 932 c->req.sg[i].size = tmp_sg[i].length;
931 c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, 933 c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev,
932 tmp_sg[i].page, 934 sg_page(&tmp_sg[i]),
933 tmp_sg[i].offset, 935 tmp_sg[i].offset,
934 tmp_sg[i].length, dir); 936 tmp_sg[i].length, dir);
935 } 937 }
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 40535036e8..1b58b01079 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -26,6 +26,7 @@
26#include <linux/crypto.h> 26#include <linux/crypto.h>
27#include <linux/blkdev.h> 27#include <linux/blkdev.h>
28#include <linux/loop.h> 28#include <linux/loop.h>
29#include <linux/scatterlist.h>
29#include <asm/semaphore.h> 30#include <asm/semaphore.h>
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
@@ -119,14 +120,17 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
119 .tfm = tfm, 120 .tfm = tfm,
120 .flags = CRYPTO_TFM_REQ_MAY_SLEEP, 121 .flags = CRYPTO_TFM_REQ_MAY_SLEEP,
121 }; 122 };
122 struct scatterlist sg_out = { NULL, }; 123 struct scatterlist sg_out;
123 struct scatterlist sg_in = { NULL, }; 124 struct scatterlist sg_in;
124 125
125 encdec_cbc_t encdecfunc; 126 encdec_cbc_t encdecfunc;
126 struct page *in_page, *out_page; 127 struct page *in_page, *out_page;
127 unsigned in_offs, out_offs; 128 unsigned in_offs, out_offs;
128 int err; 129 int err;
129 130
131 sg_init_table(&sg_out, 1);
132 sg_init_table(&sg_in, 1);
133
130 if (cmd == READ) { 134 if (cmd == READ) {
131 in_page = raw_page; 135 in_page = raw_page;
132 in_offs = raw_off; 136 in_offs = raw_off;
@@ -146,11 +150,11 @@ cryptoloop_transfer(struct loop_device *lo, int cmd,
146 u32 iv[4] = { 0, }; 150 u32 iv[4] = { 0, };
147 iv[0] = cpu_to_le32(IV & 0xffffffff); 151 iv[0] = cpu_to_le32(IV & 0xffffffff);
148 152
149 sg_in.page = in_page; 153 sg_set_page(&sg_in, in_page);
150 sg_in.offset = in_offs; 154 sg_in.offset = in_offs;
151 sg_in.length = sz; 155 sg_in.length = sz;
152 156
153 sg_out.page = out_page; 157 sg_set_page(&sg_out, out_page);
154 sg_out.offset = out_offs; 158 sg_out.offset = out_offs;
155 sg_out.length = sz; 159 sg_out.length = sz;
156 160
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c
deleted file mode 100644
index fa8e42341b..0000000000
--- a/drivers/block/lguest_blk.c
+++ /dev/null
@@ -1,421 +0,0 @@
1/*D:400
2 * The Guest block driver
3 *
4 * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
5 * The mechanism is simple: we place the information about the request in the
6 * device page, then use SEND_DMA (containing the data for a write, or an empty
7 * "ping" DMA for a read).
8 :*/
9/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25//#define DEBUG
26#include <linux/init.h>
27#include <linux/types.h>
28#include <linux/blkdev.h>
29#include <linux/interrupt.h>
30#include <linux/lguest_bus.h>
31
32static char next_block_index = 'a';
33
34/*D:420 Here is the structure which holds all the information we need about
35 * each Guest block device.
36 *
37 * I'm sure at this stage, you're wondering "hey, where was the adventure I was
38 * promised?" and thinking "Rusty sucks, I shall say nasty things about him on
39 * my blog". I think Real adventures have boring bits, too, and you're in the
40 * middle of one. But it gets better. Just not quite yet. */
41struct blockdev
42{
43 /* The block queue infrastructure wants a spinlock: it is held while it
44 * calls our block request function. We grab it in our interrupt
45 * handler so the responses don't mess with new requests. */
46 spinlock_t lock;
47
48 /* The disk structure registered with kernel. */
49 struct gendisk *disk;
50
51 /* The major device number for this disk, and the interrupt. We only
52 * really keep them here for completeness; we'd need them if we
53 * supported device unplugging. */
54 int major;
55 int irq;
56
57 /* The physical address of this device's memory page */
58 unsigned long phys_addr;
59 /* The mapped memory page for convenient acces. */
60 struct lguest_block_page *lb_page;
61
62 /* We only have a single request outstanding at a time: this is it. */
63 struct lguest_dma dma;
64 struct request *req;
65};
66
67/*D:495 We originally used end_request() throughout the driver, but it turns
68 * out that end_request() is deprecated, and doesn't actually end the request
69 * (which seems like a good reason to deprecate it!). It simply ends the first
70 * bio. So if we had 3 bios in a "struct request" we would do all 3,
71 * end_request(), do 2, end_request(), do 1 and end_request(): twice as much
72 * work as we needed to do.
73 *
74 * This reinforced to me that I do not understand the block layer.
75 *
76 * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
77 * request. This improved disk speed by 130%. */
78static void end_entire_request(struct request *req, int uptodate)
79{
80 if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
81 BUG();
82 add_disk_randomness(req->rq_disk);
83 blkdev_dequeue_request(req);
84 end_that_request_last(req, uptodate);
85}
86
87/* I'm told there are only two stories in the world worth telling: love and
88 * hate. So there used to be a love scene here like this:
89 *
90 * Launcher: We could make beautiful I/O together, you and I.
91 * Guest: My, that's a big disk!
92 *
93 * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
94
95/*D:490 This is the interrupt handler, called when a block read or write has
96 * been completed for us. */
97static irqreturn_t lgb_irq(int irq, void *_bd)
98{
99 /* We handed our "struct blockdev" as the argument to request_irq(), so
100 * it is passed through to us here. This tells us which device we're
101 * dealing with in case we have more than one. */
102 struct blockdev *bd = _bd;
103 unsigned long flags;
104
105 /* We weren't doing anything? Strange, but could happen if we shared
106 * interrupts (we don't!). */
107 if (!bd->req) {
108 pr_debug("No work!\n");
109 return IRQ_NONE;
110 }
111
112 /* Not done yet? That's equally strange. */
113 if (!bd->lb_page->result) {
114 pr_debug("No result!\n");
115 return IRQ_NONE;
116 }
117
118 /* We have to grab the lock before ending the request. */
119 spin_lock_irqsave(&bd->lock, flags);
120 /* "result" is 1 for success, 2 for failure: end_entire_request() wants
121 * to know whether this succeeded or not. */
122 end_entire_request(bd->req, bd->lb_page->result == 1);
123 /* Clear out request, it's done. */
124 bd->req = NULL;
125 /* Reset incoming DMA for next time. */
126 bd->dma.used_len = 0;
127 /* Ready for more reads or writes */
128 blk_start_queue(bd->disk->queue);
129 spin_unlock_irqrestore(&bd->lock, flags);
130
131 /* The interrupt was for us, we dealt with it. */
132 return IRQ_HANDLED;
133}
134
135/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
136 * each of which contains "struct bio_vec"s, each of which contains a page, an
137 * offset and a length.
138 *
139 * Fortunately there are iterators to help us walk through the "struct
140 * request". Even more fortunately, there were plenty of places to steal the
141 * code from. We pack the "struct request" into our "struct lguest_dma" and
142 * return the total length. */
143static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
144{
145 unsigned int i = 0, len = 0;
146 struct req_iterator iter;
147 struct bio_vec *bvec;
148
149 rq_for_each_segment(bvec, req, iter) {
150 /* We told the block layer not to give us too many. */
151 BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
152 /* If we had a zero-length segment, it would look like
153 * the end of the data referred to by the "struct
154 * lguest_dma", so make sure that doesn't happen. */
155 BUG_ON(!bvec->bv_len);
156 /* Convert page & offset to a physical address */
157 dma->addr[i] = page_to_phys(bvec->bv_page)
158 + bvec->bv_offset;
159 dma->len[i] = bvec->bv_len;
160 len += bvec->bv_len;
161 i++;
162 }
163 /* If the array isn't full, we mark the end with a 0 length */
164 if (i < LGUEST_MAX_DMA_SECTIONS)
165 dma->len[i] = 0;
166 return len;
167}
168
169/* This creates an empty DMA, useful for prodding the Host without sending data
170 * (ie. when we want to do a read) */
171static void empty_dma(struct lguest_dma *dma)
172{
173 dma->len[0] = 0;
174}
175
176/*D:470 Setting up a request is fairly easy: */
177static void setup_req(struct blockdev *bd,
178 int type, struct request *req, struct lguest_dma *dma)
179{
180 /* The type is 1 (write) or 0 (read). */
181 bd->lb_page->type = type;
182 /* The sector on disk where the read or write starts. */
183 bd->lb_page->sector = req->sector;
184 /* The result is initialized to 0 (unfinished). */
185 bd->lb_page->result = 0;
186 /* The current request (so we can end it in the interrupt handler). */
187 bd->req = req;
188 /* The number of bytes: returned as a side-effect of req_to_dma(),
189 * which packs the block layer's "struct request" into our "struct
190 * lguest_dma" */
191 bd->lb_page->bytes = req_to_dma(req, dma);
192}
193
194/*D:450 Write is pretty straightforward: we pack the request into a "struct
195 * lguest_dma", then use SEND_DMA to send the request. */
196static void do_write(struct blockdev *bd, struct request *req)
197{
198 struct lguest_dma send;
199
200 pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
201 setup_req(bd, 1, req, &send);
202
203 lguest_send_dma(bd->phys_addr, &send);
204}
205
206/* Read is similar to write, except we pack the request into our receive
207 * "struct lguest_dma" and send through an empty DMA just to tell the Host that
208 * there's a request pending. */
209static void do_read(struct blockdev *bd, struct request *req)
210{
211 struct lguest_dma ping;
212
213 pr_debug("lgb: READ sector %li\n", (long)req->sector);
214 setup_req(bd, 0, req, &bd->dma);
215
216 empty_dma(&ping);
217 lguest_send_dma(bd->phys_addr, &ping);
218}
219
220/*D:440 This where requests come in: we get handed the request queue and are
221 * expected to pull a "struct request" off it until we've finished them or
222 * we're waiting for a reply: */
223static void do_lgb_request(struct request_queue *q)
224{
225 struct blockdev *bd;
226 struct request *req;
227
228again:
229 /* This sometimes returns NULL even on the very first time around. I
230 * wonder if it's something to do with letting elves handle the request
231 * queue... */
232 req = elv_next_request(q);
233 if (!req)
234 return;
235
236 /* We attached the struct blockdev to the disk: get it back */
237 bd = req->rq_disk->private_data;
238 /* Sometimes we get repeated requests after blk_stop_queue(), but we
239 * can only handle one at a time. */
240 if (bd->req)
241 return;
242
243 /* We only do reads and writes: no tricky business! */
244 if (!blk_fs_request(req)) {
245 pr_debug("Got non-command 0x%08x\n", req->cmd_type);
246 req->errors++;
247 end_entire_request(req, 0);
248 goto again;
249 }
250
251 if (rq_data_dir(req) == WRITE)
252 do_write(bd, req);
253 else
254 do_read(bd, req);
255
256 /* We've put out the request, so stop any more coming in until we get
257 * an interrupt, which takes us to lgb_irq() to re-enable the queue. */
258 blk_stop_queue(q);
259}
260
261/*D:430 This is the "struct block_device_operations" we attach to the disk at
262 * the end of lguestblk_probe(). It doesn't seem to want much. */
263static struct block_device_operations lguestblk_fops = {
264 .owner = THIS_MODULE,
265};
266
267/*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure
268 * quite why. I do know that the IDE code sent two or three of the maintainers
269 * insane, perhaps this is the fringe of the same disease?
270 *
271 * As in the console code, the probe function gets handed the generic
272 * lguest_device from lguest_bus.c: */
273static int lguestblk_probe(struct lguest_device *lgdev)
274{
275 struct blockdev *bd;
276 int err;
277 int irqflags = IRQF_SHARED;
278
279 /* First we allocate our own "struct blockdev" and initialize the easy
280 * fields. */
281 bd = kmalloc(sizeof(*bd), GFP_KERNEL);
282 if (!bd)
283 return -ENOMEM;
284
285 spin_lock_init(&bd->lock);
286 bd->irq = lgdev_irq(lgdev);
287 bd->req = NULL;
288 bd->dma.used_len = 0;
289 bd->dma.len[0] = 0;
290 /* The descriptor in the lguest_devices array provided by the Host
291 * gives the Guest the physical page number of the device's page. */
292 bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
293
294 /* We use lguest_map() to get a pointer to the device page */
295 bd->lb_page = lguest_map(bd->phys_addr, 1);
296 if (!bd->lb_page) {
297 err = -ENOMEM;
298 goto out_free_bd;
299 }
300
301 /* We need a major device number: 0 means "assign one dynamically". */
302 bd->major = register_blkdev(0, "lguestblk");
303 if (bd->major < 0) {
304 err = bd->major;
305 goto out_unmap;
306 }
307
308 /* This allocates a "struct gendisk" where we pack all the information
309 * about the disk which the rest of Linux sees. The argument is the
310 * number of minor devices desired: we need one minor for the main
311 * disk, and one for each partition. Of course, we can't possibly know
312 * how many partitions are on the disk (add_disk does that).
313 */
314 bd->disk = alloc_disk(16);
315 if (!bd->disk) {
316 err = -ENOMEM;
317 goto out_unregister_blkdev;
318 }
319
320 /* Every disk needs a queue for requests to come in: we set up the
321 * queue with a callback function (the core of our driver) and the lock
322 * to use. */
323 bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
324 if (!bd->disk->queue) {
325 err = -ENOMEM;
326 goto out_put_disk;
327 }
328
329 /* We can only handle a certain number of pointers in our SEND_DMA
330 * call, so we set that with blk_queue_max_hw_segments(). This is not
331 * to be confused with blk_queue_max_phys_segments() of course! I
332 * know, who could possibly confuse the two?
333 *
334 * Well, it's simple to tell them apart: this one seems to work and the
335 * other one didn't. */
336 blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
337
338 /* Due to technical limitations of our Host (and simple coding) we
339 * can't have a single buffer which crosses a page boundary. Tell it
340 * here. This means that our maximum request size is 16
341 * (LGUEST_MAX_DMA_SECTIONS) pages. */
342 blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
343
344 /* We name our disk: this becomes the device name when udev does its
345 * magic thing and creates the device node, such as /dev/lgba.
346 * next_block_index is a global which starts at 'a'. Unfortunately
347 * this simple increment logic means that the 27th disk will be called
348 * "/dev/lgb{". In that case, I recommend having at least 29 disks, so
349 * your /dev directory will be balanced. */
350 sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
351
352 /* We look to the device descriptor again to see if this device's
353 * interrupts are expected to be random. If they are, we tell the irq
354 * subsystem. At the moment this bit is always set. */
355 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
356 irqflags |= IRQF_SAMPLE_RANDOM;
357
358 /* Now we have the name and irqflags, we can request the interrupt; we
359 * give it the "struct blockdev" we have set up to pass to lgb_irq()
360 * when there is an interrupt. */
361 err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
362 if (err)
363 goto out_cleanup_queue;
364
365 /* We bind our one-entry DMA pool to the key for this block device so
366 * the Host can reply to our requests. The key is equal to the
367 * physical address of the device's page, which is conveniently
368 * unique. */
369 err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
370 if (err)
371 goto out_free_irq;
372
373 /* We finish our disk initialization and add the disk to the system. */
374 bd->disk->major = bd->major;
375 bd->disk->first_minor = 0;
376 bd->disk->private_data = bd;
377 bd->disk->fops = &lguestblk_fops;
378 /* This is initialized to the disk size by the Launcher. */
379 set_capacity(bd->disk, bd->lb_page->num_sectors);
380 add_disk(bd->disk);
381
382 printk(KERN_INFO "%s: device %i at major %d\n",
383 bd->disk->disk_name, lgdev->index, bd->major);
384
385 /* We don't need to keep the "struct blockdev" around, but if we ever
386 * implemented device removal, we'd need this. */
387 lgdev->private = bd;
388 return 0;
389
390out_free_irq:
391 free_irq(bd->irq, bd);
392out_cleanup_queue:
393 blk_cleanup_queue(bd->disk->queue);
394out_put_disk:
395 put_disk(bd->disk);
396out_unregister_blkdev:
397 unregister_blkdev(bd->major, "lguestblk");
398out_unmap:
399 lguest_unmap(bd->lb_page);
400out_free_bd:
401 kfree(bd);
402 return err;
403}
404
405/*D:410 The boilerplate code for registering the lguest block driver is just
406 * like the console: */
407static struct lguest_driver lguestblk_drv = {
408 .name = "lguestblk",
409 .owner = THIS_MODULE,
410 .device_type = LGUEST_DEVICE_T_BLOCK,
411 .probe = lguestblk_probe,
412};
413
414static __init int lguestblk_init(void)
415{
416 return register_lguest_driver(&lguestblk_drv);
417}
418module_init(lguestblk_init);
419
420MODULE_DESCRIPTION("Lguest block driver");
421MODULE_LICENSE("GPL");
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 317a790c15..7276f7d207 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -388,6 +388,7 @@ static int __send_request(struct request *req)
388 op = VD_OP_BWRITE; 388 op = VD_OP_BWRITE;
389 } 389 }
390 390
391 sg_init_table(sg, port->ring_cookies);
391 nsg = blk_rq_map_sg(req->q, req, sg); 392 nsg = blk_rq_map_sg(req->q, req, sg);
392 393
393 len = 0; 394 len = 0;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 402209fec5..52dc5e1317 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -27,6 +27,7 @@
27#include <linux/hdreg.h> 27#include <linux/hdreg.h>
28#include <linux/dma-mapping.h> 28#include <linux/dma-mapping.h>
29#include <linux/completion.h> 29#include <linux/completion.h>
30#include <linux/scatterlist.h>
30#include <asm/io.h> 31#include <asm/io.h>
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32 33
@@ -522,6 +523,7 @@ static struct carm_request *carm_get_request(struct carm_host *host)
522 host->n_msgs++; 523 host->n_msgs++;
523 524
524 assert(host->n_msgs <= CARM_MAX_REQ); 525 assert(host->n_msgs <= CARM_MAX_REQ);
526 sg_init_table(crq->sg, CARM_MAX_REQ_SG);
525 return crq; 527 return crq;
526 } 528 }
527 529
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index c57dd2b3a0..14143f2c48 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -25,6 +25,7 @@
25#include <linux/usb_usual.h> 25#include <linux/usb_usual.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/timer.h> 27#include <linux/timer.h>
28#include <linux/scatterlist.h>
28#include <scsi/scsi.h> 29#include <scsi/scsi.h>
29 30
30#define DRV_NAME "ub" 31#define DRV_NAME "ub"
@@ -656,6 +657,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
656 if ((cmd = ub_get_cmd(lun)) == NULL) 657 if ((cmd = ub_get_cmd(lun)) == NULL)
657 return -1; 658 return -1;
658 memset(cmd, 0, sizeof(struct ub_scsi_cmd)); 659 memset(cmd, 0, sizeof(struct ub_scsi_cmd));
660 sg_init_table(cmd->sgv, UB_MAX_REQ_SG);
659 661
660 blkdev_dequeue_request(rq); 662 blkdev_dequeue_request(rq);
661 663
@@ -1309,9 +1311,8 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
1309 else 1311 else
1310 pipe = sc->send_bulk_pipe; 1312 pipe = sc->send_bulk_pipe;
1311 sc->last_pipe = pipe; 1313 sc->last_pipe = pipe;
1312 usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, 1314 usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg),
1313 page_address(sg->page) + sg->offset, sg->length, 1315 sg->length, ub_urb_complete, sc);
1314 ub_urb_complete, sc);
1315 sc->work_urb.actual_length = 0; 1316 sc->work_urb.actual_length = 0;
1316 sc->work_urb.error_count = 0; 1317 sc->work_urb.error_count = 0;
1317 sc->work_urb.status = 0; 1318 sc->work_urb.status = 0;
@@ -1427,7 +1428,7 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
1427 scmd->state = UB_CMDST_INIT; 1428 scmd->state = UB_CMDST_INIT;
1428 scmd->nsg = 1; 1429 scmd->nsg = 1;
1429 sg = &scmd->sgv[0]; 1430 sg = &scmd->sgv[0];
1430 sg->page = virt_to_page(sc->top_sense); 1431 sg_set_page(sg, virt_to_page(sc->top_sense));
1431 sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1); 1432 sg->offset = (unsigned long)sc->top_sense & (PAGE_SIZE-1);
1432 sg->length = UB_SENSE_SIZE; 1433 sg->length = UB_SENSE_SIZE;
1433 scmd->len = UB_SENSE_SIZE; 1434 scmd->len = UB_SENSE_SIZE;
@@ -1863,7 +1864,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
1863 cmd->state = UB_CMDST_INIT; 1864 cmd->state = UB_CMDST_INIT;
1864 cmd->nsg = 1; 1865 cmd->nsg = 1;
1865 sg = &cmd->sgv[0]; 1866 sg = &cmd->sgv[0];
1866 sg->page = virt_to_page(p); 1867 sg_set_page(sg, virt_to_page(p));
1867 sg->offset = (unsigned long)p & (PAGE_SIZE-1); 1868 sg->offset = (unsigned long)p & (PAGE_SIZE-1);
1868 sg->length = 8; 1869 sg->length = 8;
1869 cmd->len = 8; 1870 cmd->len = 8;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index e824b672e0..ab5d404faa 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -41,6 +41,7 @@
41#include <linux/dma-mapping.h> 41#include <linux/dma-mapping.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
43#include <linux/device.h> 43#include <linux/device.h>
44#include <linux/scatterlist.h>
44 45
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46#include <asm/vio.h> 47#include <asm/vio.h>
@@ -270,6 +271,7 @@ static int send_request(struct request *req)
270 d = req->rq_disk->private_data; 271 d = req->rq_disk->private_data;
271 272
272 /* Now build the scatter-gather list */ 273 /* Now build the scatter-gather list */
274 sg_init_table(sg, VIOMAXBLOCKDMA);
273 nsg = blk_rq_map_sg(req->q, req, sg); 275 nsg = blk_rq_map_sg(req->q, req, sg);
274 nsg = dma_map_sg(d->dev, sg, nsg, direction); 276 nsg = dma_map_sg(d->dev, sg, nsg, direction);
275 277
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
new file mode 100644
index 0000000000..a901eee64b
--- /dev/null
+++ b/drivers/block/virtio_blk.c
@@ -0,0 +1,308 @@
1//#define DEBUG
2#include <linux/spinlock.h>
3#include <linux/blkdev.h>
4#include <linux/hdreg.h>
5#include <linux/virtio.h>
6#include <linux/virtio_blk.h>
7#include <linux/virtio_blk.h>
8
9static unsigned char virtblk_index = 'a';
10struct virtio_blk
11{
12 spinlock_t lock;
13
14 struct virtio_device *vdev;
15 struct virtqueue *vq;
16
17 /* The disk structure for the kernel. */
18 struct gendisk *disk;
19
20 /* Request tracking. */
21 struct list_head reqs;
22
23 mempool_t *pool;
24
25 /* Scatterlist: can be too big for stack. */
26 struct scatterlist sg[3+MAX_PHYS_SEGMENTS];
27};
28
29struct virtblk_req
30{
31 struct list_head list;
32 struct request *req;
33 struct virtio_blk_outhdr out_hdr;
34 struct virtio_blk_inhdr in_hdr;
35};
36
37static bool blk_done(struct virtqueue *vq)
38{
39 struct virtio_blk *vblk = vq->vdev->priv;
40 struct virtblk_req *vbr;
41 unsigned int len;
42 unsigned long flags;
43
44 spin_lock_irqsave(&vblk->lock, flags);
45 while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
46 int uptodate;
47 switch (vbr->in_hdr.status) {
48 case VIRTIO_BLK_S_OK:
49 uptodate = 1;
50 break;
51 case VIRTIO_BLK_S_UNSUPP:
52 uptodate = -ENOTTY;
53 break;
54 default:
55 uptodate = 0;
56 break;
57 }
58
59 end_dequeued_request(vbr->req, uptodate);
60 list_del(&vbr->list);
61 mempool_free(vbr, vblk->pool);
62 }
63 /* In case queue is stopped waiting for more buffers. */
64 blk_start_queue(vblk->disk->queue);
65 spin_unlock_irqrestore(&vblk->lock, flags);
66 return true;
67}
68
69static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
70 struct request *req)
71{
72 unsigned long num, out, in;
73 struct virtblk_req *vbr;
74
75 vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
76 if (!vbr)
77 /* When another request finishes we'll try again. */
78 return false;
79
80 vbr->req = req;
81 if (blk_fs_request(vbr->req)) {
82 vbr->out_hdr.type = 0;
83 vbr->out_hdr.sector = vbr->req->sector;
84 vbr->out_hdr.ioprio = vbr->req->ioprio;
85 } else if (blk_pc_request(vbr->req)) {
86 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
87 vbr->out_hdr.sector = 0;
88 vbr->out_hdr.ioprio = vbr->req->ioprio;
89 } else {
90 /* We don't put anything else in the queue. */
91 BUG();
92 }
93
94 if (blk_barrier_rq(vbr->req))
95 vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
96
97 /* We have to zero this, otherwise blk_rq_map_sg gets upset. */
98 memset(vblk->sg, 0, sizeof(vblk->sg));
99 sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
100 num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
101 sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
102
103 if (rq_data_dir(vbr->req) == WRITE) {
104 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
105 out = 1 + num;
106 in = 1;
107 } else {
108 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
109 out = 1;
110 in = 1 + num;
111 }
112
113 if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
114 mempool_free(vbr, vblk->pool);
115 return false;
116 }
117
118 list_add_tail(&vbr->list, &vblk->reqs);
119 return true;
120}
121
122static void do_virtblk_request(struct request_queue *q)
123{
124 struct virtio_blk *vblk = NULL;
125 struct request *req;
126 unsigned int issued = 0;
127
128 while ((req = elv_next_request(q)) != NULL) {
129 vblk = req->rq_disk->private_data;
130 BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg));
131
132 /* If this request fails, stop queue and wait for something to
133 finish to restart it. */
134 if (!do_req(q, vblk, req)) {
135 blk_stop_queue(q);
136 break;
137 }
138 blkdev_dequeue_request(req);
139 issued++;
140 }
141
142 if (issued)
143 vblk->vq->vq_ops->kick(vblk->vq);
144}
145
146static int virtblk_ioctl(struct inode *inode, struct file *filp,
147 unsigned cmd, unsigned long data)
148{
149 return scsi_cmd_ioctl(filp, inode->i_bdev->bd_disk->queue,
150 inode->i_bdev->bd_disk, cmd,
151 (void __user *)data);
152}
153
154static struct block_device_operations virtblk_fops = {
155 .ioctl = virtblk_ioctl,
156 .owner = THIS_MODULE,
157};
158
159static int virtblk_probe(struct virtio_device *vdev)
160{
161 struct virtio_blk *vblk;
162 int err, major;
163 void *token;
164 unsigned int len;
165 u64 cap;
166 u32 v;
167
168 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
169 if (!vblk) {
170 err = -ENOMEM;
171 goto out;
172 }
173
174 INIT_LIST_HEAD(&vblk->reqs);
175 spin_lock_init(&vblk->lock);
176 vblk->vdev = vdev;
177
178 /* We expect one virtqueue, for output. */
179 vblk->vq = vdev->config->find_vq(vdev, blk_done);
180 if (IS_ERR(vblk->vq)) {
181 err = PTR_ERR(vblk->vq);
182 goto out_free_vblk;
183 }
184
185 vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
186 if (!vblk->pool) {
187 err = -ENOMEM;
188 goto out_free_vq;
189 }
190
191 major = register_blkdev(0, "virtblk");
192 if (major < 0) {
193 err = major;
194 goto out_mempool;
195 }
196
197 /* FIXME: How many partitions? How long is a piece of string? */
198 vblk->disk = alloc_disk(1 << 4);
199 if (!vblk->disk) {
200 err = -ENOMEM;
201 goto out_unregister_blkdev;
202 }
203
204 vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
205 if (!vblk->disk->queue) {
206 err = -ENOMEM;
207 goto out_put_disk;
208 }
209
210 sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++);
211 vblk->disk->major = major;
212 vblk->disk->first_minor = 0;
213 vblk->disk->private_data = vblk;
214 vblk->disk->fops = &virtblk_fops;
215
216 /* If barriers are supported, tell block layer that queue is ordered */
217 token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len);
218 if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER))
219 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
220
221 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap);
222 if (err) {
223 dev_err(&vdev->dev, "Bad/missing capacity in config\n");
224 goto out_put_disk;
225 }
226
227 /* If capacity is too big, truncate with warning. */
228 if ((sector_t)cap != cap) {
229 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
230 (unsigned long long)cap);
231 cap = (sector_t)-1;
232 }
233 set_capacity(vblk->disk, cap);
234
235 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v);
236 if (!err)
237 blk_queue_max_segment_size(vblk->disk->queue, v);
238 else if (err != -ENOENT) {
239 dev_err(&vdev->dev, "Bad SIZE_MAX in config\n");
240 goto out_put_disk;
241 }
242
243 err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v);
244 if (!err)
245 blk_queue_max_hw_segments(vblk->disk->queue, v);
246 else if (err != -ENOENT) {
247 dev_err(&vdev->dev, "Bad SEG_MAX in config\n");
248 goto out_put_disk;
249 }
250
251 add_disk(vblk->disk);
252 return 0;
253
254out_put_disk:
255 put_disk(vblk->disk);
256out_unregister_blkdev:
257 unregister_blkdev(major, "virtblk");
258out_mempool:
259 mempool_destroy(vblk->pool);
260out_free_vq:
261 vdev->config->del_vq(vblk->vq);
262out_free_vblk:
263 kfree(vblk);
264out:
265 return err;
266}
267
268static void virtblk_remove(struct virtio_device *vdev)
269{
270 struct virtio_blk *vblk = vdev->priv;
271 int major = vblk->disk->major;
272
273 BUG_ON(!list_empty(&vblk->reqs));
274 blk_cleanup_queue(vblk->disk->queue);
275 put_disk(vblk->disk);
276 unregister_blkdev(major, "virtblk");
277 mempool_destroy(vblk->pool);
278 kfree(vblk);
279}
280
281static struct virtio_device_id id_table[] = {
282 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
283 { 0 },
284};
285
286static struct virtio_driver virtio_blk = {
287 .driver.name = KBUILD_MODNAME,
288 .driver.owner = THIS_MODULE,
289 .id_table = id_table,
290 .probe = virtblk_probe,
291 .remove = __devexit_p(virtblk_remove),
292};
293
294static int __init init(void)
295{
296 return register_virtio_driver(&virtio_blk);
297}
298
299static void __exit fini(void)
300{
301 unregister_virtio_driver(&virtio_blk);
302}
303module_init(init);
304module_exit(fini);
305
306MODULE_DEVICE_TABLE(virtio, id_table);
307MODULE_DESCRIPTION("Virtio block driver");
308MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index b9fbe6e7f9..075598e1c5 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -22,6 +22,30 @@ config BT_HCIUSB_SCO
22 22
23 Say Y here to compile support for SCO over HCI USB. 23 Say Y here to compile support for SCO over HCI USB.
24 24
25config BT_HCIBTUSB
26 tristate "HCI USB driver (alternate version)"
27 depends on USB && EXPERIMENTAL && BT_HCIUSB=n
28 help
29 Bluetooth HCI USB driver.
30 This driver is required if you want to use Bluetooth devices with
31 USB interface.
32
33 This driver is still experimental and has no SCO support.
34
35 Say Y here to compile support for Bluetooth USB devices into the
36 kernel or say M to compile it as module (btusb).
37
38config BT_HCIBTSDIO
39 tristate "HCI SDIO driver"
40 depends on MMC
41 help
42 Bluetooth HCI SDIO driver.
43 This driver is required if you want to use Bluetooth device with
44 SDIO interface.
45
46 Say Y here to compile support for Bluetooth SDIO devices into the
47 kernel or say M to compile it as module (btsdio).
48
25config BT_HCIUART 49config BT_HCIUART
26 tristate "HCI UART driver" 50 tristate "HCI UART driver"
27 help 51 help
@@ -55,6 +79,17 @@ config BT_HCIUART_BCSP
55 79
56 Say Y here to compile support for HCI BCSP protocol. 80 Say Y here to compile support for HCI BCSP protocol.
57 81
82config BT_HCIUART_LL
83 bool "HCILL protocol support"
84 depends on BT_HCIUART
85 help
86 HCILL (HCI Low Level) is a serial protocol for communication
87 between Bluetooth device and host. This protocol is required for
88 serial Bluetooth devices that are based on Texas Instruments'
89 BRF chips.
90
91 Say Y here to compile support for HCILL protocol.
92
58config BT_HCIBCM203X 93config BT_HCIBCM203X
59 tristate "HCI BCM203x USB driver" 94 tristate "HCI BCM203x USB driver"
60 depends on USB 95 depends on USB
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 08c10e178e..77444afbf1 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -13,7 +13,11 @@ obj-$(CONFIG_BT_HCIBT3C) += bt3c_cs.o
13obj-$(CONFIG_BT_HCIBLUECARD) += bluecard_cs.o 13obj-$(CONFIG_BT_HCIBLUECARD) += bluecard_cs.o
14obj-$(CONFIG_BT_HCIBTUART) += btuart_cs.o 14obj-$(CONFIG_BT_HCIBTUART) += btuart_cs.o
15 15
16obj-$(CONFIG_BT_HCIBTUSB) += btusb.o
17obj-$(CONFIG_BT_HCIBTSDIO) += btsdio.o
18
16hci_uart-y := hci_ldisc.o 19hci_uart-y := hci_ldisc.o
17hci_uart-$(CONFIG_BT_HCIUART_H4) += hci_h4.o 20hci_uart-$(CONFIG_BT_HCIUART_H4) += hci_h4.o
18hci_uart-$(CONFIG_BT_HCIUART_BCSP) += hci_bcsp.o 21hci_uart-$(CONFIG_BT_HCIUART_BCSP) += hci_bcsp.o
22hci_uart-$(CONFIG_BT_HCIUART_LL) += hci_ll.o
19hci_uart-objs := $(hci_uart-y) 23hci_uart-objs := $(hci_uart-y)
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 851de4d5b7..bcf57927b7 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -503,10 +503,7 @@ static irqreturn_t bluecard_interrupt(int irq, void *dev_inst)
503 unsigned int iobase; 503 unsigned int iobase;
504 unsigned char reg; 504 unsigned char reg;
505 505
506 if (!info || !info->hdev) { 506 BUG_ON(!info->hdev);
507 BT_ERR("Call of irq %d for unknown device", irq);
508 return IRQ_NONE;
509 }
510 507
511 if (!test_bit(CARD_READY, &(info->hw_state))) 508 if (!test_bit(CARD_READY, &(info->hw_state)))
512 return IRQ_HANDLED; 509 return IRQ_HANDLED;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index e8ebd5d3de..1375b5345a 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -2,7 +2,7 @@
2 * 2 *
3 * Digianswer Bluetooth USB driver 3 * Digianswer Bluetooth USB driver
4 * 4 *
5 * Copyright (C) 2004-2005 Marcel Holtmann <marcel@holtmann.org> 5 * Copyright (C) 2004-2007 Marcel Holtmann <marcel@holtmann.org>
6 * 6 *
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
@@ -21,13 +21,14 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/module.h>
25
26#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/module.h>
27#include <linux/init.h> 26#include <linux/init.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/sched.h>
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/skbuff.h>
31 32
32#include <linux/usb.h> 33#include <linux/usb.h>
33 34
@@ -39,7 +40,7 @@
39#define BT_DBG(D...) 40#define BT_DBG(D...)
40#endif 41#endif
41 42
42#define VERSION "0.8" 43#define VERSION "0.9"
43 44
44static int ignore = 0; 45static int ignore = 0;
45 46
@@ -52,393 +53,285 @@ static struct usb_device_id bpa10x_table[] = {
52 53
53MODULE_DEVICE_TABLE(usb, bpa10x_table); 54MODULE_DEVICE_TABLE(usb, bpa10x_table);
54 55
55#define BPA10X_CMD_EP 0x00
56#define BPA10X_EVT_EP 0x81
57#define BPA10X_TX_EP 0x02
58#define BPA10X_RX_EP 0x82
59
60#define BPA10X_CMD_BUF_SIZE 252
61#define BPA10X_EVT_BUF_SIZE 16
62#define BPA10X_TX_BUF_SIZE 384
63#define BPA10X_RX_BUF_SIZE 384
64
65struct bpa10x_data { 56struct bpa10x_data {
66 struct hci_dev *hdev; 57 struct hci_dev *hdev;
67 struct usb_device *udev; 58 struct usb_device *udev;
68 59
69 rwlock_t lock; 60 struct usb_anchor tx_anchor;
61 struct usb_anchor rx_anchor;
70 62
71 struct sk_buff_head cmd_queue; 63 struct sk_buff *rx_skb[2];
72 struct urb *cmd_urb;
73 struct urb *evt_urb;
74 struct sk_buff *evt_skb;
75 unsigned int evt_len;
76
77 struct sk_buff_head tx_queue;
78 struct urb *tx_urb;
79 struct urb *rx_urb;
80}; 64};
81 65
82#define HCI_VENDOR_HDR_SIZE 5 66#define HCI_VENDOR_HDR_SIZE 5
83 67
84struct hci_vendor_hdr { 68struct hci_vendor_hdr {
85 __u8 type; 69 __u8 type;
86 __le16 snum; 70 __le16 snum;
87 __le16 dlen; 71 __le16 dlen;
88} __attribute__ ((packed)); 72} __attribute__ ((packed));
89 73
90static void bpa10x_recv_bulk(struct bpa10x_data *data, unsigned char *buf, int count) 74static int bpa10x_recv(struct hci_dev *hdev, int queue, void *buf, int count)
91{ 75{
92 struct hci_acl_hdr *ah; 76 struct bpa10x_data *data = hdev->driver_data;
93 struct hci_sco_hdr *sh; 77
94 struct hci_vendor_hdr *vh; 78 BT_DBG("%s queue %d buffer %p count %d", hdev->name,
95 struct sk_buff *skb; 79 queue, buf, count);
96 int len; 80
81 if (queue < 0 || queue > 1)
82 return -EILSEQ;
83
84 hdev->stat.byte_rx += count;
97 85
98 while (count) { 86 while (count) {
99 switch (*buf++) { 87 struct sk_buff *skb = data->rx_skb[queue];
100 case HCI_ACLDATA_PKT: 88 struct { __u8 type; int expect; } *scb;
101 ah = (struct hci_acl_hdr *) buf; 89 int type, len = 0;
102 len = HCI_ACL_HDR_SIZE + __le16_to_cpu(ah->dlen);
103 skb = bt_skb_alloc(len, GFP_ATOMIC);
104 if (skb) {
105 memcpy(skb_put(skb, len), buf, len);
106 skb->dev = (void *) data->hdev;
107 bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
108 hci_recv_frame(skb);
109 }
110 break;
111 90
112 case HCI_SCODATA_PKT: 91 if (!skb) {
113 sh = (struct hci_sco_hdr *) buf; 92 /* Start of the frame */
114 len = HCI_SCO_HDR_SIZE + sh->dlen; 93
115 skb = bt_skb_alloc(len, GFP_ATOMIC); 94 type = *((__u8 *) buf);
116 if (skb) { 95 count--; buf++;
117 memcpy(skb_put(skb, len), buf, len); 96
118 skb->dev = (void *) data->hdev; 97 switch (type) {
119 bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; 98 case HCI_EVENT_PKT:
120 hci_recv_frame(skb); 99 if (count >= HCI_EVENT_HDR_SIZE) {
100 struct hci_event_hdr *h = buf;
101 len = HCI_EVENT_HDR_SIZE + h->plen;
102 } else
103 return -EILSEQ;
104 break;
105
106 case HCI_ACLDATA_PKT:
107 if (count >= HCI_ACL_HDR_SIZE) {
108 struct hci_acl_hdr *h = buf;
109 len = HCI_ACL_HDR_SIZE +
110 __le16_to_cpu(h->dlen);
111 } else
112 return -EILSEQ;
113 break;
114
115 case HCI_SCODATA_PKT:
116 if (count >= HCI_SCO_HDR_SIZE) {
117 struct hci_sco_hdr *h = buf;
118 len = HCI_SCO_HDR_SIZE + h->dlen;
119 } else
120 return -EILSEQ;
121 break;
122
123 case HCI_VENDOR_PKT:
124 if (count >= HCI_VENDOR_HDR_SIZE) {
125 struct hci_vendor_hdr *h = buf;
126 len = HCI_VENDOR_HDR_SIZE +
127 __le16_to_cpu(h->dlen);
128 } else
129 return -EILSEQ;
130 break;
121 } 131 }
122 break;
123 132
124 case HCI_VENDOR_PKT:
125 vh = (struct hci_vendor_hdr *) buf;
126 len = HCI_VENDOR_HDR_SIZE + __le16_to_cpu(vh->dlen);
127 skb = bt_skb_alloc(len, GFP_ATOMIC); 133 skb = bt_skb_alloc(len, GFP_ATOMIC);
128 if (skb) { 134 if (!skb) {
129 memcpy(skb_put(skb, len), buf, len); 135 BT_ERR("%s no memory for packet", hdev->name);
130 skb->dev = (void *) data->hdev; 136 return -ENOMEM;
131 bt_cb(skb)->pkt_type = HCI_VENDOR_PKT;
132 hci_recv_frame(skb);
133 } 137 }
134 break;
135
136 default:
137 len = count - 1;
138 break;
139 }
140 138
141 buf += len; 139 skb->dev = (void *) hdev;
142 count -= (len + 1);
143 }
144}
145
146static int bpa10x_recv_event(struct bpa10x_data *data, unsigned char *buf, int size)
147{
148 BT_DBG("data %p buf %p size %d", data, buf, size);
149 140
150 if (data->evt_skb) { 141 data->rx_skb[queue] = skb;
151 struct sk_buff *skb = data->evt_skb;
152 142
153 memcpy(skb_put(skb, size), buf, size); 143 scb = (void *) skb->cb;
144 scb->type = type;
145 scb->expect = len;
146 } else {
147 /* Continuation */
154 148
155 if (skb->len == data->evt_len) { 149 scb = (void *) skb->cb;
156 data->evt_skb = NULL; 150 len = scb->expect;
157 data->evt_len = 0;
158 hci_recv_frame(skb);
159 }
160 } else {
161 struct sk_buff *skb;
162 struct hci_event_hdr *hdr;
163 unsigned char pkt_type;
164 int pkt_len = 0;
165
166 if (size < HCI_EVENT_HDR_SIZE + 1) {
167 BT_ERR("%s event packet block with size %d is too short",
168 data->hdev->name, size);
169 return -EILSEQ;
170 } 151 }
171 152
172 pkt_type = *buf++; 153 len = min(len, count);
173 size--;
174
175 if (pkt_type != HCI_EVENT_PKT) {
176 BT_ERR("%s unexpected event packet start byte 0x%02x",
177 data->hdev->name, pkt_type);
178 return -EPROTO;
179 }
180 154
181 hdr = (struct hci_event_hdr *) buf; 155 memcpy(skb_put(skb, len), buf, len);
182 pkt_len = HCI_EVENT_HDR_SIZE + hdr->plen;
183 156
184 skb = bt_skb_alloc(pkt_len, GFP_ATOMIC); 157 scb->expect -= len;
185 if (!skb) {
186 BT_ERR("%s no memory for new event packet",
187 data->hdev->name);
188 return -ENOMEM;
189 }
190 158
191 skb->dev = (void *) data->hdev; 159 if (scb->expect == 0) {
192 bt_cb(skb)->pkt_type = pkt_type; 160 /* Complete frame */
193 161
194 memcpy(skb_put(skb, size), buf, size); 162 data->rx_skb[queue] = NULL;
195 163
196 if (pkt_len == size) { 164 bt_cb(skb)->pkt_type = scb->type;
197 hci_recv_frame(skb); 165 hci_recv_frame(skb);
198 } else {
199 data->evt_skb = skb;
200 data->evt_len = pkt_len;
201 } 166 }
167
168 count -= len; buf += len;
202 } 169 }
203 170
204 return 0; 171 return 0;
205} 172}
206 173
207static void bpa10x_wakeup(struct bpa10x_data *data) 174static void bpa10x_tx_complete(struct urb *urb)
208{ 175{
209 struct urb *urb; 176 struct sk_buff *skb = urb->context;
210 struct sk_buff *skb; 177 struct hci_dev *hdev = (struct hci_dev *) skb->dev;
211 int err;
212 178
213 BT_DBG("data %p", data); 179 BT_DBG("%s urb %p status %d count %d", hdev->name,
180 urb, urb->status, urb->actual_length);
214 181
215 urb = data->cmd_urb; 182 if (!test_bit(HCI_RUNNING, &hdev->flags))
216 if (urb->status == -EINPROGRESS) 183 goto done;
217 skb = NULL; 184
185 if (!urb->status)
186 hdev->stat.byte_tx += urb->transfer_buffer_length;
218 else 187 else
219 skb = skb_dequeue(&data->cmd_queue); 188 hdev->stat.err_tx++;
220 189
221 if (skb) { 190done:
222 struct usb_ctrlrequest *cr; 191 kfree(urb->setup_packet);
223 192
224 if (skb->len > BPA10X_CMD_BUF_SIZE) { 193 kfree_skb(skb);
225 BT_ERR("%s command packet with size %d is too big", 194}
226 data->hdev->name, skb->len); 195
227 kfree_skb(skb); 196static void bpa10x_rx_complete(struct urb *urb)
228 return; 197{
229 } 198 struct hci_dev *hdev = urb->context;
199 struct bpa10x_data *data = hdev->driver_data;
200 int err;
230 201
231 cr = (struct usb_ctrlrequest *) urb->setup_packet; 202 BT_DBG("%s urb %p status %d count %d", hdev->name,
232 cr->wLength = __cpu_to_le16(skb->len); 203 urb, urb->status, urb->actual_length);
233 204
234 skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len); 205 if (!test_bit(HCI_RUNNING, &hdev->flags))
235 urb->transfer_buffer_length = skb->len; 206 return;
236 207
237 err = usb_submit_urb(urb, GFP_ATOMIC); 208 if (urb->status == 0) {
238 if (err < 0 && err != -ENODEV) { 209 if (bpa10x_recv(hdev, usb_pipebulk(urb->pipe),
239 BT_ERR("%s submit failed for command urb %p with error %d", 210 urb->transfer_buffer,
240 data->hdev->name, urb, err); 211 urb->actual_length) < 0) {
241 skb_queue_head(&data->cmd_queue, skb); 212 BT_ERR("%s corrupted event packet", hdev->name);
242 } else 213 hdev->stat.err_rx++;
243 kfree_skb(skb); 214 }
244 } 215 }
245 216
246 urb = data->tx_urb; 217 usb_anchor_urb(urb, &data->rx_anchor);
247 if (urb->status == -EINPROGRESS) 218
248 skb = NULL; 219 err = usb_submit_urb(urb, GFP_ATOMIC);
249 else 220 if (err < 0) {
250 skb = skb_dequeue(&data->tx_queue); 221 BT_ERR("%s urb %p failed to resubmit (%d)",
251 222 hdev->name, urb, -err);
252 if (skb) { 223 usb_unanchor_urb(urb);
253 skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
254 urb->transfer_buffer_length = skb->len;
255
256 err = usb_submit_urb(urb, GFP_ATOMIC);
257 if (err < 0 && err != -ENODEV) {
258 BT_ERR("%s submit failed for command urb %p with error %d",
259 data->hdev->name, urb, err);
260 skb_queue_head(&data->tx_queue, skb);
261 } else
262 kfree_skb(skb);
263 } 224 }
264} 225}
265 226
266static void bpa10x_complete(struct urb *urb) 227static inline int bpa10x_submit_intr_urb(struct hci_dev *hdev)
267{ 228{
268 struct bpa10x_data *data = urb->context; 229 struct bpa10x_data *data = hdev->driver_data;
269 unsigned char *buf = urb->transfer_buffer; 230 struct urb *urb;
270 int err, count = urb->actual_length; 231 unsigned char *buf;
232 unsigned int pipe;
233 int err, size = 16;
271 234
272 BT_DBG("data %p urb %p buf %p count %d", data, urb, buf, count); 235 BT_DBG("%s", hdev->name);
273 236
274 read_lock(&data->lock); 237 urb = usb_alloc_urb(0, GFP_KERNEL);
238 if (!urb)
239 return -ENOMEM;
275 240
276 if (!test_bit(HCI_RUNNING, &data->hdev->flags)) 241 buf = kmalloc(size, GFP_KERNEL);
277 goto unlock; 242 if (!buf) {
243 usb_free_urb(urb);
244 return -ENOMEM;
245 }
278 246
279 if (urb->status < 0 || !count) 247 pipe = usb_rcvintpipe(data->udev, 0x81);
280 goto resubmit;
281 248
282 if (usb_pipein(urb->pipe)) { 249 usb_fill_int_urb(urb, data->udev, pipe, buf, size,
283 data->hdev->stat.byte_rx += count; 250 bpa10x_rx_complete, hdev, 1);
284 251
285 if (usb_pipetype(urb->pipe) == PIPE_INTERRUPT) 252 urb->transfer_flags |= URB_FREE_BUFFER;
286 bpa10x_recv_event(data, buf, count);
287 253
288 if (usb_pipetype(urb->pipe) == PIPE_BULK) 254 usb_anchor_urb(urb, &data->rx_anchor);
289 bpa10x_recv_bulk(data, buf, count);
290 } else {
291 data->hdev->stat.byte_tx += count;
292 255
293 bpa10x_wakeup(data); 256 err = usb_submit_urb(urb, GFP_KERNEL);
257 if (err < 0) {
258 BT_ERR("%s urb %p submission failed (%d)",
259 hdev->name, urb, -err);
260 usb_unanchor_urb(urb);
261 kfree(buf);
294 } 262 }
295 263
296resubmit: 264 usb_free_urb(urb);
297 if (usb_pipein(urb->pipe)) {
298 err = usb_submit_urb(urb, GFP_ATOMIC);
299 if (err < 0 && err != -ENODEV) {
300 BT_ERR("%s urb %p type %d resubmit status %d",
301 data->hdev->name, urb, usb_pipetype(urb->pipe), err);
302 }
303 }
304 265
305unlock: 266 return err;
306 read_unlock(&data->lock);
307} 267}
308 268
309static inline struct urb *bpa10x_alloc_urb(struct usb_device *udev, unsigned int pipe, 269static inline int bpa10x_submit_bulk_urb(struct hci_dev *hdev)
310 size_t size, gfp_t flags, void *data)
311{ 270{
271 struct bpa10x_data *data = hdev->driver_data;
312 struct urb *urb; 272 struct urb *urb;
313 struct usb_ctrlrequest *cr;
314 unsigned char *buf; 273 unsigned char *buf;
274 unsigned int pipe;
275 int err, size = 64;
315 276
316 BT_DBG("udev %p data %p", udev, data); 277 BT_DBG("%s", hdev->name);
317 278
318 urb = usb_alloc_urb(0, flags); 279 urb = usb_alloc_urb(0, GFP_KERNEL);
319 if (!urb) 280 if (!urb)
320 return NULL; 281 return -ENOMEM;
321 282
322 buf = kmalloc(size, flags); 283 buf = kmalloc(size, GFP_KERNEL);
323 if (!buf) { 284 if (!buf) {
324 usb_free_urb(urb); 285 usb_free_urb(urb);
325 return NULL; 286 return -ENOMEM;
326 } 287 }
327 288
328 switch (usb_pipetype(pipe)) { 289 pipe = usb_rcvbulkpipe(data->udev, 0x82);
329 case PIPE_CONTROL:
330 cr = kmalloc(sizeof(*cr), flags);
331 if (!cr) {
332 kfree(buf);
333 usb_free_urb(urb);
334 return NULL;
335 }
336 290
337 cr->bRequestType = USB_TYPE_VENDOR; 291 usb_fill_bulk_urb(urb, data->udev, pipe,
338 cr->bRequest = 0; 292 buf, size, bpa10x_rx_complete, hdev);
339 cr->wIndex = 0;
340 cr->wValue = 0;
341 cr->wLength = __cpu_to_le16(0);
342 293
343 usb_fill_control_urb(urb, udev, pipe, (void *) cr, buf, 0, bpa10x_complete, data); 294 urb->transfer_flags |= URB_FREE_BUFFER;
344 break;
345 295
346 case PIPE_INTERRUPT: 296 usb_anchor_urb(urb, &data->rx_anchor);
347 usb_fill_int_urb(urb, udev, pipe, buf, size, bpa10x_complete, data, 1);
348 break;
349 297
350 case PIPE_BULK: 298 err = usb_submit_urb(urb, GFP_KERNEL);
351 usb_fill_bulk_urb(urb, udev, pipe, buf, size, bpa10x_complete, data); 299 if (err < 0) {
352 break; 300 BT_ERR("%s urb %p submission failed (%d)",
353 301 hdev->name, urb, -err);
354 default: 302 usb_unanchor_urb(urb);
355 kfree(buf); 303 kfree(buf);
356 usb_free_urb(urb);
357 return NULL;
358 } 304 }
359 305
360 return urb;
361}
362
363static inline void bpa10x_free_urb(struct urb *urb)
364{
365 BT_DBG("urb %p", urb);
366
367 if (!urb)
368 return;
369
370 kfree(urb->setup_packet);
371 kfree(urb->transfer_buffer);
372
373 usb_free_urb(urb); 306 usb_free_urb(urb);
307
308 return err;
374} 309}
375 310
376static int bpa10x_open(struct hci_dev *hdev) 311static int bpa10x_open(struct hci_dev *hdev)
377{ 312{
378 struct bpa10x_data *data = hdev->driver_data; 313 struct bpa10x_data *data = hdev->driver_data;
379 struct usb_device *udev = data->udev;
380 unsigned long flags;
381 int err; 314 int err;
382 315
383 BT_DBG("hdev %p data %p", hdev, data); 316 BT_DBG("%s", hdev->name);
384 317
385 if (test_and_set_bit(HCI_RUNNING, &hdev->flags)) 318 if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
386 return 0; 319 return 0;
387 320
388 data->cmd_urb = bpa10x_alloc_urb(udev, usb_sndctrlpipe(udev, BPA10X_CMD_EP), 321 err = bpa10x_submit_intr_urb(hdev);
389 BPA10X_CMD_BUF_SIZE, GFP_KERNEL, data); 322 if (err < 0)
390 if (!data->cmd_urb) { 323 goto error;
391 err = -ENOMEM;
392 goto done;
393 }
394
395 data->evt_urb = bpa10x_alloc_urb(udev, usb_rcvintpipe(udev, BPA10X_EVT_EP),
396 BPA10X_EVT_BUF_SIZE, GFP_KERNEL, data);
397 if (!data->evt_urb) {
398 bpa10x_free_urb(data->cmd_urb);
399 err = -ENOMEM;
400 goto done;
401 }
402
403 data->rx_urb = bpa10x_alloc_urb(udev, usb_rcvbulkpipe(udev, BPA10X_RX_EP),
404 BPA10X_RX_BUF_SIZE, GFP_KERNEL, data);
405 if (!data->rx_urb) {
406 bpa10x_free_urb(data->evt_urb);
407 bpa10x_free_urb(data->cmd_urb);
408 err = -ENOMEM;
409 goto done;
410 }
411
412 data->tx_urb = bpa10x_alloc_urb(udev, usb_sndbulkpipe(udev, BPA10X_TX_EP),
413 BPA10X_TX_BUF_SIZE, GFP_KERNEL, data);
414 if (!data->rx_urb) {
415 bpa10x_free_urb(data->rx_urb);
416 bpa10x_free_urb(data->evt_urb);
417 bpa10x_free_urb(data->cmd_urb);
418 err = -ENOMEM;
419 goto done;
420 }
421 324
422 write_lock_irqsave(&data->lock, flags); 325 err = bpa10x_submit_bulk_urb(hdev);
326 if (err < 0)
327 goto error;
423 328
424 err = usb_submit_urb(data->evt_urb, GFP_ATOMIC); 329 return 0;
425 if (err < 0) {
426 BT_ERR("%s submit failed for event urb %p with error %d",
427 data->hdev->name, data->evt_urb, err);
428 } else {
429 err = usb_submit_urb(data->rx_urb, GFP_ATOMIC);
430 if (err < 0) {
431 BT_ERR("%s submit failed for rx urb %p with error %d",
432 data->hdev->name, data->evt_urb, err);
433 usb_kill_urb(data->evt_urb);
434 }
435 }
436 330
437 write_unlock_irqrestore(&data->lock, flags); 331error:
332 usb_kill_anchored_urbs(&data->rx_anchor);
438 333
439done: 334 clear_bit(HCI_RUNNING, &hdev->flags);
440 if (err < 0)
441 clear_bit(HCI_RUNNING, &hdev->flags);
442 335
443 return err; 336 return err;
444} 337}
@@ -446,27 +339,13 @@ done:
446static int bpa10x_close(struct hci_dev *hdev) 339static int bpa10x_close(struct hci_dev *hdev)
447{ 340{
448 struct bpa10x_data *data = hdev->driver_data; 341 struct bpa10x_data *data = hdev->driver_data;
449 unsigned long flags;
450 342
451 BT_DBG("hdev %p data %p", hdev, data); 343 BT_DBG("%s", hdev->name);
452 344
453 if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags)) 345 if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
454 return 0; 346 return 0;
455 347
456 write_lock_irqsave(&data->lock, flags); 348 usb_kill_anchored_urbs(&data->rx_anchor);
457
458 skb_queue_purge(&data->cmd_queue);
459 usb_kill_urb(data->cmd_urb);
460 usb_kill_urb(data->evt_urb);
461 usb_kill_urb(data->rx_urb);
462 usb_kill_urb(data->tx_urb);
463
464 write_unlock_irqrestore(&data->lock, flags);
465
466 bpa10x_free_urb(data->cmd_urb);
467 bpa10x_free_urb(data->evt_urb);
468 bpa10x_free_urb(data->rx_urb);
469 bpa10x_free_urb(data->tx_urb);
470 349
471 return 0; 350 return 0;
472} 351}
@@ -475,9 +354,9 @@ static int bpa10x_flush(struct hci_dev *hdev)
475{ 354{
476 struct bpa10x_data *data = hdev->driver_data; 355 struct bpa10x_data *data = hdev->driver_data;
477 356
478 BT_DBG("hdev %p data %p", hdev, data); 357 BT_DBG("%s", hdev->name);
479 358
480 skb_queue_purge(&data->cmd_queue); 359 usb_kill_anchored_urbs(&data->tx_anchor);
481 360
482 return 0; 361 return 0;
483} 362}
@@ -485,45 +364,78 @@ static int bpa10x_flush(struct hci_dev *hdev)
485static int bpa10x_send_frame(struct sk_buff *skb) 364static int bpa10x_send_frame(struct sk_buff *skb)
486{ 365{
487 struct hci_dev *hdev = (struct hci_dev *) skb->dev; 366 struct hci_dev *hdev = (struct hci_dev *) skb->dev;
488 struct bpa10x_data *data; 367 struct bpa10x_data *data = hdev->driver_data;
489 368 struct usb_ctrlrequest *dr;
490 BT_DBG("hdev %p skb %p type %d len %d", hdev, skb, bt_cb(skb)->pkt_type, skb->len); 369 struct urb *urb;
370 unsigned int pipe;
371 int err;
491 372
492 if (!hdev) { 373 BT_DBG("%s", hdev->name);
493 BT_ERR("Frame for unknown HCI device");
494 return -ENODEV;
495 }
496 374
497 if (!test_bit(HCI_RUNNING, &hdev->flags)) 375 if (!test_bit(HCI_RUNNING, &hdev->flags))
498 return -EBUSY; 376 return -EBUSY;
499 377
500 data = hdev->driver_data; 378 urb = usb_alloc_urb(0, GFP_ATOMIC);
379 if (!urb)
380 return -ENOMEM;
501 381
502 /* Prepend skb with frame type */ 382 /* Prepend skb with frame type */
503 memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); 383 *skb_push(skb, 1) = bt_cb(skb)->pkt_type;
504 384
505 switch (bt_cb(skb)->pkt_type) { 385 switch (bt_cb(skb)->pkt_type) {
506 case HCI_COMMAND_PKT: 386 case HCI_COMMAND_PKT:
387 dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
388 if (!dr) {
389 usb_free_urb(urb);
390 return -ENOMEM;
391 }
392
393 dr->bRequestType = USB_TYPE_VENDOR;
394 dr->bRequest = 0;
395 dr->wIndex = 0;
396 dr->wValue = 0;
397 dr->wLength = __cpu_to_le16(skb->len);
398
399 pipe = usb_sndctrlpipe(data->udev, 0x00);
400
401 usb_fill_control_urb(urb, data->udev, pipe, (void *) dr,
402 skb->data, skb->len, bpa10x_tx_complete, skb);
403
507 hdev->stat.cmd_tx++; 404 hdev->stat.cmd_tx++;
508 skb_queue_tail(&data->cmd_queue, skb);
509 break; 405 break;
510 406
511 case HCI_ACLDATA_PKT: 407 case HCI_ACLDATA_PKT:
408 pipe = usb_sndbulkpipe(data->udev, 0x02);
409
410 usb_fill_bulk_urb(urb, data->udev, pipe,
411 skb->data, skb->len, bpa10x_tx_complete, skb);
412
512 hdev->stat.acl_tx++; 413 hdev->stat.acl_tx++;
513 skb_queue_tail(&data->tx_queue, skb);
514 break; 414 break;
515 415
516 case HCI_SCODATA_PKT: 416 case HCI_SCODATA_PKT:
417 pipe = usb_sndbulkpipe(data->udev, 0x02);
418
419 usb_fill_bulk_urb(urb, data->udev, pipe,
420 skb->data, skb->len, bpa10x_tx_complete, skb);
421
517 hdev->stat.sco_tx++; 422 hdev->stat.sco_tx++;
518 skb_queue_tail(&data->tx_queue, skb);
519 break; 423 break;
520 };
521 424
522 read_lock(&data->lock); 425 default:
426 return -EILSEQ;
427 }
428
429 usb_anchor_urb(urb, &data->tx_anchor);
523 430
524 bpa10x_wakeup(data); 431 err = usb_submit_urb(urb, GFP_ATOMIC);
432 if (err < 0) {
433 BT_ERR("%s urb %p submission failed", hdev->name, urb);
434 kfree(urb->setup_packet);
435 usb_unanchor_urb(urb);
436 }
525 437
526 read_unlock(&data->lock); 438 usb_free_urb(urb);
527 439
528 return 0; 440 return 0;
529} 441}
@@ -532,16 +444,17 @@ static void bpa10x_destruct(struct hci_dev *hdev)
532{ 444{
533 struct bpa10x_data *data = hdev->driver_data; 445 struct bpa10x_data *data = hdev->driver_data;
534 446
535 BT_DBG("hdev %p data %p", hdev, data); 447 BT_DBG("%s", hdev->name);
536 448
449 kfree(data->rx_skb[0]);
450 kfree(data->rx_skb[1]);
537 kfree(data); 451 kfree(data);
538} 452}
539 453
540static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *id) 454static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *id)
541{ 455{
542 struct usb_device *udev = interface_to_usbdev(intf);
543 struct hci_dev *hdev;
544 struct bpa10x_data *data; 456 struct bpa10x_data *data;
457 struct hci_dev *hdev;
545 int err; 458 int err;
546 459
547 BT_DBG("intf %p id %p", intf, id); 460 BT_DBG("intf %p id %p", intf, id);
@@ -549,48 +462,43 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *
549 if (ignore) 462 if (ignore)
550 return -ENODEV; 463 return -ENODEV;
551 464
552 if (intf->cur_altsetting->desc.bInterfaceNumber > 0) 465 if (intf->cur_altsetting->desc.bInterfaceNumber != 0)
553 return -ENODEV; 466 return -ENODEV;
554 467
555 data = kzalloc(sizeof(*data), GFP_KERNEL); 468 data = kzalloc(sizeof(*data), GFP_KERNEL);
556 if (!data) { 469 if (!data)
557 BT_ERR("Can't allocate data structure");
558 return -ENOMEM; 470 return -ENOMEM;
559 }
560
561 data->udev = udev;
562 471
563 rwlock_init(&data->lock); 472 data->udev = interface_to_usbdev(intf);
564 473
565 skb_queue_head_init(&data->cmd_queue); 474 init_usb_anchor(&data->tx_anchor);
566 skb_queue_head_init(&data->tx_queue); 475 init_usb_anchor(&data->rx_anchor);
567 476
568 hdev = hci_alloc_dev(); 477 hdev = hci_alloc_dev();
569 if (!hdev) { 478 if (!hdev) {
570 BT_ERR("Can't allocate HCI device");
571 kfree(data); 479 kfree(data);
572 return -ENOMEM; 480 return -ENOMEM;
573 } 481 }
574 482
575 data->hdev = hdev;
576
577 hdev->type = HCI_USB; 483 hdev->type = HCI_USB;
578 hdev->driver_data = data; 484 hdev->driver_data = data;
485
486 data->hdev = hdev;
487
579 SET_HCIDEV_DEV(hdev, &intf->dev); 488 SET_HCIDEV_DEV(hdev, &intf->dev);
580 489
581 hdev->open = bpa10x_open; 490 hdev->open = bpa10x_open;
582 hdev->close = bpa10x_close; 491 hdev->close = bpa10x_close;
583 hdev->flush = bpa10x_flush; 492 hdev->flush = bpa10x_flush;
584 hdev->send = bpa10x_send_frame; 493 hdev->send = bpa10x_send_frame;
585 hdev->destruct = bpa10x_destruct; 494 hdev->destruct = bpa10x_destruct;
586 495
587 hdev->owner = THIS_MODULE; 496 hdev->owner = THIS_MODULE;
588 497
589 err = hci_register_dev(hdev); 498 err = hci_register_dev(hdev);
590 if (err < 0) { 499 if (err < 0) {
591 BT_ERR("Can't register HCI device");
592 kfree(data);
593 hci_free_dev(hdev); 500 hci_free_dev(hdev);
501 kfree(data);
594 return err; 502 return err;
595 } 503 }
596 504
@@ -602,19 +510,17 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id *
602static void bpa10x_disconnect(struct usb_interface *intf) 510static void bpa10x_disconnect(struct usb_interface *intf)
603{ 511{
604 struct bpa10x_data *data = usb_get_intfdata(intf); 512 struct bpa10x_data *data = usb_get_intfdata(intf);
605 struct hci_dev *hdev = data->hdev;
606 513
607 BT_DBG("intf %p", intf); 514 BT_DBG("intf %p", intf);
608 515
609 if (!hdev) 516 if (!data)
610 return; 517 return;
611 518
612 usb_set_intfdata(intf, NULL); 519 usb_set_intfdata(intf, NULL);
613 520
614 if (hci_unregister_dev(hdev) < 0) 521 hci_unregister_dev(data->hdev);
615 BT_ERR("Can't unregister HCI device %s", hdev->name);
616 522
617 hci_free_dev(hdev); 523 hci_free_dev(data->hdev);
618} 524}
619 525
620static struct usb_driver bpa10x_driver = { 526static struct usb_driver bpa10x_driver = {
@@ -626,15 +532,9 @@ static struct usb_driver bpa10x_driver = {
626 532
627static int __init bpa10x_init(void) 533static int __init bpa10x_init(void)
628{ 534{
629 int err;
630
631 BT_INFO("Digianswer Bluetooth USB driver ver %s", VERSION); 535 BT_INFO("Digianswer Bluetooth USB driver ver %s", VERSION);
632 536
633 err = usb_register(&bpa10x_driver); 537 return usb_register(&bpa10x_driver);
634 if (err < 0)
635 BT_ERR("Failed to register USB driver");
636
637 return err;
638} 538}
639 539
640static void __exit bpa10x_exit(void) 540static void __exit bpa10x_exit(void)
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 3951607463..a18f9b8c9e 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -344,10 +344,7 @@ static irqreturn_t bt3c_interrupt(int irq, void *dev_inst)
344 unsigned int iobase; 344 unsigned int iobase;
345 int iir; 345 int iir;
346 346
347 if (!info || !info->hdev) { 347 BUG_ON(!info->hdev);
348 BT_ERR("Call of irq %d for unknown device", irq);
349 return IRQ_NONE;
350 }
351 348
352 iobase = info->p_dev->io.BasePort1; 349 iobase = info->p_dev->io.BasePort1;
353 350
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
new file mode 100644
index 0000000000..b786f61879
--- /dev/null
+++ b/drivers/bluetooth/btsdio.c
@@ -0,0 +1,406 @@
1/*
2 *
3 * Generic Bluetooth SDIO driver
4 *
5 * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
6 * Copyright (C) 2007 Marcel Holtmann <marcel@holtmann.org>
7 *
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 */
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/slab.h>
29#include <linux/types.h>
30#include <linux/sched.h>
31#include <linux/errno.h>
32#include <linux/skbuff.h>
33
34#include <linux/mmc/sdio_ids.h>
35#include <linux/mmc/sdio_func.h>
36
37#include <net/bluetooth/bluetooth.h>
38#include <net/bluetooth/hci_core.h>
39
40#ifndef CONFIG_BT_HCIBTSDIO_DEBUG
41#undef BT_DBG
42#define BT_DBG(D...)
43#endif
44
45#define VERSION "0.1"
46
47static const struct sdio_device_id btsdio_table[] = {
48 /* Generic Bluetooth Type-A SDIO device */
49 { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_A) },
50
51 /* Generic Bluetooth Type-B SDIO device */
52 { SDIO_DEVICE_CLASS(SDIO_CLASS_BT_B) },
53
54 { } /* Terminating entry */
55};
56
57MODULE_DEVICE_TABLE(sdio, btsdio_table);
58
59struct btsdio_data {
60 struct hci_dev *hdev;
61 struct sdio_func *func;
62
63 struct work_struct work;
64
65 struct sk_buff_head txq;
66};
67
68#define REG_RDAT 0x00 /* Receiver Data */
69#define REG_TDAT 0x00 /* Transmitter Data */
70#define REG_PC_RRT 0x10 /* Read Packet Control */
71#define REG_PC_WRT 0x11 /* Write Packet Control */
72#define REG_RTC_STAT 0x12 /* Retry Control Status */
73#define REG_RTC_SET 0x12 /* Retry Control Set */
74#define REG_INTRD 0x13 /* Interrupt Indication */
75#define REG_CL_INTRD 0x13 /* Interrupt Clear */
76#define REG_EN_INTRD 0x14 /* Interrupt Enable */
77#define REG_MD_STAT 0x20 /* Bluetooth Mode Status */
78
79static int btsdio_tx_packet(struct btsdio_data *data, struct sk_buff *skb)
80{
81 int err;
82
83 BT_DBG("%s", data->hdev->name);
84
85 /* Prepend Type-A header */
86 skb_push(skb, 4);
87 skb->data[0] = (skb->len & 0x0000ff);
88 skb->data[1] = (skb->len & 0x00ff00) >> 8;
89 skb->data[2] = (skb->len & 0xff0000) >> 16;
90 skb->data[3] = bt_cb(skb)->pkt_type;
91
92 err = sdio_writesb(data->func, REG_TDAT, skb->data, skb->len);
93 if (err < 0) {
94 sdio_writeb(data->func, 0x01, REG_PC_WRT, NULL);
95 return err;
96 }
97
98 data->hdev->stat.byte_tx += skb->len;
99
100 kfree_skb(skb);
101
102 return 0;
103}
104
105static void btsdio_work(struct work_struct *work)
106{
107 struct btsdio_data *data = container_of(work, struct btsdio_data, work);
108 struct sk_buff *skb;
109 int err;
110
111 BT_DBG("%s", data->hdev->name);
112
113 sdio_claim_host(data->func);
114
115 while ((skb = skb_dequeue(&data->txq))) {
116 err = btsdio_tx_packet(data, skb);
117 if (err < 0) {
118 data->hdev->stat.err_tx++;
119 skb_queue_head(&data->txq, skb);
120 break;
121 }
122 }
123
124 sdio_release_host(data->func);
125}
126
127static int btsdio_rx_packet(struct btsdio_data *data)
128{
129 u8 hdr[4] __attribute__ ((aligned(4)));
130 struct sk_buff *skb;
131 int err, len;
132
133 BT_DBG("%s", data->hdev->name);
134
135 err = sdio_readsb(data->func, hdr, REG_RDAT, 4);
136 if (err < 0)
137 return err;
138
139 len = hdr[0] | (hdr[1] << 8) | (hdr[2] << 16);
140 if (len < 4 || len > 65543)
141 return -EILSEQ;
142
143 skb = bt_skb_alloc(len - 4, GFP_KERNEL);
144 if (!skb) {
145 /* Out of memory. Prepare a read retry and just
146 * return with the expectation that the next time
147 * we're called we'll have more memory. */
148 return -ENOMEM;
149 }
150
151 skb_put(skb, len - 4);
152
153 err = sdio_readsb(data->func, skb->data, REG_RDAT, len - 4);
154 if (err < 0) {
155 kfree(skb);
156 return err;
157 }
158
159 data->hdev->stat.byte_rx += len;
160
161 skb->dev = (void *) data->hdev;
162 bt_cb(skb)->pkt_type = hdr[3];
163
164 err = hci_recv_frame(skb);
165 if (err < 0) {
166 kfree(skb);
167 return err;
168 }
169
170 sdio_writeb(data->func, 0x00, REG_PC_RRT, NULL);
171
172 return 0;
173}
174
175static void btsdio_interrupt(struct sdio_func *func)
176{
177 struct btsdio_data *data = sdio_get_drvdata(func);
178 int intrd;
179
180 BT_DBG("%s", data->hdev->name);
181
182 intrd = sdio_readb(func, REG_INTRD, NULL);
183 if (intrd & 0x01) {
184 sdio_writeb(func, 0x01, REG_CL_INTRD, NULL);
185
186 if (btsdio_rx_packet(data) < 0) {
187 data->hdev->stat.err_rx++;
188 sdio_writeb(data->func, 0x01, REG_PC_RRT, NULL);
189 }
190 }
191}
192
193static int btsdio_open(struct hci_dev *hdev)
194{
195 struct btsdio_data *data = hdev->driver_data;
196 int err;
197
198 BT_DBG("%s", hdev->name);
199
200 if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
201 return 0;
202
203 sdio_claim_host(data->func);
204
205 err = sdio_enable_func(data->func);
206 if (err < 0) {
207 clear_bit(HCI_RUNNING, &hdev->flags);
208 goto release;
209 }
210
211 err = sdio_claim_irq(data->func, btsdio_interrupt);
212 if (err < 0) {
213 sdio_disable_func(data->func);
214 clear_bit(HCI_RUNNING, &hdev->flags);
215 goto release;
216 }
217
218 if (data->func->class == SDIO_CLASS_BT_B)
219 sdio_writeb(data->func, 0x00, REG_MD_STAT, NULL);
220
221 sdio_writeb(data->func, 0x01, REG_EN_INTRD, NULL);
222
223release:
224 sdio_release_host(data->func);
225
226 return err;
227}
228
229static int btsdio_close(struct hci_dev *hdev)
230{
231 struct btsdio_data *data = hdev->driver_data;
232
233 BT_DBG("%s", hdev->name);
234
235 if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
236 return 0;
237
238 sdio_claim_host(data->func);
239
240 sdio_writeb(data->func, 0x00, REG_EN_INTRD, NULL);
241
242 sdio_release_irq(data->func);
243 sdio_disable_func(data->func);
244
245 sdio_release_host(data->func);
246
247 return 0;
248}
249
250static int btsdio_flush(struct hci_dev *hdev)
251{
252 struct btsdio_data *data = hdev->driver_data;
253
254 BT_DBG("%s", hdev->name);
255
256 skb_queue_purge(&data->txq);
257
258 return 0;
259}
260
261static int btsdio_send_frame(struct sk_buff *skb)
262{
263 struct hci_dev *hdev = (struct hci_dev *) skb->dev;
264 struct btsdio_data *data = hdev->driver_data;
265
266 BT_DBG("%s", hdev->name);
267
268 if (!test_bit(HCI_RUNNING, &hdev->flags))
269 return -EBUSY;
270
271 switch (bt_cb(skb)->pkt_type) {
272 case HCI_COMMAND_PKT:
273 hdev->stat.cmd_tx++;
274 break;
275
276 case HCI_ACLDATA_PKT:
277 hdev->stat.acl_tx++;
278 break;
279
280 case HCI_SCODATA_PKT:
281 hdev->stat.sco_tx++;
282 break;
283
284 default:
285 return -EILSEQ;
286 }
287
288 skb_queue_tail(&data->txq, skb);
289
290 schedule_work(&data->work);
291
292 return 0;
293}
294
295static void btsdio_destruct(struct hci_dev *hdev)
296{
297 struct btsdio_data *data = hdev->driver_data;
298
299 BT_DBG("%s", hdev->name);
300
301 kfree(data);
302}
303
304static int btsdio_probe(struct sdio_func *func,
305 const struct sdio_device_id *id)
306{
307 struct btsdio_data *data;
308 struct hci_dev *hdev;
309 struct sdio_func_tuple *tuple = func->tuples;
310 int err;
311
312 BT_DBG("func %p id %p class 0x%04x", func, id, func->class);
313
314 while (tuple) {
315 BT_DBG("code 0x%x size %d", tuple->code, tuple->size);
316 tuple = tuple->next;
317 }
318
319 data = kzalloc(sizeof(*data), GFP_KERNEL);
320 if (!data)
321 return -ENOMEM;
322
323 data->func = func;
324
325 INIT_WORK(&data->work, btsdio_work);
326
327 skb_queue_head_init(&data->txq);
328
329 hdev = hci_alloc_dev();
330 if (!hdev) {
331 kfree(data);
332 return -ENOMEM;
333 }
334
335 hdev->type = HCI_SDIO;
336 hdev->driver_data = data;
337
338 data->hdev = hdev;
339
340 SET_HCIDEV_DEV(hdev, &func->dev);
341
342 hdev->open = btsdio_open;
343 hdev->close = btsdio_close;
344 hdev->flush = btsdio_flush;
345 hdev->send = btsdio_send_frame;
346 hdev->destruct = btsdio_destruct;
347
348 hdev->owner = THIS_MODULE;
349
350 err = hci_register_dev(hdev);
351 if (err < 0) {
352 hci_free_dev(hdev);
353 kfree(data);
354 return err;
355 }
356
357 sdio_set_drvdata(func, data);
358
359 return 0;
360}
361
362static void btsdio_remove(struct sdio_func *func)
363{
364 struct btsdio_data *data = sdio_get_drvdata(func);
365 struct hci_dev *hdev;
366
367 BT_DBG("func %p", func);
368
369 if (!data)
370 return;
371
372 hdev = data->hdev;
373
374 sdio_set_drvdata(func, NULL);
375
376 hci_unregister_dev(hdev);
377
378 hci_free_dev(hdev);
379}
380
381static struct sdio_driver btsdio_driver = {
382 .name = "btsdio",
383 .probe = btsdio_probe,
384 .remove = btsdio_remove,
385 .id_table = btsdio_table,
386};
387
388static int __init btsdio_init(void)
389{
390 BT_INFO("Generic Bluetooth SDIO driver ver %s", VERSION);
391
392 return sdio_register_driver(&btsdio_driver);
393}
394
395static void __exit btsdio_exit(void)
396{
397 sdio_unregister_driver(&btsdio_driver);
398}
399
400module_init(btsdio_init);
401module_exit(btsdio_exit);
402
403MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
404MODULE_DESCRIPTION("Generic Bluetooth SDIO driver ver " VERSION);
405MODULE_VERSION(VERSION);
406MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index d7d2ea0d86..08f48d577a 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -294,10 +294,7 @@ static irqreturn_t btuart_interrupt(int irq, void *dev_inst)
294 int boguscount = 0; 294 int boguscount = 0;
295 int iir, lsr; 295 int iir, lsr;
296 296
297 if (!info || !info->hdev) { 297 BUG_ON(!info->hdev);
298 BT_ERR("Call of irq %d for unknown device", irq);
299 return IRQ_NONE;
300 }
301 298
302 iobase = info->p_dev->io.BasePort1; 299 iobase = info->p_dev->io.BasePort1;
303 300
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
new file mode 100644
index 0000000000..12e108914f
--- /dev/null
+++ b/drivers/bluetooth/btusb.c
@@ -0,0 +1,564 @@
1/*
2 *
3 * Generic Bluetooth USB driver
4 *
5 * Copyright (C) 2005-2007 Marcel Holtmann <marcel@holtmann.org>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/init.h>
27#include <linux/slab.h>
28#include <linux/types.h>
29#include <linux/sched.h>
30#include <linux/errno.h>
31#include <linux/skbuff.h>
32
33#include <linux/usb.h>
34
35#include <net/bluetooth/bluetooth.h>
36#include <net/bluetooth/hci_core.h>
37
38//#define CONFIG_BT_HCIBTUSB_DEBUG
39#ifndef CONFIG_BT_HCIBTUSB_DEBUG
40#undef BT_DBG
41#define BT_DBG(D...)
42#endif
43
44#define VERSION "0.1"
45
46static struct usb_device_id btusb_table[] = {
47 /* Generic Bluetooth USB device */
48 { USB_DEVICE_INFO(0xe0, 0x01, 0x01) },
49
50 { } /* Terminating entry */
51};
52
53MODULE_DEVICE_TABLE(usb, btusb_table);
54
55static struct usb_device_id blacklist_table[] = {
56 { } /* Terminating entry */
57};
58
59#define BTUSB_INTR_RUNNING 0
60#define BTUSB_BULK_RUNNING 1
61
62struct btusb_data {
63 struct hci_dev *hdev;
64 struct usb_device *udev;
65
66 spinlock_t lock;
67
68 unsigned long flags;
69
70 struct work_struct work;
71
72 struct usb_anchor tx_anchor;
73 struct usb_anchor intr_anchor;
74 struct usb_anchor bulk_anchor;
75
76 struct usb_endpoint_descriptor *intr_ep;
77 struct usb_endpoint_descriptor *bulk_tx_ep;
78 struct usb_endpoint_descriptor *bulk_rx_ep;
79};
80
81static void btusb_intr_complete(struct urb *urb)
82{
83 struct hci_dev *hdev = urb->context;
84 struct btusb_data *data = hdev->driver_data;
85 int err;
86
87 BT_DBG("%s urb %p status %d count %d", hdev->name,
88 urb, urb->status, urb->actual_length);
89
90 if (!test_bit(HCI_RUNNING, &hdev->flags))
91 return;
92
93 if (urb->status == 0) {
94 if (hci_recv_fragment(hdev, HCI_EVENT_PKT,
95 urb->transfer_buffer,
96 urb->actual_length) < 0) {
97 BT_ERR("%s corrupted event packet", hdev->name);
98 hdev->stat.err_rx++;
99 }
100 }
101
102 if (!test_bit(BTUSB_INTR_RUNNING, &data->flags))
103 return;
104
105 usb_anchor_urb(urb, &data->intr_anchor);
106
107 err = usb_submit_urb(urb, GFP_ATOMIC);
108 if (err < 0) {
109 BT_ERR("%s urb %p failed to resubmit (%d)",
110 hdev->name, urb, -err);
111 usb_unanchor_urb(urb);
112 }
113}
114
115static inline int btusb_submit_intr_urb(struct hci_dev *hdev)
116{
117 struct btusb_data *data = hdev->driver_data;
118 struct urb *urb;
119 unsigned char *buf;
120 unsigned int pipe;
121 int err, size;
122
123 BT_DBG("%s", hdev->name);
124
125 urb = usb_alloc_urb(0, GFP_ATOMIC);
126 if (!urb)
127 return -ENOMEM;
128
129 size = le16_to_cpu(data->intr_ep->wMaxPacketSize);
130
131 buf = kmalloc(size, GFP_ATOMIC);
132 if (!buf) {
133 usb_free_urb(urb);
134 return -ENOMEM;
135 }
136
137 pipe = usb_rcvintpipe(data->udev, data->intr_ep->bEndpointAddress);
138
139 usb_fill_int_urb(urb, data->udev, pipe, buf, size,
140 btusb_intr_complete, hdev,
141 data->intr_ep->bInterval);
142
143 urb->transfer_flags |= URB_FREE_BUFFER;
144
145 usb_anchor_urb(urb, &data->intr_anchor);
146
147 err = usb_submit_urb(urb, GFP_ATOMIC);
148 if (err < 0) {
149 BT_ERR("%s urb %p submission failed (%d)",
150 hdev->name, urb, -err);
151 usb_unanchor_urb(urb);
152 kfree(buf);
153 }
154
155 usb_free_urb(urb);
156
157 return err;
158}
159
160static void btusb_bulk_complete(struct urb *urb)
161{
162 struct hci_dev *hdev = urb->context;
163 struct btusb_data *data = hdev->driver_data;
164 int err;
165
166 BT_DBG("%s urb %p status %d count %d", hdev->name,
167 urb, urb->status, urb->actual_length);
168
169 if (!test_bit(HCI_RUNNING, &hdev->flags))
170 return;
171
172 if (urb->status == 0) {
173 if (hci_recv_fragment(hdev, HCI_ACLDATA_PKT,
174 urb->transfer_buffer,
175 urb->actual_length) < 0) {
176 BT_ERR("%s corrupted ACL packet", hdev->name);
177 hdev->stat.err_rx++;
178 }
179 }
180
181 if (!test_bit(BTUSB_BULK_RUNNING, &data->flags))
182 return;
183
184 usb_anchor_urb(urb, &data->bulk_anchor);
185
186 err = usb_submit_urb(urb, GFP_ATOMIC);
187 if (err < 0) {
188 BT_ERR("%s urb %p failed to resubmit (%d)",
189 hdev->name, urb, -err);
190 usb_unanchor_urb(urb);
191 }
192}
193
194static inline int btusb_submit_bulk_urb(struct hci_dev *hdev)
195{
196 struct btusb_data *data = hdev->driver_data;
197 struct urb *urb;
198 unsigned char *buf;
199 unsigned int pipe;
200 int err, size;
201
202 BT_DBG("%s", hdev->name);
203
204 urb = usb_alloc_urb(0, GFP_KERNEL);
205 if (!urb)
206 return -ENOMEM;
207
208 size = le16_to_cpu(data->bulk_rx_ep->wMaxPacketSize);
209
210 buf = kmalloc(size, GFP_KERNEL);
211 if (!buf) {
212 usb_free_urb(urb);
213 return -ENOMEM;
214 }
215
216 pipe = usb_rcvbulkpipe(data->udev, data->bulk_rx_ep->bEndpointAddress);
217
218 usb_fill_bulk_urb(urb, data->udev, pipe,
219 buf, size, btusb_bulk_complete, hdev);
220
221 urb->transfer_flags |= URB_FREE_BUFFER;
222
223 usb_anchor_urb(urb, &data->bulk_anchor);
224
225 err = usb_submit_urb(urb, GFP_KERNEL);
226 if (err < 0) {
227 BT_ERR("%s urb %p submission failed (%d)",
228 hdev->name, urb, -err);
229 usb_unanchor_urb(urb);
230 kfree(buf);
231 }
232
233 usb_free_urb(urb);
234
235 return err;
236}
237
238static void btusb_tx_complete(struct urb *urb)
239{
240 struct sk_buff *skb = urb->context;
241 struct hci_dev *hdev = (struct hci_dev *) skb->dev;
242
243 BT_DBG("%s urb %p status %d count %d", hdev->name,
244 urb, urb->status, urb->actual_length);
245
246 if (!test_bit(HCI_RUNNING, &hdev->flags))
247 goto done;
248
249 if (!urb->status)
250 hdev->stat.byte_tx += urb->transfer_buffer_length;
251 else
252 hdev->stat.err_tx++;
253
254done:
255 kfree(urb->setup_packet);
256
257 kfree_skb(skb);
258}
259
260static int btusb_open(struct hci_dev *hdev)
261{
262 struct btusb_data *data = hdev->driver_data;
263 int err;
264
265 BT_DBG("%s", hdev->name);
266
267 if (test_and_set_bit(HCI_RUNNING, &hdev->flags))
268 return 0;
269
270 if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
271 return 0;
272
273 err = btusb_submit_intr_urb(hdev);
274 if (err < 0) {
275 clear_bit(BTUSB_INTR_RUNNING, &hdev->flags);
276 clear_bit(HCI_RUNNING, &hdev->flags);
277 }
278
279 return err;
280}
281
282static int btusb_close(struct hci_dev *hdev)
283{
284 struct btusb_data *data = hdev->driver_data;
285
286 BT_DBG("%s", hdev->name);
287
288 if (!test_and_clear_bit(HCI_RUNNING, &hdev->flags))
289 return 0;
290
291 clear_bit(BTUSB_BULK_RUNNING, &data->flags);
292 usb_kill_anchored_urbs(&data->bulk_anchor);
293
294 clear_bit(BTUSB_INTR_RUNNING, &data->flags);
295 usb_kill_anchored_urbs(&data->intr_anchor);
296
297 return 0;
298}
299
300static int btusb_flush(struct hci_dev *hdev)
301{
302 struct btusb_data *data = hdev->driver_data;
303
304 BT_DBG("%s", hdev->name);
305
306 usb_kill_anchored_urbs(&data->tx_anchor);
307
308 return 0;
309}
310
311static int btusb_send_frame(struct sk_buff *skb)
312{
313 struct hci_dev *hdev = (struct hci_dev *) skb->dev;
314 struct btusb_data *data = hdev->driver_data;
315 struct usb_ctrlrequest *dr;
316 struct urb *urb;
317 unsigned int pipe;
318 int err;
319
320 BT_DBG("%s", hdev->name);
321
322 if (!test_bit(HCI_RUNNING, &hdev->flags))
323 return -EBUSY;
324
325 switch (bt_cb(skb)->pkt_type) {
326 case HCI_COMMAND_PKT:
327 urb = usb_alloc_urb(0, GFP_ATOMIC);
328 if (!urb)
329 return -ENOMEM;
330
331 dr = kmalloc(sizeof(*dr), GFP_ATOMIC);
332 if (!dr) {
333 usb_free_urb(urb);
334 return -ENOMEM;
335 }
336
337 dr->bRequestType = USB_TYPE_CLASS;
338 dr->bRequest = 0;
339 dr->wIndex = 0;
340 dr->wValue = 0;
341 dr->wLength = __cpu_to_le16(skb->len);
342
343 pipe = usb_sndctrlpipe(data->udev, 0x00);
344
345 usb_fill_control_urb(urb, data->udev, pipe, (void *) dr,
346 skb->data, skb->len, btusb_tx_complete, skb);
347
348 hdev->stat.cmd_tx++;
349 break;
350
351 case HCI_ACLDATA_PKT:
352 urb = usb_alloc_urb(0, GFP_ATOMIC);
353 if (!urb)
354 return -ENOMEM;
355
356 pipe = usb_sndbulkpipe(data->udev,
357 data->bulk_tx_ep->bEndpointAddress);
358
359 usb_fill_bulk_urb(urb, data->udev, pipe,
360 skb->data, skb->len, btusb_tx_complete, skb);
361
362 hdev->stat.acl_tx++;
363 break;
364
365 case HCI_SCODATA_PKT:
366 hdev->stat.sco_tx++;
367 kfree_skb(skb);
368 return 0;
369
370 default:
371 return -EILSEQ;
372 }
373
374 usb_anchor_urb(urb, &data->tx_anchor);
375
376 err = usb_submit_urb(urb, GFP_ATOMIC);
377 if (err < 0) {
378 BT_ERR("%s urb %p submission failed", hdev->name, urb);
379 kfree(urb->setup_packet);
380 usb_unanchor_urb(urb);
381 }
382
383 usb_free_urb(urb);
384
385 return err;
386}
387
388static void btusb_destruct(struct hci_dev *hdev)
389{
390 struct btusb_data *data = hdev->driver_data;
391
392 BT_DBG("%s", hdev->name);
393
394 kfree(data);
395}
396
397static void btusb_notify(struct hci_dev *hdev, unsigned int evt)
398{
399 struct btusb_data *data = hdev->driver_data;
400
401 BT_DBG("%s evt %d", hdev->name, evt);
402
403 if (evt == HCI_NOTIFY_CONN_ADD || evt == HCI_NOTIFY_CONN_DEL)
404 schedule_work(&data->work);
405}
406
407static void btusb_work(struct work_struct *work)
408{
409 struct btusb_data *data = container_of(work, struct btusb_data, work);
410 struct hci_dev *hdev = data->hdev;
411
412 if (hdev->conn_hash.acl_num == 0) {
413 clear_bit(BTUSB_BULK_RUNNING, &data->flags);
414 usb_kill_anchored_urbs(&data->bulk_anchor);
415 return;
416 }
417
418 if (!test_and_set_bit(BTUSB_BULK_RUNNING, &data->flags)) {
419 if (btusb_submit_bulk_urb(hdev) < 0)
420 clear_bit(BTUSB_BULK_RUNNING, &data->flags);
421 else
422 btusb_submit_bulk_urb(hdev);
423 }
424}
425
426static int btusb_probe(struct usb_interface *intf,
427 const struct usb_device_id *id)
428{
429 struct usb_endpoint_descriptor *ep_desc;
430 struct btusb_data *data;
431 struct hci_dev *hdev;
432 int i, err;
433
434 BT_DBG("intf %p id %p", intf, id);
435
436 if (intf->cur_altsetting->desc.bInterfaceNumber != 0)
437 return -ENODEV;
438
439 if (!id->driver_info) {
440 const struct usb_device_id *match;
441 match = usb_match_id(intf, blacklist_table);
442 if (match)
443 id = match;
444 }
445
446 data = kzalloc(sizeof(*data), GFP_KERNEL);
447 if (!data)
448 return -ENOMEM;
449
450 for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
451 ep_desc = &intf->cur_altsetting->endpoint[i].desc;
452
453 if (!data->intr_ep && usb_endpoint_is_int_in(ep_desc)) {
454 data->intr_ep = ep_desc;
455 continue;
456 }
457
458 if (!data->bulk_tx_ep && usb_endpoint_is_bulk_out(ep_desc)) {
459 data->bulk_tx_ep = ep_desc;
460 continue;
461 }
462
463 if (!data->bulk_rx_ep && usb_endpoint_is_bulk_in(ep_desc)) {
464 data->bulk_rx_ep = ep_desc;
465 continue;
466 }
467 }
468
469 if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) {
470 kfree(data);
471 return -ENODEV;
472 }
473
474 data->udev = interface_to_usbdev(intf);
475
476 spin_lock_init(&data->lock);
477
478 INIT_WORK(&data->work, btusb_work);
479
480 init_usb_anchor(&data->tx_anchor);
481 init_usb_anchor(&data->intr_anchor);
482 init_usb_anchor(&data->bulk_anchor);
483
484 hdev = hci_alloc_dev();
485 if (!hdev) {
486 kfree(data);
487 return -ENOMEM;
488 }
489
490 hdev->type = HCI_USB;
491 hdev->driver_data = data;
492
493 data->hdev = hdev;
494
495 SET_HCIDEV_DEV(hdev, &intf->dev);
496
497 hdev->open = btusb_open;
498 hdev->close = btusb_close;
499 hdev->flush = btusb_flush;
500 hdev->send = btusb_send_frame;
501 hdev->destruct = btusb_destruct;
502 hdev->notify = btusb_notify;
503
504 hdev->owner = THIS_MODULE;
505
506 set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks);
507
508 err = hci_register_dev(hdev);
509 if (err < 0) {
510 hci_free_dev(hdev);
511 kfree(data);
512 return err;
513 }
514
515 usb_set_intfdata(intf, data);
516
517 return 0;
518}
519
520static void btusb_disconnect(struct usb_interface *intf)
521{
522 struct btusb_data *data = usb_get_intfdata(intf);
523 struct hci_dev *hdev;
524
525 BT_DBG("intf %p", intf);
526
527 if (!data)
528 return;
529
530 hdev = data->hdev;
531
532 usb_set_intfdata(intf, NULL);
533
534 hci_unregister_dev(hdev);
535
536 hci_free_dev(hdev);
537}
538
539static struct usb_driver btusb_driver = {
540 .name = "btusb",
541 .probe = btusb_probe,
542 .disconnect = btusb_disconnect,
543 .id_table = btusb_table,
544};
545
546static int __init btusb_init(void)
547{
548 BT_INFO("Generic Bluetooth USB driver ver %s", VERSION);
549
550 return usb_register(&btusb_driver);
551}
552
553static void __exit btusb_exit(void)
554{
555 usb_deregister(&btusb_driver);
556}
557
558module_init(btusb_init);
559module_exit(btusb_exit);
560
561MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
562MODULE_DESCRIPTION("Generic Bluetooth USB driver ver " VERSION);
563MODULE_VERSION(VERSION);
564MODULE_LICENSE("GPL");
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 7f9c54b996..dae45cdf02 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -298,10 +298,7 @@ static irqreturn_t dtl1_interrupt(int irq, void *dev_inst)
298 int boguscount = 0; 298 int boguscount = 0;
299 int iir, lsr; 299 int iir, lsr;
300 300
301 if (!info || !info->hdev) { 301 BUG_ON(!info->hdev);
302 BT_ERR("Call of irq %d for unknown device", irq);
303 return IRQ_NONE;
304 }
305 302
306 iobase = info->p_dev->io.BasePort1; 303 iobase = info->p_dev->io.BasePort1;
307 304
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index d66064ccb3..696f7528f0 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -237,7 +237,8 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data,
237 if (hciextn && chan == 5) { 237 if (hciextn && chan == 5) {
238 struct hci_command_hdr *hdr = (struct hci_command_hdr *) data; 238 struct hci_command_hdr *hdr = (struct hci_command_hdr *) data;
239 239
240 if (hci_opcode_ogf(__le16_to_cpu(hdr->opcode)) == OGF_VENDOR_CMD) { 240 /* Vendor specific commands */
241 if (hci_opcode_ogf(__le16_to_cpu(hdr->opcode)) == 0x3f) {
241 u8 desc = *(data + HCI_COMMAND_HDR_SIZE); 242 u8 desc = *(data + HCI_COMMAND_HDR_SIZE);
242 if ((desc & 0xf0) == 0xc0) { 243 if ((desc & 0xf0) == 0xc0) {
243 data += HCI_COMMAND_HDR_SIZE + 1; 244 data += HCI_COMMAND_HDR_SIZE + 1;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 6055b9c0ac..e68821d074 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -549,7 +549,10 @@ static int __init hci_uart_init(void)
549#ifdef CONFIG_BT_HCIUART_BCSP 549#ifdef CONFIG_BT_HCIUART_BCSP
550 bcsp_init(); 550 bcsp_init();
551#endif 551#endif
552 552#ifdef CONFIG_BT_HCIUART_LL
553 ll_init();
554#endif
555
553 return 0; 556 return 0;
554} 557}
555 558
@@ -563,6 +566,9 @@ static void __exit hci_uart_exit(void)
563#ifdef CONFIG_BT_HCIUART_BCSP 566#ifdef CONFIG_BT_HCIUART_BCSP
564 bcsp_deinit(); 567 bcsp_deinit();
565#endif 568#endif
569#ifdef CONFIG_BT_HCIUART_LL
570 ll_deinit();
571#endif
566 572
567 /* Release tty registration of line discipline */ 573 /* Release tty registration of line discipline */
568 if ((err = tty_unregister_ldisc(N_HCI))) 574 if ((err = tty_unregister_ldisc(N_HCI)))
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
new file mode 100644
index 0000000000..8c3e62a17b
--- /dev/null
+++ b/drivers/bluetooth/hci_ll.c
@@ -0,0 +1,531 @@
1/*
2 * Texas Instruments' Bluetooth HCILL UART protocol
3 *
4 * HCILL (HCI Low Level) is a Texas Instruments' power management
5 * protocol extension to H4.
6 *
7 * Copyright (C) 2007 Texas Instruments, Inc.
8 *
9 * Written by Ohad Ben-Cohen <ohad@bencohen.org>
10 *
11 * Acknowledgements:
12 * This file is based on hci_h4.c, which was written
13 * by Maxim Krasnyansky and Marcel Holtmann.
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License version 2
17 * as published by the Free Software Foundation
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 *
28 */
29
30#include <linux/module.h>
31#include <linux/kernel.h>
32
33#include <linux/init.h>
34#include <linux/sched.h>
35#include <linux/types.h>
36#include <linux/fcntl.h>
37#include <linux/interrupt.h>
38#include <linux/ptrace.h>
39#include <linux/poll.h>
40
41#include <linux/slab.h>
42#include <linux/tty.h>
43#include <linux/errno.h>
44#include <linux/string.h>
45#include <linux/signal.h>
46#include <linux/ioctl.h>
47#include <linux/skbuff.h>
48
49#include <net/bluetooth/bluetooth.h>
50#include <net/bluetooth/hci_core.h>
51
52#include "hci_uart.h"
53
54/* HCILL commands */
55#define HCILL_GO_TO_SLEEP_IND 0x30
56#define HCILL_GO_TO_SLEEP_ACK 0x31
57#define HCILL_WAKE_UP_IND 0x32
58#define HCILL_WAKE_UP_ACK 0x33
59
60/* HCILL receiver States */
61#define HCILL_W4_PACKET_TYPE 0
62#define HCILL_W4_EVENT_HDR 1
63#define HCILL_W4_ACL_HDR 2
64#define HCILL_W4_SCO_HDR 3
65#define HCILL_W4_DATA 4
66
67/* HCILL states */
68enum hcill_states_e {
69 HCILL_ASLEEP,
70 HCILL_ASLEEP_TO_AWAKE,
71 HCILL_AWAKE,
72 HCILL_AWAKE_TO_ASLEEP
73};
74
75struct hcill_cmd {
76 u8 cmd;
77} __attribute__((packed));
78
79struct ll_struct {
80 unsigned long rx_state;
81 unsigned long rx_count;
82 struct sk_buff *rx_skb;
83 struct sk_buff_head txq;
84 spinlock_t hcill_lock; /* HCILL state lock */
85 unsigned long hcill_state; /* HCILL power state */
86 struct sk_buff_head tx_wait_q; /* HCILL wait queue */
87};
88
89/*
90 * Builds and sends an HCILL command packet.
91 * These are very simple packets with only 1 cmd byte
92 */
93static int send_hcill_cmd(u8 cmd, struct hci_uart *hu)
94{
95 int err = 0;
96 struct sk_buff *skb = NULL;
97 struct ll_struct *ll = hu->priv;
98 struct hcill_cmd *hcill_packet;
99
100 BT_DBG("hu %p cmd 0x%x", hu, cmd);
101
102 /* allocate packet */
103 skb = bt_skb_alloc(1, GFP_ATOMIC);
104 if (!skb) {
105 BT_ERR("cannot allocate memory for HCILL packet");
106 err = -ENOMEM;
107 goto out;
108 }
109
110 /* prepare packet */
111 hcill_packet = (struct hcill_cmd *) skb_put(skb, 1);
112 hcill_packet->cmd = cmd;
113 skb->dev = (void *) hu->hdev;
114
115 /* send packet */
116 skb_queue_tail(&ll->txq, skb);
117out:
118 return err;
119}
120
121/* Initialize protocol */
122static int ll_open(struct hci_uart *hu)
123{
124 struct ll_struct *ll;
125
126 BT_DBG("hu %p", hu);
127
128 ll = kzalloc(sizeof(*ll), GFP_ATOMIC);
129 if (!ll)
130 return -ENOMEM;
131
132 skb_queue_head_init(&ll->txq);
133 skb_queue_head_init(&ll->tx_wait_q);
134 spin_lock_init(&ll->hcill_lock);
135
136 ll->hcill_state = HCILL_AWAKE;
137
138 hu->priv = ll;
139
140 return 0;
141}
142
143/* Flush protocol data */
144static int ll_flush(struct hci_uart *hu)
145{
146 struct ll_struct *ll = hu->priv;
147
148 BT_DBG("hu %p", hu);
149
150 skb_queue_purge(&ll->tx_wait_q);
151 skb_queue_purge(&ll->txq);
152
153 return 0;
154}
155
156/* Close protocol */
157static int ll_close(struct hci_uart *hu)
158{
159 struct ll_struct *ll = hu->priv;
160
161 BT_DBG("hu %p", hu);
162
163 skb_queue_purge(&ll->tx_wait_q);
164 skb_queue_purge(&ll->txq);
165
166 if (ll->rx_skb)
167 kfree_skb(ll->rx_skb);
168
169 hu->priv = NULL;
170
171 kfree(ll);
172
173 return 0;
174}
175
176/*
177 * internal function, which does common work of the device wake up process:
178 * 1. places all pending packets (waiting in tx_wait_q list) in txq list.
179 * 2. changes internal state to HCILL_AWAKE.
180 * Note: assumes that hcill_lock spinlock is taken,
181 * shouldn't be called otherwise!
182 */
183static void __ll_do_awake(struct ll_struct *ll)
184{
185 struct sk_buff *skb = NULL;
186
187 while ((skb = skb_dequeue(&ll->tx_wait_q)))
188 skb_queue_tail(&ll->txq, skb);
189
190 ll->hcill_state = HCILL_AWAKE;
191}
192
193/*
194 * Called upon a wake-up-indication from the device
195 */
196static void ll_device_want_to_wakeup(struct hci_uart *hu)
197{
198 unsigned long flags;
199 struct ll_struct *ll = hu->priv;
200
201 BT_DBG("hu %p", hu);
202
203 /* lock hcill state */
204 spin_lock_irqsave(&ll->hcill_lock, flags);
205
206 switch (ll->hcill_state) {
207 case HCILL_ASLEEP:
208 /* acknowledge device wake up */
209 if (send_hcill_cmd(HCILL_WAKE_UP_ACK, hu) < 0) {
210 BT_ERR("cannot acknowledge device wake up");
211 goto out;
212 }
213 break;
214 case HCILL_ASLEEP_TO_AWAKE:
215 /*
216 * this state means that a wake-up-indication
217 * is already on its way to the device,
218 * and will serve as the required wake-up-ack
219 */
220 BT_DBG("dual wake-up-indication");
221 break;
222 default:
223 /* any other state are illegal */
224 BT_ERR("received HCILL_WAKE_UP_IND in state %ld", ll->hcill_state);
225 break;
226 }
227
228 /* send pending packets and change state to HCILL_AWAKE */
229 __ll_do_awake(ll);
230
231out:
232 spin_unlock_irqrestore(&ll->hcill_lock, flags);
233
234 /* actually send the packets */
235 hci_uart_tx_wakeup(hu);
236}
237
238/*
239 * Called upon a sleep-indication from the device
240 */
241static void ll_device_want_to_sleep(struct hci_uart *hu)
242{
243 unsigned long flags;
244 struct ll_struct *ll = hu->priv;
245
246 BT_DBG("hu %p", hu);
247
248 /* lock hcill state */
249 spin_lock_irqsave(&ll->hcill_lock, flags);
250
251 /* sanity check */
252 if (ll->hcill_state != HCILL_AWAKE)
253 BT_ERR("ERR: HCILL_GO_TO_SLEEP_IND in state %ld", ll->hcill_state);
254
255 /* acknowledge device sleep */
256 if (send_hcill_cmd(HCILL_GO_TO_SLEEP_ACK, hu) < 0) {
257 BT_ERR("cannot acknowledge device sleep");
258 goto out;
259 }
260
261 /* update state */
262 ll->hcill_state = HCILL_ASLEEP;
263
264out:
265 spin_unlock_irqrestore(&ll->hcill_lock, flags);
266
267 /* actually send the sleep ack packet */
268 hci_uart_tx_wakeup(hu);
269}
270
271/*
272 * Called upon wake-up-acknowledgement from the device
273 */
274static void ll_device_woke_up(struct hci_uart *hu)
275{
276 unsigned long flags;
277 struct ll_struct *ll = hu->priv;
278
279 BT_DBG("hu %p", hu);
280
281 /* lock hcill state */
282 spin_lock_irqsave(&ll->hcill_lock, flags);
283
284 /* sanity check */
285 if (ll->hcill_state != HCILL_ASLEEP_TO_AWAKE)
286 BT_ERR("received HCILL_WAKE_UP_ACK in state %ld", ll->hcill_state);
287
288 /* send pending packets and change state to HCILL_AWAKE */
289 __ll_do_awake(ll);
290
291 spin_unlock_irqrestore(&ll->hcill_lock, flags);
292
293 /* actually send the packets */
294 hci_uart_tx_wakeup(hu);
295}
296
297/* Enqueue frame for transmittion (padding, crc, etc) */
298/* may be called from two simultaneous tasklets */
299static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb)
300{
301 unsigned long flags = 0;
302 struct ll_struct *ll = hu->priv;
303
304 BT_DBG("hu %p skb %p", hu, skb);
305
306 /* Prepend skb with frame type */
307 memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
308
309 /* lock hcill state */
310 spin_lock_irqsave(&ll->hcill_lock, flags);
311
312 /* act according to current state */
313 switch (ll->hcill_state) {
314 case HCILL_AWAKE:
315 BT_DBG("device awake, sending normally");
316 skb_queue_tail(&ll->txq, skb);
317 break;
318 case HCILL_ASLEEP:
319 BT_DBG("device asleep, waking up and queueing packet");
320 /* save packet for later */
321 skb_queue_tail(&ll->tx_wait_q, skb);
322 /* awake device */
323 if (send_hcill_cmd(HCILL_WAKE_UP_IND, hu) < 0) {
324 BT_ERR("cannot wake up device");
325 break;
326 }
327 ll->hcill_state = HCILL_ASLEEP_TO_AWAKE;
328 break;
329 case HCILL_ASLEEP_TO_AWAKE:
330 BT_DBG("device waking up, queueing packet");
331 /* transient state; just keep packet for later */
332 skb_queue_tail(&ll->tx_wait_q, skb);
333 break;
334 default:
335 BT_ERR("illegal hcill state: %ld (losing packet)", ll->hcill_state);
336 kfree_skb(skb);
337 break;
338 }
339
340 spin_unlock_irqrestore(&ll->hcill_lock, flags);
341
342 return 0;
343}
344
345static inline int ll_check_data_len(struct ll_struct *ll, int len)
346{
347 register int room = skb_tailroom(ll->rx_skb);
348
349 BT_DBG("len %d room %d", len, room);
350
351 if (!len) {
352 hci_recv_frame(ll->rx_skb);
353 } else if (len > room) {
354 BT_ERR("Data length is too large");
355 kfree_skb(ll->rx_skb);
356 } else {
357 ll->rx_state = HCILL_W4_DATA;
358 ll->rx_count = len;
359 return len;
360 }
361
362 ll->rx_state = HCILL_W4_PACKET_TYPE;
363 ll->rx_skb = NULL;
364 ll->rx_count = 0;
365
366 return 0;
367}
368
369/* Recv data */
370static int ll_recv(struct hci_uart *hu, void *data, int count)
371{
372 struct ll_struct *ll = hu->priv;
373 register char *ptr;
374 struct hci_event_hdr *eh;
375 struct hci_acl_hdr *ah;
376 struct hci_sco_hdr *sh;
377 register int len, type, dlen;
378
379 BT_DBG("hu %p count %d rx_state %ld rx_count %ld", hu, count, ll->rx_state, ll->rx_count);
380
381 ptr = data;
382 while (count) {
383 if (ll->rx_count) {
384 len = min_t(unsigned int, ll->rx_count, count);
385 memcpy(skb_put(ll->rx_skb, len), ptr, len);
386 ll->rx_count -= len; count -= len; ptr += len;
387
388 if (ll->rx_count)
389 continue;
390
391 switch (ll->rx_state) {
392 case HCILL_W4_DATA:
393 BT_DBG("Complete data");
394 hci_recv_frame(ll->rx_skb);
395
396 ll->rx_state = HCILL_W4_PACKET_TYPE;
397 ll->rx_skb = NULL;
398 continue;
399
400 case HCILL_W4_EVENT_HDR:
401 eh = (struct hci_event_hdr *) ll->rx_skb->data;
402
403 BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
404
405 ll_check_data_len(ll, eh->plen);
406 continue;
407
408 case HCILL_W4_ACL_HDR:
409 ah = (struct hci_acl_hdr *) ll->rx_skb->data;
410 dlen = __le16_to_cpu(ah->dlen);
411
412 BT_DBG("ACL header: dlen %d", dlen);
413
414 ll_check_data_len(ll, dlen);
415 continue;
416
417 case HCILL_W4_SCO_HDR:
418 sh = (struct hci_sco_hdr *) ll->rx_skb->data;
419
420 BT_DBG("SCO header: dlen %d", sh->dlen);
421
422 ll_check_data_len(ll, sh->dlen);
423 continue;
424 }
425 }
426
427 /* HCILL_W4_PACKET_TYPE */
428 switch (*ptr) {
429 case HCI_EVENT_PKT:
430 BT_DBG("Event packet");
431 ll->rx_state = HCILL_W4_EVENT_HDR;
432 ll->rx_count = HCI_EVENT_HDR_SIZE;
433 type = HCI_EVENT_PKT;
434 break;
435
436 case HCI_ACLDATA_PKT:
437 BT_DBG("ACL packet");
438 ll->rx_state = HCILL_W4_ACL_HDR;
439 ll->rx_count = HCI_ACL_HDR_SIZE;
440 type = HCI_ACLDATA_PKT;
441 break;
442
443 case HCI_SCODATA_PKT:
444 BT_DBG("SCO packet");
445 ll->rx_state = HCILL_W4_SCO_HDR;
446 ll->rx_count = HCI_SCO_HDR_SIZE;
447 type = HCI_SCODATA_PKT;
448 break;
449
450 /* HCILL signals */
451 case HCILL_GO_TO_SLEEP_IND:
452 BT_DBG("HCILL_GO_TO_SLEEP_IND packet");
453 ll_device_want_to_sleep(hu);
454 ptr++; count--;
455 continue;
456
457 case HCILL_GO_TO_SLEEP_ACK:
458 /* shouldn't happen */
459 BT_ERR("received HCILL_GO_TO_SLEEP_ACK (in state %ld)", ll->hcill_state);
460 ptr++; count--;
461 continue;
462
463 case HCILL_WAKE_UP_IND:
464 BT_DBG("HCILL_WAKE_UP_IND packet");
465 ll_device_want_to_wakeup(hu);
466 ptr++; count--;
467 continue;
468
469 case HCILL_WAKE_UP_ACK:
470 BT_DBG("HCILL_WAKE_UP_ACK packet");
471 ll_device_woke_up(hu);
472 ptr++; count--;
473 continue;
474
475 default:
476 BT_ERR("Unknown HCI packet type %2.2x", (__u8)*ptr);
477 hu->hdev->stat.err_rx++;
478 ptr++; count--;
479 continue;
480 };
481
482 ptr++; count--;
483
484 /* Allocate packet */
485 ll->rx_skb = bt_skb_alloc(HCI_MAX_FRAME_SIZE, GFP_ATOMIC);
486 if (!ll->rx_skb) {
487 BT_ERR("Can't allocate mem for new packet");
488 ll->rx_state = HCILL_W4_PACKET_TYPE;
489 ll->rx_count = 0;
490 return 0;
491 }
492
493 ll->rx_skb->dev = (void *) hu->hdev;
494 bt_cb(ll->rx_skb)->pkt_type = type;
495 }
496
497 return count;
498}
499
500static struct sk_buff *ll_dequeue(struct hci_uart *hu)
501{
502 struct ll_struct *ll = hu->priv;
503 return skb_dequeue(&ll->txq);
504}
505
506static struct hci_uart_proto llp = {
507 .id = HCI_UART_LL,
508 .open = ll_open,
509 .close = ll_close,
510 .recv = ll_recv,
511 .enqueue = ll_enqueue,
512 .dequeue = ll_dequeue,
513 .flush = ll_flush,
514};
515
516int ll_init(void)
517{
518 int err = hci_uart_register_proto(&llp);
519
520 if (!err)
521 BT_INFO("HCILL protocol initialized");
522 else
523 BT_ERR("HCILL protocol registration failed");
524
525 return err;
526}
527
528int ll_deinit(void)
529{
530 return hci_uart_unregister_proto(&llp);
531}
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index 1097ce7239..50113db06b 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -33,12 +33,13 @@
33#define HCIUARTGETDEVICE _IOR('U', 202, int) 33#define HCIUARTGETDEVICE _IOR('U', 202, int)
34 34
35/* UART protocols */ 35/* UART protocols */
36#define HCI_UART_MAX_PROTO 4 36#define HCI_UART_MAX_PROTO 5
37 37
38#define HCI_UART_H4 0 38#define HCI_UART_H4 0
39#define HCI_UART_BCSP 1 39#define HCI_UART_BCSP 1
40#define HCI_UART_3WIRE 2 40#define HCI_UART_3WIRE 2
41#define HCI_UART_H4DS 3 41#define HCI_UART_H4DS 3
42#define HCI_UART_LL 4
42 43
43struct hci_uart; 44struct hci_uart;
44 45
@@ -85,3 +86,8 @@ int h4_deinit(void);
85int bcsp_init(void); 86int bcsp_init(void);
86int bcsp_deinit(void); 87int bcsp_deinit(void);
87#endif 88#endif
89
90#ifdef CONFIG_BT_HCIUART_LL
91int ll_init(void);
92int ll_deinit(void);
93#endif
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 65491103e0..bf18d757b8 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -613,6 +613,10 @@ config HVC_XEN
613 help 613 help
614 Xen virtual console device driver 614 Xen virtual console device driver
615 615
616config VIRTIO_CONSOLE
617 bool
618 select HVC_DRIVER
619
616config HVCS 620config HVCS
617 tristate "IBM Hypervisor Virtual Console Server support" 621 tristate "IBM Hypervisor Virtual Console Server support"
618 depends on PPC_PSERIES 622 depends on PPC_PSERIES
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index c78ff26647..07304d50e0 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o
42obj-$(CONFIG_N_HDLC) += n_hdlc.o 42obj-$(CONFIG_N_HDLC) += n_hdlc.o
43obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o 43obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
44obj-$(CONFIG_SX) += sx.o generic_serial.o 44obj-$(CONFIG_SX) += sx.o generic_serial.o
45obj-$(CONFIG_LGUEST_GUEST) += hvc_lguest.o
46obj-$(CONFIG_RIO) += rio/ generic_serial.o 45obj-$(CONFIG_RIO) += rio/ generic_serial.o
47obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o 46obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o
48obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o 47obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
@@ -50,6 +49,7 @@ obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
50obj-$(CONFIG_HVC_BEAT) += hvc_beat.o 49obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
51obj-$(CONFIG_HVC_DRIVER) += hvc_console.o 50obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
52obj-$(CONFIG_HVC_XEN) += hvc_xen.o 51obj-$(CONFIG_HVC_XEN) += hvc_xen.o
52obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o
53obj-$(CONFIG_RAW_DRIVER) += raw.o 53obj-$(CONFIG_RAW_DRIVER) += raw.o
54obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o 54obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
55obj-$(CONFIG_MSPEC) += mspec.o 55obj-$(CONFIG_MSPEC) += mspec.o
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index d1bd0f08a3..e4f579c3e2 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -1602,8 +1602,8 @@ static void cyz_handle_tx(struct cyclades_port *info,
1602 info->icount.tx++; 1602 info->icount.tx++;
1603 } 1603 }
1604#endif 1604#endif
1605ztxdone:
1606 tty_wakeup(tty); 1605 tty_wakeup(tty);
1606ztxdone:
1607 /* Update tx_put */ 1607 /* Update tx_put */
1608 cy_writel(&buf_ctrl->tx_put, tx_put); 1608 cy_writel(&buf_ctrl->tx_put, tx_put);
1609 } 1609 }
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
deleted file mode 100644
index efccb21558..0000000000
--- a/drivers/char/hvc_lguest.c
+++ /dev/null
@@ -1,177 +0,0 @@
1/*D:300
2 * The Guest console driver
3 *
4 * This is a trivial console driver: we use lguest's DMA mechanism to send
5 * bytes out, and register a DMA buffer to receive bytes in. It is assumed to
6 * be present and available from the very beginning of boot.
7 *
8 * Writing console drivers is one of the few remaining Dark Arts in Linux.
9 * Fortunately for us, the path of virtual consoles has been well-trodden by
10 * the PowerPC folks, who wrote "hvc_console.c" to generically support any
11 * virtual console. We use that infrastructure which only requires us to write
12 * the basic put_chars and get_chars functions and call the right register
13 * functions.
14 :*/
15
16/*M:002 The console can be flooded: while the Guest is processing input the
17 * Host can send more. Buffering in the Host could alleviate this, but it is a
18 * difficult problem in general. :*/
19/* Copyright (C) 2006 Rusty Russell, IBM Corporation
20 *
21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34 */
35#include <linux/err.h>
36#include <linux/init.h>
37#include <linux/lguest_bus.h>
38#include <asm/paravirt.h>
39#include "hvc_console.h"
40
41/*D:340 This is our single console input buffer, with associated "struct
42 * lguest_dma" referring to it. Note the 0-terminated length array, and the
43 * use of physical address for the buffer itself. */
44static char inbuf[256];
45static struct lguest_dma cons_input = { .used_len = 0,
46 .addr[0] = __pa(inbuf),
47 .len[0] = sizeof(inbuf),
48 .len[1] = 0 };
49
50/*D:310 The put_chars() callback is pretty straightforward.
51 *
52 * First we put the pointer and length in a "struct lguest_dma": we only have
53 * one pointer, so we set the second length to 0. Then we use SEND_DMA to send
54 * the data to (Host) buffers attached to the console key. Usually a device's
55 * key is a physical address within the device's memory, but because the
56 * console device doesn't have any associated physical memory, we use the
57 * LGUEST_CONSOLE_DMA_KEY constant (aka 0). */
58static int put_chars(u32 vtermno, const char *buf, int count)
59{
60 struct lguest_dma dma;
61
62 /* FIXME: DMA buffers in a "struct lguest_dma" are not allowed
63 * to go over page boundaries. This never seems to happen,
64 * but if it did we'd need to fix this code. */
65 dma.len[0] = count;
66 dma.len[1] = 0;
67 dma.addr[0] = __pa(buf);
68
69 lguest_send_dma(LGUEST_CONSOLE_DMA_KEY, &dma);
70 /* We're expected to return the amount of data we wrote: all of it. */
71 return count;
72}
73
74/*D:350 get_chars() is the callback from the hvc_console infrastructure when
75 * an interrupt is received.
76 *
77 * Firstly we see if our buffer has been filled: if not, we return. The rest
78 * of the code deals with the fact that the hvc_console() infrastructure only
79 * asks us for 16 bytes at a time. We keep a "cons_offset" variable for
80 * partially-read buffers. */
81static int get_chars(u32 vtermno, char *buf, int count)
82{
83 static int cons_offset;
84
85 /* Nothing left to see here... */
86 if (!cons_input.used_len)
87 return 0;
88
89 /* You want more than we have to give? Well, try wanting less! */
90 if (cons_input.used_len - cons_offset < count)
91 count = cons_input.used_len - cons_offset;
92
93 /* Copy across to their buffer and increment offset. */
94 memcpy(buf, inbuf + cons_offset, count);
95 cons_offset += count;
96
97 /* Finished? Zero offset, and reset cons_input so Host will use it
98 * again. */
99 if (cons_offset == cons_input.used_len) {
100 cons_offset = 0;
101 cons_input.used_len = 0;
102 }
103 return count;
104}
105/*:*/
106
107static struct hv_ops lguest_cons = {
108 .get_chars = get_chars,
109 .put_chars = put_chars,
110};
111
112/*D:320 Console drivers are initialized very early so boot messages can go
113 * out. At this stage, the console is output-only. Our driver checks we're a
114 * Guest, and if so hands hvc_instantiate() the console number (0), priority
115 * (0), and the struct hv_ops containing the put_chars() function. */
116static int __init cons_init(void)
117{
118 if (strcmp(pv_info.name, "lguest") != 0)
119 return 0;
120
121 return hvc_instantiate(0, 0, &lguest_cons);
122}
123console_initcall(cons_init);
124
125/*D:370 To set up and manage our virtual console, we call hvc_alloc() and
126 * stash the result in the private pointer of the "struct lguest_device".
127 * Since we never remove the console device we never need this pointer again,
128 * but using ->private is considered good form, and you never know who's going
129 * to copy your driver.
130 *
131 * Once the console is set up, we bind our input buffer ready for input. */
132static int lguestcons_probe(struct lguest_device *lgdev)
133{
134 int err;
135
136 /* The first argument of hvc_alloc() is the virtual console number, so
137 * we use zero. The second argument is the interrupt number.
138 *
139 * The third argument is a "struct hv_ops" containing the put_chars()
140 * and get_chars() pointers. The final argument is the output buffer
141 * size: we use 256 and expect the Host to have room for us to send
142 * that much. */
143 lgdev->private = hvc_alloc(0, lgdev_irq(lgdev), &lguest_cons, 256);
144 if (IS_ERR(lgdev->private))
145 return PTR_ERR(lgdev->private);
146
147 /* We bind a single DMA buffer at key LGUEST_CONSOLE_DMA_KEY.
148 * "cons_input" is that statically-initialized global DMA buffer we saw
149 * above, and we also give the interrupt we want. */
150 err = lguest_bind_dma(LGUEST_CONSOLE_DMA_KEY, &cons_input, 1,
151 lgdev_irq(lgdev));
152 if (err)
153 printk("lguest console: failed to bind buffer.\n");
154 return err;
155}
156/* Note the use of lgdev_irq() for the interrupt number. We tell hvc_alloc()
157 * to expect input when this interrupt is triggered, and then tell
158 * lguest_bind_dma() that is the interrupt to send us when input comes in. */
159
160/*D:360 From now on the console driver follows standard Guest driver form:
161 * register_lguest_driver() registers the device type and probe function, and
162 * the probe function sets up the device.
163 *
164 * The standard "struct lguest_driver": */
165static struct lguest_driver lguestcons_drv = {
166 .name = "lguestcons",
167 .owner = THIS_MODULE,
168 .device_type = LGUEST_DEVICE_T_CONSOLE,
169 .probe = lguestcons_probe,
170};
171
172/* The standard init function */
173static int __init hvc_lguest_init(void)
174{
175 return register_lguest_driver(&lguestcons_drv);
176}
177module_init(hvc_lguest_init);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
new file mode 100644
index 0000000000..100e8a201e
--- /dev/null
+++ b/drivers/char/virtio_console.c
@@ -0,0 +1,225 @@
1/*D:300
2 * The Guest console driver
3 *
4 * Writing console drivers is one of the few remaining Dark Arts in Linux.
5 * Fortunately for us, the path of virtual consoles has been well-trodden by
6 * the PowerPC folks, who wrote "hvc_console.c" to generically support any
7 * virtual console. We use that infrastructure which only requires us to write
8 * the basic put_chars and get_chars functions and call the right register
9 * functions.
10 :*/
11
12/*M:002 The console can be flooded: while the Guest is processing input the
13 * Host can send more. Buffering in the Host could alleviate this, but it is a
14 * difficult problem in general. :*/
15/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 */
31#include <linux/err.h>
32#include <linux/init.h>
33#include <linux/virtio.h>
34#include <linux/virtio_console.h>
35#include "hvc_console.h"
36
37/*D:340 These represent our input and output console queues, and the virtio
38 * operations for them. */
39static struct virtqueue *in_vq, *out_vq;
40static struct virtio_device *vdev;
41
42/* This is our input buffer, and how much data is left in it. */
43static unsigned int in_len;
44static char *in, *inbuf;
45
46/* The operations for our console. */
47static struct hv_ops virtio_cons;
48
49/*D:310 The put_chars() callback is pretty straightforward.
50 *
51 * We turn the characters into a scatter-gather list, add it to the output
52 * queue and then kick the Host. Then we sit here waiting for it to finish:
53 * inefficient in theory, but in practice implementations will do it
54 * immediately (lguest's Launcher does). */
55static int put_chars(u32 vtermno, const char *buf, int count)
56{
57 struct scatterlist sg[1];
58 unsigned int len;
59
60 /* This is a convenient routine to initialize a single-elem sg list */
61 sg_init_one(sg, buf, count);
62
63 /* add_buf wants a token to identify this buffer: we hand it any
64 * non-NULL pointer, since there's only ever one buffer. */
65 if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
66 /* Tell Host to go! */
67 out_vq->vq_ops->kick(out_vq);
68 /* Chill out until it's done with the buffer. */
69 while (!out_vq->vq_ops->get_buf(out_vq, &len))
70 cpu_relax();
71 }
72
73 /* We're expected to return the amount of data we wrote: all of it. */
74 return count;
75}
76
77/* Create a scatter-gather list representing our input buffer and put it in the
78 * queue. */
79static void add_inbuf(void)
80{
81 struct scatterlist sg[1];
82 sg_init_one(sg, inbuf, PAGE_SIZE);
83
84 /* We should always be able to add one buffer to an empty queue. */
85 if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
86 BUG();
87 in_vq->vq_ops->kick(in_vq);
88}
89
90/*D:350 get_chars() is the callback from the hvc_console infrastructure when
91 * an interrupt is received.
92 *
93 * Most of the code deals with the fact that the hvc_console() infrastructure
94 * only asks us for 16 bytes at a time. We keep in_offset and in_used fields
95 * for partially-filled buffers. */
96static int get_chars(u32 vtermno, char *buf, int count)
97{
98 /* If we don't have an input queue yet, we can't get input. */
99 BUG_ON(!in_vq);
100
101 /* No buffer? Try to get one. */
102 if (!in_len) {
103 in = in_vq->vq_ops->get_buf(in_vq, &in_len);
104 if (!in)
105 return 0;
106 }
107
108 /* You want more than we have to give? Well, try wanting less! */
109 if (in_len < count)
110 count = in_len;
111
112 /* Copy across to their buffer and increment offset. */
113 memcpy(buf, in, count);
114 in += count;
115 in_len -= count;
116
117 /* Finished? Re-register buffer so Host will use it again. */
118 if (in_len == 0)
119 add_inbuf();
120
121 return count;
122}
123/*:*/
124
125/*D:320 Console drivers are initialized very early so boot messages can go out,
126 * so we do things slightly differently from the generic virtio initialization
127 * of the net and block drivers.
128 *
129 * At this stage, the console is output-only. It's too early to set up a
130 * virtqueue, so we let the drivers do some boutique early-output thing. */
131int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
132{
133 virtio_cons.put_chars = put_chars;
134 return hvc_instantiate(0, 0, &virtio_cons);
135}
136
137/*D:370 Once we're further in boot, we get probed like any other virtio device.
138 * At this stage we set up the output virtqueue.
139 *
140 * To set up and manage our virtual console, we call hvc_alloc(). Since we
141 * never remove the console device we never need this pointer again.
142 *
143 * Finally we put our input buffer in the input queue, ready to receive. */
144static int virtcons_probe(struct virtio_device *dev)
145{
146 int err;
147 struct hvc_struct *hvc;
148
149 vdev = dev;
150
151 /* This is the scratch page we use to receive console input */
152 inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
153 if (!inbuf) {
154 err = -ENOMEM;
155 goto fail;
156 }
157
158 /* Find the input queue. */
159 /* FIXME: This is why we want to wean off hvc: we do nothing
160 * when input comes in. */
161 in_vq = vdev->config->find_vq(vdev, NULL);
162 if (IS_ERR(in_vq)) {
163 err = PTR_ERR(in_vq);
164 goto free;
165 }
166
167 out_vq = vdev->config->find_vq(vdev, NULL);
168 if (IS_ERR(out_vq)) {
169 err = PTR_ERR(out_vq);
170 goto free_in_vq;
171 }
172
173 /* Start using the new console output. */
174 virtio_cons.get_chars = get_chars;
175 virtio_cons.put_chars = put_chars;
176
177 /* The first argument of hvc_alloc() is the virtual console number, so
178 * we use zero. The second argument is the interrupt number; we
179 * currently leave this as zero: it would be better not to use the
180 * hvc mechanism and fix this (FIXME!).
181 *
182 * The third argument is a "struct hv_ops" containing the put_chars()
183 * and get_chars() pointers. The final argument is the output buffer
184 * size: we can do any size, so we put PAGE_SIZE here. */
185 hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
186 if (IS_ERR(hvc)) {
187 err = PTR_ERR(hvc);
188 goto free_out_vq;
189 }
190
191 /* Register the input buffer the first time. */
192 add_inbuf();
193 return 0;
194
195free_out_vq:
196 vdev->config->del_vq(out_vq);
197free_in_vq:
198 vdev->config->del_vq(in_vq);
199free:
200 kfree(inbuf);
201fail:
202 return err;
203}
204
205static struct virtio_device_id id_table[] = {
206 { VIRTIO_ID_CONSOLE, VIRTIO_DEV_ANY_ID },
207 { 0 },
208};
209
210static struct virtio_driver virtio_console = {
211 .driver.name = KBUILD_MODNAME,
212 .driver.owner = THIS_MODULE,
213 .id_table = id_table,
214 .probe = virtcons_probe,
215};
216
217static int __init init(void)
218{
219 return register_virtio_driver(&virtio_console);
220}
221module_init(init);
222
223MODULE_DEVICE_TABLE(virtio, id_table);
224MODULE_DESCRIPTION("Virtio console driver");
225MODULE_LICENSE("GPL");
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 2f307c4df3..67588326ae 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -606,7 +606,7 @@ static int
606at_context_queue_packet(struct context *ctx, struct fw_packet *packet) 606at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
607{ 607{
608 struct fw_ohci *ohci = ctx->ohci; 608 struct fw_ohci *ohci = ctx->ohci;
609 dma_addr_t d_bus, payload_bus; 609 dma_addr_t d_bus, uninitialized_var(payload_bus);
610 struct driver_data *driver_data; 610 struct driver_data *driver_data;
611 struct descriptor *d, *last; 611 struct descriptor *d, *last;
612 __le32 *header; 612 __le32 *header;
@@ -1459,7 +1459,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size)
1459 /* FIXME: We need a fallback for pre 1.1 OHCI. */ 1459 /* FIXME: We need a fallback for pre 1.1 OHCI. */
1460 if (callback == handle_ir_dualbuffer_packet && 1460 if (callback == handle_ir_dualbuffer_packet &&
1461 ohci->version < OHCI_VERSION_1_1) 1461 ohci->version < OHCI_VERSION_1_1)
1462 return ERR_PTR(-EINVAL); 1462 return ERR_PTR(-ENOSYS);
1463 1463
1464 spin_lock_irqsave(&ohci->lock, flags); 1464 spin_lock_irqsave(&ohci->lock, flags);
1465 index = ffs(*mask) - 1; 1465 index = ffs(*mask) - 1;
@@ -1778,7 +1778,7 @@ ohci_queue_iso(struct fw_iso_context *base,
1778 buffer, payload); 1778 buffer, payload);
1779 else 1779 else
1780 /* FIXME: Implement fallback for OHCI 1.0 controllers. */ 1780 /* FIXME: Implement fallback for OHCI 1.0 controllers. */
1781 return -EINVAL; 1781 return -ENOSYS;
1782} 1782}
1783 1783
1784static const struct fw_card_driver ohci_driver = { 1784static const struct fw_card_driver ohci_driver = {
@@ -1898,7 +1898,12 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
1898 ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; 1898 ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
1899 fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", 1899 fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
1900 dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff); 1900 dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
1901 1901 if (ohci->version < OHCI_VERSION_1_1) {
1902 fw_notify(" Isochronous I/O is not yet implemented for "
1903 "OHCI 1.0 chips.\n");
1904 fw_notify(" Cameras, audio devices etc. won't work on "
1905 "this controller with this driver version.\n");
1906 }
1902 return 0; 1907 return 0;
1903 1908
1904 fail_self_id: 1909 fail_self_id:
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index ff20377b4c..e196aefa20 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -935,11 +935,11 @@ static int cris_ide_build_dmatable (ide_drive_t *drive)
935 * than two possibly non-adjacent physical 4kB pages. 935 * than two possibly non-adjacent physical 4kB pages.
936 */ 936 */
937 /* group sequential buffers into one large buffer */ 937 /* group sequential buffers into one large buffer */
938 addr = page_to_phys(sg->page) + sg->offset; 938 addr = sg_phys(sg);
939 size = sg_dma_len(sg); 939 size = sg_dma_len(sg);
940 while (--i) { 940 while (--i) {
941 sg = sg_next(sg); 941 sg = sg_next(sg);
942 if ((addr + size) != page_to_phys(sg->page) + sg->offset) 942 if ((addr + size) != sg_phys(sg))
943 break; 943 break;
944 size += sg_dma_len(sg); 944 size += sg_dma_len(sg);
945 } 945 }
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d5146c57e5..6a6f2e066b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -47,6 +47,7 @@
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/kmod.h> 48#include <linux/kmod.h>
49#include <linux/pci.h> 49#include <linux/pci.h>
50#include <linux/scatterlist.h>
50 51
51#include <asm/byteorder.h> 52#include <asm/byteorder.h>
52#include <asm/irq.h> 53#include <asm/irq.h>
@@ -1317,12 +1318,14 @@ static int hwif_init(ide_hwif_t *hwif)
1317 if (!hwif->sg_max_nents) 1318 if (!hwif->sg_max_nents)
1318 hwif->sg_max_nents = PRD_ENTRIES; 1319 hwif->sg_max_nents = PRD_ENTRIES;
1319 1320
1320 hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents, 1321 hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
1321 GFP_KERNEL); 1322 GFP_KERNEL);
1322 if (!hwif->sg_table) { 1323 if (!hwif->sg_table) {
1323 printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name); 1324 printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
1324 goto out; 1325 goto out;
1325 } 1326 }
1327
1328 sg_init_table(hwif->sg_table, hwif->sg_max_nents);
1326 1329
1327 if (init_irq(hwif) == 0) 1330 if (init_irq(hwif) == 0)
1328 goto done; 1331 goto done;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 73ef6bf5fb..d066546f28 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -261,7 +261,7 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
261 hwif->cursg = sg; 261 hwif->cursg = sg;
262 } 262 }
263 263
264 page = cursg->page; 264 page = sg_page(cursg);
265 offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE; 265 offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE;
266 266
267 /* get the current page and offset */ 267 /* get the current page and offset */
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 1de58566e5..a4ce3ba15d 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -276,8 +276,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
276 276
277 if (iswrite) { 277 if (iswrite) {
278 if(!put_source_flags(ahwif->tx_chan, 278 if(!put_source_flags(ahwif->tx_chan,
279 (void*)(page_address(sg->page) 279 (void*) sg_virt(sg),
280 + sg->offset),
281 tc, flags)) { 280 tc, flags)) {
282 printk(KERN_ERR "%s failed %d\n", 281 printk(KERN_ERR "%s failed %d\n",
283 __FUNCTION__, __LINE__); 282 __FUNCTION__, __LINE__);
@@ -285,8 +284,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
285 } else 284 } else
286 { 285 {
287 if(!put_dest_flags(ahwif->rx_chan, 286 if(!put_dest_flags(ahwif->rx_chan,
288 (void*)(page_address(sg->page) 287 (void*) sg_virt(sg),
289 + sg->offset),
290 tc, flags)) { 288 tc, flags)) {
291 printk(KERN_ERR "%s failed %d\n", 289 printk(KERN_ERR "%s failed %d\n",
292 __FUNCTION__, __LINE__); 290 __FUNCTION__, __LINE__);
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 45d6055819..3051e312fd 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -12,7 +12,7 @@
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <asm/scatterlist.h> 15#include <linux/scatterlist.h>
16 16
17#include "dma.h" 17#include "dma.h"
18 18
@@ -111,7 +111,7 @@ int dma_region_alloc(struct dma_region *dma, unsigned long n_bytes,
111 unsigned long va = 111 unsigned long va =
112 (unsigned long)dma->kvirt + (i << PAGE_SHIFT); 112 (unsigned long)dma->kvirt + (i << PAGE_SHIFT);
113 113
114 dma->sglist[i].page = vmalloc_to_page((void *)va); 114 sg_set_page(&dma->sglist[i], vmalloc_to_page((void *)va));
115 dma->sglist[i].length = PAGE_SIZE; 115 dma->sglist[i].length = PAGE_SIZE;
116 } 116 }
117 117
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index 1b353b964b..d5dfe11aa5 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1466,7 +1466,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb,
1466 cmd->dma_size = sgpnt[0].length; 1466 cmd->dma_size = sgpnt[0].length;
1467 cmd->dma_type = CMD_DMA_PAGE; 1467 cmd->dma_type = CMD_DMA_PAGE;
1468 cmd->cmd_dma = dma_map_page(hi->host->device.parent, 1468 cmd->cmd_dma = dma_map_page(hi->host->device.parent,
1469 sgpnt[0].page, sgpnt[0].offset, 1469 sg_page(&sgpnt[0]), sgpnt[0].offset,
1470 cmd->dma_size, cmd->dma_dir); 1470 cmd->dma_size, cmd->dma_dir);
1471 1471
1472 orb->data_descriptor_lo = cmd->cmd_dma; 1472 orb->data_descriptor_lo = cmd->cmd_dma;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d08fb30768..0751697ef9 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -114,13 +114,16 @@ struct rdma_id_private {
114 114
115 struct rdma_bind_list *bind_list; 115 struct rdma_bind_list *bind_list;
116 struct hlist_node node; 116 struct hlist_node node;
117 struct list_head list; 117 struct list_head list; /* listen_any_list or cma_device.list */
118 struct list_head listen_list; 118 struct list_head listen_list; /* per device listens */
119 struct cma_device *cma_dev; 119 struct cma_device *cma_dev;
120 struct list_head mc_list; 120 struct list_head mc_list;
121 121
122 int internal_id;
122 enum cma_state state; 123 enum cma_state state;
123 spinlock_t lock; 124 spinlock_t lock;
125 struct mutex qp_mutex;
126
124 struct completion comp; 127 struct completion comp;
125 atomic_t refcount; 128 atomic_t refcount;
126 wait_queue_head_t wait_remove; 129 wait_queue_head_t wait_remove;
@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
389 id_priv->id.event_handler = event_handler; 392 id_priv->id.event_handler = event_handler;
390 id_priv->id.ps = ps; 393 id_priv->id.ps = ps;
391 spin_lock_init(&id_priv->lock); 394 spin_lock_init(&id_priv->lock);
395 mutex_init(&id_priv->qp_mutex);
392 init_completion(&id_priv->comp); 396 init_completion(&id_priv->comp);
393 atomic_set(&id_priv->refcount, 1); 397 atomic_set(&id_priv->refcount, 1);
394 init_waitqueue_head(&id_priv->wait_remove); 398 init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp);
474 478
475void rdma_destroy_qp(struct rdma_cm_id *id) 479void rdma_destroy_qp(struct rdma_cm_id *id)
476{ 480{
477 ib_destroy_qp(id->qp); 481 struct rdma_id_private *id_priv;
482
483 id_priv = container_of(id, struct rdma_id_private, id);
484 mutex_lock(&id_priv->qp_mutex);
485 ib_destroy_qp(id_priv->id.qp);
486 id_priv->id.qp = NULL;
487 mutex_unlock(&id_priv->qp_mutex);
478} 488}
479EXPORT_SYMBOL(rdma_destroy_qp); 489EXPORT_SYMBOL(rdma_destroy_qp);
480 490
481static int cma_modify_qp_rtr(struct rdma_cm_id *id) 491static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
482{ 492{
483 struct ib_qp_attr qp_attr; 493 struct ib_qp_attr qp_attr;
484 int qp_attr_mask, ret; 494 int qp_attr_mask, ret;
485 495
486 if (!id->qp) 496 mutex_lock(&id_priv->qp_mutex);
487 return 0; 497 if (!id_priv->id.qp) {
498 ret = 0;
499 goto out;
500 }
488 501
489 /* Need to update QP attributes from default values. */ 502 /* Need to update QP attributes from default values. */
490 qp_attr.qp_state = IB_QPS_INIT; 503 qp_attr.qp_state = IB_QPS_INIT;
491 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 504 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
492 if (ret) 505 if (ret)
493 return ret; 506 goto out;
494 507
495 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 508 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
496 if (ret) 509 if (ret)
497 return ret; 510 goto out;
498 511
499 qp_attr.qp_state = IB_QPS_RTR; 512 qp_attr.qp_state = IB_QPS_RTR;
500 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 513 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
501 if (ret) 514 if (ret)
502 return ret; 515 goto out;
503 516
504 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 517 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
518out:
519 mutex_unlock(&id_priv->qp_mutex);
520 return ret;
505} 521}
506 522
507static int cma_modify_qp_rts(struct rdma_cm_id *id) 523static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
508{ 524{
509 struct ib_qp_attr qp_attr; 525 struct ib_qp_attr qp_attr;
510 int qp_attr_mask, ret; 526 int qp_attr_mask, ret;
511 527
512 if (!id->qp) 528 mutex_lock(&id_priv->qp_mutex);
513 return 0; 529 if (!id_priv->id.qp) {
530 ret = 0;
531 goto out;
532 }
514 533
515 qp_attr.qp_state = IB_QPS_RTS; 534 qp_attr.qp_state = IB_QPS_RTS;
516 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 535 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
517 if (ret) 536 if (ret)
518 return ret; 537 goto out;
519 538
520 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 539 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
540out:
541 mutex_unlock(&id_priv->qp_mutex);
542 return ret;
521} 543}
522 544
523static int cma_modify_qp_err(struct rdma_cm_id *id) 545static int cma_modify_qp_err(struct rdma_id_private *id_priv)
524{ 546{
525 struct ib_qp_attr qp_attr; 547 struct ib_qp_attr qp_attr;
548 int ret;
526 549
527 if (!id->qp) 550 mutex_lock(&id_priv->qp_mutex);
528 return 0; 551 if (!id_priv->id.qp) {
552 ret = 0;
553 goto out;
554 }
529 555
530 qp_attr.qp_state = IB_QPS_ERR; 556 qp_attr.qp_state = IB_QPS_ERR;
531 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 557 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
558out:
559 mutex_unlock(&id_priv->qp_mutex);
560 return ret;
532} 561}
533 562
534static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 563static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
717 } 746 }
718} 747}
719 748
720static inline int cma_internal_listen(struct rdma_id_private *id_priv)
721{
722 return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
723 cma_any_addr(&id_priv->id.route.addr.src_addr);
724}
725
726static void cma_destroy_listen(struct rdma_id_private *id_priv)
727{
728 cma_exch(id_priv, CMA_DESTROYING);
729
730 if (id_priv->cma_dev) {
731 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
732 case RDMA_TRANSPORT_IB:
733 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
734 ib_destroy_cm_id(id_priv->cm_id.ib);
735 break;
736 case RDMA_TRANSPORT_IWARP:
737 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
738 iw_destroy_cm_id(id_priv->cm_id.iw);
739 break;
740 default:
741 break;
742 }
743 cma_detach_from_dev(id_priv);
744 }
745 list_del(&id_priv->listen_list);
746
747 cma_deref_id(id_priv);
748 wait_for_completion(&id_priv->comp);
749
750 kfree(id_priv);
751}
752
753static void cma_cancel_listens(struct rdma_id_private *id_priv) 749static void cma_cancel_listens(struct rdma_id_private *id_priv)
754{ 750{
755 struct rdma_id_private *dev_id_priv; 751 struct rdma_id_private *dev_id_priv;
756 752
753 /*
754 * Remove from listen_any_list to prevent added devices from spawning
755 * additional listen requests.
756 */
757 mutex_lock(&lock); 757 mutex_lock(&lock);
758 list_del(&id_priv->list); 758 list_del(&id_priv->list);
759 759
760 while (!list_empty(&id_priv->listen_list)) { 760 while (!list_empty(&id_priv->listen_list)) {
761 dev_id_priv = list_entry(id_priv->listen_list.next, 761 dev_id_priv = list_entry(id_priv->listen_list.next,
762 struct rdma_id_private, listen_list); 762 struct rdma_id_private, listen_list);
763 cma_destroy_listen(dev_id_priv); 763 /* sync with device removal to avoid duplicate destruction */
764 list_del_init(&dev_id_priv->list);
765 list_del(&dev_id_priv->listen_list);
766 mutex_unlock(&lock);
767
768 rdma_destroy_id(&dev_id_priv->id);
769 mutex_lock(&lock);
764 } 770 }
765 mutex_unlock(&lock); 771 mutex_unlock(&lock);
766} 772}
@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
848 cma_deref_id(id_priv); 854 cma_deref_id(id_priv);
849 wait_for_completion(&id_priv->comp); 855 wait_for_completion(&id_priv->comp);
850 856
857 if (id_priv->internal_id)
858 cma_deref_id(id_priv->id.context);
859
851 kfree(id_priv->id.route.path_rec); 860 kfree(id_priv->id.route.path_rec);
852 kfree(id_priv); 861 kfree(id_priv);
853} 862}
@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
857{ 866{
858 int ret; 867 int ret;
859 868
860 ret = cma_modify_qp_rtr(&id_priv->id); 869 ret = cma_modify_qp_rtr(id_priv);
861 if (ret) 870 if (ret)
862 goto reject; 871 goto reject;
863 872
864 ret = cma_modify_qp_rts(&id_priv->id); 873 ret = cma_modify_qp_rts(id_priv);
865 if (ret) 874 if (ret)
866 goto reject; 875 goto reject;
867 876
@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
871 880
872 return 0; 881 return 0;
873reject: 882reject:
874 cma_modify_qp_err(&id_priv->id); 883 cma_modify_qp_err(id_priv);
875 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 884 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
876 NULL, 0, NULL, 0); 885 NULL, 0, NULL, 0);
877 return ret; 886 return ret;
@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
947 /* ignore event */ 956 /* ignore event */
948 goto out; 957 goto out;
949 case IB_CM_REJ_RECEIVED: 958 case IB_CM_REJ_RECEIVED:
950 cma_modify_qp_err(&id_priv->id); 959 cma_modify_qp_err(id_priv);
951 event.status = ib_event->param.rej_rcvd.reason; 960 event.status = ib_event->param.rej_rcvd.reason;
952 event.event = RDMA_CM_EVENT_REJECTED; 961 event.event = RDMA_CM_EVENT_REJECTED;
953 event.param.conn.private_data = ib_event->private_data; 962 event.param.conn.private_data = ib_event->private_data;
@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1404 1413
1405 cma_attach_to_dev(dev_id_priv, cma_dev); 1414 cma_attach_to_dev(dev_id_priv, cma_dev);
1406 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1415 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1416 atomic_inc(&id_priv->refcount);
1417 dev_id_priv->internal_id = 1;
1407 1418
1408 ret = rdma_listen(id, id_priv->backlog); 1419 ret = rdma_listen(id, id_priv->backlog);
1409 if (ret) 1420 if (ret)
1410 goto err; 1421 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1411 1422 "listening on device %s", ret, cma_dev->device->name);
1412 return;
1413err:
1414 cma_destroy_listen(dev_id_priv);
1415} 1423}
1416 1424
1417static void cma_listen_on_all(struct rdma_id_private *id_priv) 1425static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2264 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2272 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2265 cm_id->remote_addr = *sin; 2273 cm_id->remote_addr = *sin;
2266 2274
2267 ret = cma_modify_qp_rtr(&id_priv->id); 2275 ret = cma_modify_qp_rtr(id_priv);
2268 if (ret) 2276 if (ret)
2269 goto out; 2277 goto out;
2270 2278
@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2331 int qp_attr_mask, ret; 2339 int qp_attr_mask, ret;
2332 2340
2333 if (id_priv->id.qp) { 2341 if (id_priv->id.qp) {
2334 ret = cma_modify_qp_rtr(&id_priv->id); 2342 ret = cma_modify_qp_rtr(id_priv);
2335 if (ret) 2343 if (ret)
2336 goto out; 2344 goto out;
2337 2345
@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2370 struct iw_cm_conn_param iw_param; 2378 struct iw_cm_conn_param iw_param;
2371 int ret; 2379 int ret;
2372 2380
2373 ret = cma_modify_qp_rtr(&id_priv->id); 2381 ret = cma_modify_qp_rtr(id_priv);
2374 if (ret) 2382 if (ret)
2375 return ret; 2383 return ret;
2376 2384
@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2442 2450
2443 return 0; 2451 return 0;
2444reject: 2452reject:
2445 cma_modify_qp_err(id); 2453 cma_modify_qp_err(id_priv);
2446 rdma_reject(id, NULL, 0); 2454 rdma_reject(id, NULL, 0);
2447 return ret; 2455 return ret;
2448} 2456}
@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
2512 2520
2513 switch (rdma_node_get_transport(id->device->node_type)) { 2521 switch (rdma_node_get_transport(id->device->node_type)) {
2514 case RDMA_TRANSPORT_IB: 2522 case RDMA_TRANSPORT_IB:
2515 ret = cma_modify_qp_err(id); 2523 ret = cma_modify_qp_err(id_priv);
2516 if (ret) 2524 if (ret)
2517 goto out; 2525 goto out;
2518 /* Initiate or respond to a disconnect. */ 2526 /* Initiate or respond to a disconnect. */
@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2543 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) 2551 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2544 return 0; 2552 return 0;
2545 2553
2554 mutex_lock(&id_priv->qp_mutex);
2546 if (!status && id_priv->id.qp) 2555 if (!status && id_priv->id.qp)
2547 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2556 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2548 multicast->rec.mlid); 2557 multicast->rec.mlid);
2558 mutex_unlock(&id_priv->qp_mutex);
2549 2559
2550 memset(&event, 0, sizeof event); 2560 memset(&event, 0, sizeof event);
2551 event.status = status; 2561 event.status = status;
@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
2757 id_priv = list_entry(cma_dev->id_list.next, 2767 id_priv = list_entry(cma_dev->id_list.next,
2758 struct rdma_id_private, list); 2768 struct rdma_id_private, list);
2759 2769
2760 if (cma_internal_listen(id_priv)) { 2770 list_del(&id_priv->listen_list);
2761 cma_destroy_listen(id_priv);
2762 continue;
2763 }
2764
2765 list_del_init(&id_priv->list); 2771 list_del_init(&id_priv->list);
2766 atomic_inc(&id_priv->refcount); 2772 atomic_inc(&id_priv->refcount);
2767 mutex_unlock(&lock); 2773 mutex_unlock(&lock);
2768 2774
2769 ret = cma_remove_id_dev(id_priv); 2775 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2770 cma_deref_id(id_priv); 2776 cma_deref_id(id_priv);
2771 if (ret) 2777 if (ret)
2772 rdma_destroy_id(&id_priv->id); 2778 rdma_destroy_id(&id_priv->id);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 2f54e29dc7..14159ff294 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,9 +55,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
55 ib_dma_unmap_sg(dev, chunk->page_list, 55 ib_dma_unmap_sg(dev, chunk->page_list,
56 chunk->nents, DMA_BIDIRECTIONAL); 56 chunk->nents, DMA_BIDIRECTIONAL);
57 for (i = 0; i < chunk->nents; ++i) { 57 for (i = 0; i < chunk->nents; ++i) {
58 struct page *page = sg_page(&chunk->page_list[i]);
59
58 if (umem->writable && dirty) 60 if (umem->writable && dirty)
59 set_page_dirty_lock(chunk->page_list[i].page); 61 set_page_dirty_lock(page);
60 put_page(chunk->page_list[i].page); 62 put_page(page);
61 } 63 }
62 64
63 kfree(chunk); 65 kfree(chunk);
@@ -164,11 +166,12 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
164 } 166 }
165 167
166 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); 168 chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
169 sg_init_table(chunk->page_list, chunk->nents);
167 for (i = 0; i < chunk->nents; ++i) { 170 for (i = 0; i < chunk->nents; ++i) {
168 if (vma_list && 171 if (vma_list &&
169 !is_vm_hugetlb_page(vma_list[i + off])) 172 !is_vm_hugetlb_page(vma_list[i + off]))
170 umem->hugetlb = 0; 173 umem->hugetlb = 0;
171 chunk->page_list[i].page = page_list[i + off]; 174 sg_set_page(&chunk->page_list[i], page_list[i + off]);
172 chunk->page_list[i].offset = 0; 175 chunk->page_list[i].offset = 0;
173 chunk->page_list[i].length = PAGE_SIZE; 176 chunk->page_list[i].length = PAGE_SIZE;
174 } 177 }
@@ -179,7 +182,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
179 DMA_BIDIRECTIONAL); 182 DMA_BIDIRECTIONAL);
180 if (chunk->nmap <= 0) { 183 if (chunk->nmap <= 0) {
181 for (i = 0; i < chunk->nents; ++i) 184 for (i = 0; i < chunk->nents; ++i)
182 put_page(chunk->page_list[i].page); 185 put_page(sg_page(&chunk->page_list[i]));
183 kfree(chunk); 186 kfree(chunk);
184 187
185 ret = -ENOMEM; 188 ret = -ENOMEM;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 01d70084ae..495c803fb1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -147,8 +147,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
147 147
148 spin_lock(&ib_uverbs_idr_lock); 148 spin_lock(&ib_uverbs_idr_lock);
149 uobj = idr_find(idr, id); 149 uobj = idr_find(idr, id);
150 if (uobj) 150 if (uobj) {
151 kref_get(&uobj->ref); 151 if (uobj->context == context)
152 kref_get(&uobj->ref);
153 else
154 uobj = NULL;
155 }
152 spin_unlock(&ib_uverbs_idr_lock); 156 spin_unlock(&ib_uverbs_idr_lock);
153 157
154 return uobj; 158 return uobj;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 3f2d68cff7..2d660ae189 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -323,7 +323,6 @@ extern int ehca_static_rate;
323extern int ehca_port_act_time; 323extern int ehca_port_act_time;
324extern int ehca_use_hp_mr; 324extern int ehca_use_hp_mr;
325extern int ehca_scaling_code; 325extern int ehca_scaling_code;
326extern int ehca_mr_largepage;
327 326
328struct ipzu_queue_resp { 327struct ipzu_queue_resp {
329 u32 qe_size; /* queue entry size */ 328 u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 4aa3ffa6a1..15806d1404 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -77,6 +77,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
77 } 77 }
78 78
79 memset(props, 0, sizeof(struct ib_device_attr)); 79 memset(props, 0, sizeof(struct ib_device_attr));
80 props->page_size_cap = shca->hca_cap_mr_pgsize;
80 props->fw_ver = rblock->hw_ver; 81 props->fw_ver = rblock->hw_ver;
81 props->max_mr_size = rblock->max_mr_size; 82 props->max_mr_size = rblock->max_mr_size;
82 props->vendor_id = rblock->vendor_id >> 8; 83 props->vendor_id = rblock->vendor_id >> 8;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 7a7dab890f..c6cd38c532 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -65,7 +65,7 @@ int ehca_port_act_time = 30;
65int ehca_poll_all_eqs = 1; 65int ehca_poll_all_eqs = 1;
66int ehca_static_rate = -1; 66int ehca_static_rate = -1;
67int ehca_scaling_code = 0; 67int ehca_scaling_code = 0;
68int ehca_mr_largepage = 0; 68int ehca_mr_largepage = 1;
69 69
70module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); 70module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
71module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); 71module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
@@ -260,13 +260,20 @@ static struct cap_descr {
260 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, 260 { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
261}; 261};
262 262
263int ehca_sense_attributes(struct ehca_shca *shca) 263static int ehca_sense_attributes(struct ehca_shca *shca)
264{ 264{
265 int i, ret = 0; 265 int i, ret = 0;
266 u64 h_ret; 266 u64 h_ret;
267 struct hipz_query_hca *rblock; 267 struct hipz_query_hca *rblock;
268 struct hipz_query_port *port; 268 struct hipz_query_port *port;
269 269
270 static const u32 pgsize_map[] = {
271 HCA_CAP_MR_PGSIZE_4K, 0x1000,
272 HCA_CAP_MR_PGSIZE_64K, 0x10000,
273 HCA_CAP_MR_PGSIZE_1M, 0x100000,
274 HCA_CAP_MR_PGSIZE_16M, 0x1000000,
275 };
276
270 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); 277 rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
271 if (!rblock) { 278 if (!rblock) {
272 ehca_gen_err("Cannot allocate rblock memory."); 279 ehca_gen_err("Cannot allocate rblock memory.");
@@ -329,8 +336,15 @@ int ehca_sense_attributes(struct ehca_shca *shca)
329 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) 336 if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
330 ehca_gen_dbg(" %s", hca_cap_descr[i].descr); 337 ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
331 338
332 shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported; 339 /* translate supported MR page sizes; always support 4K */
340 shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
341 if (ehca_mr_largepage) { /* support extra sizes only if enabled */
342 for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
343 if (rblock->memory_page_size_supported & pgsize_map[i])
344 shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
345 }
333 346
347 /* query max MTU from first port -- it's the same for all ports */
334 port = (struct hipz_query_port *)rblock; 348 port = (struct hipz_query_port *)rblock;
335 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); 349 h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
336 if (h_ret != H_SUCCESS) { 350 if (h_ret != H_SUCCESS) {
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index da88738265..e239bbf54d 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -72,24 +72,14 @@ enum ehca_mr_pgsize {
72 72
73static u32 ehca_encode_hwpage_size(u32 pgsize) 73static u32 ehca_encode_hwpage_size(u32 pgsize)
74{ 74{
75 u32 idx = 0; 75 int log = ilog2(pgsize);
76 pgsize >>= 12; 76 WARN_ON(log < 12 || log > 24 || log & 3);
77 /* 77 return (log - 12) / 4;
78 * map mr page size into hw code:
79 * 0, 1, 2, 3 for 4K, 64K, 1M, 64M
80 */
81 while (!(pgsize & 1)) {
82 idx++;
83 pgsize >>= 4;
84 }
85 return idx;
86} 78}
87 79
88static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) 80static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
89{ 81{
90 if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) 82 return 1UL << ilog2(shca->hca_cap_mr_pgsize);
91 return EHCA_MR_PGSIZE16M;
92 return EHCA_MR_PGSIZE4K;
93} 83}
94 84
95static struct ehca_mr *ehca_mr_new(void) 85static struct ehca_mr *ehca_mr_new(void)
@@ -259,7 +249,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
259 pginfo.u.phy.num_phys_buf = num_phys_buf; 249 pginfo.u.phy.num_phys_buf = num_phys_buf;
260 pginfo.u.phy.phys_buf_array = phys_buf_array; 250 pginfo.u.phy.phys_buf_array = phys_buf_array;
261 pginfo.next_hwpage = 251 pginfo.next_hwpage =
262 ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 252 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
263 253
264 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, 254 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
265 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, 255 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -296,7 +286,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
296 container_of(pd->device, struct ehca_shca, ib_device); 286 container_of(pd->device, struct ehca_shca, ib_device);
297 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); 287 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
298 struct ehca_mr_pginfo pginfo; 288 struct ehca_mr_pginfo pginfo;
299 int ret; 289 int ret, page_shift;
300 u32 num_kpages; 290 u32 num_kpages;
301 u32 num_hwpages; 291 u32 num_hwpages;
302 u64 hwpage_size; 292 u64 hwpage_size;
@@ -351,19 +341,20 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
351 /* determine number of MR pages */ 341 /* determine number of MR pages */
352 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); 342 num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
353 /* select proper hw_pgsize */ 343 /* select proper hw_pgsize */
354 if (ehca_mr_largepage && 344 page_shift = PAGE_SHIFT;
355 (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) { 345 if (e_mr->umem->hugetlb) {
356 int page_shift = PAGE_SHIFT; 346 /* determine page_shift, clamp between 4K and 16M */
357 if (e_mr->umem->hugetlb) { 347 page_shift = (fls64(length - 1) + 3) & ~3;
358 /* determine page_shift, clamp between 4K and 16M */ 348 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
359 page_shift = (fls64(length - 1) + 3) & ~3; 349 EHCA_MR_PGSHIFT16M);
360 page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), 350 }
361 EHCA_MR_PGSHIFT16M); 351 hwpage_size = 1UL << page_shift;
362 } 352
363 hwpage_size = 1UL << page_shift; 353 /* now that we have the desired page size, shift until it's
364 } else 354 * supported, too. 4K is always supported, so this terminates.
365 hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */ 355 */
366 ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size); 356 while (!(hwpage_size & shca->hca_cap_mr_pgsize))
357 hwpage_size >>= 4;
367 358
368reg_user_mr_fallback: 359reg_user_mr_fallback:
369 num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); 360 num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
@@ -547,7 +538,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
547 pginfo.u.phy.num_phys_buf = num_phys_buf; 538 pginfo.u.phy.num_phys_buf = num_phys_buf;
548 pginfo.u.phy.phys_buf_array = phys_buf_array; 539 pginfo.u.phy.phys_buf_array = phys_buf_array;
549 pginfo.next_hwpage = 540 pginfo.next_hwpage =
550 ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize; 541 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
551 } 542 }
552 if (mr_rereg_mask & IB_MR_REREG_ACCESS) 543 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
553 new_acl = mr_access_flags; 544 new_acl = mr_access_flags;
@@ -809,8 +800,9 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
809 ib_fmr = ERR_PTR(-EINVAL); 800 ib_fmr = ERR_PTR(-EINVAL);
810 goto alloc_fmr_exit0; 801 goto alloc_fmr_exit0;
811 } 802 }
812 hw_pgsize = ehca_get_max_hwpage_size(shca); 803
813 if ((1 << fmr_attr->page_shift) != hw_pgsize) { 804 hw_pgsize = 1 << fmr_attr->page_shift;
805 if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
814 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", 806 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
815 fmr_attr->page_shift); 807 fmr_attr->page_shift);
816 ib_fmr = ERR_PTR(-EINVAL); 808 ib_fmr = ERR_PTR(-EINVAL);
@@ -826,6 +818,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
826 818
827 /* register MR on HCA */ 819 /* register MR on HCA */
828 memset(&pginfo, 0, sizeof(pginfo)); 820 memset(&pginfo, 0, sizeof(pginfo));
821 pginfo.hwpage_size = hw_pgsize;
829 /* 822 /*
830 * pginfo.num_hwpages==0, ie register_rpages() will not be called 823 * pginfo.num_hwpages==0, ie register_rpages() will not be called
831 * but deferred to map_phys_fmr() 824 * but deferred to map_phys_fmr()
@@ -1776,7 +1769,7 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1776 list_for_each_entry_continue( 1769 list_for_each_entry_continue(
1777 chunk, (&(pginfo->u.usr.region->chunk_list)), list) { 1770 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1778 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { 1771 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1779 pgaddr = page_to_pfn(chunk->page_list[i].page) 1772 pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1780 << PAGE_SHIFT ; 1773 << PAGE_SHIFT ;
1781 *kpage = phys_to_abs(pgaddr + 1774 *kpage = phys_to_abs(pgaddr +
1782 (pginfo->next_hwpage * 1775 (pginfo->next_hwpage *
@@ -1832,7 +1825,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1832{ 1825{
1833 int t; 1826 int t;
1834 for (t = start_idx; t <= end_idx; t++) { 1827 for (t = start_idx; t <= end_idx; t++) {
1835 u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT; 1828 u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1836 ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, 1829 ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
1837 *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); 1830 *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
1838 if (pgaddr - PAGE_SIZE != *prev_pgaddr) { 1831 if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
@@ -1867,7 +1860,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1867 chunk, (&(pginfo->u.usr.region->chunk_list)), list) { 1860 chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1868 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { 1861 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1869 if (nr_kpages == kpages_per_hwpage) { 1862 if (nr_kpages == kpages_per_hwpage) {
1870 pgaddr = ( page_to_pfn(chunk->page_list[i].page) 1863 pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1871 << PAGE_SHIFT ); 1864 << PAGE_SHIFT );
1872 *kpage = phys_to_abs(pgaddr); 1865 *kpage = phys_to_abs(pgaddr);
1873 if ( !(*kpage) ) { 1866 if ( !(*kpage) ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index e2bd62be11..de182648b2 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -451,7 +451,6 @@ static struct ehca_qp *internal_create_qp(
451 has_srq = 1; 451 has_srq = 1;
452 parms.ext_type = EQPT_SRQBASE; 452 parms.ext_type = EQPT_SRQBASE;
453 parms.srq_qpn = my_srq->real_qp_num; 453 parms.srq_qpn = my_srq->real_qp_num;
454 parms.srq_token = my_srq->token;
455 } 454 }
456 455
457 if (is_llqp && has_srq) { 456 if (is_llqp && has_srq) {
@@ -583,6 +582,9 @@ static struct ehca_qp *internal_create_qp(
583 goto create_qp_exit1; 582 goto create_qp_exit1;
584 } 583 }
585 584
585 if (has_srq)
586 parms.srq_token = my_qp->token;
587
586 parms.servicetype = ibqptype2servicetype(qp_type); 588 parms.servicetype = ibqptype2servicetype(qp_type);
587 if (parms.servicetype < 0) { 589 if (parms.servicetype < 0) {
588 ret = -EINVAL; 590 ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 22709a4f8f..e90a0ea538 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -108,7 +108,7 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
108 BUG_ON(!valid_dma_direction(direction)); 108 BUG_ON(!valid_dma_direction(direction));
109 109
110 for_each_sg(sgl, sg, nents, i) { 110 for_each_sg(sgl, sg, nents, i) {
111 addr = (u64) page_address(sg->page); 111 addr = (u64) page_address(sg_page(sg));
112 /* TODO: handle highmem pages */ 112 /* TODO: handle highmem pages */
113 if (!addr) { 113 if (!addr) {
114 ret = 0; 114 ret = 0;
@@ -127,7 +127,7 @@ static void ipath_unmap_sg(struct ib_device *dev,
127 127
128static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) 128static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
129{ 129{
130 u64 addr = (u64) page_address(sg->page); 130 u64 addr = (u64) page_address(sg_page(sg));
131 131
132 if (addr) 132 if (addr)
133 addr += sg->offset; 133 addr += sg->offset;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index e442470a23..db4ba92f79 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -225,7 +225,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
225 for (i = 0; i < chunk->nents; i++) { 225 for (i = 0; i < chunk->nents; i++) {
226 void *vaddr; 226 void *vaddr;
227 227
228 vaddr = page_address(chunk->page_list[i].page); 228 vaddr = page_address(sg_page(&chunk->page_list[i]));
229 if (!vaddr) { 229 if (!vaddr) {
230 ret = ERR_PTR(-EINVAL); 230 ret = ERR_PTR(-EINVAL);
231 goto bail; 231 goto bail;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 31a480e5b0..6b3322486b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -63,6 +63,10 @@ struct mlx4_ib_sqp {
63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
64}; 64};
65 65
66enum {
67 MLX4_IB_MIN_SQ_STRIDE = 6
68};
69
66static const __be32 mlx4_ib_opcode[] = { 70static const __be32 mlx4_ib_opcode[] = {
67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), 71 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), 72 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
@@ -285,9 +289,17 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
285 return 0; 289 return 0;
286} 290}
287 291
288static int set_user_sq_size(struct mlx4_ib_qp *qp, 292static int set_user_sq_size(struct mlx4_ib_dev *dev,
293 struct mlx4_ib_qp *qp,
289 struct mlx4_ib_create_qp *ucmd) 294 struct mlx4_ib_create_qp *ucmd)
290{ 295{
296 /* Sanity check SQ size before proceeding */
297 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
298 ucmd->log_sq_stride >
299 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
300 ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
301 return -EINVAL;
302
291 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 303 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
292 qp->sq.wqe_shift = ucmd->log_sq_stride; 304 qp->sq.wqe_shift = ucmd->log_sq_stride;
293 305
@@ -330,7 +342,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
330 342
331 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 343 qp->sq_no_prefetch = ucmd.sq_no_prefetch;
332 344
333 err = set_user_sq_size(qp, &ucmd); 345 err = set_user_sq_size(dev, qp, &ucmd);
334 if (err) 346 if (err)
335 goto err; 347 goto err;
336 348
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index be6e1e03bd..6bd9f13933 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -204,16 +204,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
204static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, 204static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
205 int incr) 205 int incr)
206{ 206{
207 __be32 doorbell[2];
208
209 if (mthca_is_memfree(dev)) { 207 if (mthca_is_memfree(dev)) {
210 *cq->set_ci_db = cpu_to_be32(cq->cons_index); 208 *cq->set_ci_db = cpu_to_be32(cq->cons_index);
211 wmb(); 209 wmb();
212 } else { 210 } else {
213 doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); 211 mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
214 doorbell[1] = cpu_to_be32(incr - 1);
215
216 mthca_write64(doorbell,
217 dev->kar + MTHCA_CQ_DOORBELL, 212 dev->kar + MTHCA_CQ_DOORBELL,
218 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 213 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
219 /* 214 /*
@@ -731,17 +726,12 @@ repoll:
731 726
732int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) 727int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
733{ 728{
734 __be32 doorbell[2]; 729 u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
730 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
731 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
732 to_mcq(cq)->cqn;
735 733
736 doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == 734 mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
737 IB_CQ_SOLICITED ?
738 MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
739 MTHCA_TAVOR_CQ_DB_REQ_NOT) |
740 to_mcq(cq)->cqn);
741 doorbell[1] = (__force __be32) 0xffffffff;
742
743 mthca_write64(doorbell,
744 to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
745 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); 735 MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
746 736
747 return 0; 737 return 0;
@@ -750,19 +740,16 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
750int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 740int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
751{ 741{
752 struct mthca_cq *cq = to_mcq(ibcq); 742 struct mthca_cq *cq = to_mcq(ibcq);
753 __be32 doorbell[2]; 743 __be32 db_rec[2];
754 u32 sn; 744 u32 dbhi;
755 __be32 ci; 745 u32 sn = cq->arm_sn & 3;
756
757 sn = cq->arm_sn & 3;
758 ci = cpu_to_be32(cq->cons_index);
759 746
760 doorbell[0] = ci; 747 db_rec[0] = cpu_to_be32(cq->cons_index);
761 doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | 748 db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
762 ((flags & IB_CQ_SOLICITED_MASK) == 749 ((flags & IB_CQ_SOLICITED_MASK) ==
763 IB_CQ_SOLICITED ? 1 : 2)); 750 IB_CQ_SOLICITED ? 1 : 2));
764 751
765 mthca_write_db_rec(doorbell, cq->arm_db); 752 mthca_write_db_rec(db_rec, cq->arm_db);
766 753
767 /* 754 /*
768 * Make sure that the doorbell record in host memory is 755 * Make sure that the doorbell record in host memory is
@@ -770,14 +757,12 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
770 */ 757 */
771 wmb(); 758 wmb();
772 759
773 doorbell[0] = cpu_to_be32((sn << 28) | 760 dbhi = (sn << 28) |
774 ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 761 ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
775 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : 762 MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
776 MTHCA_ARBEL_CQ_DB_REQ_NOT) | 763 MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
777 cq->cqn);
778 doorbell[1] = ci;
779 764
780 mthca_write64(doorbell, 765 mthca_write64(dbhi, cq->cons_index,
781 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, 766 to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
782 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); 767 MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
783 768
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index dd9a44d170..b374dc395b 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -58,10 +58,10 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
58 __raw_writeq((__force u64) val, dest); 58 __raw_writeq((__force u64) val, dest);
59} 59}
60 60
61static inline void mthca_write64(__be32 val[2], void __iomem *dest, 61static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
62 spinlock_t *doorbell_lock) 62 spinlock_t *doorbell_lock)
63{ 63{
64 __raw_writeq(*(u64 *) val, dest); 64 __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
65} 65}
66 66
67static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) 67static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
@@ -87,14 +87,17 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
87 __raw_writel(((__force u32 *) &val)[1], dest + 4); 87 __raw_writel(((__force u32 *) &val)[1], dest + 4);
88} 88}
89 89
90static inline void mthca_write64(__be32 val[2], void __iomem *dest, 90static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
91 spinlock_t *doorbell_lock) 91 spinlock_t *doorbell_lock)
92{ 92{
93 unsigned long flags; 93 unsigned long flags;
94 94
95 hi = (__force u32) cpu_to_be32(hi);
96 lo = (__force u32) cpu_to_be32(lo);
97
95 spin_lock_irqsave(doorbell_lock, flags); 98 spin_lock_irqsave(doorbell_lock, flags);
96 __raw_writel((__force u32) val[0], dest); 99 __raw_writel(hi, dest);
97 __raw_writel((__force u32) val[1], dest + 4); 100 __raw_writel(lo, dest + 4);
98 spin_unlock_irqrestore(doorbell_lock, flags); 101 spin_unlock_irqrestore(doorbell_lock, flags);
99} 102}
100 103
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8592b26dc4..b29de51b7f 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -173,11 +173,6 @@ static inline u64 async_mask(struct mthca_dev *dev)
173 173
174static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) 174static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
175{ 175{
176 __be32 doorbell[2];
177
178 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn);
179 doorbell[1] = cpu_to_be32(ci & (eq->nent - 1));
180
181 /* 176 /*
182 * This barrier makes sure that all updates to ownership bits 177 * This barrier makes sure that all updates to ownership bits
183 * done by set_eqe_hw() hit memory before the consumer index 178 * done by set_eqe_hw() hit memory before the consumer index
@@ -187,7 +182,7 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u
187 * having set_eqe_hw() overwrite the owner field. 182 * having set_eqe_hw() overwrite the owner field.
188 */ 183 */
189 wmb(); 184 wmb();
190 mthca_write64(doorbell, 185 mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
191 dev->kar + MTHCA_EQ_DOORBELL, 186 dev->kar + MTHCA_EQ_DOORBELL,
192 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 187 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
193} 188}
@@ -212,12 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
212 207
213static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) 208static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
214{ 209{
215 __be32 doorbell[2]; 210 mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
216
217 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn);
218 doorbell[1] = 0;
219
220 mthca_write64(doorbell,
221 dev->kar + MTHCA_EQ_DOORBELL, 211 dev->kar + MTHCA_EQ_DOORBELL,
222 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 212 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
223} 213}
@@ -230,12 +220,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
230static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) 220static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
231{ 221{
232 if (!mthca_is_memfree(dev)) { 222 if (!mthca_is_memfree(dev)) {
233 __be32 doorbell[2]; 223 mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
234
235 doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn);
236 doorbell[1] = cpu_to_be32(cqn);
237
238 mthca_write64(doorbell,
239 dev->kar + MTHCA_EQ_DOORBELL, 224 dev->kar + MTHCA_EQ_DOORBELL,
240 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 225 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
241 } 226 }
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index e61f3e6269..007b38157f 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -71,7 +71,7 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *
71 PCI_DMA_BIDIRECTIONAL); 71 PCI_DMA_BIDIRECTIONAL);
72 72
73 for (i = 0; i < chunk->npages; ++i) 73 for (i = 0; i < chunk->npages; ++i)
74 __free_pages(chunk->mem[i].page, 74 __free_pages(sg_page(&chunk->mem[i]),
75 get_order(chunk->mem[i].length)); 75 get_order(chunk->mem[i].length));
76} 76}
77 77
@@ -81,7 +81,7 @@ static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chun
81 81
82 for (i = 0; i < chunk->npages; ++i) { 82 for (i = 0; i < chunk->npages; ++i) {
83 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, 83 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
84 lowmem_page_address(chunk->mem[i].page), 84 lowmem_page_address(sg_page(&chunk->mem[i])),
85 sg_dma_address(&chunk->mem[i])); 85 sg_dma_address(&chunk->mem[i]));
86 } 86 }
87} 87}
@@ -107,10 +107,13 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
107 107
108static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) 108static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
109{ 109{
110 mem->page = alloc_pages(gfp_mask, order); 110 struct page *page;
111 if (!mem->page) 111
112 page = alloc_pages(gfp_mask, order);
113 if (!page)
112 return -ENOMEM; 114 return -ENOMEM;
113 115
116 sg_set_page(mem, page);
114 mem->length = PAGE_SIZE << order; 117 mem->length = PAGE_SIZE << order;
115 mem->offset = 0; 118 mem->offset = 0;
116 return 0; 119 return 0;
@@ -157,6 +160,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
157 if (!chunk) 160 if (!chunk)
158 goto fail; 161 goto fail;
159 162
163 sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
160 chunk->npages = 0; 164 chunk->npages = 0;
161 chunk->nsg = 0; 165 chunk->nsg = 0;
162 list_add_tail(&chunk->list, &icm->chunk_list); 166 list_add_tail(&chunk->list, &icm->chunk_list);
@@ -304,7 +308,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_h
304 * so if we found the page, dma_handle has already 308 * so if we found the page, dma_handle has already
305 * been assigned to. */ 309 * been assigned to. */
306 if (chunk->mem[i].length > offset) { 310 if (chunk->mem[i].length > offset) {
307 page = chunk->mem[i].page; 311 page = sg_page(&chunk->mem[i]);
308 goto out; 312 goto out;
309 } 313 }
310 offset -= chunk->mem[i].length; 314 offset -= chunk->mem[i].length;
@@ -445,6 +449,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
445int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, 449int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
446 struct mthca_user_db_table *db_tab, int index, u64 uaddr) 450 struct mthca_user_db_table *db_tab, int index, u64 uaddr)
447{ 451{
452 struct page *pages[1];
448 int ret = 0; 453 int ret = 0;
449 u8 status; 454 u8 status;
450 int i; 455 int i;
@@ -472,16 +477,17 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
472 } 477 }
473 478
474 ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, 479 ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
475 &db_tab->page[i].mem.page, NULL); 480 pages, NULL);
476 if (ret < 0) 481 if (ret < 0)
477 goto out; 482 goto out;
478 483
484 sg_set_page(&db_tab->page[i].mem, pages[0]);
479 db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE; 485 db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
480 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; 486 db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
481 487
482 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); 488 ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
483 if (ret < 0) { 489 if (ret < 0) {
484 put_page(db_tab->page[i].mem.page); 490 put_page(pages[0]);
485 goto out; 491 goto out;
486 } 492 }
487 493
@@ -491,7 +497,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
491 ret = -EINVAL; 497 ret = -EINVAL;
492 if (ret) { 498 if (ret) {
493 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); 499 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
494 put_page(db_tab->page[i].mem.page); 500 put_page(sg_page(&db_tab->page[i].mem));
495 goto out; 501 goto out;
496 } 502 }
497 503
@@ -557,7 +563,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
557 if (db_tab->page[i].uvirt) { 563 if (db_tab->page[i].uvirt) {
558 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); 564 mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
559 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); 565 pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
560 put_page(db_tab->page[i].mem.page); 566 put_page(sg_page(&db_tab->page[i].mem));
561 } 567 }
562 } 568 }
563 569
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index df01b2026a..0e5461c657 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1799,15 +1799,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1799 1799
1800out: 1800out:
1801 if (likely(nreq)) { 1801 if (likely(nreq)) {
1802 __be32 doorbell[2];
1803
1804 doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
1805 qp->send_wqe_offset) | f0 | op0);
1806 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1807
1808 wmb(); 1802 wmb();
1809 1803
1810 mthca_write64(doorbell, 1804 mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
1805 qp->send_wqe_offset) | f0 | op0,
1806 (qp->qpn << 8) | size0,
1811 dev->kar + MTHCA_SEND_DOORBELL, 1807 dev->kar + MTHCA_SEND_DOORBELL,
1812 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1808 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1813 /* 1809 /*
@@ -1829,7 +1825,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1829{ 1825{
1830 struct mthca_dev *dev = to_mdev(ibqp->device); 1826 struct mthca_dev *dev = to_mdev(ibqp->device);
1831 struct mthca_qp *qp = to_mqp(ibqp); 1827 struct mthca_qp *qp = to_mqp(ibqp);
1832 __be32 doorbell[2];
1833 unsigned long flags; 1828 unsigned long flags;
1834 int err = 0; 1829 int err = 0;
1835 int nreq; 1830 int nreq;
@@ -1907,13 +1902,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1907 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 1902 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
1908 nreq = 0; 1903 nreq = 0;
1909 1904
1910 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1911 doorbell[1] = cpu_to_be32(qp->qpn << 8);
1912
1913 wmb(); 1905 wmb();
1914 1906
1915 mthca_write64(doorbell, 1907 mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1916 dev->kar + MTHCA_RECEIVE_DOORBELL, 1908 qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
1917 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1909 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1918 1910
1919 qp->rq.next_ind = ind; 1911 qp->rq.next_ind = ind;
@@ -1923,13 +1915,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1923 1915
1924out: 1916out:
1925 if (likely(nreq)) { 1917 if (likely(nreq)) {
1926 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
1927 doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
1928
1929 wmb(); 1918 wmb();
1930 1919
1931 mthca_write64(doorbell, 1920 mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1932 dev->kar + MTHCA_RECEIVE_DOORBELL, 1921 qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
1933 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1922 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1934 } 1923 }
1935 1924
@@ -1951,7 +1940,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1951{ 1940{
1952 struct mthca_dev *dev = to_mdev(ibqp->device); 1941 struct mthca_dev *dev = to_mdev(ibqp->device);
1953 struct mthca_qp *qp = to_mqp(ibqp); 1942 struct mthca_qp *qp = to_mqp(ibqp);
1954 __be32 doorbell[2]; 1943 u32 dbhi;
1955 void *wqe; 1944 void *wqe;
1956 void *prev_wqe; 1945 void *prev_wqe;
1957 unsigned long flags; 1946 unsigned long flags;
@@ -1981,10 +1970,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1981 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { 1970 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
1982 nreq = 0; 1971 nreq = 0;
1983 1972
1984 doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | 1973 dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
1985 ((qp->sq.head & 0xffff) << 8) | 1974 ((qp->sq.head & 0xffff) << 8) | f0 | op0;
1986 f0 | op0);
1987 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
1988 1975
1989 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; 1976 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1990 1977
@@ -2000,7 +1987,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2000 * write MMIO send doorbell. 1987 * write MMIO send doorbell.
2001 */ 1988 */
2002 wmb(); 1989 wmb();
2003 mthca_write64(doorbell, 1990
1991 mthca_write64(dbhi, (qp->qpn << 8) | size0,
2004 dev->kar + MTHCA_SEND_DOORBELL, 1992 dev->kar + MTHCA_SEND_DOORBELL,
2005 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1993 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2006 } 1994 }
@@ -2154,10 +2142,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2154 2142
2155out: 2143out:
2156 if (likely(nreq)) { 2144 if (likely(nreq)) {
2157 doorbell[0] = cpu_to_be32((nreq << 24) | 2145 dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
2158 ((qp->sq.head & 0xffff) << 8) |
2159 f0 | op0);
2160 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
2161 2146
2162 qp->sq.head += nreq; 2147 qp->sq.head += nreq;
2163 2148
@@ -2173,8 +2158,8 @@ out:
2173 * write MMIO send doorbell. 2158 * write MMIO send doorbell.
2174 */ 2159 */
2175 wmb(); 2160 wmb();
2176 mthca_write64(doorbell, 2161
2177 dev->kar + MTHCA_SEND_DOORBELL, 2162 mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
2178 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 2163 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2179 } 2164 }
2180 2165
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 3f58c11a62..553d681f68 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -491,7 +491,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
491{ 491{
492 struct mthca_dev *dev = to_mdev(ibsrq->device); 492 struct mthca_dev *dev = to_mdev(ibsrq->device);
493 struct mthca_srq *srq = to_msrq(ibsrq); 493 struct mthca_srq *srq = to_msrq(ibsrq);
494 __be32 doorbell[2];
495 unsigned long flags; 494 unsigned long flags;
496 int err = 0; 495 int err = 0;
497 int first_ind; 496 int first_ind;
@@ -563,16 +562,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
563 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { 562 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
564 nreq = 0; 563 nreq = 0;
565 564
566 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
567 doorbell[1] = cpu_to_be32(srq->srqn << 8);
568
569 /* 565 /*
570 * Make sure that descriptors are written 566 * Make sure that descriptors are written
571 * before doorbell is rung. 567 * before doorbell is rung.
572 */ 568 */
573 wmb(); 569 wmb();
574 570
575 mthca_write64(doorbell, 571 mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
576 dev->kar + MTHCA_RECEIVE_DOORBELL, 572 dev->kar + MTHCA_RECEIVE_DOORBELL,
577 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 573 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
578 574
@@ -581,16 +577,13 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
581 } 577 }
582 578
583 if (likely(nreq)) { 579 if (likely(nreq)) {
584 doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
585 doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
586
587 /* 580 /*
588 * Make sure that descriptors are written before 581 * Make sure that descriptors are written before
589 * doorbell is rung. 582 * doorbell is rung.
590 */ 583 */
591 wmb(); 584 wmb();
592 585
593 mthca_write64(doorbell, 586 mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
594 dev->kar + MTHCA_RECEIVE_DOORBELL, 587 dev->kar + MTHCA_RECEIVE_DOORBELL,
595 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 588 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
596 } 589 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 1b3327ad6b..eb7edab0e8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -84,9 +84,8 @@ enum {
84 IPOIB_MCAST_RUN = 6, 84 IPOIB_MCAST_RUN = 6,
85 IPOIB_STOP_REAPER = 7, 85 IPOIB_STOP_REAPER = 7,
86 IPOIB_MCAST_STARTED = 8, 86 IPOIB_MCAST_STARTED = 8,
87 IPOIB_FLAG_NETIF_STOPPED = 9, 87 IPOIB_FLAG_ADMIN_CM = 9,
88 IPOIB_FLAG_ADMIN_CM = 10, 88 IPOIB_FLAG_UMCAST = 10,
89 IPOIB_FLAG_UMCAST = 11,
90 89
91 IPOIB_MAX_BACKOFF_SECONDS = 16, 90 IPOIB_MAX_BACKOFF_SECONDS = 16,
92 91
@@ -98,9 +97,9 @@ enum {
98 97
99#define IPOIB_OP_RECV (1ul << 31) 98#define IPOIB_OP_RECV (1ul << 31)
100#ifdef CONFIG_INFINIBAND_IPOIB_CM 99#ifdef CONFIG_INFINIBAND_IPOIB_CM
101#define IPOIB_CM_OP_SRQ (1ul << 30) 100#define IPOIB_OP_CM (1ul << 30)
102#else 101#else
103#define IPOIB_CM_OP_SRQ (0) 102#define IPOIB_OP_CM (0)
104#endif 103#endif
105 104
106/* structs */ 105/* structs */
@@ -197,7 +196,6 @@ struct ipoib_cm_rx {
197 196
198struct ipoib_cm_tx { 197struct ipoib_cm_tx {
199 struct ib_cm_id *id; 198 struct ib_cm_id *id;
200 struct ib_cq *cq;
201 struct ib_qp *qp; 199 struct ib_qp *qp;
202 struct list_head list; 200 struct list_head list;
203 struct net_device *dev; 201 struct net_device *dev;
@@ -294,6 +292,7 @@ struct ipoib_dev_priv {
294 unsigned tx_tail; 292 unsigned tx_tail;
295 struct ib_sge tx_sge; 293 struct ib_sge tx_sge;
296 struct ib_send_wr tx_wr; 294 struct ib_send_wr tx_wr;
295 unsigned tx_outstanding;
297 296
298 struct ib_wc ibwc[IPOIB_NUM_WC]; 297 struct ib_wc ibwc[IPOIB_NUM_WC];
299 298
@@ -504,6 +503,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
504void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb, 503void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
505 unsigned int mtu); 504 unsigned int mtu);
506void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc); 505void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
506void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
507#else 507#else
508 508
509struct ipoib_cm_tx; 509struct ipoib_cm_tx;
@@ -592,6 +592,9 @@ static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *w
592{ 592{
593} 593}
594 594
595static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
596{
597}
595#endif 598#endif
596 599
597#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 600#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0a0dcb8fdf..87610772a9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -87,7 +87,7 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
87 struct ib_recv_wr *bad_wr; 87 struct ib_recv_wr *bad_wr;
88 int i, ret; 88 int i, ret;
89 89
90 priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ; 90 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
91 91
92 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 92 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; 93 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
@@ -401,7 +401,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
401void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 401void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
402{ 402{
403 struct ipoib_dev_priv *priv = netdev_priv(dev); 403 struct ipoib_dev_priv *priv = netdev_priv(dev);
404 unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ; 404 unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
405 struct sk_buff *skb, *newskb; 405 struct sk_buff *skb, *newskb;
406 struct ipoib_cm_rx *p; 406 struct ipoib_cm_rx *p;
407 unsigned long flags; 407 unsigned long flags;
@@ -412,7 +412,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
412 wr_id, wc->status); 412 wr_id, wc->status);
413 413
414 if (unlikely(wr_id >= ipoib_recvq_size)) { 414 if (unlikely(wr_id >= ipoib_recvq_size)) {
415 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { 415 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
416 spin_lock_irqsave(&priv->lock, flags); 416 spin_lock_irqsave(&priv->lock, flags);
417 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 417 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
418 ipoib_cm_start_rx_drain(priv); 418 ipoib_cm_start_rx_drain(priv);
@@ -434,7 +434,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
434 goto repost; 434 goto repost;
435 } 435 }
436 436
437 if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) { 437 if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
438 p = wc->qp->qp_context; 438 p = wc->qp->qp_context;
439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 439 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
440 spin_lock_irqsave(&priv->lock, flags); 440 spin_lock_irqsave(&priv->lock, flags);
@@ -498,7 +498,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
498 priv->tx_sge.addr = addr; 498 priv->tx_sge.addr = addr;
499 priv->tx_sge.length = len; 499 priv->tx_sge.length = len;
500 500
501 priv->tx_wr.wr_id = wr_id; 501 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
502 502
503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 503 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
504} 504}
@@ -549,20 +549,19 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
549 dev->trans_start = jiffies; 549 dev->trans_start = jiffies;
550 ++tx->tx_head; 550 ++tx->tx_head;
551 551
552 if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) { 552 if (++priv->tx_outstanding == ipoib_sendq_size) {
553 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 553 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
554 tx->qp->qp_num); 554 tx->qp->qp_num);
555 netif_stop_queue(dev); 555 netif_stop_queue(dev);
556 set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
557 } 556 }
558 } 557 }
559} 558}
560 559
561static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx, 560void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
562 struct ib_wc *wc)
563{ 561{
564 struct ipoib_dev_priv *priv = netdev_priv(dev); 562 struct ipoib_dev_priv *priv = netdev_priv(dev);
565 unsigned int wr_id = wc->wr_id; 563 struct ipoib_cm_tx *tx = wc->qp->qp_context;
564 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
566 struct ipoib_tx_buf *tx_req; 565 struct ipoib_tx_buf *tx_req;
567 unsigned long flags; 566 unsigned long flags;
568 567
@@ -587,11 +586,10 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
587 586
588 spin_lock_irqsave(&priv->tx_lock, flags); 587 spin_lock_irqsave(&priv->tx_lock, flags);
589 ++tx->tx_tail; 588 ++tx->tx_tail;
590 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) && 589 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
591 tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) { 590 netif_queue_stopped(dev) &&
592 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags); 591 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
593 netif_wake_queue(dev); 592 netif_wake_queue(dev);
594 }
595 593
596 if (wc->status != IB_WC_SUCCESS && 594 if (wc->status != IB_WC_SUCCESS &&
597 wc->status != IB_WC_WR_FLUSH_ERR) { 595 wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -614,11 +612,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
614 tx->neigh = NULL; 612 tx->neigh = NULL;
615 } 613 }
616 614
617 /* queue would be re-started anyway when TX is destroyed,
618 * but it makes sense to do it ASAP here. */
619 if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
620 netif_wake_queue(dev);
621
622 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 615 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
623 list_move(&tx->list, &priv->cm.reap_list); 616 list_move(&tx->list, &priv->cm.reap_list);
624 queue_work(ipoib_workqueue, &priv->cm.reap_task); 617 queue_work(ipoib_workqueue, &priv->cm.reap_task);
@@ -632,19 +625,6 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
632 spin_unlock_irqrestore(&priv->tx_lock, flags); 625 spin_unlock_irqrestore(&priv->tx_lock, flags);
633} 626}
634 627
635static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
636{
637 struct ipoib_cm_tx *tx = tx_ptr;
638 int n, i;
639
640 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
641 do {
642 n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
643 for (i = 0; i < n; ++i)
644 ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
645 } while (n == IPOIB_NUM_WC);
646}
647
648int ipoib_cm_dev_open(struct net_device *dev) 628int ipoib_cm_dev_open(struct net_device *dev)
649{ 629{
650 struct ipoib_dev_priv *priv = netdev_priv(dev); 630 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -807,17 +787,18 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
807 return 0; 787 return 0;
808} 788}
809 789
810static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq) 790static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
811{ 791{
812 struct ipoib_dev_priv *priv = netdev_priv(dev); 792 struct ipoib_dev_priv *priv = netdev_priv(dev);
813 struct ib_qp_init_attr attr = { 793 struct ib_qp_init_attr attr = {
814 .send_cq = cq, 794 .send_cq = priv->cq,
815 .recv_cq = priv->cq, 795 .recv_cq = priv->cq,
816 .srq = priv->cm.srq, 796 .srq = priv->cm.srq,
817 .cap.max_send_wr = ipoib_sendq_size, 797 .cap.max_send_wr = ipoib_sendq_size,
818 .cap.max_send_sge = 1, 798 .cap.max_send_sge = 1,
819 .sq_sig_type = IB_SIGNAL_ALL_WR, 799 .sq_sig_type = IB_SIGNAL_ALL_WR,
820 .qp_type = IB_QPT_RC, 800 .qp_type = IB_QPT_RC,
801 .qp_context = tx
821 }; 802 };
822 803
823 return ib_create_qp(priv->pd, &attr); 804 return ib_create_qp(priv->pd, &attr);
@@ -899,21 +880,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
899 goto err_tx; 880 goto err_tx;
900 } 881 }
901 882
902 p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p, 883 p->qp = ipoib_cm_create_tx_qp(p->dev, p);
903 ipoib_sendq_size + 1, 0);
904 if (IS_ERR(p->cq)) {
905 ret = PTR_ERR(p->cq);
906 ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
907 goto err_cq;
908 }
909
910 ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
911 if (ret) {
912 ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
913 goto err_req_notify;
914 }
915
916 p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
917 if (IS_ERR(p->qp)) { 884 if (IS_ERR(p->qp)) {
918 ret = PTR_ERR(p->qp); 885 ret = PTR_ERR(p->qp);
919 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret); 886 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
@@ -950,12 +917,8 @@ err_modify:
950err_id: 917err_id:
951 p->id = NULL; 918 p->id = NULL;
952 ib_destroy_qp(p->qp); 919 ib_destroy_qp(p->qp);
953err_req_notify:
954err_qp: 920err_qp:
955 p->qp = NULL; 921 p->qp = NULL;
956 ib_destroy_cq(p->cq);
957err_cq:
958 p->cq = NULL;
959err_tx: 922err_tx:
960 return ret; 923 return ret;
961} 924}
@@ -964,6 +927,8 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
964{ 927{
965 struct ipoib_dev_priv *priv = netdev_priv(p->dev); 928 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
966 struct ipoib_tx_buf *tx_req; 929 struct ipoib_tx_buf *tx_req;
930 unsigned long flags;
931 unsigned long begin;
967 932
968 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 933 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
969 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 934 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
@@ -971,27 +936,40 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
971 if (p->id) 936 if (p->id)
972 ib_destroy_cm_id(p->id); 937 ib_destroy_cm_id(p->id);
973 938
974 if (p->qp)
975 ib_destroy_qp(p->qp);
976
977 if (p->cq)
978 ib_destroy_cq(p->cq);
979
980 if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
981 netif_wake_queue(p->dev);
982
983 if (p->tx_ring) { 939 if (p->tx_ring) {
940 /* Wait for all sends to complete */
941 begin = jiffies;
984 while ((int) p->tx_tail - (int) p->tx_head < 0) { 942 while ((int) p->tx_tail - (int) p->tx_head < 0) {
985 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 943 if (time_after(jiffies, begin + 5 * HZ)) {
986 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 944 ipoib_warn(priv, "timing out; %d sends not completed\n",
987 DMA_TO_DEVICE); 945 p->tx_head - p->tx_tail);
988 dev_kfree_skb_any(tx_req->skb); 946 goto timeout;
989 ++p->tx_tail; 947 }
948
949 msleep(1);
990 } 950 }
951 }
991 952
992 kfree(p->tx_ring); 953timeout:
954
955 while ((int) p->tx_tail - (int) p->tx_head < 0) {
956 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
957 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
958 DMA_TO_DEVICE);
959 dev_kfree_skb_any(tx_req->skb);
960 ++p->tx_tail;
961 spin_lock_irqsave(&priv->tx_lock, flags);
962 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
963 netif_queue_stopped(p->dev) &&
964 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
965 netif_wake_queue(p->dev);
966 spin_unlock_irqrestore(&priv->tx_lock, flags);
993 } 967 }
994 968
969 if (p->qp)
970 ib_destroy_qp(p->qp);
971
972 kfree(p->tx_ring);
995 kfree(p); 973 kfree(p);
996} 974}
997 975
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 1a77e79f6b..5063dd509a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -267,11 +267,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
267 267
268 spin_lock_irqsave(&priv->tx_lock, flags); 268 spin_lock_irqsave(&priv->tx_lock, flags);
269 ++priv->tx_tail; 269 ++priv->tx_tail;
270 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) && 270 if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
271 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) { 271 netif_queue_stopped(dev) &&
272 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags); 272 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
273 netif_wake_queue(dev); 273 netif_wake_queue(dev);
274 }
275 spin_unlock_irqrestore(&priv->tx_lock, flags); 274 spin_unlock_irqrestore(&priv->tx_lock, flags);
276 275
277 if (wc->status != IB_WC_SUCCESS && 276 if (wc->status != IB_WC_SUCCESS &&
@@ -301,14 +300,18 @@ poll_more:
301 for (i = 0; i < n; i++) { 300 for (i = 0; i < n; i++) {
302 struct ib_wc *wc = priv->ibwc + i; 301 struct ib_wc *wc = priv->ibwc + i;
303 302
304 if (wc->wr_id & IPOIB_CM_OP_SRQ) { 303 if (wc->wr_id & IPOIB_OP_RECV) {
305 ++done;
306 ipoib_cm_handle_rx_wc(dev, wc);
307 } else if (wc->wr_id & IPOIB_OP_RECV) {
308 ++done; 304 ++done;
309 ipoib_ib_handle_rx_wc(dev, wc); 305 if (wc->wr_id & IPOIB_OP_CM)
310 } else 306 ipoib_cm_handle_rx_wc(dev, wc);
311 ipoib_ib_handle_tx_wc(dev, wc); 307 else
308 ipoib_ib_handle_rx_wc(dev, wc);
309 } else {
310 if (wc->wr_id & IPOIB_OP_CM)
311 ipoib_cm_handle_tx_wc(dev, wc);
312 else
313 ipoib_ib_handle_tx_wc(dev, wc);
314 }
312 } 315 }
313 316
314 if (n != t) 317 if (n != t)
@@ -401,10 +404,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
401 address->last_send = priv->tx_head; 404 address->last_send = priv->tx_head;
402 ++priv->tx_head; 405 ++priv->tx_head;
403 406
404 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { 407 if (++priv->tx_outstanding == ipoib_sendq_size) {
405 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 408 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
406 netif_stop_queue(dev); 409 netif_stop_queue(dev);
407 set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
408 } 410 }
409 } 411 }
410} 412}
@@ -436,7 +438,8 @@ void ipoib_reap_ah(struct work_struct *work)
436 __ipoib_reap_ah(dev); 438 __ipoib_reap_ah(dev);
437 439
438 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) 440 if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
439 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 441 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
442 round_jiffies_relative(HZ));
440} 443}
441 444
442int ipoib_ib_dev_open(struct net_device *dev) 445int ipoib_ib_dev_open(struct net_device *dev)
@@ -472,7 +475,8 @@ int ipoib_ib_dev_open(struct net_device *dev)
472 } 475 }
473 476
474 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 477 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
475 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 478 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
479 round_jiffies_relative(HZ));
476 480
477 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 481 set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
478 482
@@ -561,12 +565,17 @@ void ipoib_drain_cq(struct net_device *dev)
561 if (priv->ibwc[i].status == IB_WC_SUCCESS) 565 if (priv->ibwc[i].status == IB_WC_SUCCESS)
562 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR; 566 priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
563 567
564 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ) 568 if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
565 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); 569 if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
566 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) 570 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
567 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); 571 else
568 else 572 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
569 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); 573 } else {
574 if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
575 ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
576 else
577 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
578 }
570 } 579 }
571 } while (n == IPOIB_NUM_WC); 580 } while (n == IPOIB_NUM_WC);
572} 581}
@@ -612,6 +621,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
612 DMA_TO_DEVICE); 621 DMA_TO_DEVICE);
613 dev_kfree_skb_any(tx_req->skb); 622 dev_kfree_skb_any(tx_req->skb);
614 ++priv->tx_tail; 623 ++priv->tx_tail;
624 --priv->tx_outstanding;
615 } 625 }
616 626
617 for (i = 0; i < ipoib_recvq_size; ++i) { 627 for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 362610d870..a03a65ebcf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -148,8 +148,6 @@ static int ipoib_stop(struct net_device *dev)
148 148
149 netif_stop_queue(dev); 149 netif_stop_queue(dev);
150 150
151 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
152
153 /* 151 /*
154 * Now flush workqueue to make sure a scheduled task doesn't 152 * Now flush workqueue to make sure a scheduled task doesn't
155 * bring our internal state back up. 153 * bring our internal state back up.
@@ -902,7 +900,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
902 goto out_rx_ring_cleanup; 900 goto out_rx_ring_cleanup;
903 } 901 }
904 902
905 /* priv->tx_head & tx_tail are already 0 */ 903 /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
906 904
907 if (ipoib_ib_dev_init(dev, ca, port)) 905 if (ipoib_ib_dev_init(dev, ca, port))
908 goto out_tx_ring_cleanup; 906 goto out_tx_ring_cleanup;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index f3529b6f0a..d687980617 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -131,7 +131,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
131 131
132 p = mem; 132 p = mem;
133 for_each_sg(sgl, sg, data->size, i) { 133 for_each_sg(sgl, sg, data->size, i) {
134 from = kmap_atomic(sg->page, KM_USER0); 134 from = kmap_atomic(sg_page(sg), KM_USER0);
135 memcpy(p, 135 memcpy(p,
136 from + sg->offset, 136 from + sg->offset,
137 sg->length); 137 sg->length);
@@ -191,7 +191,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
191 191
192 p = mem; 192 p = mem;
193 for_each_sg(sgl, sg, sg_size, i) { 193 for_each_sg(sgl, sg, sg_size, i) {
194 to = kmap_atomic(sg->page, KM_SOFTIRQ0); 194 to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
195 memcpy(to + sg->offset, 195 memcpy(to + sg->offset,
196 p, 196 p,
197 sg->length); 197 sg->length);
@@ -300,7 +300,7 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
300 for_each_sg(sgl, sg, data->dma_nents, i) { 300 for_each_sg(sgl, sg, data->dma_nents, i) {
301 /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " 301 /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
302 "offset: %ld sz: %ld\n", i, 302 "offset: %ld sz: %ld\n", i,
303 (unsigned long)page_to_phys(sg->page), 303 (unsigned long)sg_phys(sg),
304 (unsigned long)sg->offset, 304 (unsigned long)sg->offset,
305 (unsigned long)sg->length); */ 305 (unsigned long)sg->length); */
306 end_addr = ib_sg_dma_address(ibdev, sg) + 306 end_addr = ib_sg_dma_address(ibdev, sg) +
@@ -336,7 +336,7 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
336 iser_err("sg[%d] dma_addr:0x%lX page:0x%p " 336 iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
337 "off:0x%x sz:0x%x dma_len:0x%x\n", 337 "off:0x%x sz:0x%x dma_len:0x%x\n",
338 i, (unsigned long)ib_sg_dma_address(ibdev, sg), 338 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
339 sg->page, sg->offset, 339 sg_page(sg), sg->offset,
340 sg->length, ib_sg_dma_len(ibdev, sg)); 340 sg->length, ib_sg_dma_len(ibdev, sg));
341} 341}
342 342
diff --git a/drivers/input/keyboard/bf54x-keys.c b/drivers/input/keyboard/bf54x-keys.c
index a67b29b089..e5f4da9283 100644
--- a/drivers/input/keyboard/bf54x-keys.c
+++ b/drivers/input/keyboard/bf54x-keys.c
@@ -256,7 +256,6 @@ static int __devinit bfin_kpad_probe(struct platform_device *pdev)
256 printk(KERN_ERR DRV_NAME 256 printk(KERN_ERR DRV_NAME
257 ": unable to claim irq %d; error %d\n", 257 ": unable to claim irq %d; error %d\n",
258 bf54x_kpad->irq, error); 258 bf54x_kpad->irq, error);
259 error = -EBUSY;
260 goto out2; 259 goto out2;
261 } 260 }
262 261
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index 0117817bf5..f132702d13 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -504,25 +504,22 @@ static void atp_complete(struct urb* urb)
504 memset(dev->xy_acc, 0, sizeof(dev->xy_acc)); 504 memset(dev->xy_acc, 0, sizeof(dev->xy_acc));
505 } 505 }
506 506
507 /* Geyser 3 will continue to send packets continually after 507 input_report_key(dev->input, BTN_LEFT, key);
508 input_sync(dev->input);
509
510 /* Many Geysers will continue to send packets continually after
508 the first touch unless reinitialised. Do so if it's been 511 the first touch unless reinitialised. Do so if it's been
509 idle for a while in order to avoid waking the kernel up 512 idle for a while in order to avoid waking the kernel up
510 several hundred times a second */ 513 several hundred times a second */
511 514
512 if (atp_is_geyser_3(dev)) { 515 if (!x && !y && !key) {
513 if (!x && !y && !key) { 516 dev->idlecount++;
514 dev->idlecount++; 517 if (dev->idlecount == 10) {
515 if (dev->idlecount == 10) { 518 dev->valid = 0;
516 dev->valid = 0; 519 schedule_work(&dev->work);
517 schedule_work(&dev->work);
518 }
519 } 520 }
520 else 521 } else
521 dev->idlecount = 0; 522 dev->idlecount = 0;
522 }
523
524 input_report_key(dev->input, BTN_LEFT, key);
525 input_sync(dev->input);
526 523
527exit: 524exit:
528 retval = usb_submit_urb(dev->urb, GFP_ATOMIC); 525 retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 11dafc0ee9..1a0cea3c52 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -20,6 +20,7 @@
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <linux/platform_device.h> 22#include <linux/platform_device.h>
23#include <linux/i8042.h>
23 24
24#include <asm/io.h> 25#include <asm/io.h>
25 26
@@ -208,7 +209,7 @@ static int __i8042_command(unsigned char *param, int command)
208 return 0; 209 return 0;
209} 210}
210 211
211static int i8042_command(unsigned char *param, int command) 212int i8042_command(unsigned char *param, int command)
212{ 213{
213 unsigned long flags; 214 unsigned long flags;
214 int retval; 215 int retval;
@@ -219,6 +220,7 @@ static int i8042_command(unsigned char *param, int command)
219 220
220 return retval; 221 return retval;
221} 222}
223EXPORT_SYMBOL(i8042_command);
222 224
223/* 225/*
224 * i8042_kbd_write() sends a byte out through the keyboard interface. 226 * i8042_kbd_write() sends a byte out through the keyboard interface.
diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h
index b3eb7a72d9..dd22d91f8b 100644
--- a/drivers/input/serio/i8042.h
+++ b/drivers/input/serio/i8042.h
@@ -61,28 +61,6 @@
61#define I8042_CTR_XLATE 0x40 61#define I8042_CTR_XLATE 0x40
62 62
63/* 63/*
64 * Commands.
65 */
66
67#define I8042_CMD_CTL_RCTR 0x0120
68#define I8042_CMD_CTL_WCTR 0x1060
69#define I8042_CMD_CTL_TEST 0x01aa
70
71#define I8042_CMD_KBD_DISABLE 0x00ad
72#define I8042_CMD_KBD_ENABLE 0x00ae
73#define I8042_CMD_KBD_TEST 0x01ab
74#define I8042_CMD_KBD_LOOP 0x11d2
75
76#define I8042_CMD_AUX_DISABLE 0x00a7
77#define I8042_CMD_AUX_ENABLE 0x00a8
78#define I8042_CMD_AUX_TEST 0x01a9
79#define I8042_CMD_AUX_SEND 0x10d4
80#define I8042_CMD_AUX_LOOP 0x11d3
81
82#define I8042_CMD_MUX_PFX 0x0090
83#define I8042_CMD_MUX_SEND 0x1090
84
85/*
86 * Return codes. 64 * Return codes.
87 */ 65 */
88 66
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index e3e0baa1a1..fa8442b624 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -202,6 +202,7 @@ config TOUCHSCREEN_USB_COMPOSITE
202 - DMC TSC-10/25 202 - DMC TSC-10/25
203 - IRTOUCHSYSTEMS/UNITOP 203 - IRTOUCHSYSTEMS/UNITOP
204 - IdealTEK URTC1000 204 - IdealTEK URTC1000
205 - GoTop Super_Q2/GogoPen/PenPower tablets
205 206
206 Have a look at <http://linux.chapter7.ch/touchkit/> for 207 Have a look at <http://linux.chapter7.ch/touchkit/> for
207 a usage description and the required user-space stuff. 208 a usage description and the required user-space stuff.
@@ -259,4 +260,9 @@ config TOUCHSCREEN_USB_GENERAL_TOUCH
259 bool "GeneralTouch Touchscreen device support" if EMBEDDED 260 bool "GeneralTouch Touchscreen device support" if EMBEDDED
260 depends on TOUCHSCREEN_USB_COMPOSITE 261 depends on TOUCHSCREEN_USB_COMPOSITE
261 262
263config TOUCHSCREEN_USB_GOTOP
264 default y
265 bool "GoTop Super_Q2/GogoPen/PenPower tablet device support" if EMBEDDED
266 depends on TOUCHSCREEN_USB_COMPOSITE
267
262endif 268endif
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 5f34b78d5d..19055e7381 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -11,8 +11,9 @@
11 * - DMC TSC-10/25 11 * - DMC TSC-10/25
12 * - IRTOUCHSYSTEMS/UNITOP 12 * - IRTOUCHSYSTEMS/UNITOP
13 * - IdealTEK URTC1000 13 * - IdealTEK URTC1000
14 * - GoTop Super_Q2/GogoPen/PenPower tablets
14 * 15 *
15 * Copyright (C) 2004-2006 by Daniel Ritz <daniel.ritz@gmx.ch> 16 * Copyright (C) 2004-2007 by Daniel Ritz <daniel.ritz@gmx.ch>
16 * Copyright (C) by Todd E. Johnson (mtouchusb.c) 17 * Copyright (C) by Todd E. Johnson (mtouchusb.c)
17 * 18 *
18 * This program is free software; you can redistribute it and/or 19 * This program is free software; you can redistribute it and/or
@@ -115,6 +116,7 @@ enum {
115 DEVTYPE_IRTOUCH, 116 DEVTYPE_IRTOUCH,
116 DEVTYPE_IDEALTEK, 117 DEVTYPE_IDEALTEK,
117 DEVTYPE_GENERAL_TOUCH, 118 DEVTYPE_GENERAL_TOUCH,
119 DEVTYPE_GOTOP,
118}; 120};
119 121
120static struct usb_device_id usbtouch_devices[] = { 122static struct usb_device_id usbtouch_devices[] = {
@@ -168,6 +170,12 @@ static struct usb_device_id usbtouch_devices[] = {
168 {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH}, 170 {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH},
169#endif 171#endif
170 172
173#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
174 {USB_DEVICE(0x08f2, 0x007f), .driver_info = DEVTYPE_GOTOP},
175 {USB_DEVICE(0x08f2, 0x00ce), .driver_info = DEVTYPE_GOTOP},
176 {USB_DEVICE(0x08f2, 0x00f4), .driver_info = DEVTYPE_GOTOP},
177#endif
178
171 {} 179 {}
172}; 180};
173 181
@@ -501,6 +509,20 @@ static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
501#endif 509#endif
502 510
503/***************************************************************************** 511/*****************************************************************************
512 * GoTop Part
513 */
514#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
515static int gotop_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
516{
517 dev->x = ((pkt[1] & 0x38) << 4) | pkt[2];
518 dev->y = ((pkt[1] & 0x07) << 7) | pkt[3];
519 dev->touch = pkt[0] & 0x01;
520 return 1;
521}
522#endif
523
524
525/*****************************************************************************
504 * the different device descriptors 526 * the different device descriptors
505 */ 527 */
506static struct usbtouch_device_info usbtouch_dev_info[] = { 528static struct usbtouch_device_info usbtouch_dev_info[] = {
@@ -623,9 +645,19 @@ static struct usbtouch_device_info usbtouch_dev_info[] = {
623 .max_yc = 0x0500, 645 .max_yc = 0x0500,
624 .rept_size = 7, 646 .rept_size = 7,
625 .read_data = general_touch_read_data, 647 .read_data = general_touch_read_data,
626 } 648 },
627#endif 649#endif
628 650
651#ifdef CONFIG_TOUCHSCREEN_USB_GOTOP
652 [DEVTYPE_GOTOP] = {
653 .min_xc = 0x0,
654 .max_xc = 0x03ff,
655 .min_yc = 0x0,
656 .max_yc = 0x03ff,
657 .rept_size = 4,
658 .read_data = gotop_read_data,
659 },
660#endif
629}; 661};
630 662
631 663
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 8749fa4ffc..656920636c 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -47,4 +47,8 @@ config KVM_AMD
47 Provides support for KVM on AMD processors equipped with the AMD-V 47 Provides support for KVM on AMD processors equipped with the AMD-V
48 (SVM) extensions. 48 (SVM) extensions.
49 49
50# OK, it's a little counter-intuitive to do this, but it puts it neatly under
51# the virtualization menu.
52source drivers/lguest/Kconfig
53
50endif # VIRTUALIZATION 54endif # VIRTUALIZATION
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index af2d288c88..07ae280e8f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -198,21 +198,15 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
198 198
199static void ack_flush(void *_completed) 199static void ack_flush(void *_completed)
200{ 200{
201 atomic_t *completed = _completed;
202
203 atomic_inc(completed);
204} 201}
205 202
206void kvm_flush_remote_tlbs(struct kvm *kvm) 203void kvm_flush_remote_tlbs(struct kvm *kvm)
207{ 204{
208 int i, cpu, needed; 205 int i, cpu;
209 cpumask_t cpus; 206 cpumask_t cpus;
210 struct kvm_vcpu *vcpu; 207 struct kvm_vcpu *vcpu;
211 atomic_t completed;
212 208
213 atomic_set(&completed, 0);
214 cpus_clear(cpus); 209 cpus_clear(cpus);
215 needed = 0;
216 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 210 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
217 vcpu = kvm->vcpus[i]; 211 vcpu = kvm->vcpus[i];
218 if (!vcpu) 212 if (!vcpu)
@@ -221,23 +215,9 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
221 continue; 215 continue;
222 cpu = vcpu->cpu; 216 cpu = vcpu->cpu;
223 if (cpu != -1 && cpu != raw_smp_processor_id()) 217 if (cpu != -1 && cpu != raw_smp_processor_id())
224 if (!cpu_isset(cpu, cpus)) { 218 cpu_set(cpu, cpus);
225 cpu_set(cpu, cpus);
226 ++needed;
227 }
228 }
229
230 /*
231 * We really want smp_call_function_mask() here. But that's not
232 * available, so ipi all cpus in parallel and wait for them
233 * to complete.
234 */
235 for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
236 smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
237 while (atomic_read(&completed) != needed) {
238 cpu_relax();
239 barrier();
240 } 219 }
220 smp_call_function_mask(cpus, ack_flush, NULL, 1);
241} 221}
242 222
243int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 223int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -2054,12 +2034,21 @@ again:
2054 2034
2055 kvm_x86_ops->run(vcpu, kvm_run); 2035 kvm_x86_ops->run(vcpu, kvm_run);
2056 2036
2057 kvm_guest_exit();
2058 vcpu->guest_mode = 0; 2037 vcpu->guest_mode = 0;
2059 local_irq_enable(); 2038 local_irq_enable();
2060 2039
2061 ++vcpu->stat.exits; 2040 ++vcpu->stat.exits;
2062 2041
2042 /*
2043 * We must have an instruction between local_irq_enable() and
2044 * kvm_guest_exit(), so the timer interrupt isn't delayed by
2045 * the interrupt shadow. The stat.exits increment will do nicely.
2046 * But we need to prevent reordering, hence this barrier():
2047 */
2048 barrier();
2049
2050 kvm_guest_exit();
2051
2063 preempt_enable(); 2052 preempt_enable();
2064 2053
2065 /* 2054 /*
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index a190587cf6..238fcad3ce 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -494,12 +494,19 @@ static void apic_send_ipi(struct kvm_lapic *apic)
494 494
495static u32 apic_get_tmcct(struct kvm_lapic *apic) 495static u32 apic_get_tmcct(struct kvm_lapic *apic)
496{ 496{
497 u32 counter_passed; 497 u64 counter_passed;
498 ktime_t passed, now = apic->timer.dev.base->get_time(); 498 ktime_t passed, now;
499 u32 tmcct = apic_get_reg(apic, APIC_TMICT); 499 u32 tmcct;
500 500
501 ASSERT(apic != NULL); 501 ASSERT(apic != NULL);
502 502
503 now = apic->timer.dev.base->get_time();
504 tmcct = apic_get_reg(apic, APIC_TMICT);
505
506 /* if initial count is 0, current count should also be 0 */
507 if (tmcct == 0)
508 return 0;
509
503 if (unlikely(ktime_to_ns(now) <= 510 if (unlikely(ktime_to_ns(now) <=
504 ktime_to_ns(apic->timer.last_update))) { 511 ktime_to_ns(apic->timer.last_update))) {
505 /* Wrap around */ 512 /* Wrap around */
@@ -514,15 +521,24 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
514 521
515 counter_passed = div64_64(ktime_to_ns(passed), 522 counter_passed = div64_64(ktime_to_ns(passed),
516 (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); 523 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
517 tmcct -= counter_passed;
518 524
519 if (tmcct <= 0) { 525 if (counter_passed > tmcct) {
520 if (unlikely(!apic_lvtt_period(apic))) 526 if (unlikely(!apic_lvtt_period(apic))) {
527 /* one-shot timers stick at 0 until reset */
521 tmcct = 0; 528 tmcct = 0;
522 else 529 } else {
523 do { 530 /*
524 tmcct += apic_get_reg(apic, APIC_TMICT); 531 * periodic timers reset to APIC_TMICT when they
525 } while (tmcct <= 0); 532 * hit 0. The while loop simulates this happening N
533 * times. (counter_passed %= tmcct) would also work,
534 * but might be slower or not work on 32-bit??
535 */
536 while (counter_passed > tmcct)
537 counter_passed -= tmcct;
538 tmcct -= counter_passed;
539 }
540 } else {
541 tmcct -= counter_passed;
526 } 542 }
527 543
528 return tmcct; 544 return tmcct;
@@ -853,7 +869,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
853 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 869 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
854 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 870 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
855 } 871 }
856 apic->timer.divide_count = 0; 872 update_divide_count(apic);
857 atomic_set(&apic->timer.pending, 0); 873 atomic_set(&apic->timer.pending, 0);
858 if (vcpu->vcpu_id == 0) 874 if (vcpu->vcpu_id == 0)
859 vcpu->apic_base |= MSR_IA32_APICBASE_BSP; 875 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 6d84d30f5e..feb5ac986c 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1049,6 +1049,7 @@ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1049 destroy_kvm_mmu(vcpu); 1049 destroy_kvm_mmu(vcpu);
1050 return init_kvm_mmu(vcpu); 1050 return init_kvm_mmu(vcpu);
1051} 1051}
1052EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
1052 1053
1053int kvm_mmu_load(struct kvm_vcpu *vcpu) 1054int kvm_mmu_load(struct kvm_vcpu *vcpu)
1054{ 1055{
@@ -1088,7 +1089,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1088 mmu_page_remove_parent_pte(child, spte); 1089 mmu_page_remove_parent_pte(child, spte);
1089 } 1090 }
1090 } 1091 }
1091 *spte = 0; 1092 set_shadow_pte(spte, 0);
1092 kvm_flush_remote_tlbs(vcpu->kvm); 1093 kvm_flush_remote_tlbs(vcpu->kvm);
1093} 1094}
1094 1095
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 4f115a8e45..bb56ae3f89 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -523,6 +523,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
523 523
524static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 524static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
525{ 525{
526 if (vcpu->rmode.active)
527 rflags |= IOPL_MASK | X86_EFLAGS_VM;
526 vmcs_writel(GUEST_RFLAGS, rflags); 528 vmcs_writel(GUEST_RFLAGS, rflags);
527} 529}
528 530
@@ -1128,6 +1130,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1128 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); 1130 fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs);
1129 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); 1131 fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs);
1130 1132
1133 kvm_mmu_reset_context(vcpu);
1131 init_rmode_tss(vcpu->kvm); 1134 init_rmode_tss(vcpu->kvm);
1132} 1135}
1133 1136
@@ -1760,10 +1763,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1760 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); 1763 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
1761 } 1764 }
1762 1765
1763 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ 1766 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
1764 asm ("int $2"); 1767 return 1; /* already handled by vmx_vcpu_run() */
1765 return 1;
1766 }
1767 1768
1768 if (is_no_device(intr_info)) { 1769 if (is_no_device(intr_info)) {
1769 vmx_fpu_activate(vcpu); 1770 vmx_fpu_activate(vcpu);
@@ -2196,6 +2197,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2196static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2197static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197{ 2198{
2198 struct vcpu_vmx *vmx = to_vmx(vcpu); 2199 struct vcpu_vmx *vmx = to_vmx(vcpu);
2200 u32 intr_info;
2199 2201
2200 /* 2202 /*
2201 * Loading guest fpu may have cleared host cr0.ts 2203 * Loading guest fpu may have cleared host cr0.ts
@@ -2322,6 +2324,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2322 2324
2323 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2325 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2324 vmx->launched = 1; 2326 vmx->launched = 1;
2327
2328 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2329
2330 /* We need to handle NMIs before interrupts are enabled */
2331 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
2332 asm("int $2");
2325} 2333}
2326 2334
2327static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, 2335static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 9737c3b2f4..a6ace302e0 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -212,7 +212,8 @@ static u16 twobyte_table[256] = {
212 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 212 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
213 DstReg | SrcMem16 | ModRM | Mov, 213 DstReg | SrcMem16 | ModRM | Mov,
214 /* 0xC0 - 0xCF */ 214 /* 0xC0 - 0xCF */
215 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 215 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
216 0, 0, 0, 0, 0, 0, 0, 0,
216 /* 0xD0 - 0xDF */ 217 /* 0xD0 - 0xDF */
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 /* 0xE0 - 0xEF */ 219 /* 0xE0 - 0xEF */
@@ -596,11 +597,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
596 case 0xf0: /* LOCK */ 597 case 0xf0: /* LOCK */
597 lock_prefix = 1; 598 lock_prefix = 1;
598 break; 599 break;
600 case 0xf2: /* REPNE/REPNZ */
599 case 0xf3: /* REP/REPE/REPZ */ 601 case 0xf3: /* REP/REPE/REPZ */
600 rep_prefix = 1; 602 rep_prefix = 1;
601 break; 603 break;
602 case 0xf2: /* REPNE/REPNZ */
603 break;
604 default: 604 default:
605 goto done_prefixes; 605 goto done_prefixes;
606 } 606 }
@@ -825,6 +825,14 @@ done_prefixes:
825 if (twobyte && b == 0x01 && modrm_reg == 7) 825 if (twobyte && b == 0x01 && modrm_reg == 7)
826 break; 826 break;
827 srcmem_common: 827 srcmem_common:
828 /*
829 * For instructions with a ModR/M byte, switch to register
830 * access if Mod = 3.
831 */
832 if ((d & ModRM) && modrm_mod == 3) {
833 src.type = OP_REG;
834 break;
835 }
828 src.type = OP_MEM; 836 src.type = OP_MEM;
829 src.ptr = (unsigned long *)cr2; 837 src.ptr = (unsigned long *)cr2;
830 src.val = 0; 838 src.val = 0;
@@ -893,6 +901,14 @@ done_prefixes:
893 dst.ptr = (unsigned long *)cr2; 901 dst.ptr = (unsigned long *)cr2;
894 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 902 dst.bytes = (d & ByteOp) ? 1 : op_bytes;
895 dst.val = 0; 903 dst.val = 0;
904 /*
905 * For instructions with a ModR/M byte, switch to register
906 * access if Mod = 3.
907 */
908 if ((d & ModRM) && modrm_mod == 3) {
909 dst.type = OP_REG;
910 break;
911 }
896 if (d & BitOp) { 912 if (d & BitOp) {
897 unsigned long mask = ~(dst.bytes * 8 - 1); 913 unsigned long mask = ~(dst.bytes * 8 - 1);
898 914
@@ -1083,31 +1099,6 @@ push:
1083 case 0xd2 ... 0xd3: /* Grp2 */ 1099 case 0xd2 ... 0xd3: /* Grp2 */
1084 src.val = _regs[VCPU_REGS_RCX]; 1100 src.val = _regs[VCPU_REGS_RCX];
1085 goto grp2; 1101 goto grp2;
1086 case 0xe8: /* call (near) */ {
1087 long int rel;
1088 switch (op_bytes) {
1089 case 2:
1090 rel = insn_fetch(s16, 2, _eip);
1091 break;
1092 case 4:
1093 rel = insn_fetch(s32, 4, _eip);
1094 break;
1095 case 8:
1096 rel = insn_fetch(s64, 8, _eip);
1097 break;
1098 default:
1099 DPRINTF("Call: Invalid op_bytes\n");
1100 goto cannot_emulate;
1101 }
1102 src.val = (unsigned long) _eip;
1103 JMP_REL(rel);
1104 goto push;
1105 }
1106 case 0xe9: /* jmp rel */
1107 case 0xeb: /* jmp rel short */
1108 JMP_REL(src.val);
1109 no_wb = 1; /* Disable writeback. */
1110 break;
1111 case 0xf6 ... 0xf7: /* Grp3 */ 1102 case 0xf6 ... 0xf7: /* Grp3 */
1112 switch (modrm_reg) { 1103 switch (modrm_reg) {
1113 case 0 ... 1: /* test */ 1104 case 0 ... 1: /* test */
@@ -1350,6 +1341,32 @@ special_insn:
1350 case 0xae ... 0xaf: /* scas */ 1341 case 0xae ... 0xaf: /* scas */
1351 DPRINTF("Urk! I don't handle SCAS.\n"); 1342 DPRINTF("Urk! I don't handle SCAS.\n");
1352 goto cannot_emulate; 1343 goto cannot_emulate;
1344 case 0xe8: /* call (near) */ {
1345 long int rel;
1346 switch (op_bytes) {
1347 case 2:
1348 rel = insn_fetch(s16, 2, _eip);
1349 break;
1350 case 4:
1351 rel = insn_fetch(s32, 4, _eip);
1352 break;
1353 case 8:
1354 rel = insn_fetch(s64, 8, _eip);
1355 break;
1356 default:
1357 DPRINTF("Call: Invalid op_bytes\n");
1358 goto cannot_emulate;
1359 }
1360 src.val = (unsigned long) _eip;
1361 JMP_REL(rel);
1362 goto push;
1363 }
1364 case 0xe9: /* jmp rel */
1365 case 0xeb: /* jmp rel short */
1366 JMP_REL(src.val);
1367 no_wb = 1; /* Disable writeback. */
1368 break;
1369
1353 1370
1354 } 1371 }
1355 goto writeback; 1372 goto writeback;
@@ -1501,6 +1518,10 @@ twobyte_insn:
1501 dst.bytes = op_bytes; 1518 dst.bytes = op_bytes;
1502 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; 1519 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
1503 break; 1520 break;
1521 case 0xc3: /* movnti */
1522 dst.bytes = op_bytes;
1523 dst.val = (op_bytes == 4) ? (u32) src.val : (u64) src.val;
1524 break;
1504 } 1525 }
1505 goto writeback; 1526 goto writeback;
1506 1527
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 41e2250613..7eb9ecff8f 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,7 +1,6 @@
1config LGUEST 1config LGUEST
2 tristate "Linux hypervisor example code" 2 tristate "Linux hypervisor example code"
3 depends on X86 && PARAVIRT && EXPERIMENTAL && !X86_PAE && FUTEX 3 depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !(X86_VISWS || X86_VOYAGER)
4 select LGUEST_GUEST
5 select HVC_DRIVER 4 select HVC_DRIVER
6 ---help--- 5 ---help---
7 This is a very simple module which allows you to run 6 This is a very simple module which allows you to run
@@ -18,13 +17,3 @@ config LGUEST_GUEST
18 The guest needs code built-in, even if the host has lguest 17 The guest needs code built-in, even if the host has lguest
19 support as a module. The drivers are tiny, so we build them 18 support as a module. The drivers are tiny, so we build them
20 in too. 19 in too.
21
22config LGUEST_NET
23 tristate
24 default y
25 depends on LGUEST_GUEST && NET
26
27config LGUEST_BLOCK
28 tristate
29 default y
30 depends on LGUEST_GUEST && BLOCK
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index e5047471c3..5e8272d296 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -1,10 +1,12 @@
1# Guest requires the paravirt_ops replacement and the bus driver. 1# Guest requires the device configuration and probing code.
2obj-$(CONFIG_LGUEST_GUEST) += lguest.o lguest_asm.o lguest_bus.o 2obj-$(CONFIG_LGUEST_GUEST) += lguest_device.o
3 3
4# Host requires the other files, which can be a module. 4# Host requires the other files, which can be a module.
5obj-$(CONFIG_LGUEST) += lg.o 5obj-$(CONFIG_LGUEST) += lg.o
6lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ 6lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \
7 segments.o io.o lguest_user.o switcher.o 7 segments.o lguest_user.o
8
9lg-$(CONFIG_X86_32) += x86/switcher_32.o x86/core.o
8 10
9Preparation Preparation!: PREFIX=P 11Preparation Preparation!: PREFIX=P
10Guest: PREFIX=G 12Guest: PREFIX=G
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index a0788c12b3..35d19ae58d 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -11,58 +11,20 @@
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/cpu.h> 12#include <linux/cpu.h>
13#include <linux/freezer.h> 13#include <linux/freezer.h>
14#include <linux/highmem.h>
14#include <asm/paravirt.h> 15#include <asm/paravirt.h>
15#include <asm/desc.h>
16#include <asm/pgtable.h> 16#include <asm/pgtable.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <asm/poll.h> 18#include <asm/poll.h>
19#include <asm/highmem.h>
20#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
21#include <asm/i387.h>
22#include "lg.h" 20#include "lg.h"
23 21
24/* Found in switcher.S */
25extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
26extern unsigned long default_idt_entries[];
27
28/* Every guest maps the core switcher code. */
29#define SHARED_SWITCHER_PAGES \
30 DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
31/* Pages for switcher itself, then two pages per cpu */
32#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS)
33
34/* We map at -4M for ease of mapping into the guest (one PTE page). */
35#define SWITCHER_ADDR 0xFFC00000
36 22
37static struct vm_struct *switcher_vma; 23static struct vm_struct *switcher_vma;
38static struct page **switcher_page; 24static struct page **switcher_page;
39 25
40static int cpu_had_pge;
41static struct {
42 unsigned long offset;
43 unsigned short segment;
44} lguest_entry;
45
46/* This One Big lock protects all inter-guest data structures. */ 26/* This One Big lock protects all inter-guest data structures. */
47DEFINE_MUTEX(lguest_lock); 27DEFINE_MUTEX(lguest_lock);
48static DEFINE_PER_CPU(struct lguest *, last_guest);
49
50/* FIXME: Make dynamic. */
51#define MAX_LGUEST_GUESTS 16
52struct lguest lguests[MAX_LGUEST_GUESTS];
53
54/* Offset from where switcher.S was compiled to where we've copied it */
55static unsigned long switcher_offset(void)
56{
57 return SWITCHER_ADDR - (unsigned long)start_switcher_text;
58}
59
60/* This cpu's struct lguest_pages. */
61static struct lguest_pages *lguest_pages(unsigned int cpu)
62{
63 return &(((struct lguest_pages *)
64 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
65}
66 28
67/*H:010 We need to set up the Switcher at a high virtual address. Remember the 29/*H:010 We need to set up the Switcher at a high virtual address. Remember the
68 * Switcher is a few hundred bytes of assembler code which actually changes the 30 * Switcher is a few hundred bytes of assembler code which actually changes the
@@ -73,9 +35,7 @@ static struct lguest_pages *lguest_pages(unsigned int cpu)
73 * Host since it will be running as the switchover occurs. 35 * Host since it will be running as the switchover occurs.
74 * 36 *
75 * Trying to map memory at a particular address is an unusual thing to do, so 37 * Trying to map memory at a particular address is an unusual thing to do, so
76 * it's not a simple one-liner. We also set up the per-cpu parts of the 38 * it's not a simple one-liner. */
77 * Switcher here.
78 */
79static __init int map_switcher(void) 39static __init int map_switcher(void)
80{ 40{
81 int i, err; 41 int i, err;
@@ -132,90 +92,11 @@ static __init int map_switcher(void)
132 goto free_vma; 92 goto free_vma;
133 } 93 }
134 94
135 /* Now the switcher is mapped at the right address, we can't fail! 95 /* Now the Switcher is mapped at the right address, we can't fail!
136 * Copy in the compiled-in Switcher code (from switcher.S). */ 96 * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */
137 memcpy(switcher_vma->addr, start_switcher_text, 97 memcpy(switcher_vma->addr, start_switcher_text,
138 end_switcher_text - start_switcher_text); 98 end_switcher_text - start_switcher_text);
139 99
140 /* Most of the switcher.S doesn't care that it's been moved; on Intel,
141 * jumps are relative, and it doesn't access any references to external
142 * code or data.
143 *
144 * The only exception is the interrupt handlers in switcher.S: their
145 * addresses are placed in a table (default_idt_entries), so we need to
146 * update the table with the new addresses. switcher_offset() is a
147 * convenience function which returns the distance between the builtin
148 * switcher code and the high-mapped copy we just made. */
149 for (i = 0; i < IDT_ENTRIES; i++)
150 default_idt_entries[i] += switcher_offset();
151
152 /*
153 * Set up the Switcher's per-cpu areas.
154 *
155 * Each CPU gets two pages of its own within the high-mapped region
156 * (aka. "struct lguest_pages"). Much of this can be initialized now,
157 * but some depends on what Guest we are running (which is set up in
158 * copy_in_guest_info()).
159 */
160 for_each_possible_cpu(i) {
161 /* lguest_pages() returns this CPU's two pages. */
162 struct lguest_pages *pages = lguest_pages(i);
163 /* This is a convenience pointer to make the code fit one
164 * statement to a line. */
165 struct lguest_ro_state *state = &pages->state;
166
167 /* The Global Descriptor Table: the Host has a different one
168 * for each CPU. We keep a descriptor for the GDT which says
169 * where it is and how big it is (the size is actually the last
170 * byte, not the size, hence the "-1"). */
171 state->host_gdt_desc.size = GDT_SIZE-1;
172 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
173
174 /* All CPUs on the Host use the same Interrupt Descriptor
175 * Table, so we just use store_idt(), which gets this CPU's IDT
176 * descriptor. */
177 store_idt(&state->host_idt_desc);
178
179 /* The descriptors for the Guest's GDT and IDT can be filled
180 * out now, too. We copy the GDT & IDT into ->guest_gdt and
181 * ->guest_idt before actually running the Guest. */
182 state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
183 state->guest_idt_desc.address = (long)&state->guest_idt;
184 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
185 state->guest_gdt_desc.address = (long)&state->guest_gdt;
186
187 /* We know where we want the stack to be when the Guest enters
188 * the switcher: in pages->regs. The stack grows upwards, so
189 * we start it at the end of that structure. */
190 state->guest_tss.esp0 = (long)(&pages->regs + 1);
191 /* And this is the GDT entry to use for the stack: we keep a
192 * couple of special LGUEST entries. */
193 state->guest_tss.ss0 = LGUEST_DS;
194
195 /* x86 can have a finegrained bitmap which indicates what I/O
196 * ports the process can use. We set it to the end of our
197 * structure, meaning "none". */
198 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
199
200 /* Some GDT entries are the same across all Guests, so we can
201 * set them up now. */
202 setup_default_gdt_entries(state);
203 /* Most IDT entries are the same for all Guests, too.*/
204 setup_default_idt_entries(state, default_idt_entries);
205
206 /* The Host needs to be able to use the LGUEST segments on this
207 * CPU, too, so put them in the Host GDT. */
208 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
209 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
210 }
211
212 /* In the Switcher, we want the %cs segment register to use the
213 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
214 * it will be undisturbed when we switch. To change %cs and jump we
215 * need this structure to feed to Intel's "lcall" instruction. */
216 lguest_entry.offset = (long)switch_to_guest + switcher_offset();
217 lguest_entry.segment = LGUEST_CS;
218
219 printk(KERN_INFO "lguest: mapped switcher at %p\n", 100 printk(KERN_INFO "lguest: mapped switcher at %p\n",
220 switcher_vma->addr); 101 switcher_vma->addr);
221 /* And we succeeded... */ 102 /* And we succeeded... */
@@ -247,86 +128,12 @@ static void unmap_switcher(void)
247 __free_pages(switcher_page[i], 0); 128 __free_pages(switcher_page[i], 0);
248} 129}
249 130
250/*H:130 Our Guest is usually so well behaved; it never tries to do things it
251 * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
252 * quite complete, because it doesn't contain replacements for the Intel I/O
253 * instructions. As a result, the Guest sometimes fumbles across one during
254 * the boot process as it probes for various things which are usually attached
255 * to a PC.
256 *
257 * When the Guest uses one of these instructions, we get trap #13 (General
258 * Protection Fault) and come here. We see if it's one of those troublesome
259 * instructions and skip over it. We return true if we did. */
260static int emulate_insn(struct lguest *lg)
261{
262 u8 insn;
263 unsigned int insnlen = 0, in = 0, shift = 0;
264 /* The eip contains the *virtual* address of the Guest's instruction:
265 * guest_pa just subtracts the Guest's page_offset. */
266 unsigned long physaddr = guest_pa(lg, lg->regs->eip);
267
268 /* The guest_pa() function only works for Guest kernel addresses, but
269 * that's all we're trying to do anyway. */
270 if (lg->regs->eip < lg->page_offset)
271 return 0;
272
273 /* Decoding x86 instructions is icky. */
274 lgread(lg, &insn, physaddr, 1);
275
276 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
277 of the eax register. */
278 if (insn == 0x66) {
279 shift = 16;
280 /* The instruction is 1 byte so far, read the next byte. */
281 insnlen = 1;
282 lgread(lg, &insn, physaddr + insnlen, 1);
283 }
284
285 /* We can ignore the lower bit for the moment and decode the 4 opcodes
286 * we need to emulate. */
287 switch (insn & 0xFE) {
288 case 0xE4: /* in <next byte>,%al */
289 insnlen += 2;
290 in = 1;
291 break;
292 case 0xEC: /* in (%dx),%al */
293 insnlen += 1;
294 in = 1;
295 break;
296 case 0xE6: /* out %al,<next byte> */
297 insnlen += 2;
298 break;
299 case 0xEE: /* out %al,(%dx) */
300 insnlen += 1;
301 break;
302 default:
303 /* OK, we don't know what this is, can't emulate. */
304 return 0;
305 }
306
307 /* If it was an "IN" instruction, they expect the result to be read
308 * into %eax, so we change %eax. We always return all-ones, which
309 * traditionally means "there's nothing there". */
310 if (in) {
311 /* Lower bit tells is whether it's a 16 or 32 bit access */
312 if (insn & 0x1)
313 lg->regs->eax = 0xFFFFFFFF;
314 else
315 lg->regs->eax |= (0xFFFF << shift);
316 }
317 /* Finally, we've "done" the instruction, so move past it. */
318 lg->regs->eip += insnlen;
319 /* Success! */
320 return 1;
321}
322/*:*/
323
324/*L:305 131/*L:305
325 * Dealing With Guest Memory. 132 * Dealing With Guest Memory.
326 * 133 *
327 * When the Guest gives us (what it thinks is) a physical address, we can use 134 * When the Guest gives us (what it thinks is) a physical address, we can use
328 * the normal copy_from_user() & copy_to_user() on that address: remember, 135 * the normal copy_from_user() & copy_to_user() on the corresponding place in
329 * Guest physical == Launcher virtual. 136 * the memory region allocated by the Launcher.
330 * 137 *
331 * But we can't trust the Guest: it might be trying to access the Launcher 138 * But we can't trust the Guest: it might be trying to access the Launcher
332 * code. We have to check that the range is below the pfn_limit the Launcher 139 * code. We have to check that the range is below the pfn_limit the Launcher
@@ -338,148 +145,27 @@ int lguest_address_ok(const struct lguest *lg,
338 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); 145 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
339} 146}
340 147
341/* This is a convenient routine to get a 32-bit value from the Guest (a very 148/* This routine copies memory from the Guest. Here we can see how useful the
342 * common operation). Here we can see how useful the kill_lguest() routine we 149 * kill_lguest() routine we met in the Launcher can be: we return a random
343 * met in the Launcher can be: we return a random value (0) instead of needing 150 * value (all zeroes) instead of needing to return an error. */
344 * to return an error. */ 151void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
345u32 lgread_u32(struct lguest *lg, unsigned long addr)
346{
347 u32 val = 0;
348
349 /* Don't let them access lguest binary. */
350 if (!lguest_address_ok(lg, addr, sizeof(val))
351 || get_user(val, (u32 __user *)addr) != 0)
352 kill_guest(lg, "bad read address %#lx", addr);
353 return val;
354}
355
356/* Same thing for writing a value. */
357void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val)
358{
359 if (!lguest_address_ok(lg, addr, sizeof(val))
360 || put_user(val, (u32 __user *)addr) != 0)
361 kill_guest(lg, "bad write address %#lx", addr);
362}
363
364/* This routine is more generic, and copies a range of Guest bytes into a
365 * buffer. If the copy_from_user() fails, we fill the buffer with zeroes, so
366 * the caller doesn't end up using uninitialized kernel memory. */
367void lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
368{ 152{
369 if (!lguest_address_ok(lg, addr, bytes) 153 if (!lguest_address_ok(lg, addr, bytes)
370 || copy_from_user(b, (void __user *)addr, bytes) != 0) { 154 || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
371 /* copy_from_user should do this, but as we rely on it... */ 155 /* copy_from_user should do this, but as we rely on it... */
372 memset(b, 0, bytes); 156 memset(b, 0, bytes);
373 kill_guest(lg, "bad read address %#lx len %u", addr, bytes); 157 kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
374 } 158 }
375} 159}
376 160
377/* Similarly, our generic routine to copy into a range of Guest bytes. */ 161/* This is the write (copy into guest) version. */
378void lgwrite(struct lguest *lg, unsigned long addr, const void *b, 162void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
379 unsigned bytes) 163 unsigned bytes)
380{ 164{
381 if (!lguest_address_ok(lg, addr, bytes) 165 if (!lguest_address_ok(lg, addr, bytes)
382 || copy_to_user((void __user *)addr, b, bytes) != 0) 166 || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
383 kill_guest(lg, "bad write address %#lx len %u", addr, bytes); 167 kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
384} 168}
385/* (end of memory access helper routines) :*/
386
387static void set_ts(void)
388{
389 u32 cr0;
390
391 cr0 = read_cr0();
392 if (!(cr0 & 8))
393 write_cr0(cr0|8);
394}
395
396/*S:010
397 * We are getting close to the Switcher.
398 *
399 * Remember that each CPU has two pages which are visible to the Guest when it
400 * runs on that CPU. This has to contain the state for that Guest: we copy the
401 * state in just before we run the Guest.
402 *
403 * Each Guest has "changed" flags which indicate what has changed in the Guest
404 * since it last ran. We saw this set in interrupts_and_traps.c and
405 * segments.c.
406 */
407static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
408{
409 /* Copying all this data can be quite expensive. We usually run the
410 * same Guest we ran last time (and that Guest hasn't run anywhere else
411 * meanwhile). If that's not the case, we pretend everything in the
412 * Guest has changed. */
413 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
414 __get_cpu_var(last_guest) = lg;
415 lg->last_pages = pages;
416 lg->changed = CHANGED_ALL;
417 }
418
419 /* These copies are pretty cheap, so we do them unconditionally: */
420 /* Save the current Host top-level page directory. */
421 pages->state.host_cr3 = __pa(current->mm->pgd);
422 /* Set up the Guest's page tables to see this CPU's pages (and no
423 * other CPU's pages). */
424 map_switcher_in_guest(lg, pages);
425 /* Set up the two "TSS" members which tell the CPU what stack to use
426 * for traps which do directly into the Guest (ie. traps at privilege
427 * level 1). */
428 pages->state.guest_tss.esp1 = lg->esp1;
429 pages->state.guest_tss.ss1 = lg->ss1;
430
431 /* Copy direct-to-Guest trap entries. */
432 if (lg->changed & CHANGED_IDT)
433 copy_traps(lg, pages->state.guest_idt, default_idt_entries);
434
435 /* Copy all GDT entries which the Guest can change. */
436 if (lg->changed & CHANGED_GDT)
437 copy_gdt(lg, pages->state.guest_gdt);
438 /* If only the TLS entries have changed, copy them. */
439 else if (lg->changed & CHANGED_GDT_TLS)
440 copy_gdt_tls(lg, pages->state.guest_gdt);
441
442 /* Mark the Guest as unchanged for next time. */
443 lg->changed = 0;
444}
445
446/* Finally: the code to actually call into the Switcher to run the Guest. */
447static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
448{
449 /* This is a dummy value we need for GCC's sake. */
450 unsigned int clobber;
451
452 /* Copy the guest-specific information into this CPU's "struct
453 * lguest_pages". */
454 copy_in_guest_info(lg, pages);
455
456 /* Set the trap number to 256 (impossible value). If we fault while
457 * switching to the Guest (bad segment registers or bug), this will
458 * cause us to abort the Guest. */
459 lg->regs->trapnum = 256;
460
461 /* Now: we push the "eflags" register on the stack, then do an "lcall".
462 * This is how we change from using the kernel code segment to using
463 * the dedicated lguest code segment, as well as jumping into the
464 * Switcher.
465 *
466 * The lcall also pushes the old code segment (KERNEL_CS) onto the
467 * stack, then the address of this call. This stack layout happens to
468 * exactly match the stack of an interrupt... */
469 asm volatile("pushf; lcall *lguest_entry"
470 /* This is how we tell GCC that %eax ("a") and %ebx ("b")
471 * are changed by this routine. The "=" means output. */
472 : "=a"(clobber), "=b"(clobber)
473 /* %eax contains the pages pointer. ("0" refers to the
474 * 0-th argument above, ie "a"). %ebx contains the
475 * physical address of the Guest's top-level page
476 * directory. */
477 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
478 /* We tell gcc that all these registers could change,
479 * which means we don't have to save and restore them in
480 * the Switcher. */
481 : "memory", "%edx", "%ecx", "%edi", "%esi");
482}
483/*:*/ 169/*:*/
484 170
485/*H:030 Let's jump straight to the the main loop which runs the Guest. 171/*H:030 Let's jump straight to the the main loop which runs the Guest.
@@ -489,22 +175,16 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
489{ 175{
490 /* We stop running once the Guest is dead. */ 176 /* We stop running once the Guest is dead. */
491 while (!lg->dead) { 177 while (!lg->dead) {
492 /* We need to initialize this, otherwise gcc complains. It's 178 /* First we run any hypercalls the Guest wants done. */
493 * not (yet) clever enough to see that it's initialized when we 179 if (lg->hcall)
494 * need it. */ 180 do_hypercalls(lg);
495 unsigned int cr2 = 0; /* Damn gcc */ 181
496 182 /* It's possible the Guest did a NOTIFY hypercall to the
497 /* First we run any hypercalls the Guest wants done: either in
498 * the hypercall ring in "struct lguest_data", or directly by
499 * using int 31 (LGUEST_TRAP_ENTRY). */
500 do_hypercalls(lg);
501 /* It's possible the Guest did a SEND_DMA hypercall to the
502 * Launcher, in which case we return from the read() now. */ 183 * Launcher, in which case we return from the read() now. */
503 if (lg->dma_is_pending) { 184 if (lg->pending_notify) {
504 if (put_user(lg->pending_dma, user) || 185 if (put_user(lg->pending_notify, user))
505 put_user(lg->pending_key, user+1))
506 return -EFAULT; 186 return -EFAULT;
507 return sizeof(unsigned long)*2; 187 return sizeof(lg->pending_notify);
508 } 188 }
509 189
510 /* Check for signals */ 190 /* Check for signals */
@@ -542,144 +222,20 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
542 * the "Do Not Disturb" sign: */ 222 * the "Do Not Disturb" sign: */
543 local_irq_disable(); 223 local_irq_disable();
544 224
545 /* Remember the awfully-named TS bit? If the Guest has asked 225 /* Actually run the Guest until something happens. */
546 * to set it we set it now, so we can trap and pass that trap 226 lguest_arch_run_guest(lg);
547 * to the Guest if it uses the FPU. */
548 if (lg->ts)
549 set_ts();
550
551 /* SYSENTER is an optimized way of doing system calls. We
552 * can't allow it because it always jumps to privilege level 0.
553 * A normal Guest won't try it because we don't advertise it in
554 * CPUID, but a malicious Guest (or malicious Guest userspace
555 * program) could, so we tell the CPU to disable it before
556 * running the Guest. */
557 if (boot_cpu_has(X86_FEATURE_SEP))
558 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
559
560 /* Now we actually run the Guest. It will pop back out when
561 * something interesting happens, and we can examine its
562 * registers to see what it was doing. */
563 run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
564
565 /* The "regs" pointer contains two extra entries which are not
566 * really registers: a trap number which says what interrupt or
567 * trap made the switcher code come back, and an error code
568 * which some traps set. */
569
570 /* If the Guest page faulted, then the cr2 register will tell
571 * us the bad virtual address. We have to grab this now,
572 * because once we re-enable interrupts an interrupt could
573 * fault and thus overwrite cr2, or we could even move off to a
574 * different CPU. */
575 if (lg->regs->trapnum == 14)
576 cr2 = read_cr2();
577 /* Similarly, if we took a trap because the Guest used the FPU,
578 * we have to restore the FPU it expects to see. */
579 else if (lg->regs->trapnum == 7)
580 math_state_restore();
581
582 /* Restore SYSENTER if it's supposed to be on. */
583 if (boot_cpu_has(X86_FEATURE_SEP))
584 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
585 227
586 /* Now we're ready to be interrupted or moved to other CPUs */ 228 /* Now we're ready to be interrupted or moved to other CPUs */
587 local_irq_enable(); 229 local_irq_enable();
588 230
589 /* OK, so what happened? */ 231 /* Now we deal with whatever happened to the Guest. */
590 switch (lg->regs->trapnum) { 232 lguest_arch_handle_trap(lg);
591 case 13: /* We've intercepted a GPF. */
592 /* Check if this was one of those annoying IN or OUT
593 * instructions which we need to emulate. If so, we
594 * just go back into the Guest after we've done it. */
595 if (lg->regs->errcode == 0) {
596 if (emulate_insn(lg))
597 continue;
598 }
599 break;
600 case 14: /* We've intercepted a page fault. */
601 /* The Guest accessed a virtual address that wasn't
602 * mapped. This happens a lot: we don't actually set
603 * up most of the page tables for the Guest at all when
604 * we start: as it runs it asks for more and more, and
605 * we set them up as required. In this case, we don't
606 * even tell the Guest that the fault happened.
607 *
608 * The errcode tells whether this was a read or a
609 * write, and whether kernel or userspace code. */
610 if (demand_page(lg, cr2, lg->regs->errcode))
611 continue;
612
613 /* OK, it's really not there (or not OK): the Guest
614 * needs to know. We write out the cr2 value so it
615 * knows where the fault occurred.
616 *
617 * Note that if the Guest were really messed up, this
618 * could happen before it's done the INITIALIZE
619 * hypercall, so lg->lguest_data will be NULL, so
620 * &lg->lguest_data->cr2 will be address 8. Writing
621 * into that address won't hurt the Host at all,
622 * though. */
623 if (put_user(cr2, &lg->lguest_data->cr2))
624 kill_guest(lg, "Writing cr2");
625 break;
626 case 7: /* We've intercepted a Device Not Available fault. */
627 /* If the Guest doesn't want to know, we already
628 * restored the Floating Point Unit, so we just
629 * continue without telling it. */
630 if (!lg->ts)
631 continue;
632 break;
633 case 32 ... 255:
634 /* These values mean a real interrupt occurred, in
635 * which case the Host handler has already been run.
636 * We just do a friendly check if another process
637 * should now be run, then fall through to loop
638 * around: */
639 cond_resched();
640 case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
641 continue;
642 }
643
644 /* If we get here, it's a trap the Guest wants to know
645 * about. */
646 if (deliver_trap(lg, lg->regs->trapnum))
647 continue;
648
649 /* If the Guest doesn't have a handler (either it hasn't
650 * registered any yet, or it's one of the faults we don't let
651 * it handle), it dies with a cryptic error message. */
652 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
653 lg->regs->trapnum, lg->regs->eip,
654 lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
655 } 233 }
234
656 /* The Guest is dead => "No such file or directory" */ 235 /* The Guest is dead => "No such file or directory" */
657 return -ENOENT; 236 return -ENOENT;
658} 237}
659 238
660/* Now we can look at each of the routines this calls, in increasing order of
661 * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
662 * deliver_trap() and demand_page(). After all those, we'll be ready to
663 * examine the Switcher, and our philosophical understanding of the Host/Guest
664 * duality will be complete. :*/
665
666int find_free_guest(void)
667{
668 unsigned int i;
669 for (i = 0; i < MAX_LGUEST_GUESTS; i++)
670 if (!lguests[i].tsk)
671 return i;
672 return -1;
673}
674
675static void adjust_pge(void *on)
676{
677 if (on)
678 write_cr4(read_cr4() | X86_CR4_PGE);
679 else
680 write_cr4(read_cr4() & ~X86_CR4_PGE);
681}
682
683/*H:000 239/*H:000
684 * Welcome to the Host! 240 * Welcome to the Host!
685 * 241 *
@@ -701,72 +257,50 @@ static int __init init(void)
701 /* First we put the Switcher up in very high virtual memory. */ 257 /* First we put the Switcher up in very high virtual memory. */
702 err = map_switcher(); 258 err = map_switcher();
703 if (err) 259 if (err)
704 return err; 260 goto out;
705 261
706 /* Now we set up the pagetable implementation for the Guests. */ 262 /* Now we set up the pagetable implementation for the Guests. */
707 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); 263 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
708 if (err) { 264 if (err)
709 unmap_switcher(); 265 goto unmap;
710 return err;
711 }
712 266
713 /* The I/O subsystem needs some things initialized. */ 267 /* We might need to reserve an interrupt vector. */
714 lguest_io_init(); 268 err = init_interrupts();
269 if (err)
270 goto free_pgtables;
715 271
716 /* /dev/lguest needs to be registered. */ 272 /* /dev/lguest needs to be registered. */
717 err = lguest_device_init(); 273 err = lguest_device_init();
718 if (err) { 274 if (err)
719 free_pagetables(); 275 goto free_interrupts;
720 unmap_switcher();
721 return err;
722 }
723 276
724 /* Finally, we need to turn off "Page Global Enable". PGE is an 277 /* Finally we do some architecture-specific setup. */
725 * optimization where page table entries are specially marked to show 278 lguest_arch_host_init();
726 * they never change. The Host kernel marks all the kernel pages this
727 * way because it's always present, even when userspace is running.
728 *
729 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
730 * switch to the Guest kernel. If you don't disable this on all CPUs,
731 * you'll get really weird bugs that you'll chase for two days.
732 *
733 * I used to turn PGE off every time we switched to the Guest and back
734 * on when we return, but that slowed the Switcher down noticibly. */
735
736 /* We don't need the complexity of CPUs coming and going while we're
737 * doing this. */
738 lock_cpu_hotplug();
739 if (cpu_has_pge) { /* We have a broader idea of "global". */
740 /* Remember that this was originally set (for cleanup). */
741 cpu_had_pge = 1;
742 /* adjust_pge is a helper function which sets or unsets the PGE
743 * bit on its CPU, depending on the argument (0 == unset). */
744 on_each_cpu(adjust_pge, (void *)0, 0, 1);
745 /* Turn off the feature in the global feature set. */
746 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
747 }
748 unlock_cpu_hotplug();
749 279
750 /* All good! */ 280 /* All good! */
751 return 0; 281 return 0;
282
283free_interrupts:
284 free_interrupts();
285free_pgtables:
286 free_pagetables();
287unmap:
288 unmap_switcher();
289out:
290 return err;
752} 291}
753 292
754/* Cleaning up is just the same code, backwards. With a little French. */ 293/* Cleaning up is just the same code, backwards. With a little French. */
755static void __exit fini(void) 294static void __exit fini(void)
756{ 295{
757 lguest_device_remove(); 296 lguest_device_remove();
297 free_interrupts();
758 free_pagetables(); 298 free_pagetables();
759 unmap_switcher(); 299 unmap_switcher();
760 300
761 /* If we had PGE before we started, turn it back on now. */ 301 lguest_arch_host_fini();
762 lock_cpu_hotplug();
763 if (cpu_had_pge) {
764 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
765 /* adjust_pge's argument "1" means set PGE. */
766 on_each_cpu(adjust_pge, (void *)1, 0, 1);
767 }
768 unlock_cpu_hotplug();
769} 302}
303/*:*/
770 304
771/* The Host side of lguest can be a module. This is a nice way for people to 305/* The Host side of lguest can be a module. This is a nice way for people to
772 * play with it. */ 306 * play with it. */
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index db6caace3b..9d5184c7c1 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -25,17 +25,13 @@
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <asm/page.h> 26#include <asm/page.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <irq_vectors.h>
29#include "lg.h" 28#include "lg.h"
30 29
31/*H:120 This is the core hypercall routine: where the Guest gets what it 30/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
32 * wants. Or gets killed. Or, in the case of LHCALL_CRASH, both. 31 * Or gets killed. Or, in the case of LHCALL_CRASH, both. */
33 * 32static void do_hcall(struct lguest *lg, struct hcall_args *args)
34 * Remember from the Guest: %eax == which call to make, and the arguments are
35 * packed into %edx, %ebx and %ecx if needed. */
36static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
37{ 33{
38 switch (regs->eax) { 34 switch (args->arg0) {
39 case LHCALL_FLUSH_ASYNC: 35 case LHCALL_FLUSH_ASYNC:
40 /* This call does nothing, except by breaking out of the Guest 36 /* This call does nothing, except by breaking out of the Guest
41 * it makes us process all the asynchronous hypercalls. */ 37 * it makes us process all the asynchronous hypercalls. */
@@ -51,7 +47,7 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
51 char msg[128]; 47 char msg[128];
52 /* If the lgread fails, it will call kill_guest() itself; the 48 /* If the lgread fails, it will call kill_guest() itself; the
53 * kill_guest() with the message will be ignored. */ 49 * kill_guest() with the message will be ignored. */
54 lgread(lg, msg, regs->edx, sizeof(msg)); 50 __lgread(lg, msg, args->arg1, sizeof(msg));
55 msg[sizeof(msg)-1] = '\0'; 51 msg[sizeof(msg)-1] = '\0';
56 kill_guest(lg, "CRASH: %s", msg); 52 kill_guest(lg, "CRASH: %s", msg);
57 break; 53 break;
@@ -59,67 +55,49 @@ static void do_hcall(struct lguest *lg, struct lguest_regs *regs)
59 case LHCALL_FLUSH_TLB: 55 case LHCALL_FLUSH_TLB:
60 /* FLUSH_TLB comes in two flavors, depending on the 56 /* FLUSH_TLB comes in two flavors, depending on the
61 * argument: */ 57 * argument: */
62 if (regs->edx) 58 if (args->arg1)
63 guest_pagetable_clear_all(lg); 59 guest_pagetable_clear_all(lg);
64 else 60 else
65 guest_pagetable_flush_user(lg); 61 guest_pagetable_flush_user(lg);
66 break; 62 break;
67 case LHCALL_BIND_DMA:
68 /* BIND_DMA really wants four arguments, but it's the only call
69 * which does. So the Guest packs the number of buffers and
70 * the interrupt number into the final argument, and we decode
71 * it here. This can legitimately fail, since we currently
72 * place a limit on the number of DMA pools a Guest can have.
73 * So we return true or false from this call. */
74 regs->eax = bind_dma(lg, regs->edx, regs->ebx,
75 regs->ecx >> 8, regs->ecx & 0xFF);
76 break;
77 63
78 /* All these calls simply pass the arguments through to the right 64 /* All these calls simply pass the arguments through to the right
79 * routines. */ 65 * routines. */
80 case LHCALL_SEND_DMA:
81 send_dma(lg, regs->edx, regs->ebx);
82 break;
83 case LHCALL_LOAD_GDT:
84 load_guest_gdt(lg, regs->edx, regs->ebx);
85 break;
86 case LHCALL_LOAD_IDT_ENTRY:
87 load_guest_idt_entry(lg, regs->edx, regs->ebx, regs->ecx);
88 break;
89 case LHCALL_NEW_PGTABLE: 66 case LHCALL_NEW_PGTABLE:
90 guest_new_pagetable(lg, regs->edx); 67 guest_new_pagetable(lg, args->arg1);
91 break; 68 break;
92 case LHCALL_SET_STACK: 69 case LHCALL_SET_STACK:
93 guest_set_stack(lg, regs->edx, regs->ebx, regs->ecx); 70 guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
94 break; 71 break;
95 case LHCALL_SET_PTE: 72 case LHCALL_SET_PTE:
96 guest_set_pte(lg, regs->edx, regs->ebx, mkgpte(regs->ecx)); 73 guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
97 break; 74 break;
98 case LHCALL_SET_PMD: 75 case LHCALL_SET_PMD:
99 guest_set_pmd(lg, regs->edx, regs->ebx); 76 guest_set_pmd(lg, args->arg1, args->arg2);
100 break;
101 case LHCALL_LOAD_TLS:
102 guest_load_tls(lg, regs->edx);
103 break; 77 break;
104 case LHCALL_SET_CLOCKEVENT: 78 case LHCALL_SET_CLOCKEVENT:
105 guest_set_clockevent(lg, regs->edx); 79 guest_set_clockevent(lg, args->arg1);
106 break; 80 break;
107
108 case LHCALL_TS: 81 case LHCALL_TS:
109 /* This sets the TS flag, as we saw used in run_guest(). */ 82 /* This sets the TS flag, as we saw used in run_guest(). */
110 lg->ts = regs->edx; 83 lg->ts = args->arg1;
111 break; 84 break;
112 case LHCALL_HALT: 85 case LHCALL_HALT:
113 /* Similarly, this sets the halted flag for run_guest(). */ 86 /* Similarly, this sets the halted flag for run_guest(). */
114 lg->halted = 1; 87 lg->halted = 1;
115 break; 88 break;
89 case LHCALL_NOTIFY:
90 lg->pending_notify = args->arg1;
91 break;
116 default: 92 default:
117 kill_guest(lg, "Bad hypercall %li\n", regs->eax); 93 if (lguest_arch_do_hcall(lg, args))
94 kill_guest(lg, "Bad hypercall %li\n", args->arg0);
118 } 95 }
119} 96}
97/*:*/
120 98
121/* Asynchronous hypercalls are easy: we just look in the array in the Guest's 99/*H:124 Asynchronous hypercalls are easy: we just look in the array in the
122 * "struct lguest_data" and see if there are any new ones marked "ready". 100 * Guest's "struct lguest_data" to see if any new ones are marked "ready".
123 * 101 *
124 * We are careful to do these in order: obviously we respect the order the 102 * We are careful to do these in order: obviously we respect the order the
125 * Guest put them in the ring, but we also promise the Guest that they will 103 * Guest put them in the ring, but we also promise the Guest that they will
@@ -134,10 +112,9 @@ static void do_async_hcalls(struct lguest *lg)
134 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) 112 if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
135 return; 113 return;
136 114
137
138 /* We process "struct lguest_data"s hcalls[] ring once. */ 115 /* We process "struct lguest_data"s hcalls[] ring once. */
139 for (i = 0; i < ARRAY_SIZE(st); i++) { 116 for (i = 0; i < ARRAY_SIZE(st); i++) {
140 struct lguest_regs regs; 117 struct hcall_args args;
141 /* We remember where we were up to from last time. This makes 118 /* We remember where we were up to from last time. This makes
142 * sure that the hypercalls are done in the order the Guest 119 * sure that the hypercalls are done in the order the Guest
143 * places them in the ring. */ 120 * places them in the ring. */
@@ -152,18 +129,16 @@ static void do_async_hcalls(struct lguest *lg)
152 if (++lg->next_hcall == LHCALL_RING_SIZE) 129 if (++lg->next_hcall == LHCALL_RING_SIZE)
153 lg->next_hcall = 0; 130 lg->next_hcall = 0;
154 131
155 /* We copy the hypercall arguments into a fake register 132 /* Copy the hypercall arguments into a local copy of
156 * structure. This makes life simple for do_hcall(). */ 133 * the hcall_args struct. */
157 if (get_user(regs.eax, &lg->lguest_data->hcalls[n].eax) 134 if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
158 || get_user(regs.edx, &lg->lguest_data->hcalls[n].edx) 135 sizeof(struct hcall_args))) {
159 || get_user(regs.ecx, &lg->lguest_data->hcalls[n].ecx)
160 || get_user(regs.ebx, &lg->lguest_data->hcalls[n].ebx)) {
161 kill_guest(lg, "Fetching async hypercalls"); 136 kill_guest(lg, "Fetching async hypercalls");
162 break; 137 break;
163 } 138 }
164 139
165 /* Do the hypercall, same as a normal one. */ 140 /* Do the hypercall, same as a normal one. */
166 do_hcall(lg, &regs); 141 do_hcall(lg, &args);
167 142
168 /* Mark the hypercall done. */ 143 /* Mark the hypercall done. */
169 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { 144 if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
@@ -171,9 +146,9 @@ static void do_async_hcalls(struct lguest *lg)
171 break; 146 break;
172 } 147 }
173 148
174 /* Stop doing hypercalls if we've just done a DMA to the 149 /* Stop doing hypercalls if they want to notify the Launcher:
175 * Launcher: it needs to service this first. */ 150 * it needs to service this first. */
176 if (lg->dma_is_pending) 151 if (lg->pending_notify)
177 break; 152 break;
178 } 153 }
179} 154}
@@ -182,76 +157,35 @@ static void do_async_hcalls(struct lguest *lg)
182 * Guest makes a hypercall, we end up here to set things up: */ 157 * Guest makes a hypercall, we end up here to set things up: */
183static void initialize(struct lguest *lg) 158static void initialize(struct lguest *lg)
184{ 159{
185 u32 tsc_speed;
186 160
187 /* You can't do anything until you're initialized. The Guest knows the 161 /* You can't do anything until you're initialized. The Guest knows the
188 * rules, so we're unforgiving here. */ 162 * rules, so we're unforgiving here. */
189 if (lg->regs->eax != LHCALL_LGUEST_INIT) { 163 if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
190 kill_guest(lg, "hypercall %li before LGUEST_INIT", 164 kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
191 lg->regs->eax);
192 return; 165 return;
193 } 166 }
194 167
195 /* We insist that the Time Stamp Counter exist and doesn't change with 168 if (lguest_arch_init_hypercalls(lg))
196 * cpu frequency. Some devious chip manufacturers decided that TSC
197 * changes could be handled in software. I decided that time going
198 * backwards might be good for benchmarks, but it's bad for users.
199 *
200 * We also insist that the TSC be stable: the kernel detects unreliable
201 * TSCs for its own purposes, and we use that here. */
202 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
203 tsc_speed = tsc_khz;
204 else
205 tsc_speed = 0;
206
207 /* The pointer to the Guest's "struct lguest_data" is the only
208 * argument. */
209 lg->lguest_data = (struct lguest_data __user *)lg->regs->edx;
210 /* If we check the address they gave is OK now, we can simply
211 * copy_to_user/from_user from now on rather than using lgread/lgwrite.
212 * I put this in to show that I'm not immune to writing stupid
213 * optimizations. */
214 if (!lguest_address_ok(lg, lg->regs->edx, sizeof(*lg->lguest_data))) {
215 kill_guest(lg, "bad guest page %p", lg->lguest_data); 169 kill_guest(lg, "bad guest page %p", lg->lguest_data);
216 return; 170
217 }
218 /* The Guest tells us where we're not to deliver interrupts by putting 171 /* The Guest tells us where we're not to deliver interrupts by putting
219 * the range of addresses into "struct lguest_data". */ 172 * the range of addresses into "struct lguest_data". */
220 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) 173 if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
221 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end) 174 || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
222 /* We tell the Guest that it can't use the top 4MB of virtual
223 * addresses used by the Switcher. */
224 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
225 || put_user(tsc_speed, &lg->lguest_data->tsc_khz)
226 /* We also give the Guest a unique id, as used in lguest_net.c. */
227 || put_user(lg->guestid, &lg->lguest_data->guestid))
228 kill_guest(lg, "bad guest page %p", lg->lguest_data); 175 kill_guest(lg, "bad guest page %p", lg->lguest_data);
229 176
230 /* We write the current time into the Guest's data page once now. */ 177 /* We write the current time into the Guest's data page once now. */
231 write_timestamp(lg); 178 write_timestamp(lg);
232 179
180 /* page_tables.c will also do some setup. */
181 page_table_guest_data_init(lg);
182
233 /* This is the one case where the above accesses might have been the 183 /* This is the one case where the above accesses might have been the
234 * first write to a Guest page. This may have caused a copy-on-write 184 * first write to a Guest page. This may have caused a copy-on-write
235 * fault, but the Guest might be referring to the old (read-only) 185 * fault, but the Guest might be referring to the old (read-only)
236 * page. */ 186 * page. */
237 guest_pagetable_clear_all(lg); 187 guest_pagetable_clear_all(lg);
238} 188}
239/* Now we've examined the hypercall code; our Guest can make requests. There
240 * is one other way we can do things for the Guest, as we see in
241 * emulate_insn(). */
242
243/*H:110 Tricky point: we mark the hypercall as "done" once we've done it.
244 * Normally we don't need to do this: the Guest will run again and update the
245 * trap number before we come back around the run_guest() loop to
246 * do_hypercalls().
247 *
248 * However, if we are signalled or the Guest sends DMA to the Launcher, that
249 * loop will exit without running the Guest. When it comes back it would try
250 * to re-run the hypercall. */
251static void clear_hcall(struct lguest *lg)
252{
253 lg->regs->trapnum = 255;
254}
255 189
256/*H:100 190/*H:100
257 * Hypercalls 191 * Hypercalls
@@ -261,16 +195,12 @@ static void clear_hcall(struct lguest *lg)
261 */ 195 */
262void do_hypercalls(struct lguest *lg) 196void do_hypercalls(struct lguest *lg)
263{ 197{
264 /* Not initialized yet? */ 198 /* Not initialized yet? This hypercall must do it. */
265 if (unlikely(!lg->lguest_data)) { 199 if (unlikely(!lg->lguest_data)) {
266 /* Did the Guest make a hypercall? We might have come back for 200 /* Set up the "struct lguest_data" */
267 * some other reason (an interrupt, a different trap). */ 201 initialize(lg);
268 if (lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 202 /* Hcall is done. */
269 /* Set up the "struct lguest_data" */ 203 lg->hcall = NULL;
270 initialize(lg);
271 /* The hypercall is done. */
272 clear_hcall(lg);
273 }
274 return; 204 return;
275 } 205 }
276 206
@@ -280,12 +210,21 @@ void do_hypercalls(struct lguest *lg)
280 do_async_hcalls(lg); 210 do_async_hcalls(lg);
281 211
282 /* If we stopped reading the hypercall ring because the Guest did a 212 /* If we stopped reading the hypercall ring because the Guest did a
283 * SEND_DMA to the Launcher, we want to return now. Otherwise if the 213 * NOTIFY to the Launcher, we want to return now. Otherwise we do
284 * Guest asked us to do a hypercall, we do it. */ 214 * the hypercall. */
285 if (!lg->dma_is_pending && lg->regs->trapnum == LGUEST_TRAP_ENTRY) { 215 if (!lg->pending_notify) {
286 do_hcall(lg, lg->regs); 216 do_hcall(lg, lg->hcall);
287 /* The hypercall is done. */ 217 /* Tricky point: we reset the hcall pointer to mark the
288 clear_hcall(lg); 218 * hypercall as "done". We use the hcall pointer rather than
219 * the trap number to indicate a hypercall is pending.
220 * Normally it doesn't matter: the Guest will run again and
221 * update the trap number before we come back here.
222 *
223 * However, if we are signalled or the Guest sends DMA to the
224 * Launcher, the run_guest() loop will exit without running the
225 * Guest. When it comes back it would try to re-run the
226 * hypercall. */
227 lg->hcall = NULL;
289 } 228 }
290} 229}
291 230
@@ -295,6 +234,6 @@ void write_timestamp(struct lguest *lg)
295{ 234{
296 struct timespec now; 235 struct timespec now;
297 ktime_get_real_ts(&now); 236 ktime_get_real_ts(&now);
298 if (put_user(now, &lg->lguest_data->time)) 237 if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
299 kill_guest(lg, "Writing timestamp"); 238 kill_guest(lg, "Writing timestamp");
300} 239}
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 39731232d8..82966982cb 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -12,8 +12,14 @@
12 * them first, so we also have a way of "reflecting" them into the Guest as if 12 * them first, so we also have a way of "reflecting" them into the Guest as if
13 * they had been delivered to it directly. :*/ 13 * they had been delivered to it directly. :*/
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/interrupt.h>
16#include <linux/module.h>
15#include "lg.h" 17#include "lg.h"
16 18
19/* Allow Guests to use a non-128 (ie. non-Linux) syscall trap. */
20static unsigned int syscall_vector = SYSCALL_VECTOR;
21module_param(syscall_vector, uint, 0444);
22
17/* The address of the interrupt handler is split into two bits: */ 23/* The address of the interrupt handler is split into two bits: */
18static unsigned long idt_address(u32 lo, u32 hi) 24static unsigned long idt_address(u32 lo, u32 hi)
19{ 25{
@@ -39,7 +45,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
39{ 45{
40 /* Stack grows upwards: move stack then write value. */ 46 /* Stack grows upwards: move stack then write value. */
41 *gstack -= 4; 47 *gstack -= 4;
42 lgwrite_u32(lg, *gstack, val); 48 lgwrite(lg, *gstack, u32, val);
43} 49}
44 50
45/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or 51/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -56,8 +62,9 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
56 * it). */ 62 * it). */
57static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) 63static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
58{ 64{
59 unsigned long gstack; 65 unsigned long gstack, origstack;
60 u32 eflags, ss, irq_enable; 66 u32 eflags, ss, irq_enable;
67 unsigned long virtstack;
61 68
62 /* There are two cases for interrupts: one where the Guest is already 69 /* There are two cases for interrupts: one where the Guest is already
63 * in the kernel, and a more complex one where the Guest is in 70 * in the kernel, and a more complex one where the Guest is in
@@ -65,8 +72,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
65 if ((lg->regs->ss&0x3) != GUEST_PL) { 72 if ((lg->regs->ss&0x3) != GUEST_PL) {
66 /* The Guest told us their kernel stack with the SET_STACK 73 /* The Guest told us their kernel stack with the SET_STACK
67 * hypercall: both the virtual address and the segment */ 74 * hypercall: both the virtual address and the segment */
68 gstack = guest_pa(lg, lg->esp1); 75 virtstack = lg->esp1;
69 ss = lg->ss1; 76 ss = lg->ss1;
77
78 origstack = gstack = guest_pa(lg, virtstack);
70 /* We push the old stack segment and pointer onto the new 79 /* We push the old stack segment and pointer onto the new
71 * stack: when the Guest does an "iret" back from the interrupt 80 * stack: when the Guest does an "iret" back from the interrupt
72 * handler the CPU will notice they're dropping privilege 81 * handler the CPU will notice they're dropping privilege
@@ -75,8 +84,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
75 push_guest_stack(lg, &gstack, lg->regs->esp); 84 push_guest_stack(lg, &gstack, lg->regs->esp);
76 } else { 85 } else {
77 /* We're staying on the same Guest (kernel) stack. */ 86 /* We're staying on the same Guest (kernel) stack. */
78 gstack = guest_pa(lg, lg->regs->esp); 87 virtstack = lg->regs->esp;
79 ss = lg->regs->ss; 88 ss = lg->regs->ss;
89
90 origstack = gstack = guest_pa(lg, virtstack);
80 } 91 }
81 92
82 /* Remember that we never let the Guest actually disable interrupts, so 93 /* Remember that we never let the Guest actually disable interrupts, so
@@ -102,7 +113,7 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
102 /* Now we've pushed all the old state, we change the stack, the code 113 /* Now we've pushed all the old state, we change the stack, the code
103 * segment and the address to execute. */ 114 * segment and the address to execute. */
104 lg->regs->ss = ss; 115 lg->regs->ss = ss;
105 lg->regs->esp = gstack + lg->page_offset; 116 lg->regs->esp = virtstack + (gstack - origstack);
106 lg->regs->cs = (__KERNEL_CS|GUEST_PL); 117 lg->regs->cs = (__KERNEL_CS|GUEST_PL);
107 lg->regs->eip = idt_address(lo, hi); 118 lg->regs->eip = idt_address(lo, hi);
108 119
@@ -165,7 +176,7 @@ void maybe_do_interrupt(struct lguest *lg)
165 /* Look at the IDT entry the Guest gave us for this interrupt. The 176 /* Look at the IDT entry the Guest gave us for this interrupt. The
166 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip 177 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
167 * over them. */ 178 * over them. */
168 idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq]; 179 idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
169 /* If they don't have a handler (yet?), we just ignore it */ 180 /* If they don't have a handler (yet?), we just ignore it */
170 if (idt_present(idt->a, idt->b)) { 181 if (idt_present(idt->a, idt->b)) {
171 /* OK, mark it no longer pending and deliver it. */ 182 /* OK, mark it no longer pending and deliver it. */
@@ -183,6 +194,47 @@ void maybe_do_interrupt(struct lguest *lg)
183 * timer interrupt. */ 194 * timer interrupt. */
184 write_timestamp(lg); 195 write_timestamp(lg);
185} 196}
197/*:*/
198
199/* Linux uses trap 128 for system calls. Plan9 uses 64, and Ron Minnich sent
200 * me a patch, so we support that too. It'd be a big step for lguest if half
201 * the Plan 9 user base were to start using it.
202 *
203 * Actually now I think of it, it's possible that Ron *is* half the Plan 9
204 * userbase. Oh well. */
205static bool could_be_syscall(unsigned int num)
206{
207 /* Normal Linux SYSCALL_VECTOR or reserved vector? */
208 return num == SYSCALL_VECTOR || num == syscall_vector;
209}
210
211/* The syscall vector it wants must be unused by Host. */
212bool check_syscall_vector(struct lguest *lg)
213{
214 u32 vector;
215
216 if (get_user(vector, &lg->lguest_data->syscall_vec))
217 return false;
218
219 return could_be_syscall(vector);
220}
221
222int init_interrupts(void)
223{
224 /* If they want some strange system call vector, reserve it now */
225 if (syscall_vector != SYSCALL_VECTOR
226 && test_and_set_bit(syscall_vector, used_vectors)) {
227 printk("lg: couldn't reserve syscall %u\n", syscall_vector);
228 return -EBUSY;
229 }
230 return 0;
231}
232
233void free_interrupts(void)
234{
235 if (syscall_vector != SYSCALL_VECTOR)
236 clear_bit(syscall_vector, used_vectors);
237}
186 238
187/*H:220 Now we've got the routines to deliver interrupts, delivering traps 239/*H:220 Now we've got the routines to deliver interrupts, delivering traps
188 * like page fault is easy. The only trick is that Intel decided that some 240 * like page fault is easy. The only trick is that Intel decided that some
@@ -197,14 +249,14 @@ int deliver_trap(struct lguest *lg, unsigned int num)
197{ 249{
198 /* Trap numbers are always 8 bit, but we set an impossible trap number 250 /* Trap numbers are always 8 bit, but we set an impossible trap number
199 * for traps inside the Switcher, so check that here. */ 251 * for traps inside the Switcher, so check that here. */
200 if (num >= ARRAY_SIZE(lg->idt)) 252 if (num >= ARRAY_SIZE(lg->arch.idt))
201 return 0; 253 return 0;
202 254
203 /* Early on the Guest hasn't set the IDT entries (or maybe it put a 255 /* Early on the Guest hasn't set the IDT entries (or maybe it put a
204 * bogus one in): if we fail here, the Guest will be killed. */ 256 * bogus one in): if we fail here, the Guest will be killed. */
205 if (!idt_present(lg->idt[num].a, lg->idt[num].b)) 257 if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
206 return 0; 258 return 0;
207 set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num)); 259 set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, has_err(num));
208 return 1; 260 return 1;
209} 261}
210 262
@@ -218,28 +270,20 @@ int deliver_trap(struct lguest *lg, unsigned int num)
218 * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all 270 * system calls down from 1750ns to 270ns. Plus, if lguest didn't do it, all
219 * the other hypervisors would tease it. 271 * the other hypervisors would tease it.
220 * 272 *
221 * This routine determines if a trap can be delivered directly. */ 273 * This routine indicates if a particular trap number could be delivered
222static int direct_trap(const struct lguest *lg, 274 * directly. */
223 const struct desc_struct *trap, 275static int direct_trap(unsigned int num)
224 unsigned int num)
225{ 276{
226 /* Hardware interrupts don't go to the Guest at all (except system 277 /* Hardware interrupts don't go to the Guest at all (except system
227 * call). */ 278 * call). */
228 if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR) 279 if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num))
229 return 0; 280 return 0;
230 281
231 /* The Host needs to see page faults (for shadow paging and to save the 282 /* The Host needs to see page faults (for shadow paging and to save the
232 * fault address), general protection faults (in/out emulation) and 283 * fault address), general protection faults (in/out emulation) and
233 * device not available (TS handling), and of course, the hypercall 284 * device not available (TS handling), and of course, the hypercall
234 * trap. */ 285 * trap. */
235 if (num == 14 || num == 13 || num == 7 || num == LGUEST_TRAP_ENTRY) 286 return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY;
236 return 0;
237
238 /* Only trap gates (type 15) can go direct to the Guest. Interrupt
239 * gates (type 14) disable interrupts as they are entered, which we
240 * never let the Guest do. Not present entries (type 0x0) also can't
241 * go direct, of course 8) */
242 return idt_type(trap->a, trap->b) == 0xF;
243} 287}
244/*:*/ 288/*:*/
245 289
@@ -348,15 +392,11 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi)
348 * to copy this again. */ 392 * to copy this again. */
349 lg->changed |= CHANGED_IDT; 393 lg->changed |= CHANGED_IDT;
350 394
351 /* The IDT which we keep in "struct lguest" only contains 32 entries 395 /* Check that the Guest doesn't try to step outside the bounds. */
352 * for the traps and LGUEST_IRQS (32) entries for interrupts. We 396 if (num >= ARRAY_SIZE(lg->arch.idt))
353 * ignore attempts to set handlers for higher interrupt numbers, except 397 kill_guest(lg, "Setting idt entry %u", num);
354 * for the system call "interrupt" at 128: we have a special IDT entry 398 else
355 * for that. */ 399 set_trap(lg, &lg->arch.idt[num], num, lo, hi);
356 if (num < ARRAY_SIZE(lg->idt))
357 set_trap(lg, &lg->idt[num], num, lo, hi);
358 else if (num == SYSCALL_VECTOR)
359 set_trap(lg, &lg->syscall_idt, num, lo, hi);
360} 400}
361 401
362/* The default entry for each interrupt points into the Switcher routines which 402/* The default entry for each interrupt points into the Switcher routines which
@@ -399,20 +439,21 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
399 439
400 /* We can simply copy the direct traps, otherwise we use the default 440 /* We can simply copy the direct traps, otherwise we use the default
401 * ones in the Switcher: they will return to the Host. */ 441 * ones in the Switcher: they will return to the Host. */
402 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) { 442 for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) {
403 if (direct_trap(lg, &lg->idt[i], i)) 443 /* If no Guest can ever override this trap, leave it alone. */
404 idt[i] = lg->idt[i]; 444 if (!direct_trap(i))
445 continue;
446
447 /* Only trap gates (type 15) can go direct to the Guest.
448 * Interrupt gates (type 14) disable interrupts as they are
449 * entered, which we never let the Guest do. Not present
450 * entries (type 0x0) also can't go direct, of course. */
451 if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF)
452 idt[i] = lg->arch.idt[i];
405 else 453 else
454 /* Reset it to the default. */
406 default_idt_entry(&idt[i], i, def[i]); 455 default_idt_entry(&idt[i], i, def[i]);
407 } 456 }
408
409 /* Don't forget the system call trap! The IDT entries for other
410 * interupts never change, so no need to copy them. */
411 i = SYSCALL_VECTOR;
412 if (direct_trap(lg, &lg->syscall_idt, i))
413 idt[i] = lg->syscall_idt;
414 else
415 default_idt_entry(&idt[i], i, def[i]);
416} 457}
417 458
418void guest_set_clockevent(struct lguest *lg, unsigned long delta) 459void guest_set_clockevent(struct lguest *lg, unsigned long delta)
diff --git a/drivers/lguest/io.c b/drivers/lguest/io.c
deleted file mode 100644
index ea68613b43..0000000000
--- a/drivers/lguest/io.c
+++ /dev/null
@@ -1,626 +0,0 @@
1/*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest
2 * to talk to the Launcher or directly to another Guest. It uses familiar
3 * concepts of DMA and interrupts, plus some neat code stolen from
4 * futexes... :*/
5
6/* Copyright (C) 2006 Rusty Russell IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22#include <linux/types.h>
23#include <linux/futex.h>
24#include <linux/jhash.h>
25#include <linux/mm.h>
26#include <linux/highmem.h>
27#include <linux/uaccess.h>
28#include "lg.h"
29
30/*L:300
31 * I/O
32 *
33 * Getting data in and out of the Guest is quite an art. There are numerous
34 * ways to do it, and they all suck differently. We try to keep things fairly
35 * close to "real" hardware so our Guest's drivers don't look like an alien
36 * visitation in the middle of the Linux code, and yet make sure that Guests
37 * can talk directly to other Guests, not just the Launcher.
38 *
39 * To do this, the Guest gives us a key when it binds or sends DMA buffers.
40 * The key corresponds to a "physical" address inside the Guest (ie. a virtual
41 * address inside the Launcher process). We don't, however, use this key
42 * directly.
43 *
44 * We want Guests which share memory to be able to DMA to each other: two
45 * Launchers can mmap memory the same file, then the Guests can communicate.
46 * Fortunately, the futex code provides us with a way to get a "union
47 * futex_key" corresponding to the memory lying at a virtual address: if the
48 * two processes share memory, the "union futex_key" for that memory will match
49 * even if the memory is mapped at different addresses in each. So we always
50 * convert the keys to "union futex_key"s to compare them.
51 *
52 * Before we dive into this though, we need to look at another set of helper
53 * routines used throughout the Host kernel code to access Guest memory.
54 :*/
55static struct list_head dma_hash[61];
56
57/* An unfortunate side effect of the Linux double-linked list implementation is
58 * that there's no good way to statically initialize an array of linked
59 * lists. */
60void lguest_io_init(void)
61{
62 unsigned int i;
63
64 for (i = 0; i < ARRAY_SIZE(dma_hash); i++)
65 INIT_LIST_HEAD(&dma_hash[i]);
66}
67
68/* FIXME: allow multi-page lengths. */
69static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma)
70{
71 unsigned int i;
72
73 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
74 if (!dma->len[i])
75 return 1;
76 if (!lguest_address_ok(lg, dma->addr[i], dma->len[i]))
77 goto kill;
78 if (dma->len[i] > PAGE_SIZE)
79 goto kill;
80 /* We could do over a page, but is it worth it? */
81 if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE)
82 goto kill;
83 }
84 return 1;
85
86kill:
87 kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]);
88 return 0;
89}
90
91/*L:330 This is our hash function, using the wonderful Jenkins hash.
92 *
93 * The futex key is a union with three parts: an unsigned long word, a pointer,
94 * and an int "offset". We could use jhash_2words() which takes three u32s.
95 * (Ok, the hash functions are great: the naming sucks though).
96 *
97 * It's nice to be portable to 64-bit platforms, so we use the more generic
98 * jhash2(), which takes an array of u32, the number of u32s, and an initial
99 * u32 to roll in. This is uglier, but breaks down to almost the same code on
100 * 32-bit platforms like this one.
101 *
102 * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61).
103 */
104static unsigned int hash(const union futex_key *key)
105{
106 return jhash2((u32*)&key->both.word,
107 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
108 key->both.offset)
109 % ARRAY_SIZE(dma_hash);
110}
111
112/* This is a convenience routine to compare two keys. It's a much bemoaned C
113 * weakness that it doesn't allow '==' on structures or unions, so we have to
114 * open-code it like this. */
115static inline int key_eq(const union futex_key *a, const union futex_key *b)
116{
117 return (a->both.word == b->both.word
118 && a->both.ptr == b->both.ptr
119 && a->both.offset == b->both.offset);
120}
121
122/*L:360 OK, when we need to actually free up a Guest's DMA array we do several
123 * things, so we have a convenient function to do it.
124 *
125 * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem
126 * for the drop_futex_key_refs(). */
127static void unlink_dma(struct lguest_dma_info *dmainfo)
128{
129 /* You locked this too, right? */
130 BUG_ON(!mutex_is_locked(&lguest_lock));
131 /* This is how we know that the entry is free. */
132 dmainfo->interrupt = 0;
133 /* Remove it from the hash table. */
134 list_del(&dmainfo->list);
135 /* Drop the references we were holding (to the inode or mm). */
136 drop_futex_key_refs(&dmainfo->key);
137}
138
139/*L:350 This is the routine which we call when the Guest asks to unregister a
140 * DMA array attached to a given key. Returns true if the array was found. */
141static int unbind_dma(struct lguest *lg,
142 const union futex_key *key,
143 unsigned long dmas)
144{
145 int i, ret = 0;
146
147 /* We don't bother with the hash table, just look through all this
148 * Guest's DMA arrays. */
149 for (i = 0; i < LGUEST_MAX_DMA; i++) {
150 /* In theory it could have more than one array on the same key,
151 * or one array on multiple keys, so we check both */
152 if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) {
153 unlink_dma(&lg->dma[i]);
154 ret = 1;
155 break;
156 }
157 }
158 return ret;
159}
160
161/*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct
162 * lguest_dma" for receiving I/O.
163 *
164 * The Guest wants to bind an array of "struct lguest_dma"s to a particular key
165 * to receive input. This only happens when the Guest is setting up a new
166 * device, so it doesn't have to be very fast.
167 *
168 * It returns 1 on a successful registration (it can fail if we hit the limit
169 * of registrations for this Guest).
170 */
171int bind_dma(struct lguest *lg,
172 unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt)
173{
174 unsigned int i;
175 int ret = 0;
176 union futex_key key;
177 /* Futex code needs the mmap_sem. */
178 struct rw_semaphore *fshared = &current->mm->mmap_sem;
179
180 /* Invalid interrupt? (We could kill the guest here). */
181 if (interrupt >= LGUEST_IRQS)
182 return 0;
183
184 /* We need to grab the Big Lguest Lock, because other Guests may be
185 * trying to look through this Guest's DMAs to send something while
186 * we're doing this. */
187 mutex_lock(&lguest_lock);
188 down_read(fshared);
189 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
190 kill_guest(lg, "bad dma key %#lx", ukey);
191 goto unlock;
192 }
193
194 /* We want to keep this key valid once we drop mmap_sem, so we have to
195 * hold a reference. */
196 get_futex_key_refs(&key);
197
198 /* If the Guest specified an interrupt of 0, that means they want to
199 * unregister this array of "struct lguest_dma"s. */
200 if (interrupt == 0)
201 ret = unbind_dma(lg, &key, dmas);
202 else {
203 /* Look through this Guest's dma array for an unused entry. */
204 for (i = 0; i < LGUEST_MAX_DMA; i++) {
205 /* If the interrupt is non-zero, the entry is already
206 * used. */
207 if (lg->dma[i].interrupt)
208 continue;
209
210 /* OK, a free one! Fill on our details. */
211 lg->dma[i].dmas = dmas;
212 lg->dma[i].num_dmas = numdmas;
213 lg->dma[i].next_dma = 0;
214 lg->dma[i].key = key;
215 lg->dma[i].guestid = lg->guestid;
216 lg->dma[i].interrupt = interrupt;
217
218 /* Now we add it to the hash table: the position
219 * depends on the futex key that we got. */
220 list_add(&lg->dma[i].list, &dma_hash[hash(&key)]);
221 /* Success! */
222 ret = 1;
223 goto unlock;
224 }
225 }
226 /* If we didn't find a slot to put the key in, drop the reference
227 * again. */
228 drop_futex_key_refs(&key);
229unlock:
230 /* Unlock and out. */
231 up_read(fshared);
232 mutex_unlock(&lguest_lock);
233 return ret;
234}
235
236/*L:385 Note that our routines to access a different Guest's memory are called
237 * lgread_other() and lgwrite_other(): these names emphasize that they are only
238 * used when the Guest is *not* the current Guest.
239 *
240 * The interface for copying from another process's memory is called
241 * access_process_vm(), with a final argument of 0 for a read, and 1 for a
242 * write.
243 *
244 * We need lgread_other() to read the destination Guest's "struct lguest_dma"
245 * array. */
246static int lgread_other(struct lguest *lg,
247 void *buf, u32 addr, unsigned bytes)
248{
249 if (!lguest_address_ok(lg, addr, bytes)
250 || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) {
251 memset(buf, 0, bytes);
252 kill_guest(lg, "bad address in registered DMA struct");
253 return 0;
254 }
255 return 1;
256}
257
258/* "lgwrite()" to another Guest: used to update the destination "used_len" once
259 * we've transferred data into the buffer. */
260static int lgwrite_other(struct lguest *lg, u32 addr,
261 const void *buf, unsigned bytes)
262{
263 if (!lguest_address_ok(lg, addr, bytes)
264 || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1)
265 != bytes)) {
266 kill_guest(lg, "bad address writing to registered DMA");
267 return 0;
268 }
269 return 1;
270}
271
272/*L:400 This is the generic engine which copies from a source "struct
273 * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The
274 * destination Guest's pages have already been mapped, as contained in the
275 * pages array.
276 *
277 * If you're wondering if there's a nice "copy from one process to another"
278 * routine, so was I. But Linux isn't really set up to copy between two
279 * unrelated processes, so we have to write it ourselves.
280 */
281static u32 copy_data(struct lguest *srclg,
282 const struct lguest_dma *src,
283 const struct lguest_dma *dst,
284 struct page *pages[])
285{
286 unsigned int totlen, si, di, srcoff, dstoff;
287 void *maddr = NULL;
288
289 /* We return the total length transferred. */
290 totlen = 0;
291
292 /* We keep indexes into the source and destination "struct lguest_dma",
293 * and an offset within each region. */
294 si = di = 0;
295 srcoff = dstoff = 0;
296
297 /* We loop until the source or destination is exhausted. */
298 while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
299 && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
300 /* We can only transfer the rest of the src buffer, or as much
301 * as will fit into the destination buffer. */
302 u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
303
304 /* For systems using "highmem" we need to use kmap() to access
305 * the page we want. We often use the same page over and over,
306 * so rather than kmap() it on every loop, we set the maddr
307 * pointer to NULL when we need to move to the next
308 * destination page. */
309 if (!maddr)
310 maddr = kmap(pages[di]);
311
312 /* Copy directly from (this Guest's) source address to the
313 * destination Guest's kmap()ed buffer. Note that maddr points
314 * to the start of the page: we need to add the offset of the
315 * destination address and offset within the buffer. */
316
317 /* FIXME: This is not completely portable. I looked at
318 * copy_to_user_page(), and some arch's seem to need special
319 * flushes. x86 is fine. */
320 if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
321 (void __user *)src->addr[si], len) != 0) {
322 /* If a copy failed, it's the source's fault. */
323 kill_guest(srclg, "bad address in sending DMA");
324 totlen = 0;
325 break;
326 }
327
328 /* Increment the total and src & dst offsets */
329 totlen += len;
330 srcoff += len;
331 dstoff += len;
332
333 /* Presumably we reached the end of the src or dest buffers: */
334 if (srcoff == src->len[si]) {
335 /* Move to the next buffer at offset 0 */
336 si++;
337 srcoff = 0;
338 }
339 if (dstoff == dst->len[di]) {
340 /* We need to unmap that destination page and reset
341 * maddr ready for the next one. */
342 kunmap(pages[di]);
343 maddr = NULL;
344 di++;
345 dstoff = 0;
346 }
347 }
348
349 /* If we still had a page mapped at the end, unmap now. */
350 if (maddr)
351 kunmap(pages[di]);
352
353 return totlen;
354}
355
356/*L:390 This is how we transfer a "struct lguest_dma" from the source Guest
357 * (the current Guest which called SEND_DMA) to another Guest. */
358static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src,
359 struct lguest *dstlg, const struct lguest_dma *dst)
360{
361 int i;
362 u32 ret;
363 struct page *pages[LGUEST_MAX_DMA_SECTIONS];
364
365 /* We check that both source and destination "struct lguest_dma"s are
366 * within the bounds of the source and destination Guests */
367 if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
368 return 0;
369
370 /* We need to map the pages which correspond to each parts of
371 * destination buffer. */
372 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
373 if (dst->len[i] == 0)
374 break;
375 /* get_user_pages() is a complicated function, especially since
376 * we only want a single page. But it works, and returns the
377 * number of pages. Note that we're holding the destination's
378 * mmap_sem, as get_user_pages() requires. */
379 if (get_user_pages(dstlg->tsk, dstlg->mm,
380 dst->addr[i], 1, 1, 1, pages+i, NULL)
381 != 1) {
382 /* This means the destination gave us a bogus buffer */
383 kill_guest(dstlg, "Error mapping DMA pages");
384 ret = 0;
385 goto drop_pages;
386 }
387 }
388
389 /* Now copy the data until we run out of src or dst. */
390 ret = copy_data(srclg, src, dst, pages);
391
392drop_pages:
393 while (--i >= 0)
394 put_page(pages[i]);
395 return ret;
396}
397
398/*L:380 Transferring data from one Guest to another is not as simple as I'd
399 * like. We've found the "struct lguest_dma_info" bound to the same address as
400 * the send, we need to copy into it.
401 *
402 * This function returns true if the destination array was empty. */
403static int dma_transfer(struct lguest *srclg,
404 unsigned long udma,
405 struct lguest_dma_info *dst)
406{
407 struct lguest_dma dst_dma, src_dma;
408 struct lguest *dstlg;
409 u32 i, dma = 0;
410
411 /* From the "struct lguest_dma_info" we found in the hash, grab the
412 * Guest. */
413 dstlg = &lguests[dst->guestid];
414 /* Read in the source "struct lguest_dma" handed to SEND_DMA. */
415 lgread(srclg, &src_dma, udma, sizeof(src_dma));
416
417 /* We need the destination's mmap_sem, and we already hold the source's
418 * mmap_sem for the futex key lookup. Normally this would suggest that
419 * we could deadlock if the destination Guest was trying to send to
420 * this source Guest at the same time, which is another reason that all
421 * I/O is done under the big lguest_lock. */
422 down_read(&dstlg->mm->mmap_sem);
423
424 /* Look through the destination DMA array for an available buffer. */
425 for (i = 0; i < dst->num_dmas; i++) {
426 /* We keep a "next_dma" pointer which often helps us avoid
427 * looking at lots of previously-filled entries. */
428 dma = (dst->next_dma + i) % dst->num_dmas;
429 if (!lgread_other(dstlg, &dst_dma,
430 dst->dmas + dma * sizeof(struct lguest_dma),
431 sizeof(dst_dma))) {
432 goto fail;
433 }
434 if (!dst_dma.used_len)
435 break;
436 }
437
438 /* If we found a buffer, we do the actual data copy. */
439 if (i != dst->num_dmas) {
440 unsigned long used_lenp;
441 unsigned int ret;
442
443 ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
444 /* Put used length in the source "struct lguest_dma"'s used_len
445 * field. It's a little tricky to figure out where that is,
446 * though. */
447 lgwrite_u32(srclg,
448 udma+offsetof(struct lguest_dma, used_len), ret);
449 /* Tranferring 0 bytes is OK if the source buffer was empty. */
450 if (ret == 0 && src_dma.len[0] != 0)
451 goto fail;
452
453 /* The destination Guest might be running on a different CPU:
454 * we have to make sure that it will see the "used_len" field
455 * change to non-zero *after* it sees the data we copied into
456 * the buffer. Hence a write memory barrier. */
457 wmb();
458 /* Figuring out where the destination's used_len field for this
459 * "struct lguest_dma" in the array is also a little ugly. */
460 used_lenp = dst->dmas
461 + dma * sizeof(struct lguest_dma)
462 + offsetof(struct lguest_dma, used_len);
463 lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
464 /* Move the cursor for next time. */
465 dst->next_dma++;
466 }
467 up_read(&dstlg->mm->mmap_sem);
468
469 /* We trigger the destination interrupt, even if the destination was
470 * empty and we didn't transfer anything: this gives them a chance to
471 * wake up and refill. */
472 set_bit(dst->interrupt, dstlg->irqs_pending);
473 /* Wake up the destination process. */
474 wake_up_process(dstlg->tsk);
475 /* If we passed the last "struct lguest_dma", the receive had no
476 * buffers left. */
477 return i == dst->num_dmas;
478
479fail:
480 up_read(&dstlg->mm->mmap_sem);
481 return 0;
482}
483
484/*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA
485 * hypercall. We find out who's listening, and send to them. */
486void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma)
487{
488 union futex_key key;
489 int empty = 0;
490 struct rw_semaphore *fshared = &current->mm->mmap_sem;
491
492again:
493 mutex_lock(&lguest_lock);
494 down_read(fshared);
495 /* Get the futex key for the key the Guest gave us */
496 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
497 kill_guest(lg, "bad sending DMA key");
498 goto unlock;
499 }
500 /* Since the key must be a multiple of 4, the futex key uses the lower
501 * bit of the "offset" field (which would always be 0) to indicate a
502 * mapping which is shared with other processes (ie. Guests). */
503 if (key.shared.offset & 1) {
504 struct lguest_dma_info *i;
505 /* Look through the hash for other Guests. */
506 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
507 /* Don't send to ourselves. */
508 if (i->guestid == lg->guestid)
509 continue;
510 if (!key_eq(&key, &i->key))
511 continue;
512
513 /* If dma_transfer() tells us the destination has no
514 * available buffers, we increment "empty". */
515 empty += dma_transfer(lg, udma, i);
516 break;
517 }
518 /* If the destination is empty, we release our locks and
519 * give the destination Guest a brief chance to restock. */
520 if (empty == 1) {
521 /* Give any recipients one chance to restock. */
522 up_read(&current->mm->mmap_sem);
523 mutex_unlock(&lguest_lock);
524 /* Next time, we won't try again. */
525 empty++;
526 goto again;
527 }
528 } else {
529 /* Private mapping: Guest is sending to its Launcher. We set
530 * the "dma_is_pending" flag so that the main loop will exit
531 * and the Launcher's read() from /dev/lguest will return. */
532 lg->dma_is_pending = 1;
533 lg->pending_dma = udma;
534 lg->pending_key = ukey;
535 }
536unlock:
537 up_read(fshared);
538 mutex_unlock(&lguest_lock);
539}
540/*:*/
541
542void release_all_dma(struct lguest *lg)
543{
544 unsigned int i;
545
546 BUG_ON(!mutex_is_locked(&lguest_lock));
547
548 down_read(&lg->mm->mmap_sem);
549 for (i = 0; i < LGUEST_MAX_DMA; i++) {
550 if (lg->dma[i].interrupt)
551 unlink_dma(&lg->dma[i]);
552 }
553 up_read(&lg->mm->mmap_sem);
554}
555
556/*M:007 We only return a single DMA buffer to the Launcher, but it would be
557 * more efficient to return a pointer to the entire array of DMA buffers, which
558 * it can cache and choose one whenever it wants.
559 *
560 * Currently the Launcher uses a write to /dev/lguest, and the return value is
561 * the address of the DMA structure with the interrupt number placed in
562 * dma->used_len. If we wanted to return the entire array, we need to return
563 * the address, array size and interrupt number: this seems to require an
564 * ioctl(). :*/
565
566/*L:320 This routine looks for a DMA buffer registered by the Guest on the
567 * given key (using the BIND_DMA hypercall). */
568unsigned long get_dma_buffer(struct lguest *lg,
569 unsigned long ukey, unsigned long *interrupt)
570{
571 unsigned long ret = 0;
572 union futex_key key;
573 struct lguest_dma_info *i;
574 struct rw_semaphore *fshared = &current->mm->mmap_sem;
575
576 /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA
577 * at the same time. */
578 mutex_lock(&lguest_lock);
579 /* To match between Guests sharing the same underlying memory we steal
580 * code from the futex infrastructure. This requires that we hold the
581 * "mmap_sem" for our process (the Launcher), and pass it to the futex
582 * code. */
583 down_read(fshared);
584
585 /* This can fail if it's not a valid address, or if the address is not
586 * divisible by 4 (the futex code needs that, we don't really). */
587 if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) {
588 kill_guest(lg, "bad registered DMA buffer");
589 goto unlock;
590 }
591 /* Search the hash table for matching entries (the Launcher can only
592 * send to its own Guest for the moment, so the entry must be for this
593 * Guest) */
594 list_for_each_entry(i, &dma_hash[hash(&key)], list) {
595 if (key_eq(&key, &i->key) && i->guestid == lg->guestid) {
596 unsigned int j;
597 /* Look through the registered DMA array for an
598 * available buffer. */
599 for (j = 0; j < i->num_dmas; j++) {
600 struct lguest_dma dma;
601
602 ret = i->dmas + j * sizeof(struct lguest_dma);
603 lgread(lg, &dma, ret, sizeof(dma));
604 if (dma.used_len == 0)
605 break;
606 }
607 /* Store the interrupt the Guest wants when the buffer
608 * is used. */
609 *interrupt = i->interrupt;
610 break;
611 }
612 }
613unlock:
614 up_read(fshared);
615 mutex_unlock(&lguest_lock);
616 return ret;
617}
618/*:*/
619
620/*L:410 This really has completed the Launcher. Not only have we now finished
621 * the longest chapter in our journey, but this also means we are over halfway
622 * through!
623 *
624 * Enough prevaricating around the bush: it is time for us to dive into the
625 * core of the Host, in "make Host".
626 */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 64f0abed31..d9144beca8 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -1,119 +1,25 @@
1#ifndef _LGUEST_H 1#ifndef _LGUEST_H
2#define _LGUEST_H 2#define _LGUEST_H
3 3
4#include <asm/desc.h>
5
6#define GDT_ENTRY_LGUEST_CS 10
7#define GDT_ENTRY_LGUEST_DS 11
8#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8)
9#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8)
10
11#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
12#include <linux/types.h> 5#include <linux/types.h>
13#include <linux/init.h> 6#include <linux/init.h>
14#include <linux/stringify.h> 7#include <linux/stringify.h>
15#include <linux/binfmts.h>
16#include <linux/futex.h>
17#include <linux/lguest.h> 8#include <linux/lguest.h>
18#include <linux/lguest_launcher.h> 9#include <linux/lguest_launcher.h>
19#include <linux/wait.h> 10#include <linux/wait.h>
20#include <linux/err.h> 11#include <linux/err.h>
21#include <asm/semaphore.h> 12#include <asm/semaphore.h>
22#include "irq_vectors.h"
23
24#define GUEST_PL 1
25 13
26struct lguest_regs 14#include <asm/lguest.h>
27{
28 /* Manually saved part. */
29 unsigned long ebx, ecx, edx;
30 unsigned long esi, edi, ebp;
31 unsigned long gs;
32 unsigned long eax;
33 unsigned long fs, ds, es;
34 unsigned long trapnum, errcode;
35 /* Trap pushed part */
36 unsigned long eip;
37 unsigned long cs;
38 unsigned long eflags;
39 unsigned long esp;
40 unsigned long ss;
41};
42 15
43void free_pagetables(void); 16void free_pagetables(void);
44int init_pagetables(struct page **switcher_page, unsigned int pages); 17int init_pagetables(struct page **switcher_page, unsigned int pages);
45 18
46/* Full 4G segment descriptors, suitable for CS and DS. */
47#define FULL_EXEC_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9b00})
48#define FULL_SEGMENT ((struct desc_struct){0x0000ffff, 0x00cf9300})
49
50struct lguest_dma_info
51{
52 struct list_head list;
53 union futex_key key;
54 unsigned long dmas;
55 u16 next_dma;
56 u16 num_dmas;
57 u16 guestid;
58 u8 interrupt; /* 0 when not registered */
59};
60
61/*H:310 The page-table code owes a great debt of gratitude to Andi Kleen. He
62 * reviewed the original code which used "u32" for all page table entries, and
63 * insisted that it would be far clearer with explicit typing. I thought it
64 * was overkill, but he was right: it is much clearer than it was before.
65 *
66 * We have separate types for the Guest's ptes & pgds and the shadow ptes &
67 * pgds. There's already a Linux type for these (pte_t and pgd_t) but they
68 * change depending on kernel config options (PAE). */
69
70/* Each entry is identical: lower 12 bits of flags and upper 20 bits for the
71 * "page frame number" (0 == first physical page, etc). They are different
72 * types so the compiler will warn us if we mix them improperly. */
73typedef union {
74 struct { unsigned flags:12, pfn:20; };
75 struct { unsigned long val; } raw;
76} spgd_t;
77typedef union {
78 struct { unsigned flags:12, pfn:20; };
79 struct { unsigned long val; } raw;
80} spte_t;
81typedef union {
82 struct { unsigned flags:12, pfn:20; };
83 struct { unsigned long val; } raw;
84} gpgd_t;
85typedef union {
86 struct { unsigned flags:12, pfn:20; };
87 struct { unsigned long val; } raw;
88} gpte_t;
89
90/* We have two convenient macros to convert a "raw" value as handed to us by
91 * the Guest into the correct Guest PGD or PTE type. */
92#define mkgpte(_val) ((gpte_t){.raw.val = _val})
93#define mkgpgd(_val) ((gpgd_t){.raw.val = _val})
94/*:*/
95
96struct pgdir 19struct pgdir
97{ 20{
98 unsigned long cr3; 21 unsigned long gpgdir;
99 spgd_t *pgdir; 22 pgd_t *pgdir;
100};
101
102/* This is a guest-specific page (mapped ro) into the guest. */
103struct lguest_ro_state
104{
105 /* Host information we need to restore when we switch back. */
106 u32 host_cr3;
107 struct Xgt_desc_struct host_idt_desc;
108 struct Xgt_desc_struct host_gdt_desc;
109 u32 host_sp;
110
111 /* Fields which are used when guest is running. */
112 struct Xgt_desc_struct guest_idt_desc;
113 struct Xgt_desc_struct guest_gdt_desc;
114 struct i386_hw_tss guest_tss;
115 struct desc_struct guest_idt[IDT_ENTRIES];
116 struct desc_struct guest_gdt[GDT_ENTRIES];
117}; 23};
118 24
119/* We have two pages shared with guests, per cpu. */ 25/* We have two pages shared with guests, per cpu. */
@@ -141,9 +47,11 @@ struct lguest
141 struct lguest_data __user *lguest_data; 47 struct lguest_data __user *lguest_data;
142 struct task_struct *tsk; 48 struct task_struct *tsk;
143 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ 49 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */
144 u16 guestid;
145 u32 pfn_limit; 50 u32 pfn_limit;
146 u32 page_offset; 51 /* This provides the offset to the base of guest-physical
52 * memory in the Launcher. */
53 void __user *mem_base;
54 unsigned long kernel_address;
147 u32 cr2; 55 u32 cr2;
148 int halted; 56 int halted;
149 int ts; 57 int ts;
@@ -151,6 +59,9 @@ struct lguest
151 u32 esp1; 59 u32 esp1;
152 u8 ss1; 60 u8 ss1;
153 61
62 /* If a hypercall was asked for, this points to the arguments. */
63 struct hcall_args *hcall;
64
154 /* Do we need to stop what we're doing and return to userspace? */ 65 /* Do we need to stop what we're doing and return to userspace? */
155 int break_out; 66 int break_out;
156 wait_queue_head_t break_wq; 67 wait_queue_head_t break_wq;
@@ -167,24 +78,15 @@ struct lguest
167 struct task_struct *wake; 78 struct task_struct *wake;
168 79
169 unsigned long noirq_start, noirq_end; 80 unsigned long noirq_start, noirq_end;
170 int dma_is_pending; 81 unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
171 unsigned long pending_dma; /* struct lguest_dma */
172 unsigned long pending_key; /* address they're sending to */
173 82
174 unsigned int stack_pages; 83 unsigned int stack_pages;
175 u32 tsc_khz; 84 u32 tsc_khz;
176 85
177 struct lguest_dma_info dma[LGUEST_MAX_DMA];
178
179 /* Dead? */ 86 /* Dead? */
180 const char *dead; 87 const char *dead;
181 88
182 /* The GDT entries copied into lguest_ro_state when running. */ 89 struct lguest_arch arch;
183 struct desc_struct gdt[GDT_ENTRIES];
184
185 /* The IDT entries: some copied into lguest_ro_state when running. */
186 struct desc_struct idt[FIRST_EXTERNAL_VECTOR+LGUEST_IRQS];
187 struct desc_struct syscall_idt;
188 90
189 /* Virtual clock device */ 91 /* Virtual clock device */
190 struct hrtimer hrt; 92 struct hrtimer hrt;
@@ -193,19 +95,38 @@ struct lguest
193 DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); 95 DECLARE_BITMAP(irqs_pending, LGUEST_IRQS);
194}; 96};
195 97
196extern struct lguest lguests[];
197extern struct mutex lguest_lock; 98extern struct mutex lguest_lock;
198 99
199/* core.c: */ 100/* core.c: */
200u32 lgread_u32(struct lguest *lg, unsigned long addr);
201void lgwrite_u32(struct lguest *lg, unsigned long addr, u32 val);
202void lgread(struct lguest *lg, void *buf, unsigned long addr, unsigned len);
203void lgwrite(struct lguest *lg, unsigned long, const void *buf, unsigned len);
204int find_free_guest(void);
205int lguest_address_ok(const struct lguest *lg, 101int lguest_address_ok(const struct lguest *lg,
206 unsigned long addr, unsigned long len); 102 unsigned long addr, unsigned long len);
103void __lgread(struct lguest *, void *, unsigned long, unsigned);
104void __lgwrite(struct lguest *, unsigned long, const void *, unsigned);
105
106/*L:306 Using memory-copy operations like that is usually inconvient, so we
107 * have the following helper macros which read and write a specific type (often
108 * an unsigned long).
109 *
110 * This reads into a variable of the given type then returns that. */
111#define lgread(lg, addr, type) \
112 ({ type _v; __lgread((lg), &_v, (addr), sizeof(_v)); _v; })
113
114/* This checks that the variable is of the given type, then writes it out. */
115#define lgwrite(lg, addr, type, val) \
116 do { \
117 typecheck(type, val); \
118 __lgwrite((lg), (addr), &(val), sizeof(val)); \
119 } while(0)
120/* (end of memory access helper routines) :*/
121
207int run_guest(struct lguest *lg, unsigned long __user *user); 122int run_guest(struct lguest *lg, unsigned long __user *user);
208 123
124/* Helper macros to obtain the first 12 or the last 20 bits, this is only the
125 * first step in the migration to the kernel types. pte_pfn is already defined
126 * in the kernel. */
127#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
128#define pte_flags(x) (pte_val(x) & ~PAGE_MASK)
129#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
209 130
210/* interrupts_and_traps.c: */ 131/* interrupts_and_traps.c: */
211void maybe_do_interrupt(struct lguest *lg); 132void maybe_do_interrupt(struct lguest *lg);
@@ -219,6 +140,9 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt,
219 const unsigned long *def); 140 const unsigned long *def);
220void guest_set_clockevent(struct lguest *lg, unsigned long delta); 141void guest_set_clockevent(struct lguest *lg, unsigned long delta);
221void init_clockdev(struct lguest *lg); 142void init_clockdev(struct lguest *lg);
143bool check_syscall_vector(struct lguest *lg);
144int init_interrupts(void);
145void free_interrupts(void);
222 146
223/* segments.c: */ 147/* segments.c: */
224void setup_default_gdt_entries(struct lguest_ro_state *state); 148void setup_default_gdt_entries(struct lguest_ro_state *state);
@@ -232,28 +156,33 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt);
232int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); 156int init_guest_pagetable(struct lguest *lg, unsigned long pgtable);
233void free_guest_pagetable(struct lguest *lg); 157void free_guest_pagetable(struct lguest *lg);
234void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); 158void guest_new_pagetable(struct lguest *lg, unsigned long pgtable);
235void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 i); 159void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
236void guest_pagetable_clear_all(struct lguest *lg); 160void guest_pagetable_clear_all(struct lguest *lg);
237void guest_pagetable_flush_user(struct lguest *lg); 161void guest_pagetable_flush_user(struct lguest *lg);
238void guest_set_pte(struct lguest *lg, unsigned long cr3, 162void guest_set_pte(struct lguest *lg, unsigned long gpgdir,
239 unsigned long vaddr, gpte_t val); 163 unsigned long vaddr, pte_t val);
240void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); 164void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages);
241int demand_page(struct lguest *info, unsigned long cr2, int errcode); 165int demand_page(struct lguest *info, unsigned long cr2, int errcode);
242void pin_page(struct lguest *lg, unsigned long vaddr); 166void pin_page(struct lguest *lg, unsigned long vaddr);
167unsigned long guest_pa(struct lguest *lg, unsigned long vaddr);
168void page_table_guest_data_init(struct lguest *lg);
169
170/* <arch>/core.c: */
171void lguest_arch_host_init(void);
172void lguest_arch_host_fini(void);
173void lguest_arch_run_guest(struct lguest *lg);
174void lguest_arch_handle_trap(struct lguest *lg);
175int lguest_arch_init_hypercalls(struct lguest *lg);
176int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args);
177void lguest_arch_setup_regs(struct lguest *lg, unsigned long start);
178
179/* <arch>/switcher.S: */
180extern char start_switcher_text[], end_switcher_text[], switch_to_guest[];
243 181
244/* lguest_user.c: */ 182/* lguest_user.c: */
245int lguest_device_init(void); 183int lguest_device_init(void);
246void lguest_device_remove(void); 184void lguest_device_remove(void);
247 185
248/* io.c: */
249void lguest_io_init(void);
250int bind_dma(struct lguest *lg,
251 unsigned long key, unsigned long udma, u16 numdmas, u8 interrupt);
252void send_dma(struct lguest *info, unsigned long key, unsigned long udma);
253void release_all_dma(struct lguest *lg);
254unsigned long get_dma_buffer(struct lguest *lg, unsigned long key,
255 unsigned long *interrupt);
256
257/* hypercalls.c: */ 186/* hypercalls.c: */
258void do_hypercalls(struct lguest *lg); 187void do_hypercalls(struct lguest *lg);
259void write_timestamp(struct lguest *lg); 188void write_timestamp(struct lguest *lg);
@@ -292,9 +221,5 @@ do { \
292} while(0) 221} while(0)
293/* (End of aside) :*/ 222/* (End of aside) :*/
294 223
295static inline unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
296{
297 return vaddr - lg->page_offset;
298}
299#endif /* __ASSEMBLY__ */ 224#endif /* __ASSEMBLY__ */
300#endif /* _LGUEST_H */ 225#endif /* _LGUEST_H */
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
deleted file mode 100644
index 3ba337dde8..0000000000
--- a/drivers/lguest/lguest.c
+++ /dev/null
@@ -1,1108 +0,0 @@
1/*P:010
2 * A hypervisor allows multiple Operating Systems to run on a single machine.
3 * To quote David Wheeler: "Any problem in computer science can be solved with
4 * another layer of indirection."
5 *
6 * We keep things simple in two ways. First, we start with a normal Linux
7 * kernel and insert a module (lg.ko) which allows us to run other Linux
8 * kernels the same way we'd run processes. We call the first kernel the Host,
9 * and the others the Guests. The program which sets up and configures Guests
10 * (such as the example in Documentation/lguest/lguest.c) is called the
11 * Launcher.
12 *
13 * Secondly, we only run specially modified Guests, not normal kernels. When
14 * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
15 * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
16 * how to be a Guest. This means that you can use the same kernel you boot
17 * normally (ie. as a Host) as a Guest.
18 *
19 * These Guests know that they cannot do privileged operations, such as disable
20 * interrupts, and that they have to ask the Host to do such things explicitly.
21 * This file consists of all the replacements for such low-level native
22 * hardware operations: these special Guest versions call the Host.
23 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special
25 * entry point marked with a magic string, which sets up a few things then
26 * calls here. We replace the native functions various "paravirt" structures
27 * with our Guest versions, then boot like normal. :*/
28
29/*
30 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
31 *
32 * This program is free software; you can redistribute it and/or modify
33 * it under the terms of the GNU General Public License as published by
34 * the Free Software Foundation; either version 2 of the License, or
35 * (at your option) any later version.
36 *
37 * This program is distributed in the hope that it will be useful, but
38 * WITHOUT ANY WARRANTY; without even the implied warranty of
39 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
40 * NON INFRINGEMENT. See the GNU General Public License for more
41 * details.
42 *
43 * You should have received a copy of the GNU General Public License
44 * along with this program; if not, write to the Free Software
45 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
46 */
47#include <linux/kernel.h>
48#include <linux/start_kernel.h>
49#include <linux/string.h>
50#include <linux/console.h>
51#include <linux/screen_info.h>
52#include <linux/irq.h>
53#include <linux/interrupt.h>
54#include <linux/clocksource.h>
55#include <linux/clockchips.h>
56#include <linux/lguest.h>
57#include <linux/lguest_launcher.h>
58#include <linux/lguest_bus.h>
59#include <asm/paravirt.h>
60#include <asm/param.h>
61#include <asm/page.h>
62#include <asm/pgtable.h>
63#include <asm/desc.h>
64#include <asm/setup.h>
65#include <asm/e820.h>
66#include <asm/mce.h>
67#include <asm/io.h>
68
69/*G:010 Welcome to the Guest!
70 *
71 * The Guest in our tale is a simple creature: identical to the Host but
72 * behaving in simplified but equivalent ways. In particular, the Guest is the
73 * same kernel as the Host (or at least, built from the same source code). :*/
74
75/* Declarations for definitions in lguest_guest.S */
76extern char lguest_noirq_start[], lguest_noirq_end[];
77extern const char lgstart_cli[], lgend_cli[];
78extern const char lgstart_sti[], lgend_sti[];
79extern const char lgstart_popf[], lgend_popf[];
80extern const char lgstart_pushf[], lgend_pushf[];
81extern const char lgstart_iret[], lgend_iret[];
82extern void lguest_iret(void);
83
84struct lguest_data lguest_data = {
85 .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
86 .noirq_start = (u32)lguest_noirq_start,
87 .noirq_end = (u32)lguest_noirq_end,
88 .blocked_interrupts = { 1 }, /* Block timer interrupts */
89};
90struct lguest_device_desc *lguest_devices;
91static cycle_t clock_base;
92
93/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first
94 * real optimization trick!
95 *
96 * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
98 * are reasonably expensive, batching them up makes sense. For example, a
99 * large mmap might update dozens of page table entries: that code calls
100 * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
101 * lguest_leave_lazy_mode().
102 *
103 * So, when we're in lazy mode, we call async_hypercall() to store the call for
104 * future processing. When lazy mode is turned off we issue a hypercall to
105 * flush the stored calls.
106 */
107static void lguest_leave_lazy_mode(void)
108{
109 paravirt_leave_lazy(paravirt_get_lazy_mode());
110 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
111}
112
113static void lazy_hcall(unsigned long call,
114 unsigned long arg1,
115 unsigned long arg2,
116 unsigned long arg3)
117{
118 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
119 hcall(call, arg1, arg2, arg3);
120 else
121 async_hcall(call, arg1, arg2, arg3);
122}
123
124/* async_hcall() is pretty simple: I'm quite proud of it really. We have a
125 * ring buffer of stored hypercalls which the Host will run though next time we
126 * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall
127 * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
128 * and 255 once the Host has finished with it.
129 *
130 * If we come around to a slot which hasn't been finished, then the table is
131 * full and we just make the hypercall directly. This has the nice side
132 * effect of causing the Host to run all the stored calls in the ring buffer
133 * which empties it for next time! */
134void async_hcall(unsigned long call,
135 unsigned long arg1, unsigned long arg2, unsigned long arg3)
136{
137 /* Note: This code assumes we're uniprocessor. */
138 static unsigned int next_call;
139 unsigned long flags;
140
141 /* Disable interrupts if not already disabled: we don't want an
142 * interrupt handler making a hypercall while we're already doing
143 * one! */
144 local_irq_save(flags);
145 if (lguest_data.hcall_status[next_call] != 0xFF) {
146 /* Table full, so do normal hcall which will flush table. */
147 hcall(call, arg1, arg2, arg3);
148 } else {
149 lguest_data.hcalls[next_call].eax = call;
150 lguest_data.hcalls[next_call].edx = arg1;
151 lguest_data.hcalls[next_call].ebx = arg2;
152 lguest_data.hcalls[next_call].ecx = arg3;
153 /* Arguments must all be written before we mark it to go */
154 wmb();
155 lguest_data.hcall_status[next_call] = 0;
156 if (++next_call == LHCALL_RING_SIZE)
157 next_call = 0;
158 }
159 local_irq_restore(flags);
160}
161/*:*/
162
163/* Wrappers for the SEND_DMA and BIND_DMA hypercalls. This is mainly because
164 * Jeff Garzik complained that __pa() should never appear in drivers, and this
165 * helps remove most of them. But also, it wraps some ugliness. */
166void lguest_send_dma(unsigned long key, struct lguest_dma *dma)
167{
168 /* The hcall might not write this if something goes wrong */
169 dma->used_len = 0;
170 hcall(LHCALL_SEND_DMA, key, __pa(dma), 0);
171}
172
173int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas,
174 unsigned int num, u8 irq)
175{
176 /* This is the only hypercall which actually wants 5 arguments, and we
177 * only support 4. Fortunately the interrupt number is always less
178 * than 256, so we can pack it with the number of dmas in the final
179 * argument. */
180 if (!hcall(LHCALL_BIND_DMA, key, __pa(dmas), (num << 8) | irq))
181 return -ENOMEM;
182 return 0;
183}
184
185/* Unbinding is the same hypercall as binding, but with 0 num & irq. */
186void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas)
187{
188 hcall(LHCALL_BIND_DMA, key, __pa(dmas), 0);
189}
190
191/* For guests, device memory can be used as normal memory, so we cast away the
192 * __iomem to quieten sparse. */
193void *lguest_map(unsigned long phys_addr, unsigned long pages)
194{
195 return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages);
196}
197
198void lguest_unmap(void *addr)
199{
200 iounmap((__force void __iomem *)addr);
201}
202
203/*G:033
204 * Here are our first native-instruction replacements: four functions for
205 * interrupt control.
206 *
207 * The simplest way of implementing these would be to have "turn interrupts
208 * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
209 * these are by far the most commonly called functions of those we override.
210 *
211 * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
212 * which the Guest can update with a single instruction. The Host knows to
213 * check there when it wants to deliver an interrupt.
214 */
215
216/* save_flags() is expected to return the processor state (ie. "eflags"). The
217 * eflags word contains all kind of stuff, but in practice Linux only cares
218 * about the interrupt flag. Our "save_flags()" just returns that. */
219static unsigned long save_fl(void)
220{
221 return lguest_data.irq_enabled;
222}
223
224/* "restore_flags" just sets the flags back to the value given. */
225static void restore_fl(unsigned long flags)
226{
227 lguest_data.irq_enabled = flags;
228}
229
230/* Interrupts go off... */
231static void irq_disable(void)
232{
233 lguest_data.irq_enabled = 0;
234}
235
236/* Interrupts go on... */
237static void irq_enable(void)
238{
239 lguest_data.irq_enabled = X86_EFLAGS_IF;
240}
241/*:*/
242/*M:003 Note that we don't check for outstanding interrupts when we re-enable
243 * them (or when we unmask an interrupt). This seems to work for the moment,
244 * since interrupts are rare and we'll just get the interrupt on the next timer
245 * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way
246 * would be to put the "irq_enabled" field in a page by itself, and have the
247 * Host write-protect it when an interrupt comes in when irqs are disabled.
248 * There will then be a page fault as soon as interrupts are re-enabled. :*/
249
250/*G:034
251 * The Interrupt Descriptor Table (IDT).
252 *
253 * The IDT tells the processor what to do when an interrupt comes in. Each
254 * entry in the table is a 64-bit descriptor: this holds the privilege level,
255 * address of the handler, and... well, who cares? The Guest just asks the
256 * Host to make the change anyway, because the Host controls the real IDT.
257 */
258static void lguest_write_idt_entry(struct desc_struct *dt,
259 int entrynum, u32 low, u32 high)
260{
261 /* Keep the local copy up to date. */
262 write_dt_entry(dt, entrynum, low, high);
263 /* Tell Host about this new entry. */
264 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
265}
266
267/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
268 * time it is written, so we can simply loop through all entries and tell the
269 * Host about them. */
270static void lguest_load_idt(const struct Xgt_desc_struct *desc)
271{
272 unsigned int i;
273 struct desc_struct *idt = (void *)desc->address;
274
275 for (i = 0; i < (desc->size+1)/8; i++)
276 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
277}
278
279/*
280 * The Global Descriptor Table.
281 *
282 * The Intel architecture defines another table, called the Global Descriptor
283 * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt"
284 * instruction, and then several other instructions refer to entries in the
285 * table. There are three entries which the Switcher needs, so the Host simply
286 * controls the entire thing and the Guest asks it to make changes using the
287 * LOAD_GDT hypercall.
288 *
289 * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
290 * hypercall and use that repeatedly to load a new IDT. I don't think it
291 * really matters, but wouldn't it be nice if they were the same?
292 */
293static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
294{
295 BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
296 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
297}
298
299/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
300 * then tell the Host to reload the entire thing. This operation is so rare
301 * that this naive implementation is reasonable. */
302static void lguest_write_gdt_entry(struct desc_struct *dt,
303 int entrynum, u32 low, u32 high)
304{
305 write_dt_entry(dt, entrynum, low, high);
306 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
307}
308
309/* OK, I lied. There are three "thread local storage" GDT entries which change
310 * on every context switch (these three entries are how glibc implements
311 * __thread variables). So we have a hypercall specifically for this case. */
312static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
313{
314 /* There's one problem which normal hardware doesn't have: the Host
315 * can't handle us removing entries we're currently using. So we clear
316 * the GS register here: if it's needed it'll be reloaded anyway. */
317 loadsegment(gs, 0);
318 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
319}
320
321/*G:038 That's enough excitement for now, back to ploughing through each of
322 * the different pv_ops structures (we're about 1/3 of the way through).
323 *
324 * This is the Local Descriptor Table, another weird Intel thingy. Linux only
325 * uses this for some strange applications like Wine. We don't do anything
326 * here, so they'll get an informative and friendly Segmentation Fault. */
327static void lguest_set_ldt(const void *addr, unsigned entries)
328{
329}
330
331/* This loads a GDT entry into the "Task Register": that entry points to a
332 * structure called the Task State Segment. Some comments scattered though the
333 * kernel code indicate that this used for task switching in ages past, along
334 * with blood sacrifice and astrology.
335 *
336 * Now there's nothing interesting in here that we don't get told elsewhere.
337 * But the native version uses the "ltr" instruction, which makes the Host
338 * complain to the Guest about a Segmentation Fault and it'll oops. So we
339 * override the native version with a do-nothing version. */
340static void lguest_load_tr_desc(void)
341{
342}
343
344/* The "cpuid" instruction is a way of querying both the CPU identity
345 * (manufacturer, model, etc) and its features. It was introduced before the
346 * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you
347 * might imagine, after a decade and a half this treatment, it is now a giant
348 * ball of hair. Its entry in the current Intel manual runs to 28 pages.
349 *
350 * This instruction even it has its own Wikipedia entry. The Wikipedia entry
351 * has been translated into 4 languages. I am not making this up!
352 *
353 * We could get funky here and identify ourselves as "GenuineLguest", but
354 * instead we just use the real "cpuid" instruction. Then I pretty much turned
355 * off feature bits until the Guest booted. (Don't say that: you'll damage
356 * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is
357 * hardly future proof.) Noone's listening! They don't like you anyway,
358 * parenthetic weirdo!
359 *
360 * Replacing the cpuid so we can turn features off is great for the kernel, but
361 * anyone (including userspace) can just use the raw "cpuid" instruction and
362 * the Host won't even notice since it isn't privileged. So we try not to get
363 * too worked up about it. */
364static void lguest_cpuid(unsigned int *eax, unsigned int *ebx,
365 unsigned int *ecx, unsigned int *edx)
366{
367 int function = *eax;
368
369 native_cpuid(eax, ebx, ecx, edx);
370 switch (function) {
371 case 1: /* Basic feature request. */
372 /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
373 *ecx &= 0x00002201;
374 /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
375 *edx &= 0x07808101;
376 /* The Host can do a nice optimization if it knows that the
377 * kernel mappings (addresses above 0xC0000000 or whatever
378 * PAGE_OFFSET is set to) haven't changed. But Linux calls
379 * flush_tlb_user() for both user and kernel mappings unless
380 * the Page Global Enable (PGE) feature bit is set. */
381 *edx |= 0x00002000;
382 break;
383 case 0x80000000:
384 /* Futureproof this a little: if they ask how much extended
385 * processor information there is, limit it to known fields. */
386 if (*eax > 0x80000008)
387 *eax = 0x80000008;
388 break;
389 }
390}
391
392/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
393 * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
394 * it. The Host needs to know when the Guest wants to change them, so we have
395 * a whole series of functions like read_cr0() and write_cr0().
396 *
397 * We start with CR0. CR0 allows you to turn on and off all kinds of basic
398 * features, but Linux only really cares about one: the horrifically-named Task
399 * Switched (TS) bit at bit 3 (ie. 8)
400 *
401 * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
402 * the floating point unit is used. Which allows us to restore FPU state
403 * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
404 * name like "FPUTRAP bit" be a little less cryptic?
405 *
406 * We store cr0 (and cr3) locally, because the Host never changes it. The
407 * Guest sometimes wants to read it and we'd prefer not to bother the Host
408 * unnecessarily. */
409static unsigned long current_cr0, current_cr3;
410static void lguest_write_cr0(unsigned long val)
411{
412 /* 8 == TS bit. */
413 lazy_hcall(LHCALL_TS, val & 8, 0, 0);
414 current_cr0 = val;
415}
416
417static unsigned long lguest_read_cr0(void)
418{
419 return current_cr0;
420}
421
422/* Intel provided a special instruction to clear the TS bit for people too cool
423 * to use write_cr0() to do it. This "clts" instruction is faster, because all
424 * the vowels have been optimized out. */
425static void lguest_clts(void)
426{
427 lazy_hcall(LHCALL_TS, 0, 0, 0);
428 current_cr0 &= ~8U;
429}
430
431/* CR2 is the virtual address of the last page fault, which the Guest only ever
432 * reads. The Host kindly writes this into our "struct lguest_data", so we
433 * just read it out of there. */
434static unsigned long lguest_read_cr2(void)
435{
436 return lguest_data.cr2;
437}
438
439/* CR3 is the current toplevel pagetable page: the principle is the same as
440 * cr0. Keep a local copy, and tell the Host when it changes. */
441static void lguest_write_cr3(unsigned long cr3)
442{
443 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
444 current_cr3 = cr3;
445}
446
447static unsigned long lguest_read_cr3(void)
448{
449 return current_cr3;
450}
451
452/* CR4 is used to enable and disable PGE, but we don't care. */
453static unsigned long lguest_read_cr4(void)
454{
455 return 0;
456}
457
458static void lguest_write_cr4(unsigned long val)
459{
460}
461
462/*
463 * Page Table Handling.
464 *
465 * Now would be a good time to take a rest and grab a coffee or similarly
466 * relaxing stimulant. The easy parts are behind us, and the trek gradually
467 * winds uphill from here.
468 *
469 * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU
470 * maps virtual addresses to physical addresses using "page tables". We could
471 * use one huge index of 1 million entries: each address is 4 bytes, so that's
472 * 1024 pages just to hold the page tables. But since most virtual addresses
473 * are unused, we use a two level index which saves space. The CR3 register
474 * contains the physical address of the top level "page directory" page, which
475 * contains physical addresses of up to 1024 second-level pages. Each of these
476 * second level pages contains up to 1024 physical addresses of actual pages,
477 * or Page Table Entries (PTEs).
478 *
479 * Here's a diagram, where arrows indicate physical addresses:
480 *
481 * CR3 ---> +---------+
482 * | --------->+---------+
483 * | | | PADDR1 |
484 * Top-level | | PADDR2 |
485 * (PMD) page | | |
486 * | | Lower-level |
487 * | | (PTE) page |
488 * | | | |
489 * .... ....
490 *
491 * So to convert a virtual address to a physical address, we look up the top
492 * level, which points us to the second level, which gives us the physical
493 * address of that page. If the top level entry was not present, or the second
494 * level entry was not present, then the virtual address is invalid (we
495 * say "the page was not mapped").
496 *
497 * Put another way, a 32-bit virtual address is divided up like so:
498 *
499 * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
500 * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
501 * Index into top Index into second Offset within page
502 * page directory page pagetable page
503 *
504 * The kernel spends a lot of time changing both the top-level page directory
505 * and lower-level pagetable pages. The Guest doesn't know physical addresses,
506 * so while it maintains these page tables exactly like normal, it also needs
507 * to keep the Host informed whenever it makes a change: the Host will create
508 * the real page tables based on the Guests'.
509 */
510
511/* The Guest calls this to set a second-level entry (pte), ie. to map a page
512 * into a process' address space. We set the entry then tell the Host the
513 * toplevel and address this corresponds to. The Guest uses one pagetable per
514 * process, so we need to tell the Host which one we're changing (mm->pgd). */
515static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
516 pte_t *ptep, pte_t pteval)
517{
518 *ptep = pteval;
519 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low);
520}
521
522/* The Guest calls this to set a top-level entry. Again, we set the entry then
523 * tell the Host which top-level page we changed, and the index of the entry we
524 * changed. */
525static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
526{
527 *pmdp = pmdval;
528 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
529 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
530}
531
532/* There are a couple of legacy places where the kernel sets a PTE, but we
533 * don't know the top level any more. This is useless for us, since we don't
534 * know which pagetable is changing or what address, so we just tell the Host
535 * to forget all of them. Fortunately, this is very rare.
536 *
537 * ... except in early boot when the kernel sets up the initial pagetables,
538 * which makes booting astonishingly slow. So we don't even tell the Host
539 * anything changed until we've done the first page table switch.
540 */
541static void lguest_set_pte(pte_t *ptep, pte_t pteval)
542{
543 *ptep = pteval;
544 /* Don't bother with hypercall before initial setup. */
545 if (current_cr3)
546 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
547}
548
549/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
550 * native page table operations. On native hardware you can set a new page
551 * table entry whenever you want, but if you want to remove one you have to do
552 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
553 *
554 * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
555 * called when a valid entry is written, not when it's removed (ie. marked not
556 * present). Instead, this is where we come when the Guest wants to remove a
557 * page table entry: we tell the Host to set that entry to 0 (ie. the present
558 * bit is zero). */
559static void lguest_flush_tlb_single(unsigned long addr)
560{
561 /* Simply set it to zero: if it was not, it will fault back in. */
562 lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
563}
564
565/* This is what happens after the Guest has removed a large number of entries.
566 * This tells the Host that any of the page table entries for userspace might
567 * have changed, ie. virtual addresses below PAGE_OFFSET. */
568static void lguest_flush_tlb_user(void)
569{
570 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0);
571}
572
573/* This is called when the kernel page tables have changed. That's not very
574 * common (unless the Guest is using highmem, which makes the Guest extremely
575 * slow), so it's worth separating this from the user flushing above. */
576static void lguest_flush_tlb_kernel(void)
577{
578 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
579}
580
581/*
582 * The Unadvanced Programmable Interrupt Controller.
583 *
584 * This is an attempt to implement the simplest possible interrupt controller.
585 * I spent some time looking though routines like set_irq_chip_and_handler,
586 * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
587 * I *think* this is as simple as it gets.
588 *
589 * We can tell the Host what interrupts we want blocked ready for using the
590 * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
591 * simple as setting a bit. We don't actually "ack" interrupts as such, we
592 * just mask and unmask them. I wonder if we should be cleverer?
593 */
594static void disable_lguest_irq(unsigned int irq)
595{
596 set_bit(irq, lguest_data.blocked_interrupts);
597}
598
599static void enable_lguest_irq(unsigned int irq)
600{
601 clear_bit(irq, lguest_data.blocked_interrupts);
602}
603
604/* This structure describes the lguest IRQ controller. */
605static struct irq_chip lguest_irq_controller = {
606 .name = "lguest",
607 .mask = disable_lguest_irq,
608 .mask_ack = disable_lguest_irq,
609 .unmask = enable_lguest_irq,
610};
611
612/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
613 * interrupt (except 128, which is used for system calls), and then tells the
614 * Linux infrastructure that each interrupt is controlled by our level-based
615 * lguest interrupt controller. */
616static void __init lguest_init_IRQ(void)
617{
618 unsigned int i;
619
620 for (i = 0; i < LGUEST_IRQS; i++) {
621 int vector = FIRST_EXTERNAL_VECTOR + i;
622 if (vector != SYSCALL_VECTOR) {
623 set_intr_gate(vector, interrupt[i]);
624 set_irq_chip_and_handler(i, &lguest_irq_controller,
625 handle_level_irq);
626 }
627 }
628 /* This call is required to set up for 4k stacks, where we have
629 * separate stacks for hard and soft interrupts. */
630 irq_ctx_init(smp_processor_id());
631}
632
633/*
634 * Time.
635 *
636 * It would be far better for everyone if the Guest had its own clock, but
637 * until then the Host gives us the time on every interrupt.
638 */
639static unsigned long lguest_get_wallclock(void)
640{
641 return lguest_data.time.tv_sec;
642}
643
644static cycle_t lguest_clock_read(void)
645{
646 unsigned long sec, nsec;
647
648 /* If the Host tells the TSC speed, we can trust that. */
649 if (lguest_data.tsc_khz)
650 return native_read_tsc();
651
652 /* If we can't use the TSC, we read the time value written by the Host.
653 * Since it's in two parts (seconds and nanoseconds), we risk reading
654 * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
655 * getting 99 and 0. As Linux tends to come apart under the stress of
656 * time travel, we must be careful: */
657 do {
658 /* First we read the seconds part. */
659 sec = lguest_data.time.tv_sec;
660 /* This read memory barrier tells the compiler and the CPU that
661 * this can't be reordered: we have to complete the above
662 * before going on. */
663 rmb();
664 /* Now we read the nanoseconds part. */
665 nsec = lguest_data.time.tv_nsec;
666 /* Make sure we've done that. */
667 rmb();
668 /* Now if the seconds part has changed, try again. */
669 } while (unlikely(lguest_data.time.tv_sec != sec));
670
671 /* Our non-TSC clock is in real nanoseconds. */
672 return sec*1000000000ULL + nsec;
673}
674
675/* This is what we tell the kernel is our clocksource. */
676static struct clocksource lguest_clock = {
677 .name = "lguest",
678 .rating = 400,
679 .read = lguest_clock_read,
680 .mask = CLOCKSOURCE_MASK(64),
681 .mult = 1 << 22,
682 .shift = 22,
683};
684
685/* The "scheduler clock" is just our real clock, adjusted to start at zero */
686static unsigned long long lguest_sched_clock(void)
687{
688 return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
689}
690
691/* We also need a "struct clock_event_device": Linux asks us to set it to go
692 * off some time in the future. Actually, James Morris figured all this out, I
693 * just applied the patch. */
694static int lguest_clockevent_set_next_event(unsigned long delta,
695 struct clock_event_device *evt)
696{
697 if (delta < LG_CLOCK_MIN_DELTA) {
698 if (printk_ratelimit())
699 printk(KERN_DEBUG "%s: small delta %lu ns\n",
700 __FUNCTION__, delta);
701 return -ETIME;
702 }
703 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
704 return 0;
705}
706
707static void lguest_clockevent_set_mode(enum clock_event_mode mode,
708 struct clock_event_device *evt)
709{
710 switch (mode) {
711 case CLOCK_EVT_MODE_UNUSED:
712 case CLOCK_EVT_MODE_SHUTDOWN:
713 /* A 0 argument shuts the clock down. */
714 hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0);
715 break;
716 case CLOCK_EVT_MODE_ONESHOT:
717 /* This is what we expect. */
718 break;
719 case CLOCK_EVT_MODE_PERIODIC:
720 BUG();
721 case CLOCK_EVT_MODE_RESUME:
722 break;
723 }
724}
725
726/* This describes our primitive timer chip. */
727static struct clock_event_device lguest_clockevent = {
728 .name = "lguest",
729 .features = CLOCK_EVT_FEAT_ONESHOT,
730 .set_next_event = lguest_clockevent_set_next_event,
731 .set_mode = lguest_clockevent_set_mode,
732 .rating = INT_MAX,
733 .mult = 1,
734 .shift = 0,
735 .min_delta_ns = LG_CLOCK_MIN_DELTA,
736 .max_delta_ns = LG_CLOCK_MAX_DELTA,
737};
738
739/* This is the Guest timer interrupt handler (hardware interrupt 0). We just
740 * call the clockevent infrastructure and it does whatever needs doing. */
741static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
742{
743 unsigned long flags;
744
745 /* Don't interrupt us while this is running. */
746 local_irq_save(flags);
747 lguest_clockevent.event_handler(&lguest_clockevent);
748 local_irq_restore(flags);
749}
750
751/* At some point in the boot process, we get asked to set up our timing
752 * infrastructure. The kernel doesn't expect timer interrupts before this, but
753 * we cleverly initialized the "blocked_interrupts" field of "struct
754 * lguest_data" so that timer interrupts were blocked until now. */
755static void lguest_time_init(void)
756{
757 /* Set up the timer interrupt (0) to go to our simple timer routine */
758 set_irq_handler(0, lguest_time_irq);
759
760 /* Our clock structure look like arch/i386/kernel/tsc.c if we can use
761 * the TSC, otherwise it's a dumb nanosecond-resolution clock. Either
762 * way, the "rating" is initialized so high that it's always chosen
763 * over any other clocksource. */
764 if (lguest_data.tsc_khz) {
765 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
766 lguest_clock.shift);
767 lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
768 }
769 clock_base = lguest_clock_read();
770 clocksource_register(&lguest_clock);
771
772 /* Now we've set up our clock, we can use it as the scheduler clock */
773 pv_time_ops.sched_clock = lguest_sched_clock;
774
775 /* We can't set cpumask in the initializer: damn C limitations! Set it
776 * here and register our timer device. */
777 lguest_clockevent.cpumask = cpumask_of_cpu(0);
778 clockevents_register_device(&lguest_clockevent);
779
780 /* Finally, we unblock the timer interrupt. */
781 enable_lguest_irq(0);
782}
783
784/*
785 * Miscellaneous bits and pieces.
786 *
787 * Here is an oddball collection of functions which the Guest needs for things
788 * to work. They're pretty simple.
789 */
790
791/* The Guest needs to tell the host what stack it expects traps to use. For
792 * native hardware, this is part of the Task State Segment mentioned above in
793 * lguest_load_tr_desc(), but to help hypervisors there's this special call.
794 *
795 * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
796 * segment), the privilege level (we're privilege level 1, the Host is 0 and
797 * will not tolerate us trying to use that), the stack pointer, and the number
798 * of pages in the stack. */
799static void lguest_load_esp0(struct tss_struct *tss,
800 struct thread_struct *thread)
801{
802 lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0,
803 THREAD_SIZE/PAGE_SIZE);
804}
805
806/* Let's just say, I wouldn't do debugging under a Guest. */
807static void lguest_set_debugreg(int regno, unsigned long value)
808{
809 /* FIXME: Implement */
810}
811
812/* There are times when the kernel wants to make sure that no memory writes are
813 * caught in the cache (that they've all reached real hardware devices). This
814 * doesn't matter for the Guest which has virtual hardware.
815 *
816 * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
817 * (clflush) instruction is available and the kernel uses that. Otherwise, it
818 * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
819 * Unlike clflush, wbinvd can only be run at privilege level 0. So we can
820 * ignore clflush, but replace wbinvd.
821 */
822static void lguest_wbinvd(void)
823{
824}
825
826/* If the Guest expects to have an Advanced Programmable Interrupt Controller,
827 * we play dumb by ignoring writes and returning 0 for reads. So it's no
828 * longer Programmable nor Controlling anything, and I don't think 8 lines of
829 * code qualifies for Advanced. It will also never interrupt anything. It
830 * does, however, allow us to get through the Linux boot code. */
831#ifdef CONFIG_X86_LOCAL_APIC
832static void lguest_apic_write(unsigned long reg, unsigned long v)
833{
834}
835
836static unsigned long lguest_apic_read(unsigned long reg)
837{
838 return 0;
839}
840#endif
841
842/* STOP! Until an interrupt comes in. */
843static void lguest_safe_halt(void)
844{
845 hcall(LHCALL_HALT, 0, 0, 0);
846}
847
848/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
849 * message out when we're crashing as well as elegant termination like powering
850 * off.
851 *
852 * Note that the Host always prefers that the Guest speak in physical addresses
853 * rather than virtual addresses, so we use __pa() here. */
854static void lguest_power_off(void)
855{
856 hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
857}
858
859/*
860 * Panicing.
861 *
862 * Don't. But if you did, this is what happens.
863 */
864static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
865{
866 hcall(LHCALL_CRASH, __pa(p), 0, 0);
867 /* The hcall won't return, but to keep gcc happy, we're "done". */
868 return NOTIFY_DONE;
869}
870
871static struct notifier_block paniced = {
872 .notifier_call = lguest_panic
873};
874
875/* Setting up memory is fairly easy. */
876static __init char *lguest_memory_setup(void)
877{
878 /* We do this here and not earlier because lockcheck barfs if we do it
879 * before start_kernel() */
880 atomic_notifier_chain_register(&panic_notifier_list, &paniced);
881
882 /* The Linux bootloader header contains an "e820" memory map: the
883 * Launcher populated the first entry with our memory limit. */
884 add_memory_region(boot_params.e820_map[0].addr,
885 boot_params.e820_map[0].size,
886 boot_params.e820_map[0].type);
887
888 /* This string is for the boot messages. */
889 return "LGUEST";
890}
891
892/*G:050
893 * Patching (Powerfully Placating Performance Pedants)
894 *
895 * We have already seen that pv_ops structures let us replace simple
896 * native instructions with calls to the appropriate back end all throughout
897 * the kernel. This allows the same kernel to run as a Guest and as a native
898 * kernel, but it's slow because of all the indirect branches.
899 *
900 * Remember that David Wheeler quote about "Any problem in computer science can
901 * be solved with another layer of indirection"? The rest of that quote is
902 * "... But that usually will create another problem." This is the first of
903 * those problems.
904 *
905 * Our current solution is to allow the paravirt back end to optionally patch
906 * over the indirect calls to replace them with something more efficient. We
907 * patch the four most commonly called functions: disable interrupts, enable
908 * interrupts, restore interrupts and save interrupts. We usually have 10
909 * bytes to patch into: the Guest versions of these operations are small enough
910 * that we can fit comfortably.
911 *
912 * First we need assembly templates of each of the patchable Guest operations,
913 * and these are in lguest_asm.S. */
914
915/*G:060 We construct a table from the assembler templates: */
916static const struct lguest_insns
917{
918 const char *start, *end;
919} lguest_insns[] = {
920 [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
921 [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
922 [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
923 [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
924};
925
926/* Now our patch routine is fairly simple (based on the native one in
927 * paravirt.c). If we have a replacement, we copy it in and return how much of
928 * the available space we used. */
929static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
930 unsigned long addr, unsigned len)
931{
932 unsigned int insn_len;
933
934 /* Don't do anything special if we don't have a replacement */
935 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
936 return paravirt_patch_default(type, clobber, ibuf, addr, len);
937
938 insn_len = lguest_insns[type].end - lguest_insns[type].start;
939
940 /* Similarly if we can't fit replacement (shouldn't happen, but let's
941 * be thorough). */
942 if (len < insn_len)
943 return paravirt_patch_default(type, clobber, ibuf, addr, len);
944
945 /* Copy in our instructions. */
946 memcpy(ibuf, lguest_insns[type].start, insn_len);
947 return insn_len;
948}
949
950/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
951 * structures in the kernel provide points for (almost) every routine we have
952 * to override to avoid privileged instructions. */
953__init void lguest_init(void *boot)
954{
955 /* Copy boot parameters first: the Launcher put the physical location
956 * in %esi, and head.S converted that to a virtual address and handed
957 * it to us. We use "__memcpy" because "memcpy" sometimes tries to do
958 * tricky things to go faster, and we're not ready for that. */
959 __memcpy(&boot_params, boot, PARAM_SIZE);
960 /* The boot parameters also tell us where the command-line is: save
961 * that, too. */
962 __memcpy(boot_command_line, __va(boot_params.hdr.cmd_line_ptr),
963 COMMAND_LINE_SIZE);
964
965 /* We're under lguest, paravirt is enabled, and we're running at
966 * privilege level 1, not 0 as normal. */
967 pv_info.name = "lguest";
968 pv_info.paravirt_enabled = 1;
969 pv_info.kernel_rpl = 1;
970
971 /* We set up all the lguest overrides for sensitive operations. These
972 * are detailed with the operations themselves. */
973
974 /* interrupt-related operations */
975 pv_irq_ops.init_IRQ = lguest_init_IRQ;
976 pv_irq_ops.save_fl = save_fl;
977 pv_irq_ops.restore_fl = restore_fl;
978 pv_irq_ops.irq_disable = irq_disable;
979 pv_irq_ops.irq_enable = irq_enable;
980 pv_irq_ops.safe_halt = lguest_safe_halt;
981
982 /* init-time operations */
983 pv_init_ops.memory_setup = lguest_memory_setup;
984 pv_init_ops.patch = lguest_patch;
985
986 /* Intercepts of various cpu instructions */
987 pv_cpu_ops.load_gdt = lguest_load_gdt;
988 pv_cpu_ops.cpuid = lguest_cpuid;
989 pv_cpu_ops.load_idt = lguest_load_idt;
990 pv_cpu_ops.iret = lguest_iret;
991 pv_cpu_ops.load_esp0 = lguest_load_esp0;
992 pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
993 pv_cpu_ops.set_ldt = lguest_set_ldt;
994 pv_cpu_ops.load_tls = lguest_load_tls;
995 pv_cpu_ops.set_debugreg = lguest_set_debugreg;
996 pv_cpu_ops.clts = lguest_clts;
997 pv_cpu_ops.read_cr0 = lguest_read_cr0;
998 pv_cpu_ops.write_cr0 = lguest_write_cr0;
999 pv_cpu_ops.read_cr4 = lguest_read_cr4;
1000 pv_cpu_ops.write_cr4 = lguest_write_cr4;
1001 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1002 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1003 pv_cpu_ops.wbinvd = lguest_wbinvd;
1004 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
1005 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1006
1007 /* pagetable management */
1008 pv_mmu_ops.write_cr3 = lguest_write_cr3;
1009 pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
1010 pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
1011 pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
1012 pv_mmu_ops.set_pte = lguest_set_pte;
1013 pv_mmu_ops.set_pte_at = lguest_set_pte_at;
1014 pv_mmu_ops.set_pmd = lguest_set_pmd;
1015 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1016 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1017 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1018 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1019
1020#ifdef CONFIG_X86_LOCAL_APIC
1021 /* apic read/write intercepts */
1022 pv_apic_ops.apic_write = lguest_apic_write;
1023 pv_apic_ops.apic_write_atomic = lguest_apic_write;
1024 pv_apic_ops.apic_read = lguest_apic_read;
1025#endif
1026
1027 /* time operations */
1028 pv_time_ops.get_wallclock = lguest_get_wallclock;
1029 pv_time_ops.time_init = lguest_time_init;
1030
1031 /* Now is a good time to look at the implementations of these functions
1032 * before returning to the rest of lguest_init(). */
1033
1034 /*G:070 Now we've seen all the paravirt_ops, we return to
1035 * lguest_init() where the rest of the fairly chaotic boot setup
1036 * occurs.
1037 *
1038 * The Host expects our first hypercall to tell it where our "struct
1039 * lguest_data" is, so we do that first. */
1040 hcall(LHCALL_LGUEST_INIT, __pa(&lguest_data), 0, 0);
1041
1042 /* The native boot code sets up initial page tables immediately after
1043 * the kernel itself, and sets init_pg_tables_end so they're not
1044 * clobbered. The Launcher places our initial pagetables somewhere at
1045 * the top of our physical memory, so we don't need extra space: set
1046 * init_pg_tables_end to the end of the kernel. */
1047 init_pg_tables_end = __pa(pg0);
1048
1049 /* Load the %fs segment register (the per-cpu segment register) with
1050 * the normal data segment to get through booting. */
1051 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
1052
1053 /* Clear the part of the kernel data which is expected to be zero.
1054 * Normally it will be anyway, but if we're loading from a bzImage with
1055 * CONFIG_RELOCATALE=y, the relocations will be sitting here. */
1056 memset(__bss_start, 0, __bss_stop - __bss_start);
1057
1058 /* The Host uses the top of the Guest's virtual address space for the
1059 * Host<->Guest Switcher, and it tells us how much it needs in
1060 * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
1061 reserve_top_address(lguest_data.reserve_mem);
1062
1063 /* If we don't initialize the lock dependency checker now, it crashes
1064 * paravirt_disable_iospace. */
1065 lockdep_init();
1066
1067 /* The IDE code spends about 3 seconds probing for disks: if we reserve
1068 * all the I/O ports up front it can't get them and so doesn't probe.
1069 * Other device drivers are similar (but less severe). This cuts the
1070 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */
1071 paravirt_disable_iospace();
1072
1073 /* This is messy CPU setup stuff which the native boot code does before
1074 * start_kernel, so we have to do, too: */
1075 cpu_detect(&new_cpu_data);
1076 /* head.S usually sets up the first capability word, so do it here. */
1077 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1078
1079 /* Math is always hard! */
1080 new_cpu_data.hard_math = 1;
1081
1082#ifdef CONFIG_X86_MCE
1083 mce_disabled = 1;
1084#endif
1085#ifdef CONFIG_ACPI
1086 acpi_disabled = 1;
1087 acpi_ht = 0;
1088#endif
1089
1090 /* We set the perferred console to "hvc". This is the "hypervisor
1091 * virtual console" driver written by the PowerPC people, which we also
1092 * adapted for lguest's use. */
1093 add_preferred_console("hvc", 0, NULL);
1094
1095 /* Last of all, we set the power management poweroff hook to point to
1096 * the Guest routine to power off. */
1097 pm_power_off = lguest_power_off;
1098
1099 /* Now we're set up, call start_kernel() in init/main.c and we proceed
1100 * to boot as normal. It never returns. */
1101 start_kernel();
1102}
1103/*
1104 * This marks the end of stage II of our journey, The Guest.
1105 *
1106 * It is now time for us to explore the nooks and crannies of the three Guest
1107 * devices and complete our understanding of the Guest in "make Drivers".
1108 */
diff --git a/drivers/lguest/lguest_asm.S b/drivers/lguest/lguest_asm.S
deleted file mode 100644
index 1ddcd5cd20..0000000000
--- a/drivers/lguest/lguest_asm.S
+++ /dev/null
@@ -1,93 +0,0 @@
1#include <linux/linkage.h>
2#include <linux/lguest.h>
3#include <asm/asm-offsets.h>
4#include <asm/thread_info.h>
5#include <asm/processor-flags.h>
6
7/*G:020 This is where we begin: we have a magic signature which the launcher
8 * looks for. The plan is that the Linux boot protocol will be extended with a
9 * "platform type" field which will guide us here from the normal entry point,
10 * but for the moment this suffices. The normal boot code uses %esi for the
11 * boot header, so we do too. We convert it to a virtual address by adding
12 * PAGE_OFFSET, and hand it to lguest_init() as its argument (ie. %eax).
13 *
14 * The .section line puts this code in .init.text so it will be discarded after
15 * boot. */
16.section .init.text, "ax", @progbits
17.ascii "GenuineLguest"
18 /* Set up initial stack. */
19 movl $(init_thread_union+THREAD_SIZE),%esp
20 movl %esi, %eax
21 addl $__PAGE_OFFSET, %eax
22 jmp lguest_init
23
24/*G:055 We create a macro which puts the assembler code between lgstart_ and
25 * lgend_ markers. These templates are put in the .text section: they can't be
26 * discarded after boot as we may need to patch modules, too. */
27.text
28#define LGUEST_PATCH(name, insns...) \
29 lgstart_##name: insns; lgend_##name:; \
30 .globl lgstart_##name; .globl lgend_##name
31
32LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
33LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
34LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
35LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
36/*:*/
37
38/* These demark the EIP range where host should never deliver interrupts. */
39.global lguest_noirq_start
40.global lguest_noirq_end
41
42/*M:004 When the Host reflects a trap or injects an interrupt into the Guest,
43 * it sets the eflags interrupt bit on the stack based on
44 * lguest_data.irq_enabled, so the Guest iret logic does the right thing when
45 * restoring it. However, when the Host sets the Guest up for direct traps,
46 * such as system calls, the processor is the one to push eflags onto the
47 * stack, and the interrupt bit will be 1 (in reality, interrupts are always
48 * enabled in the Guest).
49 *
50 * This turns out to be harmless: the only trap which should happen under Linux
51 * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
52 * regions), which has to be reflected through the Host anyway. If another
53 * trap *does* go off when interrupts are disabled, the Guest will panic, and
54 * we'll never get to this iret! :*/
55
56/*G:045 There is one final paravirt_op that the Guest implements, and glancing
57 * at it you can see why I left it to last. It's *cool*! It's in *assembler*!
58 *
59 * The "iret" instruction is used to return from an interrupt or trap. The
60 * stack looks like this:
61 * old address
62 * old code segment & privilege level
63 * old processor flags ("eflags")
64 *
65 * The "iret" instruction pops those values off the stack and restores them all
66 * at once. The only problem is that eflags includes the Interrupt Flag which
67 * the Guest can't change: the CPU will simply ignore it when we do an "iret".
68 * So we have to copy eflags from the stack to lguest_data.irq_enabled before
69 * we do the "iret".
70 *
71 * There are two problems with this: firstly, we need to use a register to do
72 * the copy and secondly, the whole thing needs to be atomic. The first
73 * problem is easy to solve: push %eax on the stack so we can use it, and then
74 * restore it at the end just before the real "iret".
75 *
76 * The second is harder: copying eflags to lguest_data.irq_enabled will turn
77 * interrupts on before we're finished, so we could be interrupted before we
78 * return to userspace or wherever. Our solution to this is to surround the
79 * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the
80 * Host that it is *never* to interrupt us there, even if interrupts seem to be
81 * enabled. */
82ENTRY(lguest_iret)
83 pushl %eax
84 movl 12(%esp), %eax
85lguest_noirq_start:
86 /* Note the %ss: segment prefix here. Normal data accesses use the
87 * "ds" segment, but that will have already been restored for whatever
88 * we're returning to (such as userspace): we can't trust it. The %ss:
89 * prefix makes sure we use the stack segment, which is still valid. */
90 movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
91 popl %eax
92 iret
93lguest_noirq_end:
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
deleted file mode 100644
index 57329788f8..0000000000
--- a/drivers/lguest/lguest_bus.c
+++ /dev/null
@@ -1,218 +0,0 @@
1/*P:050 Lguest guests use a very simple bus for devices. It's a simple array
2 * of device descriptors contained just above the top of normal memory. The
3 * lguest bus is 80% tedious boilerplate code. :*/
4#include <linux/init.h>
5#include <linux/bootmem.h>
6#include <linux/lguest_bus.h>
7#include <asm/io.h>
8#include <asm/paravirt.h>
9
10static ssize_t type_show(struct device *_dev,
11 struct device_attribute *attr, char *buf)
12{
13 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
14 return sprintf(buf, "%hu", lguest_devices[dev->index].type);
15}
16static ssize_t features_show(struct device *_dev,
17 struct device_attribute *attr, char *buf)
18{
19 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
20 return sprintf(buf, "%hx", lguest_devices[dev->index].features);
21}
22static ssize_t pfn_show(struct device *_dev,
23 struct device_attribute *attr, char *buf)
24{
25 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
26 return sprintf(buf, "%u", lguest_devices[dev->index].pfn);
27}
28static ssize_t status_show(struct device *_dev,
29 struct device_attribute *attr, char *buf)
30{
31 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
32 return sprintf(buf, "%hx", lguest_devices[dev->index].status);
33}
34static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
35 const char *buf, size_t count)
36{
37 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
38 if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1)
39 return -EINVAL;
40 return count;
41}
42static struct device_attribute lguest_dev_attrs[] = {
43 __ATTR_RO(type),
44 __ATTR_RO(features),
45 __ATTR_RO(pfn),
46 __ATTR(status, 0644, status_show, status_store),
47 __ATTR_NULL
48};
49
50/*D:130 The generic bus infrastructure requires a function which says whether a
51 * device matches a driver. For us, it is simple: "struct lguest_driver"
52 * contains a "device_type" field which indicates what type of device it can
53 * handle, so we just cast the args and compare: */
54static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
55{
56 struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
57 struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
58
59 return (drv->device_type == lguest_devices[dev->index].type);
60}
61/*:*/
62
63struct lguest_bus {
64 struct bus_type bus;
65 struct device dev;
66};
67
68static struct lguest_bus lguest_bus = {
69 .bus = {
70 .name = "lguest",
71 .match = lguest_dev_match,
72 .dev_attrs = lguest_dev_attrs,
73 },
74 .dev = {
75 .parent = NULL,
76 .bus_id = "lguest",
77 }
78};
79
80/*D:140 This is the callback which occurs once the bus infrastructure matches
81 * up a device and driver, ie. in response to add_lguest_device() calling
82 * device_register(), or register_lguest_driver() calling driver_register().
83 *
84 * At the moment it's always the latter: the devices are added first, since
85 * scan_devices() is called from a "core_initcall", and the drivers themselves
86 * called later as a normal "initcall". But it would work the other way too.
87 *
88 * So now we have the happy couple, we add the status bit to indicate that we
89 * found a driver. If the driver truly loves the device, it will return
90 * happiness from its probe function (ok, perhaps this wasn't my greatest
91 * analogy), and we set the final "driver ok" bit so the Host sees it's all
92 * green. */
93static int lguest_dev_probe(struct device *_dev)
94{
95 int ret;
96 struct lguest_device*dev = container_of(_dev,struct lguest_device,dev);
97 struct lguest_driver*drv = container_of(dev->dev.driver,
98 struct lguest_driver, drv);
99
100 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
101 ret = drv->probe(dev);
102 if (ret == 0)
103 lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
104 return ret;
105}
106
107/* The last part of the bus infrastructure is the function lguest drivers use
108 * to register themselves. Firstly, we do nothing if there's no lguest bus
109 * (ie. this is not a Guest), otherwise we fill in the embedded generic "struct
110 * driver" fields and call the generic driver_register(). */
111int register_lguest_driver(struct lguest_driver *drv)
112{
113 if (!lguest_devices)
114 return 0;
115
116 drv->drv.bus = &lguest_bus.bus;
117 drv->drv.name = drv->name;
118 drv->drv.owner = drv->owner;
119 drv->drv.probe = lguest_dev_probe;
120
121 return driver_register(&drv->drv);
122}
123
124/* At the moment we build all the drivers into the kernel because they're so
125 * simple: 8144 bytes for all three of them as I type this. And as the console
126 * really needs to be built in, it's actually only 3527 bytes for the network
127 * and block drivers.
128 *
129 * If they get complex it will make sense for them to be modularized, so we
130 * need to explicitly export the symbol.
131 *
132 * I don't think non-GPL modules make sense, so it's a GPL-only export.
133 */
134EXPORT_SYMBOL_GPL(register_lguest_driver);
135
136/*D:120 This is the core of the lguest bus: actually adding a new device.
137 * It's a separate function because it's neater that way, and because an
138 * earlier version of the code supported hotplug and unplug. They were removed
139 * early on because they were never used.
140 *
141 * As Andrew Tridgell says, "Untested code is buggy code".
142 *
143 * It's worth reading this carefully: we start with an index into the array of
144 * "struct lguest_device_desc"s indicating the device which is new: */
145static void add_lguest_device(unsigned int index)
146{
147 struct lguest_device *new;
148
149 /* Each "struct lguest_device_desc" has a "status" field, which the
150 * Guest updates as the device is probed. In the worst case, the Host
151 * can look at these bits to tell what part of device setup failed,
152 * even if the console isn't available. */
153 lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
154 new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
155 if (!new) {
156 printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
157 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
158 return;
159 }
160
161 /* The "struct lguest_device" setup is pretty straight-forward example
162 * code. */
163 new->index = index;
164 new->private = NULL;
165 memset(&new->dev, 0, sizeof(new->dev));
166 new->dev.parent = &lguest_bus.dev;
167 new->dev.bus = &lguest_bus.bus;
168 sprintf(new->dev.bus_id, "%u", index);
169
170 /* device_register() causes the bus infrastructure to look for a
171 * matching driver. */
172 if (device_register(&new->dev) != 0) {
173 printk(KERN_EMERG "Cannot register lguest device %u\n", index);
174 lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
175 kfree(new);
176 }
177}
178
179/*D:110 scan_devices() simply iterates through the device array. The type 0
180 * is reserved to mean "no device", and anything else means we have found a
181 * device: add it. */
182static void scan_devices(void)
183{
184 unsigned int i;
185
186 for (i = 0; i < LGUEST_MAX_DEVICES; i++)
187 if (lguest_devices[i].type)
188 add_lguest_device(i);
189}
190
191/*D:100 Fairly early in boot, lguest_bus_init() is called to set up the lguest
192 * bus. We check that we are a Guest by checking paravirt_ops.name: there are
193 * other ways of checking, but this seems most obvious to me.
194 *
195 * So we can access the array of "struct lguest_device_desc"s easily, we map
196 * that memory and store the pointer in the global "lguest_devices". Then we
197 * register the bus with the core. Doing two registrations seems clunky to me,
198 * but it seems to be the correct sysfs incantation.
199 *
200 * Finally we call scan_devices() which adds all the devices found in the
201 * "struct lguest_device_desc" array. */
202static int __init lguest_bus_init(void)
203{
204 if (strcmp(pv_info.name, "lguest") != 0)
205 return 0;
206
207 /* Devices are in a single page above top of "normal" mem */
208 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
209
210 if (bus_register(&lguest_bus.bus) != 0
211 || device_register(&lguest_bus.dev) != 0)
212 panic("lguest bus registration failed");
213
214 scan_devices();
215 return 0;
216}
217/* Do this after core stuff, before devices. */
218postcore_initcall(lguest_bus_init);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
new file mode 100644
index 0000000000..71c64837b4
--- /dev/null
+++ b/drivers/lguest/lguest_device.c
@@ -0,0 +1,373 @@
1/*P:050 Lguest guests use a very simple method to describe devices. It's a
2 * series of device descriptors contained just above the top of normal
3 * memory.
4 *
5 * We use the standard "virtio" device infrastructure, which provides us with a
6 * console, a network and a block driver. Each one expects some configuration
7 * information and a "virtqueue" mechanism to send and receive data. :*/
8#include <linux/init.h>
9#include <linux/bootmem.h>
10#include <linux/lguest_launcher.h>
11#include <linux/virtio.h>
12#include <linux/virtio_config.h>
13#include <linux/interrupt.h>
14#include <linux/virtio_ring.h>
15#include <linux/err.h>
16#include <asm/io.h>
17#include <asm/paravirt.h>
18#include <asm/lguest_hcall.h>
19
20/* The pointer to our (page) of device descriptions. */
21static void *lguest_devices;
22
23/* Unique numbering for lguest devices. */
24static unsigned int dev_index;
25
26/* For Guests, device memory can be used as normal memory, so we cast away the
27 * __iomem to quieten sparse. */
28static inline void *lguest_map(unsigned long phys_addr, unsigned long pages)
29{
30 return (__force void *)ioremap(phys_addr, PAGE_SIZE*pages);
31}
32
33static inline void lguest_unmap(void *addr)
34{
35 iounmap((__force void __iomem *)addr);
36}
37
38/*D:100 Each lguest device is just a virtio device plus a pointer to its entry
39 * in the lguest_devices page. */
40struct lguest_device {
41 struct virtio_device vdev;
42
43 /* The entry in the lguest_devices page for this device. */
44 struct lguest_device_desc *desc;
45};
46
47/* Since the virtio infrastructure hands us a pointer to the virtio_device all
48 * the time, it helps to have a curt macro to get a pointer to the struct
49 * lguest_device it's enclosed in. */
50#define to_lgdev(vdev) container_of(vdev, struct lguest_device, vdev)
51
52/*D:130
53 * Device configurations
54 *
55 * The configuration information for a device consists of a series of fields.
56 * The device will look for these fields during setup.
57 *
58 * For us these fields come immediately after that device's descriptor in the
59 * lguest_devices page.
60 *
61 * Each field starts with a "type" byte, a "length" byte, then that number of
62 * bytes of configuration information. The device descriptor tells us the
63 * total configuration length so we know when we've reached the last field. */
64
65/* type + length bytes */
66#define FHDR_LEN 2
67
68/* This finds the first field of a given type for a device's configuration. */
69static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len)
70{
71 struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
72 int i;
73
74 for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) {
75 if (desc->config[i] == type) {
76 /* Mark it used, so Host can know we looked at it, and
77 * also so we won't find the same one twice. */
78 desc->config[i] |= 0x80;
79 /* Remember, the second byte is the length. */
80 *len = desc->config[i+1];
81 /* We return a pointer to the field header. */
82 return desc->config + i;
83 }
84 }
85
86 /* Not found: return NULL for failure. */
87 return NULL;
88}
89
90/* Once they've found a field, getting a copy of it is easy. */
91static void lg_get(struct virtio_device *vdev, void *token,
92 void *buf, unsigned len)
93{
94 /* Check they didn't ask for more than the length of the field! */
95 BUG_ON(len > ((u8 *)token)[1]);
96 memcpy(buf, token + FHDR_LEN, len);
97}
98
99/* Setting the contents is also trivial. */
100static void lg_set(struct virtio_device *vdev, void *token,
101 const void *buf, unsigned len)
102{
103 BUG_ON(len > ((u8 *)token)[1]);
104 memcpy(token + FHDR_LEN, buf, len);
105}
106
107/* The operations to get and set the status word just access the status field
108 * of the device descriptor. */
109static u8 lg_get_status(struct virtio_device *vdev)
110{
111 return to_lgdev(vdev)->desc->status;
112}
113
114static void lg_set_status(struct virtio_device *vdev, u8 status)
115{
116 to_lgdev(vdev)->desc->status = status;
117}
118
119/*
120 * Virtqueues
121 *
122 * The other piece of infrastructure virtio needs is a "virtqueue": a way of
123 * the Guest device registering buffers for the other side to read from or
124 * write into (ie. send and receive buffers). Each device can have multiple
125 * virtqueues: for example the console has one queue for sending and one for
126 * receiving.
127 *
128 * Fortunately for us, a very fast shared-memory-plus-descriptors virtqueue
129 * already exists in virtio_ring.c. We just need to connect it up.
130 *
131 * We start with the information we need to keep about each virtqueue.
132 */
133
134/*D:140 This is the information we remember about each virtqueue. */
135struct lguest_vq_info
136{
137 /* A copy of the information contained in the device config. */
138 struct lguest_vqconfig config;
139
140 /* The address where we mapped the virtio ring, so we can unmap it. */
141 void *pages;
142};
143
144/* When the virtio_ring code wants to prod the Host, it calls us here and we
145 * make a hypercall. We hand the page number of the virtqueue so the Host
146 * knows which virtqueue we're talking about. */
147static void lg_notify(struct virtqueue *vq)
148{
149 /* We store our virtqueue information in the "priv" pointer of the
150 * virtqueue structure. */
151 struct lguest_vq_info *lvq = vq->priv;
152
153 hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0);
154}
155
156/* This routine finds the first virtqueue described in the configuration of
157 * this device and sets it up.
158 *
159 * This is kind of an ugly duckling. It'd be nicer to have a standard
160 * representation of a virtqueue in the configuration space, but it seems that
161 * everyone wants to do it differently. The KVM guys want the Guest to
162 * allocate its own pages and tell the Host where they are, but for lguest it's
163 * simpler for the Host to simply tell us where the pages are.
164 *
165 * So we provide devices with a "find virtqueue and set it up" function. */
166static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
167 bool (*callback)(struct virtqueue *vq))
168{
169 struct lguest_vq_info *lvq;
170 struct virtqueue *vq;
171 unsigned int len;
172 void *token;
173 int err;
174
175 /* Look for a field of the correct type to mark a virtqueue. Note that
176 * if this succeeds, then the type will be changed so it won't be found
177 * again, and future lg_find_vq() calls will find the next
178 * virtqueue (if any). */
179 token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len);
180 if (!token)
181 return ERR_PTR(-ENOENT);
182
183 lvq = kmalloc(sizeof(*lvq), GFP_KERNEL);
184 if (!lvq)
185 return ERR_PTR(-ENOMEM);
186
187 /* Note: we could use a configuration space inside here, just like we
188 * do for the device. This would allow expansion in future, because
189 * our configuration system is designed to be expansible. But this is
190 * way easier. */
191 if (len != sizeof(lvq->config)) {
192 dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len);
193 err = -EIO;
194 goto free_lvq;
195 }
196 /* Make a copy of the "struct lguest_vqconfig" field. We need a copy
197 * because the config space might not be aligned correctly. */
198 vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config));
199
200 /* Figure out how many pages the ring will take, and map that memory */
201 lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
202 DIV_ROUND_UP(vring_size(lvq->config.num),
203 PAGE_SIZE));
204 if (!lvq->pages) {
205 err = -ENOMEM;
206 goto free_lvq;
207 }
208
209 /* OK, tell virtio_ring.c to set up a virtqueue now we know its size
210 * and we've got a pointer to its pages. */
211 vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages,
212 lg_notify, callback);
213 if (!vq) {
214 err = -ENOMEM;
215 goto unmap;
216 }
217
218 /* Tell the interrupt for this virtqueue to go to the virtio_ring
219 * interrupt handler. */
220 /* FIXME: We used to have a flag for the Host to tell us we could use
221 * the interrupt as a source of randomness: it'd be nice to have that
222 * back.. */
223 err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED,
224 vdev->dev.bus_id, vq);
225 if (err)
226 goto destroy_vring;
227
228 /* Last of all we hook up our 'struct lguest_vq_info" to the
229 * virtqueue's priv pointer. */
230 vq->priv = lvq;
231 return vq;
232
233destroy_vring:
234 vring_del_virtqueue(vq);
235unmap:
236 lguest_unmap(lvq->pages);
237free_lvq:
238 kfree(lvq);
239 return ERR_PTR(err);
240}
241/*:*/
242
243/* Cleaning up a virtqueue is easy */
244static void lg_del_vq(struct virtqueue *vq)
245{
246 struct lguest_vq_info *lvq = vq->priv;
247
248 /* Tell virtio_ring.c to free the virtqueue. */
249 vring_del_virtqueue(vq);
250 /* Unmap the pages containing the ring. */
251 lguest_unmap(lvq->pages);
252 /* Free our own queue information. */
253 kfree(lvq);
254}
255
256/* The ops structure which hooks everything together. */
257static struct virtio_config_ops lguest_config_ops = {
258 .find = lg_find,
259 .get = lg_get,
260 .set = lg_set,
261 .get_status = lg_get_status,
262 .set_status = lg_set_status,
263 .find_vq = lg_find_vq,
264 .del_vq = lg_del_vq,
265};
266
267/* The root device for the lguest virtio devices. This makes them appear as
268 * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */
269static struct device lguest_root = {
270 .parent = NULL,
271 .bus_id = "lguest",
272};
273
274/*D:120 This is the core of the lguest bus: actually adding a new device.
275 * It's a separate function because it's neater that way, and because an
276 * earlier version of the code supported hotplug and unplug. They were removed
277 * early on because they were never used.
278 *
279 * As Andrew Tridgell says, "Untested code is buggy code".
280 *
281 * It's worth reading this carefully: we start with a pointer to the new device
282 * descriptor in the "lguest_devices" page. */
283static void add_lguest_device(struct lguest_device_desc *d)
284{
285 struct lguest_device *ldev;
286
287 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
288 if (!ldev) {
289 printk(KERN_EMERG "Cannot allocate lguest dev %u\n",
290 dev_index++);
291 return;
292 }
293
294 /* This devices' parent is the lguest/ dir. */
295 ldev->vdev.dev.parent = &lguest_root;
296 /* We have a unique device index thanks to the dev_index counter. */
297 ldev->vdev.index = dev_index++;
298 /* The device type comes straight from the descriptor. There's also a
299 * device vendor field in the virtio_device struct, which we leave as
300 * 0. */
301 ldev->vdev.id.device = d->type;
302 /* We have a simple set of routines for querying the device's
303 * configuration information and setting its status. */
304 ldev->vdev.config = &lguest_config_ops;
305 /* And we remember the device's descriptor for lguest_config_ops. */
306 ldev->desc = d;
307
308 /* register_virtio_device() sets up the generic fields for the struct
309 * virtio_device and calls device_register(). This makes the bus
310 * infrastructure look for a matching driver. */
311 if (register_virtio_device(&ldev->vdev) != 0) {
312 printk(KERN_ERR "Failed to register lguest device %u\n",
313 ldev->vdev.index);
314 kfree(ldev);
315 }
316}
317
318/*D:110 scan_devices() simply iterates through the device page. The type 0 is
319 * reserved to mean "end of devices". */
320static void scan_devices(void)
321{
322 unsigned int i;
323 struct lguest_device_desc *d;
324
325 /* We start at the page beginning, and skip over each entry. */
326 for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) {
327 d = lguest_devices + i;
328
329 /* Once we hit a zero, stop. */
330 if (d->type == 0)
331 break;
332
333 add_lguest_device(d);
334 }
335}
336
337/*D:105 Fairly early in boot, lguest_devices_init() is called to set up the
338 * lguest device infrastructure. We check that we are a Guest by checking
339 * pv_info.name: there are other ways of checking, but this seems most
340 * obvious to me.
341 *
342 * So we can access the "struct lguest_device_desc"s easily, we map that memory
343 * and store the pointer in the global "lguest_devices". Then we register a
344 * root device from which all our devices will hang (this seems to be the
345 * correct sysfs incantation).
346 *
347 * Finally we call scan_devices() which adds all the devices found in the
348 * lguest_devices page. */
349static int __init lguest_devices_init(void)
350{
351 if (strcmp(pv_info.name, "lguest") != 0)
352 return 0;
353
354 if (device_register(&lguest_root) != 0)
355 panic("Could not register lguest root");
356
357 /* Devices are in a single page above top of "normal" mem */
358 lguest_devices = lguest_map(max_pfn<<PAGE_SHIFT, 1);
359
360 scan_devices();
361 return 0;
362}
363/* We do this after core stuff, but before the drivers. */
364postcore_initcall(lguest_devices_init);
365
366/*D:150 At this point in the journey we used to now wade through the lguest
367 * devices themselves: net, block and console. Since they're all now virtio
368 * devices rather than lguest-specific, I've decided to ignore them. Mostly,
369 * they're kind of boring. But this does mean you'll never experience the
370 * thrill of reading the forbidden love scene buried deep in the block driver.
371 *
372 * "make Launcher" beckons, where we answer questions like "Where do Guests
373 * come from?", and "What do you do when someone asks for optimization?". */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 80d1b58c76..ee405b3838 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,73 +1,17 @@
1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher 1/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
2 * controls and communicates with the Guest. For example, the first write will 2 * controls and communicates with the Guest. For example, the first write will
3 * tell us the memory size, pagetable, entry point and kernel address offset. 3 * tell us the Guest's memory layout, pagetable, entry point and kernel address
4 * A read will run the Guest until a signal is pending (-EINTR), or the Guest 4 * offset. A read will run the Guest until something happens, such as a signal
5 * does a DMA out to the Launcher. Writes are also used to get a DMA buffer 5 * or the Guest doing a NOTIFY out to the Launcher. :*/
6 * registered by the Guest and to send the Guest an interrupt. :*/
7#include <linux/uaccess.h> 6#include <linux/uaccess.h>
8#include <linux/miscdevice.h> 7#include <linux/miscdevice.h>
9#include <linux/fs.h> 8#include <linux/fs.h>
10#include "lg.h" 9#include "lg.h"
11 10
12/*L:030 setup_regs() doesn't really belong in this file, but it gives us an
13 * early glimpse deeper into the Host so it's worth having here.
14 *
15 * Most of the Guest's registers are left alone: we used get_zeroed_page() to
16 * allocate the structure, so they will be 0. */
17static void setup_regs(struct lguest_regs *regs, unsigned long start)
18{
19 /* There are four "segment" registers which the Guest needs to boot:
20 * The "code segment" register (cs) refers to the kernel code segment
21 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
22 * refer to the kernel data segment __KERNEL_DS.
23 *
24 * The privilege level is packed into the lower bits. The Guest runs
25 * at privilege level 1 (GUEST_PL).*/
26 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
27 regs->cs = __KERNEL_CS|GUEST_PL;
28
29 /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
30 * is supposed to always be "1". Bit 9 (0x200) controls whether
31 * interrupts are enabled. We always leave interrupts enabled while
32 * running the Guest. */
33 regs->eflags = 0x202;
34
35 /* The "Extended Instruction Pointer" register says where the Guest is
36 * running. */
37 regs->eip = start;
38
39 /* %esi points to our boot information, at physical address 0, so don't
40 * touch it. */
41}
42
43/*L:310 To send DMA into the Guest, the Launcher needs to be able to ask for a
44 * DMA buffer. This is done by writing LHREQ_GETDMA and the key to
45 * /dev/lguest. */
46static long user_get_dma(struct lguest *lg, const u32 __user *input)
47{
48 unsigned long key, udma, irq;
49
50 /* Fetch the key they wrote to us. */
51 if (get_user(key, input) != 0)
52 return -EFAULT;
53 /* Look for a free Guest DMA buffer bound to that key. */
54 udma = get_dma_buffer(lg, key, &irq);
55 if (!udma)
56 return -ENOENT;
57
58 /* We need to tell the Launcher what interrupt the Guest expects after
59 * the buffer is filled. We stash it in udma->used_len. */
60 lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq);
61
62 /* The (guest-physical) address of the DMA buffer is returned from
63 * the write(). */
64 return udma;
65}
66
67/*L:315 To force the Guest to stop running and return to the Launcher, the 11/*L:315 To force the Guest to stop running and return to the Launcher, the
68 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The 12 * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest. The
69 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */ 13 * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */
70static int break_guest_out(struct lguest *lg, const u32 __user *input) 14static int break_guest_out(struct lguest *lg, const unsigned long __user *input)
71{ 15{
72 unsigned long on; 16 unsigned long on;
73 17
@@ -90,9 +34,9 @@ static int break_guest_out(struct lguest *lg, const u32 __user *input)
90 34
91/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt 35/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
92 * number to /dev/lguest. */ 36 * number to /dev/lguest. */
93static int user_send_irq(struct lguest *lg, const u32 __user *input) 37static int user_send_irq(struct lguest *lg, const unsigned long __user *input)
94{ 38{
95 u32 irq; 39 unsigned long irq;
96 40
97 if (get_user(irq, input) != 0) 41 if (get_user(irq, input) != 0)
98 return -EFAULT; 42 return -EFAULT;
@@ -133,17 +77,19 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
133 return len; 77 return len;
134 } 78 }
135 79
136 /* If we returned from read() last time because the Guest sent DMA, 80 /* If we returned from read() last time because the Guest notified,
137 * clear the flag. */ 81 * clear the flag. */
138 if (lg->dma_is_pending) 82 if (lg->pending_notify)
139 lg->dma_is_pending = 0; 83 lg->pending_notify = 0;
140 84
141 /* Run the Guest until something interesting happens. */ 85 /* Run the Guest until something interesting happens. */
142 return run_guest(lg, (unsigned long __user *)user); 86 return run_guest(lg, (unsigned long __user *)user);
143} 87}
144 88
145/*L:020 The initialization write supplies 4 32-bit values (in addition to the 89/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
146 * 32-bit LHREQ_INITIALIZE value). These are: 90 * values (in addition to the LHREQ_INITIALIZE value). These are:
91 *
92 * base: The start of the Guest-physical memory inside the Launcher memory.
147 * 93 *
148 * pfnlimit: The highest (Guest-physical) page number the Guest should be 94 * pfnlimit: The highest (Guest-physical) page number the Guest should be
149 * allowed to access. The Launcher has to live in Guest memory, so it sets 95 * allowed to access. The Launcher has to live in Guest memory, so it sets
@@ -153,23 +99,17 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
153 * pagetables (which are set up by the Launcher). 99 * pagetables (which are set up by the Launcher).
154 * 100 *
155 * start: The first instruction to execute ("eip" in x86-speak). 101 * start: The first instruction to execute ("eip" in x86-speak).
156 *
157 * page_offset: The PAGE_OFFSET constant in the Guest kernel. We should
158 * probably wean the code off this, but it's a very useful constant! Any
159 * address above this is within the Guest kernel, and any kernel address can
160 * quickly converted from physical to virtual by adding PAGE_OFFSET. It's
161 * 0xC0000000 (3G) by default, but it's configurable at kernel build time.
162 */ 102 */
163static int initialize(struct file *file, const u32 __user *input) 103static int initialize(struct file *file, const unsigned long __user *input)
164{ 104{
165 /* "struct lguest" contains everything we (the Host) know about a 105 /* "struct lguest" contains everything we (the Host) know about a
166 * Guest. */ 106 * Guest. */
167 struct lguest *lg; 107 struct lguest *lg;
168 int err, i; 108 int err;
169 u32 args[4]; 109 unsigned long args[4];
170 110
171 /* We grab the Big Lguest lock, which protects the global array 111 /* We grab the Big Lguest lock, which protects against multiple
172 * "lguests" and multiple simultaneous initializations. */ 112 * simultaneous initializations. */
173 mutex_lock(&lguest_lock); 113 mutex_lock(&lguest_lock);
174 /* You can't initialize twice! Close the device and start again... */ 114 /* You can't initialize twice! Close the device and start again... */
175 if (file->private_data) { 115 if (file->private_data) {
@@ -182,20 +122,15 @@ static int initialize(struct file *file, const u32 __user *input)
182 goto unlock; 122 goto unlock;
183 } 123 }
184 124
185 /* Find an unused guest. */ 125 lg = kzalloc(sizeof(*lg), GFP_KERNEL);
186 i = find_free_guest(); 126 if (!lg) {
187 if (i < 0) { 127 err = -ENOMEM;
188 err = -ENOSPC;
189 goto unlock; 128 goto unlock;
190 } 129 }
191 /* OK, we have an index into the "lguest" array: "lg" is a convenient
192 * pointer. */
193 lg = &lguests[i];
194 130
195 /* Populate the easy fields of our "struct lguest" */ 131 /* Populate the easy fields of our "struct lguest" */
196 lg->guestid = i; 132 lg->mem_base = (void __user *)(long)args[0];
197 lg->pfn_limit = args[0]; 133 lg->pfn_limit = args[1];
198 lg->page_offset = args[3];
199 134
200 /* We need a complete page for the Guest registers: they are accessible 135 /* We need a complete page for the Guest registers: they are accessible
201 * to the Guest and we can only grant it access to whole pages. */ 136 * to the Guest and we can only grant it access to whole pages. */
@@ -210,17 +145,13 @@ static int initialize(struct file *file, const u32 __user *input)
210 /* Initialize the Guest's shadow page tables, using the toplevel 145 /* Initialize the Guest's shadow page tables, using the toplevel
211 * address the Launcher gave us. This allocates memory, so can 146 * address the Launcher gave us. This allocates memory, so can
212 * fail. */ 147 * fail. */
213 err = init_guest_pagetable(lg, args[1]); 148 err = init_guest_pagetable(lg, args[2]);
214 if (err) 149 if (err)
215 goto free_regs; 150 goto free_regs;
216 151
217 /* Now we initialize the Guest's registers, handing it the start 152 /* Now we initialize the Guest's registers, handing it the start
218 * address. */ 153 * address. */
219 setup_regs(lg->regs, args[2]); 154 lguest_arch_setup_regs(lg, args[3]);
220
221 /* There are a couple of GDT entries the Guest expects when first
222 * booting. */
223 setup_guest_gdt(lg);
224 155
225 /* The timer for lguest's clock needs initialization. */ 156 /* The timer for lguest's clock needs initialization. */
226 init_clockdev(lg); 157 init_clockdev(lg);
@@ -260,18 +191,19 @@ unlock:
260/*L:010 The first operation the Launcher does must be a write. All writes 191/*L:010 The first operation the Launcher does must be a write. All writes
261 * start with a 32 bit number: for the first write this must be 192 * start with a 32 bit number: for the first write this must be
262 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use 193 * LHREQ_INITIALIZE to set up the Guest. After that the Launcher can use
263 * writes of other values to get DMA buffers and send interrupts. */ 194 * writes of other values to send interrupts. */
264static ssize_t write(struct file *file, const char __user *input, 195static ssize_t write(struct file *file, const char __user *in,
265 size_t size, loff_t *off) 196 size_t size, loff_t *off)
266{ 197{
267 /* Once the guest is initialized, we hold the "struct lguest" in the 198 /* Once the guest is initialized, we hold the "struct lguest" in the
268 * file private data. */ 199 * file private data. */
269 struct lguest *lg = file->private_data; 200 struct lguest *lg = file->private_data;
270 u32 req; 201 const unsigned long __user *input = (const unsigned long __user *)in;
202 unsigned long req;
271 203
272 if (get_user(req, input) != 0) 204 if (get_user(req, input) != 0)
273 return -EFAULT; 205 return -EFAULT;
274 input += sizeof(req); 206 input++;
275 207
276 /* If you haven't initialized, you must do that first. */ 208 /* If you haven't initialized, you must do that first. */
277 if (req != LHREQ_INITIALIZE && !lg) 209 if (req != LHREQ_INITIALIZE && !lg)
@@ -287,13 +219,11 @@ static ssize_t write(struct file *file, const char __user *input,
287 219
288 switch (req) { 220 switch (req) {
289 case LHREQ_INITIALIZE: 221 case LHREQ_INITIALIZE:
290 return initialize(file, (const u32 __user *)input); 222 return initialize(file, input);
291 case LHREQ_GETDMA:
292 return user_get_dma(lg, (const u32 __user *)input);
293 case LHREQ_IRQ: 223 case LHREQ_IRQ:
294 return user_send_irq(lg, (const u32 __user *)input); 224 return user_send_irq(lg, input);
295 case LHREQ_BREAK: 225 case LHREQ_BREAK:
296 return break_guest_out(lg, (const u32 __user *)input); 226 return break_guest_out(lg, input);
297 default: 227 default:
298 return -EINVAL; 228 return -EINVAL;
299 } 229 }
@@ -319,8 +249,6 @@ static int close(struct inode *inode, struct file *file)
319 mutex_lock(&lguest_lock); 249 mutex_lock(&lguest_lock);
320 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ 250 /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
321 hrtimer_cancel(&lg->hrt); 251 hrtimer_cancel(&lg->hrt);
322 /* Free any DMA buffers the Guest had bound. */
323 release_all_dma(lg);
324 /* Free up the shadow page tables for the Guest. */ 252 /* Free up the shadow page tables for the Guest. */
325 free_guest_pagetable(lg); 253 free_guest_pagetable(lg);
326 /* Now all the memory cleanups are done, it's safe to release the 254 /* Now all the memory cleanups are done, it's safe to release the
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index b7a924ace6..2a45f0691c 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -13,6 +13,7 @@
13#include <linux/random.h> 13#include <linux/random.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/uaccess.h>
16#include "lg.h" 17#include "lg.h"
17 18
18/*M:008 We hold reference to pages, which prevents them from being swapped. 19/*M:008 We hold reference to pages, which prevents them from being swapped.
@@ -44,44 +45,32 @@
44 * (vii) Setting up the page tables initially. 45 * (vii) Setting up the page tables initially.
45 :*/ 46 :*/
46 47
47/* Pages a 4k long, and each page table entry is 4 bytes long, giving us 1024
48 * (or 2^10) entries per page. */
49#define PTES_PER_PAGE_SHIFT 10
50#define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
51 48
52/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is 49/* 1024 entries in a page table page maps 1024 pages: 4MB. The Switcher is
53 * conveniently placed at the top 4MB, so it uses a separate, complete PTE 50 * conveniently placed at the top 4MB, so it uses a separate, complete PTE
54 * page. */ 51 * page. */
55#define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1) 52#define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
56 53
57/* We actually need a separate PTE page for each CPU. Remember that after the 54/* We actually need a separate PTE page for each CPU. Remember that after the
58 * Switcher code itself comes two pages for each CPU, and we don't want this 55 * Switcher code itself comes two pages for each CPU, and we don't want this
59 * CPU's guest to see the pages of any other CPU. */ 56 * CPU's guest to see the pages of any other CPU. */
60static DEFINE_PER_CPU(spte_t *, switcher_pte_pages); 57static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
61#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu) 58#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
62 59
63/*H:320 With our shadow and Guest types established, we need to deal with 60/*H:320 With our shadow and Guest types established, we need to deal with
64 * them: the page table code is curly enough to need helper functions to keep 61 * them: the page table code is curly enough to need helper functions to keep
65 * it clear and clean. 62 * it clear and clean.
66 * 63 *
67 * The first helper takes a virtual address, and says which entry in the top 64 * There are two functions which return pointers to the shadow (aka "real")
68 * level page table deals with that address. Since each top level entry deals
69 * with 4M, this effectively divides by 4M. */
70static unsigned vaddr_to_pgd_index(unsigned long vaddr)
71{
72 return vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
73}
74
75/* There are two functions which return pointers to the shadow (aka "real")
76 * page tables. 65 * page tables.
77 * 66 *
78 * spgd_addr() takes the virtual address and returns a pointer to the top-level 67 * spgd_addr() takes the virtual address and returns a pointer to the top-level
79 * page directory entry for that address. Since we keep track of several page 68 * page directory entry for that address. Since we keep track of several page
80 * tables, the "i" argument tells us which one we're interested in (it's 69 * tables, the "i" argument tells us which one we're interested in (it's
81 * usually the current one). */ 70 * usually the current one). */
82static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) 71static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
83{ 72{
84 unsigned int index = vaddr_to_pgd_index(vaddr); 73 unsigned int index = pgd_index(vaddr);
85 74
86 /* We kill any Guest trying to touch the Switcher addresses. */ 75 /* We kill any Guest trying to touch the Switcher addresses. */
87 if (index >= SWITCHER_PGD_INDEX) { 76 if (index >= SWITCHER_PGD_INDEX) {
@@ -95,28 +84,28 @@ static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
95/* This routine then takes the PGD entry given above, which contains the 84/* This routine then takes the PGD entry given above, which contains the
96 * address of the PTE page. It then returns a pointer to the PTE entry for the 85 * address of the PTE page. It then returns a pointer to the PTE entry for the
97 * given address. */ 86 * given address. */
98static spte_t *spte_addr(struct lguest *lg, spgd_t spgd, unsigned long vaddr) 87static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
99{ 88{
100 spte_t *page = __va(spgd.pfn << PAGE_SHIFT); 89 pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
101 /* You should never call this if the PGD entry wasn't valid */ 90 /* You should never call this if the PGD entry wasn't valid */
102 BUG_ON(!(spgd.flags & _PAGE_PRESENT)); 91 BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
103 return &page[(vaddr >> PAGE_SHIFT) % PTES_PER_PAGE]; 92 return &page[(vaddr >> PAGE_SHIFT) % PTRS_PER_PTE];
104} 93}
105 94
106/* These two functions just like the above two, except they access the Guest 95/* These two functions just like the above two, except they access the Guest
107 * page tables. Hence they return a Guest address. */ 96 * page tables. Hence they return a Guest address. */
108static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) 97static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
109{ 98{
110 unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT); 99 unsigned int index = vaddr >> (PGDIR_SHIFT);
111 return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t); 100 return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t);
112} 101}
113 102
114static unsigned long gpte_addr(struct lguest *lg, 103static unsigned long gpte_addr(struct lguest *lg,
115 gpgd_t gpgd, unsigned long vaddr) 104 pgd_t gpgd, unsigned long vaddr)
116{ 105{
117 unsigned long gpage = gpgd.pfn << PAGE_SHIFT; 106 unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT;
118 BUG_ON(!(gpgd.flags & _PAGE_PRESENT)); 107 BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT));
119 return gpage + ((vaddr>>PAGE_SHIFT) % PTES_PER_PAGE) * sizeof(gpte_t); 108 return gpage + ((vaddr>>PAGE_SHIFT) % PTRS_PER_PTE) * sizeof(pte_t);
120} 109}
121 110
122/*H:350 This routine takes a page number given by the Guest and converts it to 111/*H:350 This routine takes a page number given by the Guest and converts it to
@@ -149,53 +138,55 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
149 * entry can be a little tricky. The flags are (almost) the same, but the 138 * entry can be a little tricky. The flags are (almost) the same, but the
150 * Guest PTE contains a virtual page number: the CPU needs the real page 139 * Guest PTE contains a virtual page number: the CPU needs the real page
151 * number. */ 140 * number. */
152static spte_t gpte_to_spte(struct lguest *lg, gpte_t gpte, int write) 141static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write)
153{ 142{
154 spte_t spte; 143 unsigned long pfn, base, flags;
155 unsigned long pfn;
156 144
157 /* The Guest sets the global flag, because it thinks that it is using 145 /* The Guest sets the global flag, because it thinks that it is using
158 * PGE. We only told it to use PGE so it would tell us whether it was 146 * PGE. We only told it to use PGE so it would tell us whether it was
159 * flushing a kernel mapping or a userspace mapping. We don't actually 147 * flushing a kernel mapping or a userspace mapping. We don't actually
160 * use the global bit, so throw it away. */ 148 * use the global bit, so throw it away. */
161 spte.flags = (gpte.flags & ~_PAGE_GLOBAL); 149 flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
150
151 /* The Guest's pages are offset inside the Launcher. */
152 base = (unsigned long)lg->mem_base / PAGE_SIZE;
162 153
163 /* We need a temporary "unsigned long" variable to hold the answer from 154 /* We need a temporary "unsigned long" variable to hold the answer from
164 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't 155 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
165 * fit in spte.pfn. get_pfn() finds the real physical number of the 156 * fit in spte.pfn. get_pfn() finds the real physical number of the
166 * page, given the virtual number. */ 157 * page, given the virtual number. */
167 pfn = get_pfn(gpte.pfn, write); 158 pfn = get_pfn(base + pte_pfn(gpte), write);
168 if (pfn == -1UL) { 159 if (pfn == -1UL) {
169 kill_guest(lg, "failed to get page %u", gpte.pfn); 160 kill_guest(lg, "failed to get page %lu", pte_pfn(gpte));
170 /* When we destroy the Guest, we'll go through the shadow page 161 /* When we destroy the Guest, we'll go through the shadow page
171 * tables and release_pte() them. Make sure we don't think 162 * tables and release_pte() them. Make sure we don't think
172 * this one is valid! */ 163 * this one is valid! */
173 spte.flags = 0; 164 flags = 0;
174 } 165 }
175 /* Now we assign the page number, and our shadow PTE is complete. */ 166 /* Now we assemble our shadow PTE from the page number and flags. */
176 spte.pfn = pfn; 167 return pfn_pte(pfn, __pgprot(flags));
177 return spte;
178} 168}
179 169
180/*H:460 And to complete the chain, release_pte() looks like this: */ 170/*H:460 And to complete the chain, release_pte() looks like this: */
181static void release_pte(spte_t pte) 171static void release_pte(pte_t pte)
182{ 172{
183 /* Remember that get_user_pages() took a reference to the page, in 173 /* Remember that get_user_pages() took a reference to the page, in
184 * get_pfn()? We have to put it back now. */ 174 * get_pfn()? We have to put it back now. */
185 if (pte.flags & _PAGE_PRESENT) 175 if (pte_flags(pte) & _PAGE_PRESENT)
186 put_page(pfn_to_page(pte.pfn)); 176 put_page(pfn_to_page(pte_pfn(pte)));
187} 177}
188/*:*/ 178/*:*/
189 179
190static void check_gpte(struct lguest *lg, gpte_t gpte) 180static void check_gpte(struct lguest *lg, pte_t gpte)
191{ 181{
192 if ((gpte.flags & (_PAGE_PWT|_PAGE_PSE)) || gpte.pfn >= lg->pfn_limit) 182 if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
183 || pte_pfn(gpte) >= lg->pfn_limit)
193 kill_guest(lg, "bad page table entry"); 184 kill_guest(lg, "bad page table entry");
194} 185}
195 186
196static void check_gpgd(struct lguest *lg, gpgd_t gpgd) 187static void check_gpgd(struct lguest *lg, pgd_t gpgd)
197{ 188{
198 if ((gpgd.flags & ~_PAGE_TABLE) || gpgd.pfn >= lg->pfn_limit) 189 if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit)
199 kill_guest(lg, "bad page directory entry"); 190 kill_guest(lg, "bad page directory entry");
200} 191}
201 192
@@ -211,21 +202,21 @@ static void check_gpgd(struct lguest *lg, gpgd_t gpgd)
211 * true. */ 202 * true. */
212int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) 203int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
213{ 204{
214 gpgd_t gpgd; 205 pgd_t gpgd;
215 spgd_t *spgd; 206 pgd_t *spgd;
216 unsigned long gpte_ptr; 207 unsigned long gpte_ptr;
217 gpte_t gpte; 208 pte_t gpte;
218 spte_t *spte; 209 pte_t *spte;
219 210
220 /* First step: get the top-level Guest page table entry. */ 211 /* First step: get the top-level Guest page table entry. */
221 gpgd = mkgpgd(lgread_u32(lg, gpgd_addr(lg, vaddr))); 212 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
222 /* Toplevel not present? We can't map it in. */ 213 /* Toplevel not present? We can't map it in. */
223 if (!(gpgd.flags & _PAGE_PRESENT)) 214 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
224 return 0; 215 return 0;
225 216
226 /* Now look at the matching shadow entry. */ 217 /* Now look at the matching shadow entry. */
227 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 218 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
228 if (!(spgd->flags & _PAGE_PRESENT)) { 219 if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
229 /* No shadow entry: allocate a new shadow PTE page. */ 220 /* No shadow entry: allocate a new shadow PTE page. */
230 unsigned long ptepage = get_zeroed_page(GFP_KERNEL); 221 unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
231 /* This is not really the Guest's fault, but killing it is 222 /* This is not really the Guest's fault, but killing it is
@@ -238,34 +229,35 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
238 check_gpgd(lg, gpgd); 229 check_gpgd(lg, gpgd);
239 /* And we copy the flags to the shadow PGD entry. The page 230 /* And we copy the flags to the shadow PGD entry. The page
240 * number in the shadow PGD is the page we just allocated. */ 231 * number in the shadow PGD is the page we just allocated. */
241 spgd->raw.val = (__pa(ptepage) | gpgd.flags); 232 *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd));
242 } 233 }
243 234
244 /* OK, now we look at the lower level in the Guest page table: keep its 235 /* OK, now we look at the lower level in the Guest page table: keep its
245 * address, because we might update it later. */ 236 * address, because we might update it later. */
246 gpte_ptr = gpte_addr(lg, gpgd, vaddr); 237 gpte_ptr = gpte_addr(lg, gpgd, vaddr);
247 gpte = mkgpte(lgread_u32(lg, gpte_ptr)); 238 gpte = lgread(lg, gpte_ptr, pte_t);
248 239
249 /* If this page isn't in the Guest page tables, we can't page it in. */ 240 /* If this page isn't in the Guest page tables, we can't page it in. */
250 if (!(gpte.flags & _PAGE_PRESENT)) 241 if (!(pte_flags(gpte) & _PAGE_PRESENT))
251 return 0; 242 return 0;
252 243
253 /* Check they're not trying to write to a page the Guest wants 244 /* Check they're not trying to write to a page the Guest wants
254 * read-only (bit 2 of errcode == write). */ 245 * read-only (bit 2 of errcode == write). */
255 if ((errcode & 2) && !(gpte.flags & _PAGE_RW)) 246 if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
256 return 0; 247 return 0;
257 248
258 /* User access to a kernel page? (bit 3 == user access) */ 249 /* User access to a kernel page? (bit 3 == user access) */
259 if ((errcode & 4) && !(gpte.flags & _PAGE_USER)) 250 if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
260 return 0; 251 return 0;
261 252
262 /* Check that the Guest PTE flags are OK, and the page number is below 253 /* Check that the Guest PTE flags are OK, and the page number is below
263 * the pfn_limit (ie. not mapping the Launcher binary). */ 254 * the pfn_limit (ie. not mapping the Launcher binary). */
264 check_gpte(lg, gpte); 255 check_gpte(lg, gpte);
265 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ 256 /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
266 gpte.flags |= _PAGE_ACCESSED; 257 gpte = pte_mkyoung(gpte);
258
267 if (errcode & 2) 259 if (errcode & 2)
268 gpte.flags |= _PAGE_DIRTY; 260 gpte = pte_mkdirty(gpte);
269 261
270 /* Get the pointer to the shadow PTE entry we're going to set. */ 262 /* Get the pointer to the shadow PTE entry we're going to set. */
271 spte = spte_addr(lg, *spgd, vaddr); 263 spte = spte_addr(lg, *spgd, vaddr);
@@ -275,21 +267,18 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
275 267
276 /* If this is a write, we insist that the Guest page is writable (the 268 /* If this is a write, we insist that the Guest page is writable (the
277 * final arg to gpte_to_spte()). */ 269 * final arg to gpte_to_spte()). */
278 if (gpte.flags & _PAGE_DIRTY) 270 if (pte_dirty(gpte))
279 *spte = gpte_to_spte(lg, gpte, 1); 271 *spte = gpte_to_spte(lg, gpte, 1);
280 else { 272 else
281 /* If this is a read, don't set the "writable" bit in the page 273 /* If this is a read, don't set the "writable" bit in the page
282 * table entry, even if the Guest says it's writable. That way 274 * table entry, even if the Guest says it's writable. That way
283 * we come back here when a write does actually ocur, so we can 275 * we come back here when a write does actually ocur, so we can
284 * update the Guest's _PAGE_DIRTY flag. */ 276 * update the Guest's _PAGE_DIRTY flag. */
285 gpte_t ro_gpte = gpte; 277 *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0);
286 ro_gpte.flags &= ~_PAGE_RW;
287 *spte = gpte_to_spte(lg, ro_gpte, 0);
288 }
289 278
290 /* Finally, we write the Guest PTE entry back: we've set the 279 /* Finally, we write the Guest PTE entry back: we've set the
291 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ 280 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
292 lgwrite_u32(lg, gpte_ptr, gpte.raw.val); 281 lgwrite(lg, gpte_ptr, pte_t, gpte);
293 282
294 /* We succeeded in mapping the page! */ 283 /* We succeeded in mapping the page! */
295 return 1; 284 return 1;
@@ -305,17 +294,18 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
305 * mapped by the shadow page tables, and is it writable? */ 294 * mapped by the shadow page tables, and is it writable? */
306static int page_writable(struct lguest *lg, unsigned long vaddr) 295static int page_writable(struct lguest *lg, unsigned long vaddr)
307{ 296{
308 spgd_t *spgd; 297 pgd_t *spgd;
309 unsigned long flags; 298 unsigned long flags;
310 299
311 /* Look at the top level entry: is it present? */ 300 /* Look at the top level entry: is it present? */
312 spgd = spgd_addr(lg, lg->pgdidx, vaddr); 301 spgd = spgd_addr(lg, lg->pgdidx, vaddr);
313 if (!(spgd->flags & _PAGE_PRESENT)) 302 if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
314 return 0; 303 return 0;
315 304
316 /* Check the flags on the pte entry itself: it must be present and 305 /* Check the flags on the pte entry itself: it must be present and
317 * writable. */ 306 * writable. */
318 flags = spte_addr(lg, *spgd, vaddr)->flags; 307 flags = pte_flags(*(spte_addr(lg, *spgd, vaddr)));
308
319 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); 309 return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
320} 310}
321 311
@@ -329,22 +319,22 @@ void pin_page(struct lguest *lg, unsigned long vaddr)
329} 319}
330 320
331/*H:450 If we chase down the release_pgd() code, it looks like this: */ 321/*H:450 If we chase down the release_pgd() code, it looks like this: */
332static void release_pgd(struct lguest *lg, spgd_t *spgd) 322static void release_pgd(struct lguest *lg, pgd_t *spgd)
333{ 323{
334 /* If the entry's not present, there's nothing to release. */ 324 /* If the entry's not present, there's nothing to release. */
335 if (spgd->flags & _PAGE_PRESENT) { 325 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
336 unsigned int i; 326 unsigned int i;
337 /* Converting the pfn to find the actual PTE page is easy: turn 327 /* Converting the pfn to find the actual PTE page is easy: turn
338 * the page number into a physical address, then convert to a 328 * the page number into a physical address, then convert to a
339 * virtual address (easy for kernel pages like this one). */ 329 * virtual address (easy for kernel pages like this one). */
340 spte_t *ptepage = __va(spgd->pfn << PAGE_SHIFT); 330 pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
341 /* For each entry in the page, we might need to release it. */ 331 /* For each entry in the page, we might need to release it. */
342 for (i = 0; i < PTES_PER_PAGE; i++) 332 for (i = 0; i < PTRS_PER_PTE; i++)
343 release_pte(ptepage[i]); 333 release_pte(ptepage[i]);
344 /* Now we can free the page of PTEs */ 334 /* Now we can free the page of PTEs */
345 free_page((long)ptepage); 335 free_page((long)ptepage);
346 /* And zero out the PGD entry we we never release it twice. */ 336 /* And zero out the PGD entry we we never release it twice. */
347 spgd->raw.val = 0; 337 *spgd = __pgd(0);
348 } 338 }
349} 339}
350 340
@@ -356,7 +346,7 @@ static void flush_user_mappings(struct lguest *lg, int idx)
356{ 346{
357 unsigned int i; 347 unsigned int i;
358 /* Release every pgd entry up to the kernel's address. */ 348 /* Release every pgd entry up to the kernel's address. */
359 for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++) 349 for (i = 0; i < pgd_index(lg->kernel_address); i++)
360 release_pgd(lg, lg->pgdirs[idx].pgdir + i); 350 release_pgd(lg, lg->pgdirs[idx].pgdir + i);
361} 351}
362 352
@@ -369,6 +359,25 @@ void guest_pagetable_flush_user(struct lguest *lg)
369} 359}
370/*:*/ 360/*:*/
371 361
362/* We walk down the guest page tables to get a guest-physical address */
363unsigned long guest_pa(struct lguest *lg, unsigned long vaddr)
364{
365 pgd_t gpgd;
366 pte_t gpte;
367
368 /* First step: get the top-level Guest page table entry. */
369 gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t);
370 /* Toplevel not present? We can't map it in. */
371 if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
372 kill_guest(lg, "Bad address %#lx", vaddr);
373
374 gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t);
375 if (!(pte_flags(gpte) & _PAGE_PRESENT))
376 kill_guest(lg, "Bad address %#lx", vaddr);
377
378 return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
379}
380
372/* We keep several page tables. This is a simple routine to find the page 381/* We keep several page tables. This is a simple routine to find the page
373 * table (if any) corresponding to this top-level address the Guest has given 382 * table (if any) corresponding to this top-level address the Guest has given
374 * us. */ 383 * us. */
@@ -376,7 +385,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
376{ 385{
377 unsigned int i; 386 unsigned int i;
378 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 387 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
379 if (lg->pgdirs[i].cr3 == pgtable) 388 if (lg->pgdirs[i].gpgdir == pgtable)
380 break; 389 break;
381 return i; 390 return i;
382} 391}
@@ -385,7 +394,7 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
385 * allocate a new one (and so the kernel parts are not there), we set 394 * allocate a new one (and so the kernel parts are not there), we set
386 * blank_pgdir. */ 395 * blank_pgdir. */
387static unsigned int new_pgdir(struct lguest *lg, 396static unsigned int new_pgdir(struct lguest *lg,
388 unsigned long cr3, 397 unsigned long gpgdir,
389 int *blank_pgdir) 398 int *blank_pgdir)
390{ 399{
391 unsigned int next; 400 unsigned int next;
@@ -395,7 +404,7 @@ static unsigned int new_pgdir(struct lguest *lg,
395 next = random32() % ARRAY_SIZE(lg->pgdirs); 404 next = random32() % ARRAY_SIZE(lg->pgdirs);
396 /* If it's never been allocated at all before, try now. */ 405 /* If it's never been allocated at all before, try now. */
397 if (!lg->pgdirs[next].pgdir) { 406 if (!lg->pgdirs[next].pgdir) {
398 lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL); 407 lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
399 /* If the allocation fails, just keep using the one we have */ 408 /* If the allocation fails, just keep using the one we have */
400 if (!lg->pgdirs[next].pgdir) 409 if (!lg->pgdirs[next].pgdir)
401 next = lg->pgdidx; 410 next = lg->pgdidx;
@@ -405,7 +414,7 @@ static unsigned int new_pgdir(struct lguest *lg,
405 *blank_pgdir = 1; 414 *blank_pgdir = 1;
406 } 415 }
407 /* Record which Guest toplevel this shadows. */ 416 /* Record which Guest toplevel this shadows. */
408 lg->pgdirs[next].cr3 = cr3; 417 lg->pgdirs[next].gpgdir = gpgdir;
409 /* Release all the non-kernel mappings. */ 418 /* Release all the non-kernel mappings. */
410 flush_user_mappings(lg, next); 419 flush_user_mappings(lg, next);
411 420
@@ -472,26 +481,27 @@ void guest_pagetable_clear_all(struct lguest *lg)
472 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. 481 * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
473 */ 482 */
474static void do_set_pte(struct lguest *lg, int idx, 483static void do_set_pte(struct lguest *lg, int idx,
475 unsigned long vaddr, gpte_t gpte) 484 unsigned long vaddr, pte_t gpte)
476{ 485{
477 /* Look up the matching shadow page directot entry. */ 486 /* Look up the matching shadow page directot entry. */
478 spgd_t *spgd = spgd_addr(lg, idx, vaddr); 487 pgd_t *spgd = spgd_addr(lg, idx, vaddr);
479 488
480 /* If the top level isn't present, there's no entry to update. */ 489 /* If the top level isn't present, there's no entry to update. */
481 if (spgd->flags & _PAGE_PRESENT) { 490 if (pgd_flags(*spgd) & _PAGE_PRESENT) {
482 /* Otherwise, we start by releasing the existing entry. */ 491 /* Otherwise, we start by releasing the existing entry. */
483 spte_t *spte = spte_addr(lg, *spgd, vaddr); 492 pte_t *spte = spte_addr(lg, *spgd, vaddr);
484 release_pte(*spte); 493 release_pte(*spte);
485 494
486 /* If they're setting this entry as dirty or accessed, we might 495 /* If they're setting this entry as dirty or accessed, we might
487 * as well put that entry they've given us in now. This shaves 496 * as well put that entry they've given us in now. This shaves
488 * 10% off a copy-on-write micro-benchmark. */ 497 * 10% off a copy-on-write micro-benchmark. */
489 if (gpte.flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) { 498 if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
490 check_gpte(lg, gpte); 499 check_gpte(lg, gpte);
491 *spte = gpte_to_spte(lg, gpte, gpte.flags&_PAGE_DIRTY); 500 *spte = gpte_to_spte(lg, gpte,
501 pte_flags(gpte) & _PAGE_DIRTY);
492 } else 502 } else
493 /* Otherwise we can demand_page() it in later. */ 503 /* Otherwise we can demand_page() it in later. */
494 spte->raw.val = 0; 504 *spte = __pte(0);
495 } 505 }
496} 506}
497 507
@@ -506,18 +516,18 @@ static void do_set_pte(struct lguest *lg, int idx,
506 * The benefit is that when we have to track a new page table, we can copy keep 516 * The benefit is that when we have to track a new page table, we can copy keep
507 * all the kernel mappings. This speeds up context switch immensely. */ 517 * all the kernel mappings. This speeds up context switch immensely. */
508void guest_set_pte(struct lguest *lg, 518void guest_set_pte(struct lguest *lg,
509 unsigned long cr3, unsigned long vaddr, gpte_t gpte) 519 unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
510{ 520{
511 /* Kernel mappings must be changed on all top levels. Slow, but 521 /* Kernel mappings must be changed on all top levels. Slow, but
512 * doesn't happen often. */ 522 * doesn't happen often. */
513 if (vaddr >= lg->page_offset) { 523 if (vaddr >= lg->kernel_address) {
514 unsigned int i; 524 unsigned int i;
515 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) 525 for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
516 if (lg->pgdirs[i].pgdir) 526 if (lg->pgdirs[i].pgdir)
517 do_set_pte(lg, i, vaddr, gpte); 527 do_set_pte(lg, i, vaddr, gpte);
518 } else { 528 } else {
519 /* Is this page table one we have a shadow for? */ 529 /* Is this page table one we have a shadow for? */
520 int pgdir = find_pgdir(lg, cr3); 530 int pgdir = find_pgdir(lg, gpgdir);
521 if (pgdir != ARRAY_SIZE(lg->pgdirs)) 531 if (pgdir != ARRAY_SIZE(lg->pgdirs))
522 /* If so, do the update. */ 532 /* If so, do the update. */
523 do_set_pte(lg, pgdir, vaddr, gpte); 533 do_set_pte(lg, pgdir, vaddr, gpte);
@@ -538,7 +548,7 @@ void guest_set_pte(struct lguest *lg,
538 * 548 *
539 * So with that in mind here's our code to to update a (top-level) PGD entry: 549 * So with that in mind here's our code to to update a (top-level) PGD entry:
540 */ 550 */
541void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx) 551void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
542{ 552{
543 int pgdir; 553 int pgdir;
544 554
@@ -548,7 +558,7 @@ void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
548 return; 558 return;
549 559
550 /* If they're talking about a page table we have a shadow for... */ 560 /* If they're talking about a page table we have a shadow for... */
551 pgdir = find_pgdir(lg, cr3); 561 pgdir = find_pgdir(lg, gpgdir);
552 if (pgdir < ARRAY_SIZE(lg->pgdirs)) 562 if (pgdir < ARRAY_SIZE(lg->pgdirs))
553 /* ... throw it away. */ 563 /* ... throw it away. */
554 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); 564 release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
@@ -560,21 +570,34 @@ void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
560 * its first page table is. We set some things up here: */ 570 * its first page table is. We set some things up here: */
561int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) 571int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
562{ 572{
563 /* In flush_user_mappings() we loop from 0 to
564 * "vaddr_to_pgd_index(lg->page_offset)". This assumes it won't hit
565 * the Switcher mappings, so check that now. */
566 if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
567 return -EINVAL;
568 /* We start on the first shadow page table, and give it a blank PGD 573 /* We start on the first shadow page table, and give it a blank PGD
569 * page. */ 574 * page. */
570 lg->pgdidx = 0; 575 lg->pgdidx = 0;
571 lg->pgdirs[lg->pgdidx].cr3 = pgtable; 576 lg->pgdirs[lg->pgdidx].gpgdir = pgtable;
572 lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL); 577 lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL);
573 if (!lg->pgdirs[lg->pgdidx].pgdir) 578 if (!lg->pgdirs[lg->pgdidx].pgdir)
574 return -ENOMEM; 579 return -ENOMEM;
575 return 0; 580 return 0;
576} 581}
577 582
583/* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
584void page_table_guest_data_init(struct lguest *lg)
585{
586 /* We get the kernel address: above this is all kernel memory. */
587 if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address)
588 /* We tell the Guest that it can't use the top 4MB of virtual
589 * addresses used by the Switcher. */
590 || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
591 || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir))
592 kill_guest(lg, "bad guest page %p", lg->lguest_data);
593
594 /* In flush_user_mappings() we loop from 0 to
595 * "pgd_index(lg->kernel_address)". This assumes it won't hit the
596 * Switcher mappings, so check that now. */
597 if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX)
598 kill_guest(lg, "bad kernel address %#lx", lg->kernel_address);
599}
600
578/* When a Guest dies, our cleanup is fairly simple. */ 601/* When a Guest dies, our cleanup is fairly simple. */
579void free_guest_pagetable(struct lguest *lg) 602void free_guest_pagetable(struct lguest *lg)
580{ 603{
@@ -594,14 +617,14 @@ void free_guest_pagetable(struct lguest *lg)
594 * for each CPU already set up, we just need to hook them in. */ 617 * for each CPU already set up, we just need to hook them in. */
595void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) 618void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
596{ 619{
597 spte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); 620 pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
598 spgd_t switcher_pgd; 621 pgd_t switcher_pgd;
599 spte_t regs_pte; 622 pte_t regs_pte;
600 623
601 /* Make the last PGD entry for this Guest point to the Switcher's PTE 624 /* Make the last PGD entry for this Guest point to the Switcher's PTE
602 * page for this CPU (with appropriate flags). */ 625 * page for this CPU (with appropriate flags). */
603 switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT; 626 switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL);
604 switcher_pgd.flags = _PAGE_KERNEL; 627
605 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; 628 lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
606 629
607 /* We also change the Switcher PTE page. When we're running the Guest, 630 /* We also change the Switcher PTE page. When we're running the Guest,
@@ -611,10 +634,8 @@ void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages)
611 * CPU's "struct lguest_pages": if we make sure the Guest's register 634 * CPU's "struct lguest_pages": if we make sure the Guest's register
612 * page is already mapped there, we don't have to copy them out 635 * page is already mapped there, we don't have to copy them out
613 * again. */ 636 * again. */
614 regs_pte.pfn = __pa(lg->regs_page) >> PAGE_SHIFT; 637 regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL));
615 regs_pte.flags = _PAGE_KERNEL; 638 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte;
616 switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTES_PER_PAGE]
617 = regs_pte;
618} 639}
619/*:*/ 640/*:*/
620 641
@@ -635,24 +656,25 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
635 unsigned int pages) 656 unsigned int pages)
636{ 657{
637 unsigned int i; 658 unsigned int i;
638 spte_t *pte = switcher_pte_page(cpu); 659 pte_t *pte = switcher_pte_page(cpu);
639 660
640 /* The first entries are easy: they map the Switcher code. */ 661 /* The first entries are easy: they map the Switcher code. */
641 for (i = 0; i < pages; i++) { 662 for (i = 0; i < pages; i++) {
642 pte[i].pfn = page_to_pfn(switcher_page[i]); 663 pte[i] = mk_pte(switcher_page[i],
643 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 664 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
644 } 665 }
645 666
646 /* The only other thing we map is this CPU's pair of pages. */ 667 /* The only other thing we map is this CPU's pair of pages. */
647 i = pages + cpu*2; 668 i = pages + cpu*2;
648 669
649 /* First page (Guest registers) is writable from the Guest */ 670 /* First page (Guest registers) is writable from the Guest */
650 pte[i].pfn = page_to_pfn(switcher_page[i]); 671 pte[i] = pfn_pte(page_to_pfn(switcher_page[i]),
651 pte[i].flags = _PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW; 672 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW));
673
652 /* The second page contains the "struct lguest_ro_state", and is 674 /* The second page contains the "struct lguest_ro_state", and is
653 * read-only. */ 675 * read-only. */
654 pte[i+1].pfn = page_to_pfn(switcher_page[i+1]); 676 pte[i+1] = pfn_pte(page_to_pfn(switcher_page[i+1]),
655 pte[i+1].flags = _PAGE_PRESENT|_PAGE_ACCESSED; 677 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED));
656} 678}
657 679
658/*H:510 At boot or module load time, init_pagetables() allocates and populates 680/*H:510 At boot or module load time, init_pagetables() allocates and populates
@@ -662,7 +684,7 @@ __init int init_pagetables(struct page **switcher_page, unsigned int pages)
662 unsigned int i; 684 unsigned int i;
663 685
664 for_each_possible_cpu(i) { 686 for_each_possible_cpu(i) {
665 switcher_pte_page(i) = (spte_t *)get_zeroed_page(GFP_KERNEL); 687 switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL);
666 if (!switcher_pte_page(i)) { 688 if (!switcher_pte_page(i)) {
667 free_switcher_pte_pages(); 689 free_switcher_pte_pages();
668 return -ENOMEM; 690 return -ENOMEM;
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 9b81119f46..c2434ec99f 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -73,14 +73,14 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
73 /* Segment descriptors contain a privilege level: the Guest is 73 /* Segment descriptors contain a privilege level: the Guest is
74 * sometimes careless and leaves this as 0, even though it's 74 * sometimes careless and leaves this as 0, even though it's
75 * running at privilege level 1. If so, we fix it here. */ 75 * running at privilege level 1. If so, we fix it here. */
76 if ((lg->gdt[i].b & 0x00006000) == 0) 76 if ((lg->arch.gdt[i].b & 0x00006000) == 0)
77 lg->gdt[i].b |= (GUEST_PL << 13); 77 lg->arch.gdt[i].b |= (GUEST_PL << 13);
78 78
79 /* Each descriptor has an "accessed" bit. If we don't set it 79 /* Each descriptor has an "accessed" bit. If we don't set it
80 * now, the CPU will try to set it when the Guest first loads 80 * now, the CPU will try to set it when the Guest first loads
81 * that entry into a segment register. But the GDT isn't 81 * that entry into a segment register. But the GDT isn't
82 * writable by the Guest, so bad things can happen. */ 82 * writable by the Guest, so bad things can happen. */
83 lg->gdt[i].b |= 0x00000100; 83 lg->arch.gdt[i].b |= 0x00000100;
84 } 84 }
85} 85}
86 86
@@ -106,12 +106,12 @@ void setup_default_gdt_entries(struct lguest_ro_state *state)
106void setup_guest_gdt(struct lguest *lg) 106void setup_guest_gdt(struct lguest *lg)
107{ 107{
108 /* Start with full 0-4G segments... */ 108 /* Start with full 0-4G segments... */
109 lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; 109 lg->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
110 lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; 110 lg->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
111 /* ...except the Guest is allowed to use them, so set the privilege 111 /* ...except the Guest is allowed to use them, so set the privilege
112 * level appropriately in the flags. */ 112 * level appropriately in the flags. */
113 lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); 113 lg->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
114 lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); 114 lg->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
115} 115}
116 116
117/* Like the IDT, we never simply use the GDT the Guest gives us. We set up the 117/* Like the IDT, we never simply use the GDT the Guest gives us. We set up the
@@ -126,7 +126,7 @@ void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt)
126 unsigned int i; 126 unsigned int i;
127 127
128 for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) 128 for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++)
129 gdt[i] = lg->gdt[i]; 129 gdt[i] = lg->arch.gdt[i];
130} 130}
131 131
132/* This is the full version */ 132/* This is the full version */
@@ -138,7 +138,7 @@ void copy_gdt(const struct lguest *lg, struct desc_struct *gdt)
138 * replaced. See ignored_gdt() above. */ 138 * replaced. See ignored_gdt() above. */
139 for (i = 0; i < GDT_ENTRIES; i++) 139 for (i = 0; i < GDT_ENTRIES; i++)
140 if (!ignored_gdt(i)) 140 if (!ignored_gdt(i))
141 gdt[i] = lg->gdt[i]; 141 gdt[i] = lg->arch.gdt[i];
142} 142}
143 143
144/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */ 144/* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
@@ -146,12 +146,12 @@ void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
146{ 146{
147 /* We assume the Guest has the same number of GDT entries as the 147 /* We assume the Guest has the same number of GDT entries as the
148 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ 148 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
149 if (num > ARRAY_SIZE(lg->gdt)) 149 if (num > ARRAY_SIZE(lg->arch.gdt))
150 kill_guest(lg, "too many gdt entries %i", num); 150 kill_guest(lg, "too many gdt entries %i", num);
151 151
152 /* We read the whole thing in, then fix it up. */ 152 /* We read the whole thing in, then fix it up. */
153 lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0])); 153 __lgread(lg, lg->arch.gdt, table, num * sizeof(lg->arch.gdt[0]));
154 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt)); 154 fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->arch.gdt));
155 /* Mark that the GDT changed so the core knows it has to copy it again, 155 /* Mark that the GDT changed so the core knows it has to copy it again,
156 * even if the Guest is run on the same CPU. */ 156 * even if the Guest is run on the same CPU. */
157 lg->changed |= CHANGED_GDT; 157 lg->changed |= CHANGED_GDT;
@@ -159,9 +159,9 @@ void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num)
159 159
160void guest_load_tls(struct lguest *lg, unsigned long gtls) 160void guest_load_tls(struct lguest *lg, unsigned long gtls)
161{ 161{
162 struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN]; 162 struct desc_struct *tls = &lg->arch.gdt[GDT_ENTRY_TLS_MIN];
163 163
164 lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); 164 __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
165 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); 165 fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
166 lg->changed |= CHANGED_GDT_TLS; 166 lg->changed |= CHANGED_GDT_TLS;
167} 167}
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
new file mode 100644
index 0000000000..9eed12d5a3
--- /dev/null
+++ b/drivers/lguest/x86/core.c
@@ -0,0 +1,577 @@
1/*
2 * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
3 * Copyright (C) 2007, Jes Sorensen <jes@sgi.com> SGI.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/kernel.h>
21#include <linux/start_kernel.h>
22#include <linux/string.h>
23#include <linux/console.h>
24#include <linux/screen_info.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/clocksource.h>
28#include <linux/clockchips.h>
29#include <linux/cpu.h>
30#include <linux/lguest.h>
31#include <linux/lguest_launcher.h>
32#include <asm/paravirt.h>
33#include <asm/param.h>
34#include <asm/page.h>
35#include <asm/pgtable.h>
36#include <asm/desc.h>
37#include <asm/setup.h>
38#include <asm/lguest.h>
39#include <asm/uaccess.h>
40#include <asm/i387.h>
41#include "../lg.h"
42
43static int cpu_had_pge;
44
45static struct {
46 unsigned long offset;
47 unsigned short segment;
48} lguest_entry;
49
50/* Offset from where switcher.S was compiled to where we've copied it */
51static unsigned long switcher_offset(void)
52{
53 return SWITCHER_ADDR - (unsigned long)start_switcher_text;
54}
55
56/* This cpu's struct lguest_pages. */
57static struct lguest_pages *lguest_pages(unsigned int cpu)
58{
59 return &(((struct lguest_pages *)
60 (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
61}
62
63static DEFINE_PER_CPU(struct lguest *, last_guest);
64
65/*S:010
66 * We are getting close to the Switcher.
67 *
68 * Remember that each CPU has two pages which are visible to the Guest when it
69 * runs on that CPU. This has to contain the state for that Guest: we copy the
70 * state in just before we run the Guest.
71 *
72 * Each Guest has "changed" flags which indicate what has changed in the Guest
73 * since it last ran. We saw this set in interrupts_and_traps.c and
74 * segments.c.
75 */
76static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
77{
78 /* Copying all this data can be quite expensive. We usually run the
79 * same Guest we ran last time (and that Guest hasn't run anywhere else
80 * meanwhile). If that's not the case, we pretend everything in the
81 * Guest has changed. */
82 if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
83 __get_cpu_var(last_guest) = lg;
84 lg->last_pages = pages;
85 lg->changed = CHANGED_ALL;
86 }
87
88 /* These copies are pretty cheap, so we do them unconditionally: */
89 /* Save the current Host top-level page directory. */
90 pages->state.host_cr3 = __pa(current->mm->pgd);
91 /* Set up the Guest's page tables to see this CPU's pages (and no
92 * other CPU's pages). */
93 map_switcher_in_guest(lg, pages);
94 /* Set up the two "TSS" members which tell the CPU what stack to use
95 * for traps which do directly into the Guest (ie. traps at privilege
96 * level 1). */
97 pages->state.guest_tss.esp1 = lg->esp1;
98 pages->state.guest_tss.ss1 = lg->ss1;
99
100 /* Copy direct-to-Guest trap entries. */
101 if (lg->changed & CHANGED_IDT)
102 copy_traps(lg, pages->state.guest_idt, default_idt_entries);
103
104 /* Copy all GDT entries which the Guest can change. */
105 if (lg->changed & CHANGED_GDT)
106 copy_gdt(lg, pages->state.guest_gdt);
107 /* If only the TLS entries have changed, copy them. */
108 else if (lg->changed & CHANGED_GDT_TLS)
109 copy_gdt_tls(lg, pages->state.guest_gdt);
110
111 /* Mark the Guest as unchanged for next time. */
112 lg->changed = 0;
113}
114
115/* Finally: the code to actually call into the Switcher to run the Guest. */
116static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
117{
118 /* This is a dummy value we need for GCC's sake. */
119 unsigned int clobber;
120
121 /* Copy the guest-specific information into this CPU's "struct
122 * lguest_pages". */
123 copy_in_guest_info(lg, pages);
124
125 /* Set the trap number to 256 (impossible value). If we fault while
126 * switching to the Guest (bad segment registers or bug), this will
127 * cause us to abort the Guest. */
128 lg->regs->trapnum = 256;
129
130 /* Now: we push the "eflags" register on the stack, then do an "lcall".
131 * This is how we change from using the kernel code segment to using
132 * the dedicated lguest code segment, as well as jumping into the
133 * Switcher.
134 *
135 * The lcall also pushes the old code segment (KERNEL_CS) onto the
136 * stack, then the address of this call. This stack layout happens to
137 * exactly match the stack of an interrupt... */
138 asm volatile("pushf; lcall *lguest_entry"
139 /* This is how we tell GCC that %eax ("a") and %ebx ("b")
140 * are changed by this routine. The "=" means output. */
141 : "=a"(clobber), "=b"(clobber)
142 /* %eax contains the pages pointer. ("0" refers to the
143 * 0-th argument above, ie "a"). %ebx contains the
144 * physical address of the Guest's top-level page
145 * directory. */
146 : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
147 /* We tell gcc that all these registers could change,
148 * which means we don't have to save and restore them in
149 * the Switcher. */
150 : "memory", "%edx", "%ecx", "%edi", "%esi");
151}
152/*:*/
153
154/*H:040 This is the i386-specific code to setup and run the Guest. Interrupts
155 * are disabled: we own the CPU. */
156void lguest_arch_run_guest(struct lguest *lg)
157{
158 /* Remember the awfully-named TS bit? If the Guest has asked
159 * to set it we set it now, so we can trap and pass that trap
160 * to the Guest if it uses the FPU. */
161 if (lg->ts)
162 lguest_set_ts();
163
164 /* SYSENTER is an optimized way of doing system calls. We
165 * can't allow it because it always jumps to privilege level 0.
166 * A normal Guest won't try it because we don't advertise it in
167 * CPUID, but a malicious Guest (or malicious Guest userspace
168 * program) could, so we tell the CPU to disable it before
169 * running the Guest. */
170 if (boot_cpu_has(X86_FEATURE_SEP))
171 wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
172
173 /* Now we actually run the Guest. It will pop back out when
174 * something interesting happens, and we can examine its
175 * registers to see what it was doing. */
176 run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
177
178 /* The "regs" pointer contains two extra entries which are not
179 * really registers: a trap number which says what interrupt or
180 * trap made the switcher code come back, and an error code
181 * which some traps set. */
182
183 /* If the Guest page faulted, then the cr2 register will tell
184 * us the bad virtual address. We have to grab this now,
185 * because once we re-enable interrupts an interrupt could
186 * fault and thus overwrite cr2, or we could even move off to a
187 * different CPU. */
188 if (lg->regs->trapnum == 14)
189 lg->arch.last_pagefault = read_cr2();
190 /* Similarly, if we took a trap because the Guest used the FPU,
191 * we have to restore the FPU it expects to see. */
192 else if (lg->regs->trapnum == 7)
193 math_state_restore();
194
195 /* Restore SYSENTER if it's supposed to be on. */
196 if (boot_cpu_has(X86_FEATURE_SEP))
197 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
198}
199
200/*H:130 Our Guest is usually so well behaved; it never tries to do things it
201 * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
202 * quite complete, because it doesn't contain replacements for the Intel I/O
203 * instructions. As a result, the Guest sometimes fumbles across one during
204 * the boot process as it probes for various things which are usually attached
205 * to a PC.
206 *
207 * When the Guest uses one of these instructions, we get trap #13 (General
208 * Protection Fault) and come here. We see if it's one of those troublesome
209 * instructions and skip over it. We return true if we did. */
210static int emulate_insn(struct lguest *lg)
211{
212 u8 insn;
213 unsigned int insnlen = 0, in = 0, shift = 0;
214 /* The eip contains the *virtual* address of the Guest's instruction:
215 * guest_pa just subtracts the Guest's page_offset. */
216 unsigned long physaddr = guest_pa(lg, lg->regs->eip);
217
218 /* This must be the Guest kernel trying to do something, not userspace!
219 * The bottom two bits of the CS segment register are the privilege
220 * level. */
221 if ((lg->regs->cs & 3) != GUEST_PL)
222 return 0;
223
224 /* Decoding x86 instructions is icky. */
225 insn = lgread(lg, physaddr, u8);
226
227 /* 0x66 is an "operand prefix". It means it's using the upper 16 bits
228 of the eax register. */
229 if (insn == 0x66) {
230 shift = 16;
231 /* The instruction is 1 byte so far, read the next byte. */
232 insnlen = 1;
233 insn = lgread(lg, physaddr + insnlen, u8);
234 }
235
236 /* We can ignore the lower bit for the moment and decode the 4 opcodes
237 * we need to emulate. */
238 switch (insn & 0xFE) {
239 case 0xE4: /* in <next byte>,%al */
240 insnlen += 2;
241 in = 1;
242 break;
243 case 0xEC: /* in (%dx),%al */
244 insnlen += 1;
245 in = 1;
246 break;
247 case 0xE6: /* out %al,<next byte> */
248 insnlen += 2;
249 break;
250 case 0xEE: /* out %al,(%dx) */
251 insnlen += 1;
252 break;
253 default:
254 /* OK, we don't know what this is, can't emulate. */
255 return 0;
256 }
257
258 /* If it was an "IN" instruction, they expect the result to be read
259 * into %eax, so we change %eax. We always return all-ones, which
260 * traditionally means "there's nothing there". */
261 if (in) {
262 /* Lower bit tells is whether it's a 16 or 32 bit access */
263 if (insn & 0x1)
264 lg->regs->eax = 0xFFFFFFFF;
265 else
266 lg->regs->eax |= (0xFFFF << shift);
267 }
268 /* Finally, we've "done" the instruction, so move past it. */
269 lg->regs->eip += insnlen;
270 /* Success! */
271 return 1;
272}
273
274/*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
275void lguest_arch_handle_trap(struct lguest *lg)
276{
277 switch (lg->regs->trapnum) {
278 case 13: /* We've intercepted a GPF. */
279 /* Check if this was one of those annoying IN or OUT
280 * instructions which we need to emulate. If so, we
281 * just go back into the Guest after we've done it. */
282 if (lg->regs->errcode == 0) {
283 if (emulate_insn(lg))
284 return;
285 }
286 break;
287 case 14: /* We've intercepted a page fault. */
288 /* The Guest accessed a virtual address that wasn't
289 * mapped. This happens a lot: we don't actually set
290 * up most of the page tables for the Guest at all when
291 * we start: as it runs it asks for more and more, and
292 * we set them up as required. In this case, we don't
293 * even tell the Guest that the fault happened.
294 *
295 * The errcode tells whether this was a read or a
296 * write, and whether kernel or userspace code. */
297 if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode))
298 return;
299
300 /* OK, it's really not there (or not OK): the Guest
301 * needs to know. We write out the cr2 value so it
302 * knows where the fault occurred.
303 *
304 * Note that if the Guest were really messed up, this
305 * could happen before it's done the INITIALIZE
306 * hypercall, so lg->lguest_data will be NULL */
307 if (lg->lguest_data &&
308 put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2))
309 kill_guest(lg, "Writing cr2");
310 break;
311 case 7: /* We've intercepted a Device Not Available fault. */
312 /* If the Guest doesn't want to know, we already
313 * restored the Floating Point Unit, so we just
314 * continue without telling it. */
315 if (!lg->ts)
316 return;
317 break;
318 case 32 ... 255:
319 /* These values mean a real interrupt occurred, in which case
320 * the Host handler has already been run. We just do a
321 * friendly check if another process should now be run, then
322 * return to run the Guest again */
323 cond_resched();
324 return;
325 case LGUEST_TRAP_ENTRY:
326 /* Our 'struct hcall_args' maps directly over our regs: we set
327 * up the pointer now to indicate a hypercall is pending. */
328 lg->hcall = (struct hcall_args *)lg->regs;
329 return;
330 }
331
332 /* We didn't handle the trap, so it needs to go to the Guest. */
333 if (!deliver_trap(lg, lg->regs->trapnum))
334 /* If the Guest doesn't have a handler (either it hasn't
335 * registered any yet, or it's one of the faults we don't let
336 * it handle), it dies with a cryptic error message. */
337 kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
338 lg->regs->trapnum, lg->regs->eip,
339 lg->regs->trapnum == 14 ? lg->arch.last_pagefault
340 : lg->regs->errcode);
341}
342
343/* Now we can look at each of the routines this calls, in increasing order of
344 * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
345 * deliver_trap() and demand_page(). After all those, we'll be ready to
346 * examine the Switcher, and our philosophical understanding of the Host/Guest
347 * duality will be complete. :*/
348static void adjust_pge(void *on)
349{
350 if (on)
351 write_cr4(read_cr4() | X86_CR4_PGE);
352 else
353 write_cr4(read_cr4() & ~X86_CR4_PGE);
354}
355
356/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do
357 * some more i386-specific initialization. */
358void __init lguest_arch_host_init(void)
359{
360 int i;
361
362 /* Most of the i386/switcher.S doesn't care that it's been moved; on
363 * Intel, jumps are relative, and it doesn't access any references to
364 * external code or data.
365 *
366 * The only exception is the interrupt handlers in switcher.S: their
367 * addresses are placed in a table (default_idt_entries), so we need to
368 * update the table with the new addresses. switcher_offset() is a
369 * convenience function which returns the distance between the builtin
370 * switcher code and the high-mapped copy we just made. */
371 for (i = 0; i < IDT_ENTRIES; i++)
372 default_idt_entries[i] += switcher_offset();
373
374 /*
375 * Set up the Switcher's per-cpu areas.
376 *
377 * Each CPU gets two pages of its own within the high-mapped region
378 * (aka. "struct lguest_pages"). Much of this can be initialized now,
379 * but some depends on what Guest we are running (which is set up in
380 * copy_in_guest_info()).
381 */
382 for_each_possible_cpu(i) {
383 /* lguest_pages() returns this CPU's two pages. */
384 struct lguest_pages *pages = lguest_pages(i);
385 /* This is a convenience pointer to make the code fit one
386 * statement to a line. */
387 struct lguest_ro_state *state = &pages->state;
388
389 /* The Global Descriptor Table: the Host has a different one
390 * for each CPU. We keep a descriptor for the GDT which says
391 * where it is and how big it is (the size is actually the last
392 * byte, not the size, hence the "-1"). */
393 state->host_gdt_desc.size = GDT_SIZE-1;
394 state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
395
396 /* All CPUs on the Host use the same Interrupt Descriptor
397 * Table, so we just use store_idt(), which gets this CPU's IDT
398 * descriptor. */
399 store_idt(&state->host_idt_desc);
400
401 /* The descriptors for the Guest's GDT and IDT can be filled
402 * out now, too. We copy the GDT & IDT into ->guest_gdt and
403 * ->guest_idt before actually running the Guest. */
404 state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
405 state->guest_idt_desc.address = (long)&state->guest_idt;
406 state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
407 state->guest_gdt_desc.address = (long)&state->guest_gdt;
408
409 /* We know where we want the stack to be when the Guest enters
410 * the switcher: in pages->regs. The stack grows upwards, so
411 * we start it at the end of that structure. */
412 state->guest_tss.esp0 = (long)(&pages->regs + 1);
413 /* And this is the GDT entry to use for the stack: we keep a
414 * couple of special LGUEST entries. */
415 state->guest_tss.ss0 = LGUEST_DS;
416
417 /* x86 can have a finegrained bitmap which indicates what I/O
418 * ports the process can use. We set it to the end of our
419 * structure, meaning "none". */
420 state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
421
422 /* Some GDT entries are the same across all Guests, so we can
423 * set them up now. */
424 setup_default_gdt_entries(state);
425 /* Most IDT entries are the same for all Guests, too.*/
426 setup_default_idt_entries(state, default_idt_entries);
427
428 /* The Host needs to be able to use the LGUEST segments on this
429 * CPU, too, so put them in the Host GDT. */
430 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
431 get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
432 }
433
434 /* In the Switcher, we want the %cs segment register to use the
435 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
436 * it will be undisturbed when we switch. To change %cs and jump we
437 * need this structure to feed to Intel's "lcall" instruction. */
438 lguest_entry.offset = (long)switch_to_guest + switcher_offset();
439 lguest_entry.segment = LGUEST_CS;
440
441 /* Finally, we need to turn off "Page Global Enable". PGE is an
442 * optimization where page table entries are specially marked to show
443 * they never change. The Host kernel marks all the kernel pages this
444 * way because it's always present, even when userspace is running.
445 *
446 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
447 * switch to the Guest kernel. If you don't disable this on all CPUs,
448 * you'll get really weird bugs that you'll chase for two days.
449 *
450 * I used to turn PGE off every time we switched to the Guest and back
451 * on when we return, but that slowed the Switcher down noticibly. */
452
453 /* We don't need the complexity of CPUs coming and going while we're
454 * doing this. */
455 lock_cpu_hotplug();
456 if (cpu_has_pge) { /* We have a broader idea of "global". */
457 /* Remember that this was originally set (for cleanup). */
458 cpu_had_pge = 1;
459 /* adjust_pge is a helper function which sets or unsets the PGE
460 * bit on its CPU, depending on the argument (0 == unset). */
461 on_each_cpu(adjust_pge, (void *)0, 0, 1);
462 /* Turn off the feature in the global feature set. */
463 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
464 }
465 unlock_cpu_hotplug();
466};
467/*:*/
468
469void __exit lguest_arch_host_fini(void)
470{
471 /* If we had PGE before we started, turn it back on now. */
472 lock_cpu_hotplug();
473 if (cpu_had_pge) {
474 set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
475 /* adjust_pge's argument "1" means set PGE. */
476 on_each_cpu(adjust_pge, (void *)1, 0, 1);
477 }
478 unlock_cpu_hotplug();
479}
480
481
482/*H:122 The i386-specific hypercalls simply farm out to the right functions. */
483int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args)
484{
485 switch (args->arg0) {
486 case LHCALL_LOAD_GDT:
487 load_guest_gdt(lg, args->arg1, args->arg2);
488 break;
489 case LHCALL_LOAD_IDT_ENTRY:
490 load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3);
491 break;
492 case LHCALL_LOAD_TLS:
493 guest_load_tls(lg, args->arg1);
494 break;
495 default:
496 /* Bad Guest. Bad! */
497 return -EIO;
498 }
499 return 0;
500}
501
502/*H:126 i386-specific hypercall initialization: */
503int lguest_arch_init_hypercalls(struct lguest *lg)
504{
505 u32 tsc_speed;
506
507 /* The pointer to the Guest's "struct lguest_data" is the only
508 * argument. We check that address now. */
509 if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data)))
510 return -EFAULT;
511
512 /* Having checked it, we simply set lg->lguest_data to point straight
513 * into the Launcher's memory at the right place and then use
514 * copy_to_user/from_user from now on, instead of lgread/write. I put
515 * this in to show that I'm not immune to writing stupid
516 * optimizations. */
517 lg->lguest_data = lg->mem_base + lg->hcall->arg1;
518
519 /* We insist that the Time Stamp Counter exist and doesn't change with
520 * cpu frequency. Some devious chip manufacturers decided that TSC
521 * changes could be handled in software. I decided that time going
522 * backwards might be good for benchmarks, but it's bad for users.
523 *
524 * We also insist that the TSC be stable: the kernel detects unreliable
525 * TSCs for its own purposes, and we use that here. */
526 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
527 tsc_speed = tsc_khz;
528 else
529 tsc_speed = 0;
530 if (put_user(tsc_speed, &lg->lguest_data->tsc_khz))
531 return -EFAULT;
532
533 /* The interrupt code might not like the system call vector. */
534 if (!check_syscall_vector(lg))
535 kill_guest(lg, "bad syscall vector");
536
537 return 0;
538}
539/* Now we've examined the hypercall code; our Guest can make requests. There
540 * is one other way we can do things for the Guest, as we see in
541 * emulate_insn(). :*/
542
543/*L:030 lguest_arch_setup_regs()
544 *
545 * Most of the Guest's registers are left alone: we used get_zeroed_page() to
546 * allocate the structure, so they will be 0. */
547void lguest_arch_setup_regs(struct lguest *lg, unsigned long start)
548{
549 struct lguest_regs *regs = lg->regs;
550
551 /* There are four "segment" registers which the Guest needs to boot:
552 * The "code segment" register (cs) refers to the kernel code segment
553 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
554 * refer to the kernel data segment __KERNEL_DS.
555 *
556 * The privilege level is packed into the lower bits. The Guest runs
557 * at privilege level 1 (GUEST_PL).*/
558 regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
559 regs->cs = __KERNEL_CS|GUEST_PL;
560
561 /* The "eflags" register contains miscellaneous flags. Bit 1 (0x002)
562 * is supposed to always be "1". Bit 9 (0x200) controls whether
563 * interrupts are enabled. We always leave interrupts enabled while
564 * running the Guest. */
565 regs->eflags = 0x202;
566
567 /* The "Extended Instruction Pointer" register says where the Guest is
568 * running. */
569 regs->eip = start;
570
571 /* %esi points to our boot information, at physical address 0, so don't
572 * touch it. */
573 /* There are a couple of GDT entries the Guest expects when first
574 * booting. */
575
576 setup_guest_gdt(lg);
577}
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/x86/switcher_32.S
index 7c9c230cc8..1010b90b11 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/x86/switcher_32.S
@@ -48,7 +48,8 @@
48#include <linux/linkage.h> 48#include <linux/linkage.h>
49#include <asm/asm-offsets.h> 49#include <asm/asm-offsets.h>
50#include <asm/page.h> 50#include <asm/page.h>
51#include "lg.h" 51#include <asm/segment.h>
52#include <asm/lguest.h>
52 53
53// We mark the start of the code to copy 54// We mark the start of the code to copy
54// It's placed in .text tho it's never run here 55// It's placed in .text tho it's never run here
@@ -132,6 +133,7 @@ ENTRY(switch_to_guest)
132 // The Guest's register page has been mapped 133 // The Guest's register page has been mapped
133 // Writable onto our %esp (stack) -- 134 // Writable onto our %esp (stack) --
134 // We can simply pop off all Guest regs. 135 // We can simply pop off all Guest regs.
136 popl %eax
135 popl %ebx 137 popl %ebx
136 popl %ecx 138 popl %ecx
137 popl %edx 139 popl %edx
@@ -139,7 +141,6 @@ ENTRY(switch_to_guest)
139 popl %edi 141 popl %edi
140 popl %ebp 142 popl %ebp
141 popl %gs 143 popl %gs
142 popl %eax
143 popl %fs 144 popl %fs
144 popl %ds 145 popl %ds
145 popl %es 146 popl %es
@@ -167,7 +168,6 @@ ENTRY(switch_to_guest)
167 pushl %es; \ 168 pushl %es; \
168 pushl %ds; \ 169 pushl %ds; \
169 pushl %fs; \ 170 pushl %fs; \
170 pushl %eax; \
171 pushl %gs; \ 171 pushl %gs; \
172 pushl %ebp; \ 172 pushl %ebp; \
173 pushl %edi; \ 173 pushl %edi; \
@@ -175,6 +175,7 @@ ENTRY(switch_to_guest)
175 pushl %edx; \ 175 pushl %edx; \
176 pushl %ecx; \ 176 pushl %ecx; \
177 pushl %ebx; \ 177 pushl %ebx; \
178 pushl %eax; \
178 /* Our stack and our code are using segments \ 179 /* Our stack and our code are using segments \
179 * Set in the TSS and IDT \ 180 * Set in the TSS and IDT \
180 * Yet if we were to touch data we'd use \ 181 * Yet if we were to touch data we'd use \
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 927cb34c48..7c426d07a5 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -274,7 +274,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
274 if (bitmap->offset < 0) { 274 if (bitmap->offset < 0) {
275 /* DATA BITMAP METADATA */ 275 /* DATA BITMAP METADATA */
276 if (bitmap->offset 276 if (bitmap->offset
277 + page->index * (PAGE_SIZE/512) 277 + (long)(page->index * (PAGE_SIZE/512))
278 + size/512 > 0) 278 + size/512 > 0)
279 /* bitmap runs in to metadata */ 279 /* bitmap runs in to metadata */
280 return -EINVAL; 280 return -EINVAL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0eb5416798..ac54f697c5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -348,16 +348,17 @@ static int crypt_convert(struct crypt_config *cc,
348 ctx->idx_out < ctx->bio_out->bi_vcnt) { 348 ctx->idx_out < ctx->bio_out->bi_vcnt) {
349 struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); 349 struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
350 struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); 350 struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
351 struct scatterlist sg_in = { 351 struct scatterlist sg_in, sg_out;
352 .page = bv_in->bv_page, 352
353 .offset = bv_in->bv_offset + ctx->offset_in, 353 sg_init_table(&sg_in, 1);
354 .length = 1 << SECTOR_SHIFT 354 sg_set_page(&sg_in, bv_in->bv_page);
355 }; 355 sg_in.offset = bv_in->bv_offset + ctx->offset_in;
356 struct scatterlist sg_out = { 356 sg_in.length = 1 << SECTOR_SHIFT;
357 .page = bv_out->bv_page, 357
358 .offset = bv_out->bv_offset + ctx->offset_out, 358 sg_init_table(&sg_out, 1);
359 .length = 1 << SECTOR_SHIFT 359 sg_set_page(&sg_out, bv_out->bv_page);
360 }; 360 sg_out.offset = bv_out->bv_offset + ctx->offset_out;
361 sg_out.length = 1 << SECTOR_SHIFT;
361 362
362 ctx->offset_in += sg_in.length; 363 ctx->offset_in += sg_in.length;
363 if (ctx->offset_in >= bv_in->bv_len) { 364 if (ctx->offset_in >= bv_in->bv_len) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8ee181a01f..80a67d789b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -376,7 +376,12 @@ static unsigned long get_stripe_work(struct stripe_head *sh)
376 ack++; 376 ack++;
377 377
378 sh->ops.count -= ack; 378 sh->ops.count -= ack;
379 BUG_ON(sh->ops.count < 0); 379 if (unlikely(sh->ops.count < 0)) {
380 printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
381 "ops.complete: %#lx\n", pending, sh->ops.pending,
382 sh->ops.ack, sh->ops.complete);
383 BUG();
384 }
380 385
381 return pending; 386 return pending;
382} 387}
@@ -550,8 +555,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
550 } 555 }
551 } 556 }
552 } 557 }
553 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); 558 set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
554 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
555 559
556 return_io(return_bi); 560 return_io(return_bi);
557 561
@@ -2893,6 +2897,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2893 s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 2897 s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
2894 /* Now to look around and see what can be done */ 2898 /* Now to look around and see what can be done */
2895 2899
2900 /* clean-up completed biofill operations */
2901 if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) {
2902 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
2903 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
2904 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
2905 }
2906
2896 rcu_read_lock(); 2907 rcu_read_lock();
2897 for (i=disks; i--; ) { 2908 for (i=disks; i--; ) {
2898 mdk_rdev_t *rdev; 2909 mdk_rdev_t *rdev;
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index aefcf28da1..185e8a860c 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -1074,41 +1074,41 @@ EXPORT_SYMBOL_GPL(ir_codes_manli);
1074/* Mike Baikov <mike@baikov.com> */ 1074/* Mike Baikov <mike@baikov.com> */
1075IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE] = { 1075IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE] = {
1076 1076
1077 [ 0x21 ] = KEY_POWER, 1077 [ 0x11 ] = KEY_POWER,
1078 [ 0x69 ] = KEY_TV, 1078 [ 0x35 ] = KEY_TV,
1079 [ 0x33 ] = KEY_0, 1079 [ 0x1b ] = KEY_0,
1080 [ 0x51 ] = KEY_1, 1080 [ 0x29 ] = KEY_1,
1081 [ 0x31 ] = KEY_2, 1081 [ 0x19 ] = KEY_2,
1082 [ 0x71 ] = KEY_3, 1082 [ 0x39 ] = KEY_3,
1083 [ 0x3b ] = KEY_4, 1083 [ 0x1f ] = KEY_4,
1084 [ 0x58 ] = KEY_5, 1084 [ 0x2c ] = KEY_5,
1085 [ 0x41 ] = KEY_6, 1085 [ 0x21 ] = KEY_6,
1086 [ 0x48 ] = KEY_7, 1086 [ 0x24 ] = KEY_7,
1087 [ 0x30 ] = KEY_8, 1087 [ 0x18 ] = KEY_8,
1088 [ 0x53 ] = KEY_9, 1088 [ 0x2b ] = KEY_9,
1089 [ 0x73 ] = KEY_AGAIN, /* LOOP */ 1089 [ 0x3b ] = KEY_AGAIN, /* LOOP */
1090 [ 0x0a ] = KEY_AUDIO, 1090 [ 0x06 ] = KEY_AUDIO,
1091 [ 0x61 ] = KEY_PRINT, /* PREVIEW */ 1091 [ 0x31 ] = KEY_PRINT, /* PREVIEW */
1092 [ 0x7a ] = KEY_VIDEO, 1092 [ 0x3e ] = KEY_VIDEO,
1093 [ 0x20 ] = KEY_CHANNELUP, 1093 [ 0x10 ] = KEY_CHANNELUP,
1094 [ 0x40 ] = KEY_CHANNELDOWN, 1094 [ 0x20 ] = KEY_CHANNELDOWN,
1095 [ 0x18 ] = KEY_VOLUMEDOWN, 1095 [ 0x0c ] = KEY_VOLUMEDOWN,
1096 [ 0x50 ] = KEY_VOLUMEUP, 1096 [ 0x28 ] = KEY_VOLUMEUP,
1097 [ 0x10 ] = KEY_MUTE, 1097 [ 0x08 ] = KEY_MUTE,
1098 [ 0x4a ] = KEY_SEARCH, 1098 [ 0x26 ] = KEY_SEARCH, /*SCAN*/
1099 [ 0x7b ] = KEY_SHUFFLE, /* SNAPSHOT */ 1099 [ 0x3f ] = KEY_SHUFFLE, /* SNAPSHOT */
1100 [ 0x22 ] = KEY_RECORD, 1100 [ 0x12 ] = KEY_RECORD,
1101 [ 0x62 ] = KEY_STOP, 1101 [ 0x32 ] = KEY_STOP,
1102 [ 0x78 ] = KEY_PLAY, 1102 [ 0x3c ] = KEY_PLAY,
1103 [ 0x39 ] = KEY_REWIND, 1103 [ 0x1d ] = KEY_REWIND,
1104 [ 0x59 ] = KEY_PAUSE, 1104 [ 0x2d ] = KEY_PAUSE,
1105 [ 0x19 ] = KEY_FORWARD, 1105 [ 0x0d ] = KEY_FORWARD,
1106 [ 0x09 ] = KEY_ZOOM, 1106 [ 0x05 ] = KEY_ZOOM, /*FULL*/
1107 1107
1108 [ 0x52 ] = KEY_F21, /* LIVE TIMESHIFT */ 1108 [ 0x2a ] = KEY_F21, /* LIVE TIMESHIFT */
1109 [ 0x1a ] = KEY_F22, /* MIN TIMESHIFT */ 1109 [ 0x0e ] = KEY_F22, /* MIN TIMESHIFT */
1110 [ 0x3a ] = KEY_F23, /* TIMESHIFT */ 1110 [ 0x1e ] = KEY_F23, /* TIMESHIFT */
1111 [ 0x70 ] = KEY_F24, /* NORMAL TIMESHIFT */ 1111 [ 0x38 ] = KEY_F24, /* NORMAL TIMESHIFT */
1112}; 1112};
1113 1113
1114EXPORT_SYMBOL_GPL(ir_codes_gotview7135); 1114EXPORT_SYMBOL_GPL(ir_codes_gotview7135);
diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c
index 365a22118a..2b1f8b4be0 100644
--- a/drivers/media/common/saa7146_core.c
+++ b/drivers/media/common/saa7146_core.c
@@ -112,12 +112,13 @@ static struct scatterlist* vmalloc_to_sg(unsigned char *virt, int nr_pages)
112 sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL); 112 sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
113 if (NULL == sglist) 113 if (NULL == sglist)
114 return NULL; 114 return NULL;
115 sg_init_table(sglist, nr_pages);
115 for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) { 116 for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
116 pg = vmalloc_to_page(virt); 117 pg = vmalloc_to_page(virt);
117 if (NULL == pg) 118 if (NULL == pg)
118 goto err; 119 goto err;
119 BUG_ON(PageHighMem(pg)); 120 BUG_ON(PageHighMem(pg));
120 sglist[i].page = pg; 121 sg_set_page(&sglist[i], pg);
121 sglist[i].length = PAGE_SIZE; 122 sglist[i].length = PAGE_SIZE;
122 } 123 }
123 return sglist; 124 return sglist;
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index a05e5c1822..db08b0a888 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -345,7 +345,9 @@ static int cinergyt2_start_feed(struct dvb_demux_feed *dvbdmxfeed)
345 struct dvb_demux *demux = dvbdmxfeed->demux; 345 struct dvb_demux *demux = dvbdmxfeed->demux;
346 struct cinergyt2 *cinergyt2 = demux->priv; 346 struct cinergyt2 *cinergyt2 = demux->priv;
347 347
348 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem)) 348 if (cinergyt2->disconnect_pending)
349 return -EAGAIN;
350 if (mutex_lock_interruptible(&cinergyt2->sem))
349 return -ERESTARTSYS; 351 return -ERESTARTSYS;
350 352
351 if (cinergyt2->streaming == 0) 353 if (cinergyt2->streaming == 0)
@@ -361,7 +363,9 @@ static int cinergyt2_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
361 struct dvb_demux *demux = dvbdmxfeed->demux; 363 struct dvb_demux *demux = dvbdmxfeed->demux;
362 struct cinergyt2 *cinergyt2 = demux->priv; 364 struct cinergyt2 *cinergyt2 = demux->priv;
363 365
364 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem)) 366 if (cinergyt2->disconnect_pending)
367 return -EAGAIN;
368 if (mutex_lock_interruptible(&cinergyt2->sem))
365 return -ERESTARTSYS; 369 return -ERESTARTSYS;
366 370
367 if (--cinergyt2->streaming == 0) 371 if (--cinergyt2->streaming == 0)
@@ -481,12 +485,16 @@ static int cinergyt2_open (struct inode *inode, struct file *file)
481{ 485{
482 struct dvb_device *dvbdev = file->private_data; 486 struct dvb_device *dvbdev = file->private_data;
483 struct cinergyt2 *cinergyt2 = dvbdev->priv; 487 struct cinergyt2 *cinergyt2 = dvbdev->priv;
484 int err = -ERESTARTSYS; 488 int err = -EAGAIN;
485 489
486 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem)) 490 if (cinergyt2->disconnect_pending)
491 goto out;
492 err = mutex_lock_interruptible(&cinergyt2->wq_sem);
493 if (err)
487 goto out; 494 goto out;
488 495
489 if (mutex_lock_interruptible(&cinergyt2->sem)) 496 err = mutex_lock_interruptible(&cinergyt2->sem);
497 if (err)
490 goto out_unlock1; 498 goto out_unlock1;
491 499
492 if ((err = dvb_generic_open(inode, file))) 500 if ((err = dvb_generic_open(inode, file)))
@@ -550,7 +558,9 @@ static unsigned int cinergyt2_poll (struct file *file, struct poll_table_struct
550 struct cinergyt2 *cinergyt2 = dvbdev->priv; 558 struct cinergyt2 *cinergyt2 = dvbdev->priv;
551 unsigned int mask = 0; 559 unsigned int mask = 0;
552 560
553 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem)) 561 if (cinergyt2->disconnect_pending)
562 return -EAGAIN;
563 if (mutex_lock_interruptible(&cinergyt2->sem))
554 return -ERESTARTSYS; 564 return -ERESTARTSYS;
555 565
556 poll_wait(file, &cinergyt2->poll_wq, wait); 566 poll_wait(file, &cinergyt2->poll_wq, wait);
@@ -625,7 +635,9 @@ static int cinergyt2_ioctl (struct inode *inode, struct file *file,
625 if (copy_from_user(&p, (void __user*) arg, sizeof(p))) 635 if (copy_from_user(&p, (void __user*) arg, sizeof(p)))
626 return -EFAULT; 636 return -EFAULT;
627 637
628 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem)) 638 if (cinergyt2->disconnect_pending)
639 return -EAGAIN;
640 if (mutex_lock_interruptible(&cinergyt2->sem))
629 return -ERESTARTSYS; 641 return -ERESTARTSYS;
630 642
631 param->cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS; 643 param->cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS;
@@ -996,7 +1008,9 @@ static int cinergyt2_suspend (struct usb_interface *intf, pm_message_t state)
996{ 1008{
997 struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf); 1009 struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf);
998 1010
999 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem)) 1011 if (cinergyt2->disconnect_pending)
1012 return -EAGAIN;
1013 if (mutex_lock_interruptible(&cinergyt2->wq_sem))
1000 return -ERESTARTSYS; 1014 return -ERESTARTSYS;
1001 1015
1002 cinergyt2_suspend_rc(cinergyt2); 1016 cinergyt2_suspend_rc(cinergyt2);
@@ -1017,16 +1031,18 @@ static int cinergyt2_resume (struct usb_interface *intf)
1017{ 1031{
1018 struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf); 1032 struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf);
1019 struct dvbt_set_parameters_msg *param = &cinergyt2->param; 1033 struct dvbt_set_parameters_msg *param = &cinergyt2->param;
1020 int err = -ERESTARTSYS; 1034 int err = -EAGAIN;
1021 1035
1022 if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->wq_sem)) 1036 if (cinergyt2->disconnect_pending)
1037 goto out;
1038 err = mutex_lock_interruptible(&cinergyt2->wq_sem);
1039 if (err)
1023 goto out; 1040 goto out;
1024 1041
1025 if (mutex_lock_interruptible(&cinergyt2->sem)) 1042 err = mutex_lock_interruptible(&cinergyt2->sem);
1043 if (err)
1026 goto out_unlock1; 1044 goto out_unlock1;
1027 1045
1028 err = 0;
1029
1030 if (!cinergyt2->sleeping) { 1046 if (!cinergyt2->sleeping) {
1031 cinergyt2_sleep(cinergyt2, 0); 1047 cinergyt2_sleep(cinergyt2, 0);
1032 cinergyt2_command(cinergyt2, (char *) param, sizeof(*param), NULL, 0); 1048 cinergyt2_command(cinergyt2, (char *) param, sizeof(*param), NULL, 0);
diff --git a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
index 084a508a03..89437fdab8 100644
--- a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
@@ -972,7 +972,7 @@ static int dvb_ca_en50221_thread(void *data)
972 /* main loop */ 972 /* main loop */
973 while (!kthread_should_stop()) { 973 while (!kthread_should_stop()) {
974 /* sleep for a bit */ 974 /* sleep for a bit */
975 while (!ca->wakeup) { 975 if (!ca->wakeup) {
976 set_current_state(TASK_INTERRUPTIBLE); 976 set_current_state(TASK_INTERRUPTIBLE);
977 schedule_timeout(ca->delay); 977 schedule_timeout(ca->delay);
978 if (kthread_should_stop()) 978 if (kthread_should_stop())
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index e8c4a86945..58452b5200 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -828,7 +828,7 @@ MODULE_DEVICE_TABLE(usb, dib0700_usb_id_table);
828#define DIB0700_DEFAULT_DEVICE_PROPERTIES \ 828#define DIB0700_DEFAULT_DEVICE_PROPERTIES \
829 .caps = DVB_USB_IS_AN_I2C_ADAPTER, \ 829 .caps = DVB_USB_IS_AN_I2C_ADAPTER, \
830 .usb_ctrl = DEVICE_SPECIFIC, \ 830 .usb_ctrl = DEVICE_SPECIFIC, \
831 .firmware = "dvb-usb-dib0700-03-pre1.fw", \ 831 .firmware = "dvb-usb-dib0700-1.10.fw", \
832 .download_firmware = dib0700_download_firmware, \ 832 .download_firmware = dib0700_download_firmware, \
833 .no_reconnect = 1, \ 833 .no_reconnect = 1, \
834 .size_of_priv = sizeof(struct dib0700_state), \ 834 .size_of_priv = sizeof(struct dib0700_state), \
diff --git a/drivers/media/radio/miropcm20-radio.c b/drivers/media/radio/miropcm20-radio.c
index c7c9d1dc06..3ae56fef8c 100644
--- a/drivers/media/radio/miropcm20-radio.c
+++ b/drivers/media/radio/miropcm20-radio.c
@@ -229,7 +229,6 @@ static struct video_device pcm20_radio = {
229 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
230 .name = "Miro PCM 20 radio", 230 .name = "Miro PCM 20 radio",
231 .type = VID_TYPE_TUNER, 231 .type = VID_TYPE_TUNER,
232 .hardware = VID_HARDWARE_RTRACK,
233 .fops = &pcm20_fops, 232 .fops = &pcm20_fops,
234 .priv = &pcm20_unit 233 .priv = &pcm20_unit
235}; 234};
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index 0c963db036..5e4b9ddb23 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -554,7 +554,6 @@ static struct video_device gemtek_radio = {
554 .owner = THIS_MODULE, 554 .owner = THIS_MODULE,
555 .name = "GemTek Radio card", 555 .name = "GemTek Radio card",
556 .type = VID_TYPE_TUNER, 556 .type = VID_TYPE_TUNER,
557 .hardware = VID_HARDWARE_GEMTEK,
558 .fops = &gemtek_fops, 557 .fops = &gemtek_fops,
559 .vidioc_querycap = vidioc_querycap, 558 .vidioc_querycap = vidioc_querycap,
560 .vidioc_g_tuner = vidioc_g_tuner, 559 .vidioc_g_tuner = vidioc_g_tuner,
diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c
index 19e9929ffa..c94a4d0f28 100644
--- a/drivers/media/video/arv.c
+++ b/drivers/media/video/arv.c
@@ -755,7 +755,6 @@ static struct video_device ar_template = {
755 .owner = THIS_MODULE, 755 .owner = THIS_MODULE,
756 .name = "Colour AR VGA", 756 .name = "Colour AR VGA",
757 .type = VID_TYPE_CAPTURE, 757 .type = VID_TYPE_CAPTURE,
758 .hardware = VID_HARDWARE_ARV,
759 .fops = &ar_fops, 758 .fops = &ar_fops,
760 .release = ar_release, 759 .release = ar_release,
761 .minor = -1, 760 .minor = -1,
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 7a332b3efe..9feeb636ff 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -3877,7 +3877,6 @@ static struct video_device bttv_video_template =
3877 .name = "UNSET", 3877 .name = "UNSET",
3878 .type = VID_TYPE_CAPTURE|VID_TYPE_TUNER| 3878 .type = VID_TYPE_CAPTURE|VID_TYPE_TUNER|
3879 VID_TYPE_CLIPPING|VID_TYPE_SCALES, 3879 VID_TYPE_CLIPPING|VID_TYPE_SCALES,
3880 .hardware = VID_HARDWARE_BT848,
3881 .fops = &bttv_fops, 3880 .fops = &bttv_fops,
3882 .minor = -1, 3881 .minor = -1,
3883}; 3882};
@@ -3886,7 +3885,6 @@ static struct video_device bttv_vbi_template =
3886{ 3885{
3887 .name = "bt848/878 vbi", 3886 .name = "bt848/878 vbi",
3888 .type = VID_TYPE_TUNER|VID_TYPE_TELETEXT, 3887 .type = VID_TYPE_TUNER|VID_TYPE_TELETEXT,
3889 .hardware = VID_HARDWARE_BT848,
3890 .fops = &bttv_fops, 3888 .fops = &bttv_fops,
3891 .minor = -1, 3889 .minor = -1,
3892}; 3890};
@@ -4034,7 +4032,6 @@ static struct video_device radio_template =
4034{ 4032{
4035 .name = "bt848/878 radio", 4033 .name = "bt848/878 radio",
4036 .type = VID_TYPE_TUNER, 4034 .type = VID_TYPE_TUNER,
4037 .hardware = VID_HARDWARE_BT848,
4038 .fops = &radio_fops, 4035 .fops = &radio_fops,
4039 .minor = -1, 4036 .minor = -1,
4040}; 4037};
diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c
index 7f7e3d3398..5842352559 100644
--- a/drivers/media/video/bw-qcam.c
+++ b/drivers/media/video/bw-qcam.c
@@ -899,7 +899,6 @@ static struct video_device qcam_template=
899 .owner = THIS_MODULE, 899 .owner = THIS_MODULE,
900 .name = "Connectix Quickcam", 900 .name = "Connectix Quickcam",
901 .type = VID_TYPE_CAPTURE, 901 .type = VID_TYPE_CAPTURE,
902 .hardware = VID_HARDWARE_QCAM_BW,
903 .fops = &qcam_fops, 902 .fops = &qcam_fops,
904}; 903};
905 904
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index f76c6a6c37..cf1546b5a7 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -699,7 +699,6 @@ static struct video_device qcam_template=
699 .owner = THIS_MODULE, 699 .owner = THIS_MODULE,
700 .name = "Colour QuickCam", 700 .name = "Colour QuickCam",
701 .type = VID_TYPE_CAPTURE, 701 .type = VID_TYPE_CAPTURE,
702 .hardware = VID_HARDWARE_QCAM_C,
703 .fops = &qcam_fops, 702 .fops = &qcam_fops,
704}; 703};
705 704
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index a1d02e5ce0..7c630f5ee7 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -65,10 +65,6 @@ MODULE_PARM_DESC(colorspace_conv,
65 65
66#define ABOUT "V4L-Driver for Vision CPiA based cameras" 66#define ABOUT "V4L-Driver for Vision CPiA based cameras"
67 67
68#ifndef VID_HARDWARE_CPIA
69#define VID_HARDWARE_CPIA 24 /* FIXME -> from linux/videodev.h */
70#endif
71
72#define CPIA_MODULE_CPIA (0<<5) 68#define CPIA_MODULE_CPIA (0<<5)
73#define CPIA_MODULE_SYSTEM (1<<5) 69#define CPIA_MODULE_SYSTEM (1<<5)
74#define CPIA_MODULE_VP_CTRL (5<<5) 70#define CPIA_MODULE_VP_CTRL (5<<5)
@@ -3804,7 +3800,6 @@ static struct video_device cpia_template = {
3804 .owner = THIS_MODULE, 3800 .owner = THIS_MODULE,
3805 .name = "CPiA Camera", 3801 .name = "CPiA Camera",
3806 .type = VID_TYPE_CAPTURE, 3802 .type = VID_TYPE_CAPTURE,
3807 .hardware = VID_HARDWARE_CPIA,
3808 .fops = &cpia_fops, 3803 .fops = &cpia_fops,
3809}; 3804};
3810 3805
diff --git a/drivers/media/video/cpia2/cpia2_v4l.c b/drivers/media/video/cpia2/cpia2_v4l.c
index e3aaba1e0e..e378abec80 100644
--- a/drivers/media/video/cpia2/cpia2_v4l.c
+++ b/drivers/media/video/cpia2/cpia2_v4l.c
@@ -86,10 +86,6 @@ MODULE_LICENSE("GPL");
86 86
87#define ABOUT "V4L-Driver for Vision CPiA2 based cameras" 87#define ABOUT "V4L-Driver for Vision CPiA2 based cameras"
88 88
89#ifndef VID_HARDWARE_CPIA2
90#error "VID_HARDWARE_CPIA2 should have been defined in linux/videodev.h"
91#endif
92
93struct control_menu_info { 89struct control_menu_info {
94 int value; 90 int value;
95 char name[32]; 91 char name[32];
@@ -1942,7 +1938,6 @@ static struct video_device cpia2_template = {
1942 .type= VID_TYPE_CAPTURE, 1938 .type= VID_TYPE_CAPTURE,
1943 .type2 = V4L2_CAP_VIDEO_CAPTURE | 1939 .type2 = V4L2_CAP_VIDEO_CAPTURE |
1944 V4L2_CAP_STREAMING, 1940 V4L2_CAP_STREAMING,
1945 .hardware= VID_HARDWARE_CPIA2,
1946 .minor= -1, 1941 .minor= -1,
1947 .fops= &fops_template, 1942 .fops= &fops_template,
1948 .release= video_device_release, 1943 .release= video_device_release,
diff --git a/drivers/media/video/cx23885/cx23885-core.c b/drivers/media/video/cx23885/cx23885-core.c
index af16505bd2..3cdd136477 100644
--- a/drivers/media/video/cx23885/cx23885-core.c
+++ b/drivers/media/video/cx23885/cx23885-core.c
@@ -793,7 +793,7 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
793 dev->pci->subsystem_device); 793 dev->pci->subsystem_device);
794 794
795 cx23885_devcount--; 795 cx23885_devcount--;
796 goto fail_free; 796 return -ENODEV;
797 } 797 }
798 798
799 /* PCIe stuff */ 799 /* PCIe stuff */
@@ -835,10 +835,6 @@ static int cx23885_dev_setup(struct cx23885_dev *dev)
835 } 835 }
836 836
837 return 0; 837 return 0;
838
839fail_free:
840 kfree(dev);
841 return -ENODEV;
842} 838}
843 839
844void cx23885_dev_unregister(struct cx23885_dev *dev) 840void cx23885_dev_unregister(struct cx23885_dev *dev)
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 141dadf7cf..40ffd7a557 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -39,6 +39,7 @@
39#include <sound/pcm_params.h> 39#include <sound/pcm_params.h>
40#include <sound/control.h> 40#include <sound/control.h>
41#include <sound/initval.h> 41#include <sound/initval.h>
42#include <sound/tlv.h>
42 43
43#include "cx88.h" 44#include "cx88.h"
44#include "cx88-reg.h" 45#include "cx88-reg.h"
@@ -82,6 +83,7 @@ typedef struct cx88_audio_dev snd_cx88_card_t;
82 83
83 84
84 85
86
85/**************************************************************************** 87/****************************************************************************
86 Module global static vars 88 Module global static vars
87 ****************************************************************************/ 89 ****************************************************************************/
@@ -545,8 +547,8 @@ static int __devinit snd_cx88_pcm(snd_cx88_card_t *chip, int device, char *name)
545/**************************************************************************** 547/****************************************************************************
546 CONTROL INTERFACE 548 CONTROL INTERFACE
547 ****************************************************************************/ 549 ****************************************************************************/
548static int snd_cx88_capture_volume_info(struct snd_kcontrol *kcontrol, 550static int snd_cx88_volume_info(struct snd_kcontrol *kcontrol,
549 struct snd_ctl_elem_info *info) 551 struct snd_ctl_elem_info *info)
550{ 552{
551 info->type = SNDRV_CTL_ELEM_TYPE_INTEGER; 553 info->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
552 info->count = 2; 554 info->count = 2;
@@ -556,9 +558,8 @@ static int snd_cx88_capture_volume_info(struct snd_kcontrol *kcontrol,
556 return 0; 558 return 0;
557} 559}
558 560
559/* OK - TODO: test it */ 561static int snd_cx88_volume_get(struct snd_kcontrol *kcontrol,
560static int snd_cx88_capture_volume_get(struct snd_kcontrol *kcontrol, 562 struct snd_ctl_elem_value *value)
561 struct snd_ctl_elem_value *value)
562{ 563{
563 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol); 564 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
564 struct cx88_core *core=chip->core; 565 struct cx88_core *core=chip->core;
@@ -573,8 +574,8 @@ static int snd_cx88_capture_volume_get(struct snd_kcontrol *kcontrol,
573} 574}
574 575
575/* OK - TODO: test it */ 576/* OK - TODO: test it */
576static int snd_cx88_capture_volume_put(struct snd_kcontrol *kcontrol, 577static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
577 struct snd_ctl_elem_value *value) 578 struct snd_ctl_elem_value *value)
578{ 579{
579 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol); 580 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
580 struct cx88_core *core=chip->core; 581 struct cx88_core *core=chip->core;
@@ -605,14 +606,67 @@ static int snd_cx88_capture_volume_put(struct snd_kcontrol *kcontrol,
605 return changed; 606 return changed;
606} 607}
607 608
608static struct snd_kcontrol_new snd_cx88_capture_volume = { 609static const DECLARE_TLV_DB_SCALE(snd_cx88_db_scale, -6300, 100, 0);
610
611static struct snd_kcontrol_new snd_cx88_volume = {
612 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
613 .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
614 SNDRV_CTL_ELEM_ACCESS_TLV_READ,
615 .name = "Playback Volume",
616 .info = snd_cx88_volume_info,
617 .get = snd_cx88_volume_get,
618 .put = snd_cx88_volume_put,
619 .tlv.p = snd_cx88_db_scale,
620};
621
622static int snd_cx88_switch_get(struct snd_kcontrol *kcontrol,
623 struct snd_ctl_elem_value *value)
624{
625 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
626 struct cx88_core *core = chip->core;
627 u32 bit = kcontrol->private_value;
628
629 value->value.integer.value[0] = !(cx_read(AUD_VOL_CTL) & bit);
630 return 0;
631}
632
633static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
634 struct snd_ctl_elem_value *value)
635{
636 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
637 struct cx88_core *core = chip->core;
638 u32 bit = kcontrol->private_value;
639 int ret = 0;
640 u32 vol;
641
642 spin_lock_irq(&chip->reg_lock);
643 vol = cx_read(AUD_VOL_CTL);
644 if (value->value.integer.value[0] != !(vol & bit)) {
645 vol ^= bit;
646 cx_write(AUD_VOL_CTL, vol);
647 ret = 1;
648 }
649 spin_unlock_irq(&chip->reg_lock);
650 return ret;
651}
652
653static struct snd_kcontrol_new snd_cx88_dac_switch = {
609 .iface = SNDRV_CTL_ELEM_IFACE_MIXER, 654 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
610 .name = "Capture Volume", 655 .name = "Playback Switch",
611 .info = snd_cx88_capture_volume_info, 656 .info = snd_ctl_boolean_mono_info,
612 .get = snd_cx88_capture_volume_get, 657 .get = snd_cx88_switch_get,
613 .put = snd_cx88_capture_volume_put, 658 .put = snd_cx88_switch_put,
659 .private_value = (1<<8),
614}; 660};
615 661
662static struct snd_kcontrol_new snd_cx88_source_switch = {
663 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
664 .name = "Capture Switch",
665 .info = snd_ctl_boolean_mono_info,
666 .get = snd_cx88_switch_get,
667 .put = snd_cx88_switch_put,
668 .private_value = (1<<6),
669};
616 670
617/**************************************************************************** 671/****************************************************************************
618 Basic Flow for Sound Devices 672 Basic Flow for Sound Devices
@@ -762,7 +816,13 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
762 if (err < 0) 816 if (err < 0)
763 goto error; 817 goto error;
764 818
765 err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_capture_volume, chip)); 819 err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_volume, chip));
820 if (err < 0)
821 goto error;
822 err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_dac_switch, chip));
823 if (err < 0)
824 goto error;
825 err = snd_ctl_add(card, snd_ctl_new1(&snd_cx88_source_switch, chip));
766 if (err < 0) 826 if (err < 0)
767 goto error; 827 goto error;
768 828
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 6d6f5048d7..f33f0b4714 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -527,44 +527,6 @@ static void blackbird_codec_settings(struct cx8802_dev *dev)
527 cx2341x_update(dev, blackbird_mbox_func, NULL, &dev->params); 527 cx2341x_update(dev, blackbird_mbox_func, NULL, &dev->params);
528} 528}
529 529
530static struct v4l2_mpeg_compression default_mpeg_params = {
531 .st_type = V4L2_MPEG_PS_2,
532 .st_bitrate = {
533 .mode = V4L2_BITRATE_CBR,
534 .min = 0,
535 .target = 0,
536 .max = 0
537 },
538 .ts_pid_pmt = 16,
539 .ts_pid_audio = 260,
540 .ts_pid_video = 256,
541 .ts_pid_pcr = 259,
542 .ps_size = 0,
543 .au_type = V4L2_MPEG_AU_2_II,
544 .au_bitrate = {
545 .mode = V4L2_BITRATE_CBR,
546 .min = 224,
547 .target = 224,
548 .max = 224
549 },
550 .au_sample_rate = 48000,
551 .au_pesid = 0,
552 .vi_type = V4L2_MPEG_VI_2,
553 .vi_aspect_ratio = V4L2_MPEG_ASPECT_4_3,
554 .vi_bitrate = {
555 .mode = V4L2_BITRATE_CBR,
556 .min = 4000,
557 .target = 4500,
558 .max = 6000
559 },
560 .vi_frame_rate = 25,
561 .vi_frames_per_gop = 12,
562 .vi_bframes_count = 2,
563 .vi_pesid = 0,
564 .closed_gops = 1,
565 .pulldown = 0
566};
567
568static int blackbird_initialize_codec(struct cx8802_dev *dev) 530static int blackbird_initialize_codec(struct cx8802_dev *dev)
569{ 531{
570 struct cx88_core *core = dev->core; 532 struct cx88_core *core = dev->core;
@@ -852,23 +814,6 @@ static int vidioc_streamoff(struct file *file, void *priv, enum v4l2_buf_type i)
852 return videobuf_streamoff(&fh->mpegq); 814 return videobuf_streamoff(&fh->mpegq);
853} 815}
854 816
855static int vidioc_g_mpegcomp (struct file *file, void *fh,
856 struct v4l2_mpeg_compression *f)
857{
858 printk(KERN_WARNING "VIDIOC_G_MPEGCOMP is obsolete. "
859 "Replace with VIDIOC_G_EXT_CTRLS!");
860 memcpy(f,&default_mpeg_params,sizeof(*f));
861 return 0;
862}
863
864static int vidioc_s_mpegcomp (struct file *file, void *fh,
865 struct v4l2_mpeg_compression *f)
866{
867 printk(KERN_WARNING "VIDIOC_S_MPEGCOMP is obsolete. "
868 "Replace with VIDIOC_S_EXT_CTRLS!");
869 return 0;
870}
871
872static int vidioc_g_ext_ctrls (struct file *file, void *priv, 817static int vidioc_g_ext_ctrls (struct file *file, void *priv,
873 struct v4l2_ext_controls *f) 818 struct v4l2_ext_controls *f)
874{ 819{
@@ -1216,8 +1161,6 @@ static struct video_device cx8802_mpeg_template =
1216 .vidioc_dqbuf = vidioc_dqbuf, 1161 .vidioc_dqbuf = vidioc_dqbuf,
1217 .vidioc_streamon = vidioc_streamon, 1162 .vidioc_streamon = vidioc_streamon,
1218 .vidioc_streamoff = vidioc_streamoff, 1163 .vidioc_streamoff = vidioc_streamoff,
1219 .vidioc_g_mpegcomp = vidioc_g_mpegcomp,
1220 .vidioc_s_mpegcomp = vidioc_s_mpegcomp,
1221 .vidioc_g_ext_ctrls = vidioc_g_ext_ctrls, 1164 .vidioc_g_ext_ctrls = vidioc_g_ext_ctrls,
1222 .vidioc_s_ext_ctrls = vidioc_s_ext_ctrls, 1165 .vidioc_s_ext_ctrls = vidioc_s_ext_ctrls,
1223 .vidioc_try_ext_ctrls = vidioc_try_ext_ctrls, 1166 .vidioc_try_ext_ctrls = vidioc_try_ext_ctrls,
diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c
index d16e5c6d21..fce19caf9d 100644
--- a/drivers/media/video/cx88/cx88-dvb.c
+++ b/drivers/media/video/cx88/cx88-dvb.c
@@ -475,8 +475,9 @@ static int dvb_register(struct cx8802_dev *dev)
475 break; 475 break;
476 case CX88_BOARD_DNTV_LIVE_DVB_T_PRO: 476 case CX88_BOARD_DNTV_LIVE_DVB_T_PRO:
477#if defined(CONFIG_VIDEO_CX88_VP3054) || (defined(CONFIG_VIDEO_CX88_VP3054_MODULE) && defined(MODULE)) 477#if defined(CONFIG_VIDEO_CX88_VP3054) || (defined(CONFIG_VIDEO_CX88_VP3054_MODULE) && defined(MODULE))
478 /* MT352 is on a secondary I2C bus made from some GPIO lines */
478 dev->dvb.frontend = dvb_attach(mt352_attach, &dntv_live_dvbt_pro_config, 479 dev->dvb.frontend = dvb_attach(mt352_attach, &dntv_live_dvbt_pro_config,
479 &((struct vp3054_i2c_state *)dev->card_priv)->adap); 480 &dev->vp3054->adap);
480 if (dev->dvb.frontend != NULL) { 481 if (dev->dvb.frontend != NULL) {
481 dvb_attach(dvb_pll_attach, dev->dvb.frontend, 0x61, 482 dvb_attach(dvb_pll_attach, dev->dvb.frontend, 0x61,
482 &dev->core->i2c_adap, DVB_PLL_FMD1216ME); 483 &dev->core->i2c_adap, DVB_PLL_FMD1216ME);
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index a652f294d2..448c673809 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -79,7 +79,8 @@ static int cx8802_start_dma(struct cx8802_dev *dev,
79{ 79{
80 struct cx88_core *core = dev->core; 80 struct cx88_core *core = dev->core;
81 81
82 dprintk(1, "cx8802_start_dma w: %d, h: %d, f: %d\n", dev->width, dev->height, buf->vb.field); 82 dprintk(1, "cx8802_start_dma w: %d, h: %d, f: %d\n",
83 buf->vb.width, buf->vb.height, buf->vb.field);
83 84
84 /* setup fifo + format */ 85 /* setup fifo + format */
85 cx88_sram_channel_setup(core, &cx88_sram_channels[SRAM_CH28], 86 cx88_sram_channel_setup(core, &cx88_sram_channels[SRAM_CH28],
@@ -177,7 +178,6 @@ static int cx8802_restart_queue(struct cx8802_dev *dev,
177 struct cx88_dmaqueue *q) 178 struct cx88_dmaqueue *q)
178{ 179{
179 struct cx88_buffer *buf; 180 struct cx88_buffer *buf;
180 struct list_head *item;
181 181
182 dprintk( 1, "cx8802_restart_queue\n" ); 182 dprintk( 1, "cx8802_restart_queue\n" );
183 if (list_empty(&q->active)) 183 if (list_empty(&q->active))
@@ -223,10 +223,8 @@ static int cx8802_restart_queue(struct cx8802_dev *dev,
223 dprintk(2,"restart_queue [%p/%d]: restart dma\n", 223 dprintk(2,"restart_queue [%p/%d]: restart dma\n",
224 buf, buf->vb.i); 224 buf, buf->vb.i);
225 cx8802_start_dma(dev, q, buf); 225 cx8802_start_dma(dev, q, buf);
226 list_for_each(item,&q->active) { 226 list_for_each_entry(buf, &q->active, vb.queue)
227 buf = list_entry(item, struct cx88_buffer, vb.queue);
228 buf->count = q->count++; 227 buf->count = q->count++;
229 }
230 mod_timer(&q->timeout, jiffies+BUFFER_TIMEOUT); 228 mod_timer(&q->timeout, jiffies+BUFFER_TIMEOUT);
231 return 0; 229 return 0;
232} 230}
@@ -572,42 +570,29 @@ int cx8802_resume_common(struct pci_dev *pci_dev)
572 return 0; 570 return 0;
573} 571}
574 572
573#if defined(CONFIG_VIDEO_CX88_BLACKBIRD) || \
574 defined(CONFIG_VIDEO_CX88_BLACKBIRD_MODULE)
575struct cx8802_dev * cx8802_get_device(struct inode *inode) 575struct cx8802_dev * cx8802_get_device(struct inode *inode)
576{ 576{
577 int minor = iminor(inode); 577 int minor = iminor(inode);
578 struct cx8802_dev *h = NULL; 578 struct cx8802_dev *dev;
579 struct list_head *list;
580 579
581 list_for_each(list,&cx8802_devlist) { 580 list_for_each_entry(dev, &cx8802_devlist, devlist)
582 h = list_entry(list, struct cx8802_dev, devlist); 581 if (dev->mpeg_dev && dev->mpeg_dev->minor == minor)
583 if (h->mpeg_dev && h->mpeg_dev->minor == minor) 582 return dev;
584 return h;
585 }
586 583
587 return NULL; 584 return NULL;
588} 585}
586EXPORT_SYMBOL(cx8802_get_device);
587#endif
589 588
590struct cx8802_driver * cx8802_get_driver(struct cx8802_dev *dev, enum cx88_board_type btype) 589struct cx8802_driver * cx8802_get_driver(struct cx8802_dev *dev, enum cx88_board_type btype)
591{ 590{
592 struct cx8802_dev *h = NULL; 591 struct cx8802_driver *d;
593 struct cx8802_driver *d = NULL;
594 struct list_head *list;
595 struct list_head *list2;
596
597 list_for_each(list,&cx8802_devlist) {
598 h = list_entry(list, struct cx8802_dev, devlist);
599 if (h != dev)
600 continue;
601
602 list_for_each(list2, &h->drvlist.devlist) {
603 d = list_entry(list2, struct cx8802_driver, devlist);
604 592
605 /* only unregister the correct driver type */ 593 list_for_each_entry(d, &dev->drvlist, drvlist)
606 if (d->type_id == btype) { 594 if (d->type_id == btype)
607 return d; 595 return d;
608 }
609 }
610 }
611 596
612 return NULL; 597 return NULL;
613} 598}
@@ -671,10 +656,9 @@ static int cx8802_check_driver(struct cx8802_driver *drv)
671 656
672int cx8802_register_driver(struct cx8802_driver *drv) 657int cx8802_register_driver(struct cx8802_driver *drv)
673{ 658{
674 struct cx8802_dev *h; 659 struct cx8802_dev *dev;
675 struct cx8802_driver *driver; 660 struct cx8802_driver *driver;
676 struct list_head *list; 661 int err, i = 0;
677 int err = 0, i = 0;
678 662
679 printk(KERN_INFO 663 printk(KERN_INFO
680 "cx88/2: registering cx8802 driver, type: %s access: %s\n", 664 "cx88/2: registering cx8802 driver, type: %s access: %s\n",
@@ -686,14 +670,12 @@ int cx8802_register_driver(struct cx8802_driver *drv)
686 return err; 670 return err;
687 } 671 }
688 672
689 list_for_each(list,&cx8802_devlist) { 673 list_for_each_entry(dev, &cx8802_devlist, devlist) {
690 h = list_entry(list, struct cx8802_dev, devlist);
691
692 printk(KERN_INFO 674 printk(KERN_INFO
693 "%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n", 675 "%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n",
694 h->core->name, h->pci->subsystem_vendor, 676 dev->core->name, dev->pci->subsystem_vendor,
695 h->pci->subsystem_device, h->core->board.name, 677 dev->pci->subsystem_device, dev->core->board.name,
696 h->core->boardnr); 678 dev->core->boardnr);
697 679
698 /* Bring up a new struct for each driver instance */ 680 /* Bring up a new struct for each driver instance */
699 driver = kzalloc(sizeof(*drv),GFP_KERNEL); 681 driver = kzalloc(sizeof(*drv),GFP_KERNEL);
@@ -701,7 +683,7 @@ int cx8802_register_driver(struct cx8802_driver *drv)
701 return -ENOMEM; 683 return -ENOMEM;
702 684
703 /* Snapshot of the driver registration data */ 685 /* Snapshot of the driver registration data */
704 drv->core = h->core; 686 drv->core = dev->core;
705 drv->suspend = cx8802_suspend_common; 687 drv->suspend = cx8802_suspend_common;
706 drv->resume = cx8802_resume_common; 688 drv->resume = cx8802_resume_common;
707 drv->request_acquire = cx8802_request_acquire; 689 drv->request_acquire = cx8802_request_acquire;
@@ -712,49 +694,38 @@ int cx8802_register_driver(struct cx8802_driver *drv)
712 if (err == 0) { 694 if (err == 0) {
713 i++; 695 i++;
714 mutex_lock(&drv->core->lock); 696 mutex_lock(&drv->core->lock);
715 list_add_tail(&driver->devlist,&h->drvlist.devlist); 697 list_add_tail(&driver->drvlist, &dev->drvlist);
716 mutex_unlock(&drv->core->lock); 698 mutex_unlock(&drv->core->lock);
717 } else { 699 } else {
718 printk(KERN_ERR 700 printk(KERN_ERR
719 "%s/2: cx8802 probe failed, err = %d\n", 701 "%s/2: cx8802 probe failed, err = %d\n",
720 h->core->name, err); 702 dev->core->name, err);
721 } 703 }
722 704
723 } 705 }
724 if (i == 0)
725 err = -ENODEV;
726 else
727 err = 0;
728 706
729 return err; 707 return i ? 0 : -ENODEV;
730} 708}
731 709
732int cx8802_unregister_driver(struct cx8802_driver *drv) 710int cx8802_unregister_driver(struct cx8802_driver *drv)
733{ 711{
734 struct cx8802_dev *h; 712 struct cx8802_dev *dev;
735 struct cx8802_driver *d; 713 struct cx8802_driver *d, *dtmp;
736 struct list_head *list; 714 int err = 0;
737 struct list_head *list2, *q;
738 int err = 0, i = 0;
739 715
740 printk(KERN_INFO 716 printk(KERN_INFO
741 "cx88/2: unregistering cx8802 driver, type: %s access: %s\n", 717 "cx88/2: unregistering cx8802 driver, type: %s access: %s\n",
742 drv->type_id == CX88_MPEG_DVB ? "dvb" : "blackbird", 718 drv->type_id == CX88_MPEG_DVB ? "dvb" : "blackbird",
743 drv->hw_access == CX8802_DRVCTL_SHARED ? "shared" : "exclusive"); 719 drv->hw_access == CX8802_DRVCTL_SHARED ? "shared" : "exclusive");
744 720
745 list_for_each(list,&cx8802_devlist) { 721 list_for_each_entry(dev, &cx8802_devlist, devlist) {
746 i++;
747 h = list_entry(list, struct cx8802_dev, devlist);
748
749 printk(KERN_INFO 722 printk(KERN_INFO
750 "%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n", 723 "%s/2: subsystem: %04x:%04x, board: %s [card=%d]\n",
751 h->core->name, h->pci->subsystem_vendor, 724 dev->core->name, dev->pci->subsystem_vendor,
752 h->pci->subsystem_device, h->core->board.name, 725 dev->pci->subsystem_device, dev->core->board.name,
753 h->core->boardnr); 726 dev->core->boardnr);
754
755 list_for_each_safe(list2, q, &h->drvlist.devlist) {
756 d = list_entry(list2, struct cx8802_driver, devlist);
757 727
728 list_for_each_entry_safe(d, dtmp, &dev->drvlist, drvlist) {
758 /* only unregister the correct driver type */ 729 /* only unregister the correct driver type */
759 if (d->type_id != drv->type_id) 730 if (d->type_id != drv->type_id)
760 continue; 731 continue;
@@ -762,12 +733,12 @@ int cx8802_unregister_driver(struct cx8802_driver *drv)
762 err = d->remove(d); 733 err = d->remove(d);
763 if (err == 0) { 734 if (err == 0) {
764 mutex_lock(&drv->core->lock); 735 mutex_lock(&drv->core->lock);
765 list_del(list2); 736 list_del(&d->drvlist);
766 mutex_unlock(&drv->core->lock); 737 mutex_unlock(&drv->core->lock);
738 kfree(d);
767 } else 739 } else
768 printk(KERN_ERR "%s/2: cx8802 driver remove " 740 printk(KERN_ERR "%s/2: cx8802 driver remove "
769 "failed (%d)\n", h->core->name, err); 741 "failed (%d)\n", dev->core->name, err);
770
771 } 742 }
772 743
773 } 744 }
@@ -805,7 +776,7 @@ static int __devinit cx8802_probe(struct pci_dev *pci_dev,
805 if (err != 0) 776 if (err != 0)
806 goto fail_free; 777 goto fail_free;
807 778
808 INIT_LIST_HEAD(&dev->drvlist.devlist); 779 INIT_LIST_HEAD(&dev->drvlist);
809 list_add_tail(&dev->devlist,&cx8802_devlist); 780 list_add_tail(&dev->devlist,&cx8802_devlist);
810 781
811 /* Maintain a reference so cx88-video can query the 8802 device. */ 782 /* Maintain a reference so cx88-video can query the 8802 device. */
@@ -825,23 +796,30 @@ static int __devinit cx8802_probe(struct pci_dev *pci_dev,
825static void __devexit cx8802_remove(struct pci_dev *pci_dev) 796static void __devexit cx8802_remove(struct pci_dev *pci_dev)
826{ 797{
827 struct cx8802_dev *dev; 798 struct cx8802_dev *dev;
828 struct cx8802_driver *h;
829 struct list_head *list;
830 799
831 dev = pci_get_drvdata(pci_dev); 800 dev = pci_get_drvdata(pci_dev);
832 801
833 dprintk( 1, "%s\n", __FUNCTION__); 802 dprintk( 1, "%s\n", __FUNCTION__);
834 803
835 list_for_each(list,&dev->drvlist.devlist) { 804 if (!list_empty(&dev->drvlist)) {
836 h = list_entry(list, struct cx8802_driver, devlist); 805 struct cx8802_driver *drv, *tmp;
837 dprintk( 1, " ->driver\n"); 806 int err;
838 if (h->remove == NULL) { 807
839 printk(KERN_ERR "%s .. skipping driver, no probe function\n", __FUNCTION__); 808 printk(KERN_WARNING "%s/2: Trying to remove cx8802 driver "
840 continue; 809 "while cx8802 sub-drivers still loaded?!\n",
810 dev->core->name);
811
812 list_for_each_entry_safe(drv, tmp, &dev->drvlist, drvlist) {
813 err = drv->remove(drv);
814 if (err == 0) {
815 mutex_lock(&drv->core->lock);
816 list_del(&drv->drvlist);
817 mutex_unlock(&drv->core->lock);
818 } else
819 printk(KERN_ERR "%s/2: cx8802 driver remove "
820 "failed (%d)\n", dev->core->name, err);
821 kfree(drv);
841 } 822 }
842 printk(KERN_INFO "%s .. Removing driver type %d\n", __FUNCTION__, h->type_id);
843 cx8802_unregister_driver(h);
844 list_del(&dev->drvlist.devlist);
845 } 823 }
846 824
847 /* Destroy any 8802 reference. */ 825 /* Destroy any 8802 reference. */
@@ -901,7 +879,6 @@ EXPORT_SYMBOL(cx8802_fini_common);
901 879
902EXPORT_SYMBOL(cx8802_register_driver); 880EXPORT_SYMBOL(cx8802_register_driver);
903EXPORT_SYMBOL(cx8802_unregister_driver); 881EXPORT_SYMBOL(cx8802_unregister_driver);
904EXPORT_SYMBOL(cx8802_get_device);
905EXPORT_SYMBOL(cx8802_get_driver); 882EXPORT_SYMBOL(cx8802_get_driver);
906/* ----------------------------------------------------------- */ 883/* ----------------------------------------------------------- */
907/* 884/*
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 231ae6c4dd..5ee05f8f3f 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1675,7 +1675,6 @@ static struct video_device cx8800_radio_template =
1675{ 1675{
1676 .name = "cx8800-radio", 1676 .name = "cx8800-radio",
1677 .type = VID_TYPE_TUNER, 1677 .type = VID_TYPE_TUNER,
1678 .hardware = 0,
1679 .fops = &radio_fops, 1678 .fops = &radio_fops,
1680 .minor = -1, 1679 .minor = -1,
1681 .vidioc_querycap = radio_querycap, 1680 .vidioc_querycap = radio_querycap,
diff --git a/drivers/media/video/cx88/cx88-vp3054-i2c.c b/drivers/media/video/cx88/cx88-vp3054-i2c.c
index 77c3788923..6ce5af4884 100644
--- a/drivers/media/video/cx88/cx88-vp3054-i2c.c
+++ b/drivers/media/video/cx88/cx88-vp3054-i2c.c
@@ -41,7 +41,7 @@ static void vp3054_bit_setscl(void *data, int state)
41{ 41{
42 struct cx8802_dev *dev = data; 42 struct cx8802_dev *dev = data;
43 struct cx88_core *core = dev->core; 43 struct cx88_core *core = dev->core;
44 struct vp3054_i2c_state *vp3054_i2c = dev->card_priv; 44 struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
45 45
46 if (state) { 46 if (state) {
47 vp3054_i2c->state |= 0x0001; /* SCL high */ 47 vp3054_i2c->state |= 0x0001; /* SCL high */
@@ -58,7 +58,7 @@ static void vp3054_bit_setsda(void *data, int state)
58{ 58{
59 struct cx8802_dev *dev = data; 59 struct cx8802_dev *dev = data;
60 struct cx88_core *core = dev->core; 60 struct cx88_core *core = dev->core;
61 struct vp3054_i2c_state *vp3054_i2c = dev->card_priv; 61 struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
62 62
63 if (state) { 63 if (state) {
64 vp3054_i2c->state |= 0x0002; /* SDA high */ 64 vp3054_i2c->state |= 0x0002; /* SDA high */
@@ -113,10 +113,10 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
113 if (core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO) 113 if (core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO)
114 return 0; 114 return 0;
115 115
116 dev->card_priv = kzalloc(sizeof(*vp3054_i2c), GFP_KERNEL); 116 vp3054_i2c = kzalloc(sizeof(*vp3054_i2c), GFP_KERNEL);
117 if (dev->card_priv == NULL) 117 if (vp3054_i2c == NULL)
118 return -ENOMEM; 118 return -ENOMEM;
119 vp3054_i2c = dev->card_priv; 119 dev->vp3054 = vp3054_i2c;
120 120
121 memcpy(&vp3054_i2c->algo, &vp3054_i2c_algo_template, 121 memcpy(&vp3054_i2c->algo, &vp3054_i2c_algo_template,
122 sizeof(vp3054_i2c->algo)); 122 sizeof(vp3054_i2c->algo));
@@ -139,8 +139,8 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
139 if (0 != rc) { 139 if (0 != rc) {
140 printk("%s: vp3054_i2c register FAILED\n", core->name); 140 printk("%s: vp3054_i2c register FAILED\n", core->name);
141 141
142 kfree(dev->card_priv); 142 kfree(dev->vp3054);
143 dev->card_priv = NULL; 143 dev->vp3054 = NULL;
144 } 144 }
145 145
146 return rc; 146 return rc;
@@ -148,7 +148,7 @@ int vp3054_i2c_probe(struct cx8802_dev *dev)
148 148
149void vp3054_i2c_remove(struct cx8802_dev *dev) 149void vp3054_i2c_remove(struct cx8802_dev *dev)
150{ 150{
151 struct vp3054_i2c_state *vp3054_i2c = dev->card_priv; 151 struct vp3054_i2c_state *vp3054_i2c = dev->vp3054;
152 152
153 if (vp3054_i2c == NULL || 153 if (vp3054_i2c == NULL ||
154 dev->core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO) 154 dev->core->boardnr != CX88_BOARD_DNTV_LIVE_DVB_T_PRO)
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 42e0a9b8c5..eb296bdecb 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -412,7 +412,9 @@ struct cx8802_suspend_state {
412 412
413struct cx8802_driver { 413struct cx8802_driver {
414 struct cx88_core *core; 414 struct cx88_core *core;
415 struct list_head devlist; 415
416 /* List of drivers attached to device */
417 struct list_head drvlist;
416 418
417 /* Type of driver and access required */ 419 /* Type of driver and access required */
418 enum cx88_board_type type_id; 420 enum cx88_board_type type_id;
@@ -453,27 +455,33 @@ struct cx8802_dev {
453 455
454 /* for blackbird only */ 456 /* for blackbird only */
455 struct list_head devlist; 457 struct list_head devlist;
458#if defined(CONFIG_VIDEO_CX88_BLACKBIRD) || \
459 defined(CONFIG_VIDEO_CX88_BLACKBIRD_MODULE)
456 struct video_device *mpeg_dev; 460 struct video_device *mpeg_dev;
457 u32 mailbox; 461 u32 mailbox;
458 int width; 462 int width;
459 int height; 463 int height;
460 464
465 /* mpeg params */
466 struct cx2341x_mpeg_params params;
467#endif
468
461#if defined(CONFIG_VIDEO_CX88_DVB) || defined(CONFIG_VIDEO_CX88_DVB_MODULE) 469#if defined(CONFIG_VIDEO_CX88_DVB) || defined(CONFIG_VIDEO_CX88_DVB_MODULE)
462 /* for dvb only */ 470 /* for dvb only */
463 struct videobuf_dvb dvb; 471 struct videobuf_dvb dvb;
472#endif
464 473
465 void *card_priv; 474#if defined(CONFIG_VIDEO_CX88_VP3054) || \
475 defined(CONFIG_VIDEO_CX88_VP3054_MODULE)
476 /* For VP3045 secondary I2C bus support */
477 struct vp3054_i2c_state *vp3054;
466#endif 478#endif
467 /* for switching modulation types */ 479 /* for switching modulation types */
468 unsigned char ts_gen_cntrl; 480 unsigned char ts_gen_cntrl;
469 481
470 /* mpeg params */
471 struct cx2341x_mpeg_params params;
472
473 /* List of attached drivers */ 482 /* List of attached drivers */
474 struct cx8802_driver drvlist; 483 struct list_head drvlist;
475 struct work_struct request_module_wk; 484 struct work_struct request_module_wk;
476
477}; 485};
478 486
479/* ----------------------------------------------------------- */ 487/* ----------------------------------------------------------- */
diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index d3282ec62c..d56484f204 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -648,7 +648,7 @@ void em28xx_uninit_isoc(struct em28xx *dev)
648 */ 648 */
649int em28xx_init_isoc(struct em28xx *dev) 649int em28xx_init_isoc(struct em28xx *dev)
650{ 650{
651 /* change interface to 3 which allowes the biggest packet sizes */ 651 /* change interface to 3 which allows the biggest packet sizes */
652 int i, errCode; 652 int i, errCode;
653 const int sb_size = EM28XX_NUM_PACKETS * dev->max_pkt_size; 653 const int sb_size = EM28XX_NUM_PACKETS * dev->max_pkt_size;
654 654
@@ -673,6 +673,7 @@ int em28xx_init_isoc(struct em28xx *dev)
673 ("unable to allocate %i bytes for transfer buffer %i\n", 673 ("unable to allocate %i bytes for transfer buffer %i\n",
674 sb_size, i); 674 sb_size, i);
675 em28xx_uninit_isoc(dev); 675 em28xx_uninit_isoc(dev);
676 usb_free_urb(urb);
676 return -ENOMEM; 677 return -ENOMEM;
677 } 678 }
678 memset(dev->transfer_buffer[i], 0, sb_size); 679 memset(dev->transfer_buffer[i], 0, sb_size);
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index e467682aab..a4c2a90712 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1617,7 +1617,6 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev,
1617 1617
1618 /* Fills VBI device info */ 1618 /* Fills VBI device info */
1619 dev->vbi_dev->type = VFL_TYPE_VBI; 1619 dev->vbi_dev->type = VFL_TYPE_VBI;
1620 dev->vbi_dev->hardware = 0;
1621 dev->vbi_dev->fops = &em28xx_v4l_fops; 1620 dev->vbi_dev->fops = &em28xx_v4l_fops;
1622 dev->vbi_dev->minor = -1; 1621 dev->vbi_dev->minor = -1;
1623 dev->vbi_dev->dev = &dev->udev->dev; 1622 dev->vbi_dev->dev = &dev->udev->dev;
@@ -1629,7 +1628,6 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev,
1629 dev->vdev->type = VID_TYPE_CAPTURE; 1628 dev->vdev->type = VID_TYPE_CAPTURE;
1630 if (dev->has_tuner) 1629 if (dev->has_tuner)
1631 dev->vdev->type |= VID_TYPE_TUNER; 1630 dev->vdev->type |= VID_TYPE_TUNER;
1632 dev->vdev->hardware = 0;
1633 dev->vdev->fops = &em28xx_v4l_fops; 1631 dev->vdev->fops = &em28xx_v4l_fops;
1634 dev->vdev->minor = -1; 1632 dev->vdev->minor = -1;
1635 dev->vdev->dev = &dev->udev->dev; 1633 dev->vdev->dev = &dev->udev->dev;
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index d5fef4c01c..d19d73b81e 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -2585,7 +2585,6 @@ et61x251_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
2585 strcpy(cam->v4ldev->name, "ET61X[12]51 PC Camera"); 2585 strcpy(cam->v4ldev->name, "ET61X[12]51 PC Camera");
2586 cam->v4ldev->owner = THIS_MODULE; 2586 cam->v4ldev->owner = THIS_MODULE;
2587 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES; 2587 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
2588 cam->v4ldev->hardware = 0;
2589 cam->v4ldev->fops = &et61x251_fops; 2588 cam->v4ldev->fops = &et61x251_fops;
2590 cam->v4ldev->minor = video_nr[dev_nr]; 2589 cam->v4ldev->minor = video_nr[dev_nr];
2591 cam->v4ldev->release = video_device_release; 2590 cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index d98dd0d1e3..29779d8bf7 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -528,6 +528,7 @@ static int ir_probe(struct i2c_adapter *adap)
528 break; 528 break;
529 case I2C_HW_B_CX2388x: 529 case I2C_HW_B_CX2388x:
530 probe = probe_cx88; 530 probe = probe_cx88;
531 break;
531 case I2C_HW_B_CX23885: 532 case I2C_HW_B_CX23885:
532 probe = probe_cx23885; 533 probe = probe_cx23885;
533 break; 534 break;
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index fd7a932e1d..6d2dd8764f 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -1003,8 +1003,6 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
1003 1003
1004 IVTV_DEBUG_INFO("base addr: 0x%08x\n", itv->base_addr); 1004 IVTV_DEBUG_INFO("base addr: 0x%08x\n", itv->base_addr);
1005 1005
1006 mutex_lock(&itv->serialize_lock);
1007
1008 /* PCI Device Setup */ 1006 /* PCI Device Setup */
1009 if ((retval = ivtv_setup_pci(itv, dev, pci_id)) != 0) { 1007 if ((retval = ivtv_setup_pci(itv, dev, pci_id)) != 0) {
1010 if (retval == -EIO) 1008 if (retval == -EIO)
@@ -1064,7 +1062,7 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
1064 IVTV_DEBUG_INFO("activating i2c...\n"); 1062 IVTV_DEBUG_INFO("activating i2c...\n");
1065 if (init_ivtv_i2c(itv)) { 1063 if (init_ivtv_i2c(itv)) {
1066 IVTV_ERR("Could not initialize i2c\n"); 1064 IVTV_ERR("Could not initialize i2c\n");
1067 goto free_irq; 1065 goto free_io;
1068 } 1066 }
1069 1067
1070 IVTV_DEBUG_INFO("Active card count: %d.\n", ivtv_cards_active); 1068 IVTV_DEBUG_INFO("Active card count: %d.\n", ivtv_cards_active);
@@ -1176,7 +1174,11 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
1176 IVTV_ERR("Failed to register irq %d\n", retval); 1174 IVTV_ERR("Failed to register irq %d\n", retval);
1177 goto free_streams; 1175 goto free_streams;
1178 } 1176 }
1179 mutex_unlock(&itv->serialize_lock); 1177 retval = ivtv_streams_register(itv);
1178 if (retval) {
1179 IVTV_ERR("Error %d registering devices\n", retval);
1180 goto free_irq;
1181 }
1180 IVTV_INFO("Initialized card #%d: %s\n", itv->num, itv->card_name); 1182 IVTV_INFO("Initialized card #%d: %s\n", itv->num, itv->card_name);
1181 return 0; 1183 return 0;
1182 1184
@@ -1195,7 +1197,6 @@ static int __devinit ivtv_probe(struct pci_dev *dev,
1195 release_mem_region(itv->base_addr + IVTV_DECODER_OFFSET, IVTV_DECODER_SIZE); 1197 release_mem_region(itv->base_addr + IVTV_DECODER_OFFSET, IVTV_DECODER_SIZE);
1196 free_workqueue: 1198 free_workqueue:
1197 destroy_workqueue(itv->irq_work_queues); 1199 destroy_workqueue(itv->irq_work_queues);
1198 mutex_unlock(&itv->serialize_lock);
1199 err: 1200 err:
1200 if (retval == 0) 1201 if (retval == 0)
1201 retval = -ENODEV; 1202 retval = -ENODEV;
diff --git a/drivers/media/video/ivtv/ivtv-driver.h b/drivers/media/video/ivtv/ivtv-driver.h
index 3bda1df63c..49ce14d14a 100644
--- a/drivers/media/video/ivtv/ivtv-driver.h
+++ b/drivers/media/video/ivtv/ivtv-driver.h
@@ -51,6 +51,7 @@
51#include <linux/unistd.h> 51#include <linux/unistd.h>
52#include <linux/byteorder/swab.h> 52#include <linux/byteorder/swab.h>
53#include <linux/pagemap.h> 53#include <linux/pagemap.h>
54#include <linux/scatterlist.h>
54#include <linux/workqueue.h> 55#include <linux/workqueue.h>
55#include <linux/mutex.h> 56#include <linux/mutex.h>
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
diff --git a/drivers/media/video/ivtv/ivtv-fileops.c b/drivers/media/video/ivtv/ivtv-fileops.c
index da50fa4a72..a200a8a95a 100644
--- a/drivers/media/video/ivtv/ivtv-fileops.c
+++ b/drivers/media/video/ivtv/ivtv-fileops.c
@@ -822,6 +822,11 @@ int ivtv_v4l2_close(struct inode *inode, struct file *filp)
822 crystal_freq.flags = 0; 822 crystal_freq.flags = 0;
823 ivtv_saa7115(itv, VIDIOC_INT_S_CRYSTAL_FREQ, &crystal_freq); 823 ivtv_saa7115(itv, VIDIOC_INT_S_CRYSTAL_FREQ, &crystal_freq);
824 } 824 }
825 if (atomic_read(&itv->capturing) > 0) {
826 /* Undo video mute */
827 ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1,
828 itv->params.video_mute | (itv->params.video_mute_yuv << 8));
829 }
825 /* Done! Unmute and continue. */ 830 /* Done! Unmute and continue. */
826 ivtv_unmute(itv); 831 ivtv_unmute(itv);
827 ivtv_release_stream(s); 832 ivtv_release_stream(s);
@@ -892,6 +897,7 @@ static int ivtv_serialized_open(struct ivtv_stream *s, struct file *filp)
892 if (atomic_read(&itv->capturing) > 0) { 897 if (atomic_read(&itv->capturing) > 0) {
893 /* switching to radio while capture is 898 /* switching to radio while capture is
894 in progress is not polite */ 899 in progress is not polite */
900 ivtv_release_stream(s);
895 kfree(item); 901 kfree(item);
896 return -EBUSY; 902 return -EBUSY;
897 } 903 }
@@ -947,7 +953,7 @@ int ivtv_v4l2_open(struct inode *inode, struct file *filp)
947 if (itv == NULL) { 953 if (itv == NULL) {
948 /* Couldn't find a device registered 954 /* Couldn't find a device registered
949 on that minor, shouldn't happen! */ 955 on that minor, shouldn't happen! */
950 IVTV_WARN("No ivtv device found on minor %d\n", minor); 956 printk(KERN_WARNING "No ivtv device found on minor %d\n", minor);
951 return -ENXIO; 957 return -ENXIO;
952 } 958 }
953 959
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index 206eee7542..fd6826f472 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -555,6 +555,7 @@ static int ivtv_try_or_set_fmt(struct ivtv *itv, int streamtype,
555 555
556 /* set window size */ 556 /* set window size */
557 if (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { 557 if (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
558 struct cx2341x_mpeg_params *p = &itv->params;
558 int w = fmt->fmt.pix.width; 559 int w = fmt->fmt.pix.width;
559 int h = fmt->fmt.pix.height; 560 int h = fmt->fmt.pix.height;
560 561
@@ -566,17 +567,19 @@ static int ivtv_try_or_set_fmt(struct ivtv *itv, int streamtype,
566 fmt->fmt.pix.width = w; 567 fmt->fmt.pix.width = w;
567 fmt->fmt.pix.height = h; 568 fmt->fmt.pix.height = h;
568 569
569 if (!set_fmt || (itv->params.width == w && itv->params.height == h)) 570 if (!set_fmt || (p->width == w && p->height == h))
570 return 0; 571 return 0;
571 if (atomic_read(&itv->capturing) > 0) 572 if (atomic_read(&itv->capturing) > 0)
572 return -EBUSY; 573 return -EBUSY;
573 574
574 itv->params.width = w; 575 p->width = w;
575 itv->params.height = h; 576 p->height = h;
576 if (w != 720 || h != (itv->is_50hz ? 576 : 480)) 577 if (w != 720 || h != (itv->is_50hz ? 576 : 480))
577 itv->params.video_temporal_filter = 0; 578 p->video_temporal_filter = 0;
578 else 579 else
579 itv->params.video_temporal_filter = 8; 580 p->video_temporal_filter = 8;
581 if (p->video_encoding == V4L2_MPEG_VIDEO_ENCODING_MPEG_1)
582 fmt->fmt.pix.width /= 2;
580 itv->video_dec_func(itv, VIDIOC_S_FMT, fmt); 583 itv->video_dec_func(itv, VIDIOC_S_FMT, fmt);
581 return ivtv_get_fmt(itv, streamtype, fmt); 584 return ivtv_get_fmt(itv, streamtype, fmt);
582 } 585 }
diff --git a/drivers/media/video/ivtv/ivtv-streams.c b/drivers/media/video/ivtv/ivtv-streams.c
index fd135985e7..aa03e61ef3 100644
--- a/drivers/media/video/ivtv/ivtv-streams.c
+++ b/drivers/media/video/ivtv/ivtv-streams.c
@@ -166,10 +166,9 @@ static void ivtv_stream_init(struct ivtv *itv, int type)
166 ivtv_queue_init(&s->q_io); 166 ivtv_queue_init(&s->q_io);
167} 167}
168 168
169static int ivtv_reg_dev(struct ivtv *itv, int type) 169static int ivtv_prep_dev(struct ivtv *itv, int type)
170{ 170{
171 struct ivtv_stream *s = &itv->streams[type]; 171 struct ivtv_stream *s = &itv->streams[type];
172 int vfl_type = ivtv_stream_info[type].vfl_type;
173 int minor_offset = ivtv_stream_info[type].minor_offset; 172 int minor_offset = ivtv_stream_info[type].minor_offset;
174 int minor; 173 int minor;
175 174
@@ -187,15 +186,12 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
187 if (type >= IVTV_DEC_STREAM_TYPE_MPG && !(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT)) 186 if (type >= IVTV_DEC_STREAM_TYPE_MPG && !(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
188 return 0; 187 return 0;
189 188
190 if (minor_offset >= 0) 189 /* card number + user defined offset + device offset */
191 /* card number + user defined offset + device offset */ 190 minor = itv->num + ivtv_first_minor + minor_offset;
192 minor = itv->num + ivtv_first_minor + minor_offset;
193 else
194 minor = -1;
195 191
196 /* User explicitly selected 0 buffers for these streams, so don't 192 /* User explicitly selected 0 buffers for these streams, so don't
197 create them. */ 193 create them. */
198 if (minor >= 0 && ivtv_stream_info[type].dma != PCI_DMA_NONE && 194 if (ivtv_stream_info[type].dma != PCI_DMA_NONE &&
199 itv->options.kilobytes[type] == 0) { 195 itv->options.kilobytes[type] == 0) {
200 IVTV_INFO("Disabled %s device\n", ivtv_stream_info[type].name); 196 IVTV_INFO("Disabled %s device\n", ivtv_stream_info[type].name);
201 return 0; 197 return 0;
@@ -223,21 +219,53 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
223 s->v4l2dev->fops = ivtv_stream_info[type].fops; 219 s->v4l2dev->fops = ivtv_stream_info[type].fops;
224 s->v4l2dev->release = video_device_release; 220 s->v4l2dev->release = video_device_release;
225 221
226 if (minor >= 0) { 222 return 0;
227 /* Register device. First try the desired minor, then any free one. */ 223}
228 if (video_register_device(s->v4l2dev, vfl_type, minor) && 224
229 video_register_device(s->v4l2dev, vfl_type, -1)) { 225/* Initialize v4l2 variables and prepare v4l2 devices */
230 IVTV_ERR("Couldn't register v4l2 device for %s minor %d\n", 226int ivtv_streams_setup(struct ivtv *itv)
231 s->name, minor); 227{
232 video_device_release(s->v4l2dev); 228 int type;
233 s->v4l2dev = NULL; 229
234 return -ENOMEM; 230 /* Setup V4L2 Devices */
235 } 231 for (type = 0; type < IVTV_MAX_STREAMS; type++) {
232 /* Prepare device */
233 if (ivtv_prep_dev(itv, type))
234 break;
235
236 if (itv->streams[type].v4l2dev == NULL)
237 continue;
238
239 /* Allocate Stream */
240 if (ivtv_stream_alloc(&itv->streams[type]))
241 break;
236 } 242 }
237 else { 243 if (type == IVTV_MAX_STREAMS)
238 /* Don't register a 'hidden' stream (OSD) */
239 IVTV_INFO("Created framebuffer stream for %s\n", s->name);
240 return 0; 244 return 0;
245
246 /* One or more streams could not be initialized. Clean 'em all up. */
247 ivtv_streams_cleanup(itv);
248 return -ENOMEM;
249}
250
251static int ivtv_reg_dev(struct ivtv *itv, int type)
252{
253 struct ivtv_stream *s = &itv->streams[type];
254 int vfl_type = ivtv_stream_info[type].vfl_type;
255 int minor;
256
257 if (s->v4l2dev == NULL)
258 return 0;
259
260 minor = s->v4l2dev->minor;
261 /* Register device. First try the desired minor, then any free one. */
262 if (video_register_device(s->v4l2dev, vfl_type, minor) &&
263 video_register_device(s->v4l2dev, vfl_type, -1)) {
264 IVTV_ERR("Couldn't register v4l2 device for %s minor %d\n",
265 s->name, minor);
266 video_device_release(s->v4l2dev);
267 s->v4l2dev = NULL;
268 return -ENOMEM;
241 } 269 }
242 270
243 switch (vfl_type) { 271 switch (vfl_type) {
@@ -262,27 +290,18 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
262 return 0; 290 return 0;
263} 291}
264 292
265/* Initialize v4l2 variables and register v4l2 devices */ 293/* Register v4l2 devices */
266int ivtv_streams_setup(struct ivtv *itv) 294int ivtv_streams_register(struct ivtv *itv)
267{ 295{
268 int type; 296 int type;
297 int err = 0;
269 298
270 /* Setup V4L2 Devices */ 299 /* Register V4L2 devices */
271 for (type = 0; type < IVTV_MAX_STREAMS; type++) { 300 for (type = 0; type < IVTV_MAX_STREAMS; type++)
272 /* Register Device */ 301 err |= ivtv_reg_dev(itv, type);
273 if (ivtv_reg_dev(itv, type))
274 break;
275
276 if (itv->streams[type].v4l2dev == NULL)
277 continue;
278 302
279 /* Allocate Stream */ 303 if (err == 0)
280 if (ivtv_stream_alloc(&itv->streams[type]))
281 break;
282 }
283 if (type == IVTV_MAX_STREAMS) {
284 return 0; 304 return 0;
285 }
286 305
287 /* One or more streams could not be initialized. Clean 'em all up. */ 306 /* One or more streams could not be initialized. Clean 'em all up. */
288 ivtv_streams_cleanup(itv); 307 ivtv_streams_cleanup(itv);
@@ -303,11 +322,8 @@ void ivtv_streams_cleanup(struct ivtv *itv)
303 continue; 322 continue;
304 323
305 ivtv_stream_free(&itv->streams[type]); 324 ivtv_stream_free(&itv->streams[type]);
306 /* Free Device */ 325 /* Unregister device */
307 if (vdev->minor == -1) /* 'Hidden' never registered stream (OSD) */ 326 video_unregister_device(vdev);
308 video_device_release(vdev);
309 else /* All others, just unregister. */
310 video_unregister_device(vdev);
311 } 327 }
312} 328}
313 329
@@ -425,6 +441,7 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
425{ 441{
426 u32 data[CX2341X_MBOX_MAX_DATA]; 442 u32 data[CX2341X_MBOX_MAX_DATA];
427 struct ivtv *itv = s->itv; 443 struct ivtv *itv = s->itv;
444 struct cx2341x_mpeg_params *p = &itv->params;
428 int captype = 0, subtype = 0; 445 int captype = 0, subtype = 0;
429 int enable_passthrough = 0; 446 int enable_passthrough = 0;
430 447
@@ -445,7 +462,7 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
445 } 462 }
446 itv->mpg_data_received = itv->vbi_data_inserted = 0; 463 itv->mpg_data_received = itv->vbi_data_inserted = 0;
447 itv->dualwatch_jiffies = jiffies; 464 itv->dualwatch_jiffies = jiffies;
448 itv->dualwatch_stereo_mode = itv->params.audio_properties & 0x0300; 465 itv->dualwatch_stereo_mode = p->audio_properties & 0x0300;
449 itv->search_pack_header = 0; 466 itv->search_pack_header = 0;
450 break; 467 break;
451 468
@@ -477,9 +494,6 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
477 s->subtype = subtype; 494 s->subtype = subtype;
478 s->buffers_stolen = 0; 495 s->buffers_stolen = 0;
479 496
480 /* mute/unmute video */
481 ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1, test_bit(IVTV_F_I_RADIO_USER, &itv->i_flags) ? 1 : 0);
482
483 /* Clear Streamoff flags in case left from last capture */ 497 /* Clear Streamoff flags in case left from last capture */
484 clear_bit(IVTV_F_S_STREAMOFF, &s->s_flags); 498 clear_bit(IVTV_F_S_STREAMOFF, &s->s_flags);
485 499
@@ -536,7 +550,12 @@ int ivtv_start_v4l2_encode_stream(struct ivtv_stream *s)
536 itv->pgm_info_offset, itv->pgm_info_num); 550 itv->pgm_info_offset, itv->pgm_info_num);
537 551
538 /* Setup API for Stream */ 552 /* Setup API for Stream */
539 cx2341x_update(itv, ivtv_api_func, NULL, &itv->params); 553 cx2341x_update(itv, ivtv_api_func, NULL, p);
554
555 /* mute if capturing radio */
556 if (test_bit(IVTV_F_I_RADIO_USER, &itv->i_flags))
557 ivtv_vapi(itv, CX2341X_ENC_MUTE_VIDEO, 1,
558 1 | (p->video_mute_yuv << 8));
540 } 559 }
541 560
542 /* Vsync Setup */ 561 /* Vsync Setup */
@@ -585,6 +604,7 @@ static int ivtv_setup_v4l2_decode_stream(struct ivtv_stream *s)
585{ 604{
586 u32 data[CX2341X_MBOX_MAX_DATA]; 605 u32 data[CX2341X_MBOX_MAX_DATA];
587 struct ivtv *itv = s->itv; 606 struct ivtv *itv = s->itv;
607 struct cx2341x_mpeg_params *p = &itv->params;
588 int datatype; 608 int datatype;
589 609
590 if (s->v4l2dev == NULL) 610 if (s->v4l2dev == NULL)
@@ -623,7 +643,7 @@ static int ivtv_setup_v4l2_decode_stream(struct ivtv_stream *s)
623 break; 643 break;
624 } 644 }
625 if (ivtv_vapi(itv, CX2341X_DEC_SET_DECODER_SOURCE, 4, datatype, 645 if (ivtv_vapi(itv, CX2341X_DEC_SET_DECODER_SOURCE, 4, datatype,
626 itv->params.width, itv->params.height, itv->params.audio_properties)) { 646 p->width, p->height, p->audio_properties)) {
627 IVTV_DEBUG_WARN("Couldn't initialize decoder source\n"); 647 IVTV_DEBUG_WARN("Couldn't initialize decoder source\n");
628 } 648 }
629 return 0; 649 return 0;
diff --git a/drivers/media/video/ivtv/ivtv-streams.h b/drivers/media/video/ivtv/ivtv-streams.h
index 8f5f5b1c7c..3d76a415fb 100644
--- a/drivers/media/video/ivtv/ivtv-streams.h
+++ b/drivers/media/video/ivtv/ivtv-streams.h
@@ -22,6 +22,7 @@
22#define IVTV_STREAMS_H 22#define IVTV_STREAMS_H
23 23
24int ivtv_streams_setup(struct ivtv *itv); 24int ivtv_streams_setup(struct ivtv *itv);
25int ivtv_streams_register(struct ivtv *itv);
25void ivtv_streams_cleanup(struct ivtv *itv); 26void ivtv_streams_cleanup(struct ivtv *itv);
26 27
27/* Capture related */ 28/* Capture related */
diff --git a/drivers/media/video/ivtv/ivtv-udma.c b/drivers/media/video/ivtv/ivtv-udma.c
index c4626d1cdf..912b424e52 100644
--- a/drivers/media/video/ivtv/ivtv-udma.c
+++ b/drivers/media/video/ivtv/ivtv-udma.c
@@ -63,10 +63,10 @@ int ivtv_udma_fill_sg_list (struct ivtv_user_dma *dma, struct ivtv_dma_page_info
63 memcpy(page_address(dma->bouncemap[map_offset]) + offset, src, len); 63 memcpy(page_address(dma->bouncemap[map_offset]) + offset, src, len);
64 kunmap_atomic(src, KM_BOUNCE_READ); 64 kunmap_atomic(src, KM_BOUNCE_READ);
65 local_irq_restore(flags); 65 local_irq_restore(flags);
66 dma->SGlist[map_offset].page = dma->bouncemap[map_offset]; 66 sg_set_page(&dma->SGlist[map_offset], dma->bouncemap[map_offset]);
67 } 67 }
68 else { 68 else {
69 dma->SGlist[map_offset].page = dma->map[map_offset]; 69 sg_set_page(&dma->SGlist[map_offset], dma->map[map_offset]);
70 } 70 }
71 offset = 0; 71 offset = 0;
72 map_offset++; 72 map_offset++;
diff --git a/drivers/media/video/ivtv/ivtv-yuv.c b/drivers/media/video/ivtv/ivtv-yuv.c
index e2288f224a..9091c4837b 100644
--- a/drivers/media/video/ivtv/ivtv-yuv.c
+++ b/drivers/media/video/ivtv/ivtv-yuv.c
@@ -710,7 +710,7 @@ static u32 ivtv_yuv_window_setup (struct ivtv *itv, struct yuv_frame_info *windo
710 710
711 /* If there's nothing to safe to display, we may as well stop now */ 711 /* If there's nothing to safe to display, we may as well stop now */
712 if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) { 712 if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) {
713 return 0; 713 return IVTV_YUV_UPDATE_INVALID;
714 } 714 }
715 715
716 /* Ensure video remains inside OSD area */ 716 /* Ensure video remains inside OSD area */
@@ -791,7 +791,7 @@ static u32 ivtv_yuv_window_setup (struct ivtv *itv, struct yuv_frame_info *windo
791 791
792 /* Check again. If there's nothing to safe to display, stop now */ 792 /* Check again. If there's nothing to safe to display, stop now */
793 if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) { 793 if ((int)window->dst_w <= 2 || (int)window->dst_h <= 2 || (int)window->src_w <= 2 || (int)window->src_h <= 2) {
794 return 0; 794 return IVTV_YUV_UPDATE_INVALID;
795 } 795 }
796 796
797 /* Both x offset & width are linked, so they have to be done together */ 797 /* Both x offset & width are linked, so they have to be done together */
@@ -840,110 +840,118 @@ void ivtv_yuv_work_handler (struct ivtv *itv)
840 if (!(yuv_update = ivtv_yuv_window_setup (itv, &window))) 840 if (!(yuv_update = ivtv_yuv_window_setup (itv, &window)))
841 return; 841 return;
842 842
843 /* Update horizontal settings */ 843 if (yuv_update & IVTV_YUV_UPDATE_INVALID) {
844 if (yuv_update & IVTV_YUV_UPDATE_HORIZONTAL) 844 write_reg(0x01008080, 0x2898);
845 ivtv_yuv_handle_horizontal(itv, &window); 845 } else if (yuv_update) {
846 write_reg(0x00108080, 0x2898);
846 847
847 if (yuv_update & IVTV_YUV_UPDATE_VERTICAL) 848 if (yuv_update & IVTV_YUV_UPDATE_HORIZONTAL)
848 ivtv_yuv_handle_vertical(itv, &window); 849 ivtv_yuv_handle_horizontal(itv, &window);
850
851 if (yuv_update & IVTV_YUV_UPDATE_VERTICAL)
852 ivtv_yuv_handle_vertical(itv, &window);
853 }
849 854
850 memcpy(&itv->yuv_info.old_frame_info, &window, sizeof (itv->yuv_info.old_frame_info)); 855 memcpy(&itv->yuv_info.old_frame_info, &window, sizeof (itv->yuv_info.old_frame_info));
851} 856}
852 857
853static void ivtv_yuv_init (struct ivtv *itv) 858static void ivtv_yuv_init (struct ivtv *itv)
854{ 859{
860 struct yuv_playback_info *yi = &itv->yuv_info;
861
855 IVTV_DEBUG_YUV("ivtv_yuv_init\n"); 862 IVTV_DEBUG_YUV("ivtv_yuv_init\n");
856 863
857 /* Take a snapshot of the current register settings */ 864 /* Take a snapshot of the current register settings */
858 itv->yuv_info.reg_2834 = read_reg(0x02834); 865 yi->reg_2834 = read_reg(0x02834);
859 itv->yuv_info.reg_2838 = read_reg(0x02838); 866 yi->reg_2838 = read_reg(0x02838);
860 itv->yuv_info.reg_283c = read_reg(0x0283c); 867 yi->reg_283c = read_reg(0x0283c);
861 itv->yuv_info.reg_2840 = read_reg(0x02840); 868 yi->reg_2840 = read_reg(0x02840);
862 itv->yuv_info.reg_2844 = read_reg(0x02844); 869 yi->reg_2844 = read_reg(0x02844);
863 itv->yuv_info.reg_2848 = read_reg(0x02848); 870 yi->reg_2848 = read_reg(0x02848);
864 itv->yuv_info.reg_2854 = read_reg(0x02854); 871 yi->reg_2854 = read_reg(0x02854);
865 itv->yuv_info.reg_285c = read_reg(0x0285c); 872 yi->reg_285c = read_reg(0x0285c);
866 itv->yuv_info.reg_2864 = read_reg(0x02864); 873 yi->reg_2864 = read_reg(0x02864);
867 itv->yuv_info.reg_2870 = read_reg(0x02870); 874 yi->reg_2870 = read_reg(0x02870);
868 itv->yuv_info.reg_2874 = read_reg(0x02874); 875 yi->reg_2874 = read_reg(0x02874);
869 itv->yuv_info.reg_2898 = read_reg(0x02898); 876 yi->reg_2898 = read_reg(0x02898);
870 itv->yuv_info.reg_2890 = read_reg(0x02890); 877 yi->reg_2890 = read_reg(0x02890);
871 878
872 itv->yuv_info.reg_289c = read_reg(0x0289c); 879 yi->reg_289c = read_reg(0x0289c);
873 itv->yuv_info.reg_2918 = read_reg(0x02918); 880 yi->reg_2918 = read_reg(0x02918);
874 itv->yuv_info.reg_291c = read_reg(0x0291c); 881 yi->reg_291c = read_reg(0x0291c);
875 itv->yuv_info.reg_2920 = read_reg(0x02920); 882 yi->reg_2920 = read_reg(0x02920);
876 itv->yuv_info.reg_2924 = read_reg(0x02924); 883 yi->reg_2924 = read_reg(0x02924);
877 itv->yuv_info.reg_2928 = read_reg(0x02928); 884 yi->reg_2928 = read_reg(0x02928);
878 itv->yuv_info.reg_292c = read_reg(0x0292c); 885 yi->reg_292c = read_reg(0x0292c);
879 itv->yuv_info.reg_2930 = read_reg(0x02930); 886 yi->reg_2930 = read_reg(0x02930);
880 itv->yuv_info.reg_2934 = read_reg(0x02934); 887 yi->reg_2934 = read_reg(0x02934);
881 itv->yuv_info.reg_2938 = read_reg(0x02938); 888 yi->reg_2938 = read_reg(0x02938);
882 itv->yuv_info.reg_293c = read_reg(0x0293c); 889 yi->reg_293c = read_reg(0x0293c);
883 itv->yuv_info.reg_2940 = read_reg(0x02940); 890 yi->reg_2940 = read_reg(0x02940);
884 itv->yuv_info.reg_2944 = read_reg(0x02944); 891 yi->reg_2944 = read_reg(0x02944);
885 itv->yuv_info.reg_2948 = read_reg(0x02948); 892 yi->reg_2948 = read_reg(0x02948);
886 itv->yuv_info.reg_294c = read_reg(0x0294c); 893 yi->reg_294c = read_reg(0x0294c);
887 itv->yuv_info.reg_2950 = read_reg(0x02950); 894 yi->reg_2950 = read_reg(0x02950);
888 itv->yuv_info.reg_2954 = read_reg(0x02954); 895 yi->reg_2954 = read_reg(0x02954);
889 itv->yuv_info.reg_2958 = read_reg(0x02958); 896 yi->reg_2958 = read_reg(0x02958);
890 itv->yuv_info.reg_295c = read_reg(0x0295c); 897 yi->reg_295c = read_reg(0x0295c);
891 itv->yuv_info.reg_2960 = read_reg(0x02960); 898 yi->reg_2960 = read_reg(0x02960);
892 itv->yuv_info.reg_2964 = read_reg(0x02964); 899 yi->reg_2964 = read_reg(0x02964);
893 itv->yuv_info.reg_2968 = read_reg(0x02968); 900 yi->reg_2968 = read_reg(0x02968);
894 itv->yuv_info.reg_296c = read_reg(0x0296c); 901 yi->reg_296c = read_reg(0x0296c);
895 itv->yuv_info.reg_2970 = read_reg(0x02970); 902 yi->reg_2970 = read_reg(0x02970);
896 903
897 itv->yuv_info.v_filter_1 = -1; 904 yi->v_filter_1 = -1;
898 itv->yuv_info.v_filter_2 = -1; 905 yi->v_filter_2 = -1;
899 itv->yuv_info.h_filter = -1; 906 yi->h_filter = -1;
900 907
901 /* Set some valid size info */ 908 /* Set some valid size info */
902 itv->yuv_info.osd_x_offset = read_reg(0x02a04) & 0x00000FFF; 909 yi->osd_x_offset = read_reg(0x02a04) & 0x00000FFF;
903 itv->yuv_info.osd_y_offset = (read_reg(0x02a04) >> 16) & 0x00000FFF; 910 yi->osd_y_offset = (read_reg(0x02a04) >> 16) & 0x00000FFF;
904 911
905 /* Bit 2 of reg 2878 indicates current decoder output format 912 /* Bit 2 of reg 2878 indicates current decoder output format
906 0 : NTSC 1 : PAL */ 913 0 : NTSC 1 : PAL */
907 if (read_reg(0x2878) & 4) 914 if (read_reg(0x2878) & 4)
908 itv->yuv_info.decode_height = 576; 915 yi->decode_height = 576;
909 else 916 else
910 itv->yuv_info.decode_height = 480; 917 yi->decode_height = 480;
911 918
912 /* If no visible size set, assume full size */ 919 if (!itv->osd_info) {
913 if (!itv->yuv_info.osd_vis_w) 920 yi->osd_vis_w = 720 - yi->osd_x_offset;
914 itv->yuv_info.osd_vis_w = 720 - itv->yuv_info.osd_x_offset; 921 yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
915
916 if (!itv->yuv_info.osd_vis_h) {
917 itv->yuv_info.osd_vis_h = itv->yuv_info.decode_height - itv->yuv_info.osd_y_offset;
918 } else { 922 } else {
919 /* If output video standard has changed, requested height may 923 /* If no visible size set, assume full size */
920 not be legal */ 924 if (!yi->osd_vis_w)
921 if (itv->yuv_info.osd_vis_h + itv->yuv_info.osd_y_offset > itv->yuv_info.decode_height) { 925 yi->osd_vis_w = 720 - yi->osd_x_offset;
922 IVTV_DEBUG_WARN("Clipping yuv output - fb size (%d) exceeds video standard limit (%d)\n", 926
923 itv->yuv_info.osd_vis_h + itv->yuv_info.osd_y_offset, 927 if (!yi->osd_vis_h)
924 itv->yuv_info.decode_height); 928 yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
925 itv->yuv_info.osd_vis_h = itv->yuv_info.decode_height - itv->yuv_info.osd_y_offset; 929 else {
930 /* If output video standard has changed, requested height may
931 not be legal */
932 if (yi->osd_vis_h + yi->osd_y_offset > yi->decode_height) {
933 IVTV_DEBUG_WARN("Clipping yuv output - fb size (%d) exceeds video standard limit (%d)\n",
934 yi->osd_vis_h + yi->osd_y_offset,
935 yi->decode_height);
936 yi->osd_vis_h = yi->decode_height - yi->osd_y_offset;
937 }
926 } 938 }
927 } 939 }
928 940
929 /* We need a buffer for blanking when Y plane is offset - non-fatal if we can't get one */ 941 /* We need a buffer for blanking when Y plane is offset - non-fatal if we can't get one */
930 itv->yuv_info.blanking_ptr = kzalloc(720*16,GFP_KERNEL); 942 yi->blanking_ptr = kzalloc(720*16, GFP_KERNEL);
931 if (itv->yuv_info.blanking_ptr) { 943 if (yi->blanking_ptr)
932 itv->yuv_info.blanking_dmaptr = pci_map_single(itv->dev, itv->yuv_info.blanking_ptr, 720*16, PCI_DMA_TODEVICE); 944 yi->blanking_dmaptr = pci_map_single(itv->dev, yi->blanking_ptr, 720*16, PCI_DMA_TODEVICE);
933 }
934 else { 945 else {
935 itv->yuv_info.blanking_dmaptr = 0; 946 yi->blanking_dmaptr = 0;
936 IVTV_DEBUG_WARN ("Failed to allocate yuv blanking buffer\n"); 947 IVTV_DEBUG_WARN("Failed to allocate yuv blanking buffer\n");
937 } 948 }
938 949
939 IVTV_DEBUG_WARN("Enable video output\n");
940 write_reg_sync(0x00108080, 0x2898);
941
942 /* Enable YUV decoder output */ 950 /* Enable YUV decoder output */
943 write_reg_sync(0x01, IVTV_REG_VDM); 951 write_reg_sync(0x01, IVTV_REG_VDM);
944 952
945 set_bit(IVTV_F_I_DECODING_YUV, &itv->i_flags); 953 set_bit(IVTV_F_I_DECODING_YUV, &itv->i_flags);
946 atomic_set(&itv->yuv_info.next_dma_frame,0); 954 atomic_set(&yi->next_dma_frame, 0);
947} 955}
948 956
949int ivtv_yuv_prep_frame(struct ivtv *itv, struct ivtv_dma_frame *args) 957int ivtv_yuv_prep_frame(struct ivtv *itv, struct ivtv_dma_frame *args)
diff --git a/drivers/media/video/ivtv/ivtv-yuv.h b/drivers/media/video/ivtv/ivtv-yuv.h
index f7215eeca0..3b966f0a20 100644
--- a/drivers/media/video/ivtv/ivtv-yuv.h
+++ b/drivers/media/video/ivtv/ivtv-yuv.h
@@ -34,6 +34,7 @@
34 34
35#define IVTV_YUV_UPDATE_HORIZONTAL 0x01 35#define IVTV_YUV_UPDATE_HORIZONTAL 0x01
36#define IVTV_YUV_UPDATE_VERTICAL 0x02 36#define IVTV_YUV_UPDATE_VERTICAL 0x02
37#define IVTV_YUV_UPDATE_INVALID 0x04
37 38
38extern const u32 yuv_offset[4]; 39extern const u32 yuv_offset[4];
39 40
diff --git a/drivers/media/video/ivtv/ivtvfb.c b/drivers/media/video/ivtv/ivtvfb.c
index 9684048fe5..52ffd154a3 100644
--- a/drivers/media/video/ivtv/ivtvfb.c
+++ b/drivers/media/video/ivtv/ivtvfb.c
@@ -55,7 +55,6 @@
55static int ivtvfb_card_id = -1; 55static int ivtvfb_card_id = -1;
56static int ivtvfb_debug = 0; 56static int ivtvfb_debug = 0;
57static int osd_laced; 57static int osd_laced;
58static int osd_compat;
59static int osd_depth; 58static int osd_depth;
60static int osd_upper; 59static int osd_upper;
61static int osd_left; 60static int osd_left;
@@ -65,7 +64,6 @@ static int osd_xres;
65module_param(ivtvfb_card_id, int, 0444); 64module_param(ivtvfb_card_id, int, 0444);
66module_param_named(debug,ivtvfb_debug, int, 0644); 65module_param_named(debug,ivtvfb_debug, int, 0644);
67module_param(osd_laced, bool, 0444); 66module_param(osd_laced, bool, 0444);
68module_param(osd_compat, bool, 0444);
69module_param(osd_depth, int, 0444); 67module_param(osd_depth, int, 0444);
70module_param(osd_upper, int, 0444); 68module_param(osd_upper, int, 0444);
71module_param(osd_left, int, 0444); 69module_param(osd_left, int, 0444);
@@ -80,12 +78,6 @@ MODULE_PARM_DESC(debug,
80 "Debug level (bitmask). Default: errors only\n" 78 "Debug level (bitmask). Default: errors only\n"
81 "\t\t\t(debug = 3 gives full debugging)"); 79 "\t\t\t(debug = 3 gives full debugging)");
82 80
83MODULE_PARM_DESC(osd_compat,
84 "Compatibility mode - Display size is locked (use for old X drivers)\n"
85 "\t\t\t0=off\n"
86 "\t\t\t1=on\n"
87 "\t\t\tdefault off");
88
89/* Why upper, left, xres, yres, depth, laced ? To match terminology used 81/* Why upper, left, xres, yres, depth, laced ? To match terminology used
90 by fbset. 82 by fbset.
91 Why start at 1 for left & upper coordinate ? Because X doesn't allow 0 */ 83 Why start at 1 for left & upper coordinate ? Because X doesn't allow 0 */
@@ -166,9 +158,6 @@ struct osd_info {
166 unsigned long fb_end_aligned_physaddr; 158 unsigned long fb_end_aligned_physaddr;
167#endif 159#endif
168 160
169 /* Current osd mode */
170 int osd_mode;
171
172 /* Store the buffer offset */ 161 /* Store the buffer offset */
173 int set_osd_coords_x; 162 int set_osd_coords_x;
174 int set_osd_coords_y; 163 int set_osd_coords_y;
@@ -470,13 +459,11 @@ static int ivtvfb_set_var(struct ivtv *itv, struct fb_var_screeninfo *var)
470 IVTVFB_DEBUG_WARN("ivtvfb_set_var - Invalid bpp\n"); 459 IVTVFB_DEBUG_WARN("ivtvfb_set_var - Invalid bpp\n");
471 } 460 }
472 461
473 /* Change osd mode if needed. 462 /* Set video mode. Although rare, the display can become scrambled even
474 Although rare, things can go wrong. The extra mode 463 if we don't change mode. Always 'bounce' to osd_mode via mode 0 */
475 change seems to help... */ 464 if (osd_mode != -1) {
476 if (osd_mode != -1 && osd_mode != oi->osd_mode) {
477 ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, 0); 465 ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, 0);
478 ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, osd_mode); 466 ivtv_vapi(itv, CX2341X_OSD_SET_PIXEL_FORMAT, 1, osd_mode);
479 oi->osd_mode = osd_mode;
480 } 467 }
481 468
482 oi->bits_per_pixel = var->bits_per_pixel; 469 oi->bits_per_pixel = var->bits_per_pixel;
@@ -579,14 +566,6 @@ static int _ivtvfb_check_var(struct fb_var_screeninfo *var, struct ivtv *itv)
579 osd_height_limit = 480; 566 osd_height_limit = 480;
580 } 567 }
581 568
582 /* Check the bits per pixel */
583 if (osd_compat) {
584 if (var->bits_per_pixel != 32) {
585 IVTVFB_DEBUG_WARN("Invalid colour mode: %d\n", var->bits_per_pixel);
586 return -EINVAL;
587 }
588 }
589
590 if (var->bits_per_pixel == 8 || var->bits_per_pixel == 32) { 569 if (var->bits_per_pixel == 8 || var->bits_per_pixel == 32) {
591 var->transp.offset = 24; 570 var->transp.offset = 24;
592 var->transp.length = 8; 571 var->transp.length = 8;
@@ -638,32 +617,20 @@ static int _ivtvfb_check_var(struct fb_var_screeninfo *var, struct ivtv *itv)
638 } 617 }
639 618
640 /* Check the resolution */ 619 /* Check the resolution */
641 if (osd_compat) { 620 if (var->xres > IVTV_OSD_MAX_WIDTH || var->yres > osd_height_limit) {
642 if (var->xres != oi->ivtvfb_defined.xres || 621 IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d\n",
643 var->yres != oi->ivtvfb_defined.yres || 622 var->xres, var->yres);
644 var->xres_virtual != oi->ivtvfb_defined.xres_virtual || 623 return -EINVAL;
645 var->yres_virtual != oi->ivtvfb_defined.yres_virtual) {
646 IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d (virtual %dx%d)\n",
647 var->xres, var->yres, var->xres_virtual, var->yres_virtual);
648 return -EINVAL;
649 }
650 } 624 }
651 else {
652 if (var->xres > IVTV_OSD_MAX_WIDTH || var->yres > osd_height_limit) {
653 IVTVFB_DEBUG_WARN("Invalid resolution: %dx%d\n",
654 var->xres, var->yres);
655 return -EINVAL;
656 }
657 625
658 /* Max horizontal size is 1023 @ 32bpp, 2046 & 16bpp, 4092 @ 8bpp */ 626 /* Max horizontal size is 1023 @ 32bpp, 2046 & 16bpp, 4092 @ 8bpp */
659 if (var->xres_virtual > 4095 / (var->bits_per_pixel / 8) || 627 if (var->xres_virtual > 4095 / (var->bits_per_pixel / 8) ||
660 var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8) > oi->video_buffer_size || 628 var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8) > oi->video_buffer_size ||
661 var->xres_virtual < var->xres || 629 var->xres_virtual < var->xres ||
662 var->yres_virtual < var->yres) { 630 var->yres_virtual < var->yres) {
663 IVTVFB_DEBUG_WARN("Invalid virtual resolution: %dx%d\n", 631 IVTVFB_DEBUG_WARN("Invalid virtual resolution: %dx%d\n",
664 var->xres_virtual, var->yres_virtual); 632 var->xres_virtual, var->yres_virtual);
665 return -EINVAL; 633 return -EINVAL;
666 }
667 } 634 }
668 635
669 /* Some extra checks if in 8 bit mode */ 636 /* Some extra checks if in 8 bit mode */
@@ -877,17 +844,15 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
877 844
878 /* Color mode */ 845 /* Color mode */
879 846
880 if (osd_compat) osd_depth = 32; 847 if (osd_depth != 8 && osd_depth != 16 && osd_depth != 32)
881 if (osd_depth != 8 && osd_depth != 16 && osd_depth != 32) osd_depth = 8; 848 osd_depth = 8;
882 oi->bits_per_pixel = osd_depth; 849 oi->bits_per_pixel = osd_depth;
883 oi->bytes_per_pixel = oi->bits_per_pixel / 8; 850 oi->bytes_per_pixel = oi->bits_per_pixel / 8;
884 851
885 /* Invalidate current osd mode to force a mode switch later */
886 oi->osd_mode = -1;
887
888 /* Horizontal size & position */ 852 /* Horizontal size & position */
889 853
890 if (osd_xres > 720) osd_xres = 720; 854 if (osd_xres > 720)
855 osd_xres = 720;
891 856
892 /* Must be a multiple of 4 for 8bpp & 2 for 16bpp */ 857 /* Must be a multiple of 4 for 8bpp & 2 for 16bpp */
893 if (osd_depth == 8) 858 if (osd_depth == 8)
@@ -895,10 +860,7 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
895 else if (osd_depth == 16) 860 else if (osd_depth == 16)
896 osd_xres &= ~1; 861 osd_xres &= ~1;
897 862
898 if (osd_xres) 863 start_window.width = osd_xres ? osd_xres : 640;
899 start_window.width = osd_xres;
900 else
901 start_window.width = osd_compat ? 720: 640;
902 864
903 /* Check horizontal start (osd_left). */ 865 /* Check horizontal start (osd_left). */
904 if (osd_left && osd_left + start_window.width > 721) { 866 if (osd_left && osd_left + start_window.width > 721) {
@@ -921,10 +883,7 @@ static int ivtvfb_init_vidmode(struct ivtv *itv)
921 if (osd_yres > max_height) 883 if (osd_yres > max_height)
922 osd_yres = max_height; 884 osd_yres = max_height;
923 885
924 if (osd_yres) 886 start_window.height = osd_yres ? osd_yres : itv->is_50hz ? 480 : 400;
925 start_window.height = osd_yres;
926 else
927 start_window.height = osd_compat ? max_height : (itv->is_50hz ? 480 : 400);
928 887
929 /* Check vertical start (osd_upper). */ 888 /* Check vertical start (osd_upper). */
930 if (osd_upper + start_window.height > max_height + 1) { 889 if (osd_upper + start_window.height > max_height + 1) {
@@ -1127,10 +1086,6 @@ static int ivtvfb_init_card(struct ivtv *itv)
1127 /* Enable the osd */ 1086 /* Enable the osd */
1128 ivtvfb_blank(FB_BLANK_UNBLANK, &itv->osd_info->ivtvfb_info); 1087 ivtvfb_blank(FB_BLANK_UNBLANK, &itv->osd_info->ivtvfb_info);
1129 1088
1130 /* Note if we're running in compatibility mode */
1131 if (osd_compat)
1132 IVTVFB_INFO("Running in compatibility mode. Display resize & mode change disabled\n");
1133
1134 /* Allocate DMA */ 1089 /* Allocate DMA */
1135 ivtv_udma_alloc(itv); 1090 ivtv_udma_alloc(itv);
1136 return 0; 1091 return 0;
@@ -1177,9 +1132,12 @@ static void ivtvfb_cleanup(void)
1177 for (i = 0; i < ivtv_cards_active; i++) { 1132 for (i = 0; i < ivtv_cards_active; i++) {
1178 itv = ivtv_cards[i]; 1133 itv = ivtv_cards[i];
1179 if (itv && (itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT) && itv->osd_info) { 1134 if (itv && (itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT) && itv->osd_info) {
1135 if (unregister_framebuffer(&itv->osd_info->ivtvfb_info)) {
1136 IVTVFB_WARN("Framebuffer %d is in use, cannot unload\n", i);
1137 return;
1138 }
1180 IVTVFB_DEBUG_INFO("Unregister framebuffer %d\n", i); 1139 IVTVFB_DEBUG_INFO("Unregister framebuffer %d\n", i);
1181 ivtvfb_blank(FB_BLANK_POWERDOWN, &itv->osd_info->ivtvfb_info); 1140 ivtvfb_blank(FB_BLANK_POWERDOWN, &itv->osd_info->ivtvfb_info);
1182 unregister_framebuffer(&itv->osd_info->ivtvfb_info);
1183 ivtvfb_release_buffers(itv); 1141 ivtvfb_release_buffers(itv);
1184 itv->osd_video_pbase = 0; 1142 itv->osd_video_pbase = 0;
1185 } 1143 }
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 69283926a8..c311632904 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1762,7 +1762,6 @@ static struct video_device meye_template = {
1762 .owner = THIS_MODULE, 1762 .owner = THIS_MODULE,
1763 .name = "meye", 1763 .name = "meye",
1764 .type = VID_TYPE_CAPTURE, 1764 .type = VID_TYPE_CAPTURE,
1765 .hardware = VID_HARDWARE_MEYE,
1766 .fops = &meye_fops, 1765 .fops = &meye_fops,
1767 .release = video_device_release, 1766 .release = video_device_release,
1768 .minor = -1, 1767 .minor = -1,
diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index b8d4ac0d93..d55d5800ef 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c
@@ -4668,7 +4668,6 @@ static struct video_device vdev_template = {
4668 .owner = THIS_MODULE, 4668 .owner = THIS_MODULE,
4669 .name = "OV511 USB Camera", 4669 .name = "OV511 USB Camera",
4670 .type = VID_TYPE_CAPTURE, 4670 .type = VID_TYPE_CAPTURE,
4671 .hardware = VID_HARDWARE_OV511,
4672 .fops = &ov511_fops, 4671 .fops = &ov511_fops,
4673 .release = video_device_release, 4672 .release = video_device_release,
4674 .minor = -1, 4673 .minor = -1,
diff --git a/drivers/media/video/planb.c b/drivers/media/video/planb.c
index 0ef73d9d58..ce4b2f9791 100644
--- a/drivers/media/video/planb.c
+++ b/drivers/media/video/planb.c
@@ -2013,7 +2013,6 @@ static struct video_device planb_template=
2013 .owner = THIS_MODULE, 2013 .owner = THIS_MODULE,
2014 .name = PLANB_DEVICE_NAME, 2014 .name = PLANB_DEVICE_NAME,
2015 .type = VID_TYPE_OVERLAY, 2015 .type = VID_TYPE_OVERLAY,
2016 .hardware = VID_HARDWARE_PLANB,
2017 .open = planb_open, 2016 .open = planb_open,
2018 .close = planb_close, 2017 .close = planb_close,
2019 .read = planb_read, 2018 .read = planb_read,
diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c
index b5a67f0dd1..6820c2aabd 100644
--- a/drivers/media/video/pms.c
+++ b/drivers/media/video/pms.c
@@ -895,7 +895,6 @@ static struct video_device pms_template=
895 .owner = THIS_MODULE, 895 .owner = THIS_MODULE,
896 .name = "Mediavision PMS", 896 .name = "Mediavision PMS",
897 .type = VID_TYPE_CAPTURE, 897 .type = VID_TYPE_CAPTURE,
898 .hardware = VID_HARDWARE_PMS,
899 .fops = &pms_fops, 898 .fops = &pms_fops,
900}; 899};
901 900
diff --git a/drivers/media/video/pvrusb2/pvrusb2-encoder.c b/drivers/media/video/pvrusb2/pvrusb2-encoder.c
index 20b614436d..205087a3e1 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-encoder.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-encoder.c
@@ -209,6 +209,11 @@ static int pvr2_encoder_cmd(void *ctxt,
209 209
210 LOCK_TAKE(hdw->ctl_lock); do { 210 LOCK_TAKE(hdw->ctl_lock); do {
211 211
212 if (!hdw->flag_encoder_ok) {
213 ret = -EIO;
214 break;
215 }
216
212 retry_flag = 0; 217 retry_flag = 0;
213 try_count++; 218 try_count++;
214 ret = 0; 219 ret = 0;
@@ -273,6 +278,7 @@ static int pvr2_encoder_cmd(void *ctxt,
273 ret = -EBUSY; 278 ret = -EBUSY;
274 } 279 }
275 if (ret) { 280 if (ret) {
281 hdw->flag_encoder_ok = 0;
276 pvr2_trace( 282 pvr2_trace(
277 PVR2_TRACE_ERROR_LEGS, 283 PVR2_TRACE_ERROR_LEGS,
278 "Giving up on command." 284 "Giving up on command."
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
index 985d9ae7f5..f873994b08 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw-internal.h
@@ -225,11 +225,12 @@ struct pvr2_hdw {
225 unsigned int cmd_debug_write_len; // 225 unsigned int cmd_debug_write_len; //
226 unsigned int cmd_debug_read_len; // 226 unsigned int cmd_debug_read_len; //
227 227
228 int flag_ok; // device in known good state 228 int flag_ok; /* device in known good state */
229 int flag_disconnected; // flag_ok == 0 due to disconnect 229 int flag_disconnected; /* flag_ok == 0 due to disconnect */
230 int flag_init_ok; // true if structure is fully initialized 230 int flag_init_ok; /* true if structure is fully initialized */
231 int flag_streaming_enabled; // true if streaming should be on 231 int flag_streaming_enabled; /* true if streaming should be on */
232 int fw1_state; // current situation with fw1 232 int fw1_state; /* current situation with fw1 */
233 int flag_encoder_ok; /* True if encoder is healthy */
233 234
234 int flag_decoder_is_tuned; 235 int flag_decoder_is_tuned;
235 236
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index 27b12b4b5c..402c594882 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -1248,6 +1248,8 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
1248 time we configure the encoder, then we'll fully configure it. */ 1248 time we configure the encoder, then we'll fully configure it. */
1249 hdw->enc_cur_valid = 0; 1249 hdw->enc_cur_valid = 0;
1250 1250
1251 hdw->flag_encoder_ok = 0;
1252
1251 /* First prepare firmware loading */ 1253 /* First prepare firmware loading */
1252 ret |= pvr2_write_register(hdw, 0x0048, 0xffffffff); /*interrupt mask*/ 1254 ret |= pvr2_write_register(hdw, 0x0048, 0xffffffff); /*interrupt mask*/
1253 ret |= pvr2_hdw_gpio_chg_dir(hdw,0xffffffff,0x00000088); /*gpio dir*/ 1255 ret |= pvr2_hdw_gpio_chg_dir(hdw,0xffffffff,0x00000088); /*gpio dir*/
@@ -1346,6 +1348,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw)
1346 pvr2_trace(PVR2_TRACE_ERROR_LEGS, 1348 pvr2_trace(PVR2_TRACE_ERROR_LEGS,
1347 "firmware2 upload post-proc failure"); 1349 "firmware2 upload post-proc failure");
1348 } else { 1350 } else {
1351 hdw->flag_encoder_ok = !0;
1349 hdw->subsys_enabled_mask |= (1<<PVR2_SUBSYS_B_ENC_FIRMWARE); 1352 hdw->subsys_enabled_mask |= (1<<PVR2_SUBSYS_B_ENC_FIRMWARE);
1350 } 1353 }
1351 return ret; 1354 return ret;
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index 4563b3df8a..7a596ea7cf 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -1121,15 +1121,12 @@ static const struct file_operations vdev_fops = {
1121}; 1121};
1122 1122
1123 1123
1124#define VID_HARDWARE_PVRUSB2 38 /* FIXME : need a good value */
1125
1126static struct video_device vdev_template = { 1124static struct video_device vdev_template = {
1127 .owner = THIS_MODULE, 1125 .owner = THIS_MODULE,
1128 .type = VID_TYPE_CAPTURE | VID_TYPE_TUNER, 1126 .type = VID_TYPE_CAPTURE | VID_TYPE_TUNER,
1129 .type2 = (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VBI_CAPTURE 1127 .type2 = (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VBI_CAPTURE
1130 | V4L2_CAP_TUNER | V4L2_CAP_AUDIO 1128 | V4L2_CAP_TUNER | V4L2_CAP_AUDIO
1131 | V4L2_CAP_READWRITE), 1129 | V4L2_CAP_READWRITE),
1132 .hardware = VID_HARDWARE_PVRUSB2,
1133 .fops = &vdev_fops, 1130 .fops = &vdev_fops,
1134}; 1131};
1135 1132
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index 950da25421..7300ace8f4 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -166,7 +166,6 @@ static struct video_device pwc_template = {
166 .owner = THIS_MODULE, 166 .owner = THIS_MODULE,
167 .name = "Philips Webcam", /* Filled in later */ 167 .name = "Philips Webcam", /* Filled in later */
168 .type = VID_TYPE_CAPTURE, 168 .type = VID_TYPE_CAPTURE,
169 .hardware = VID_HARDWARE_PWC,
170 .release = video_device_release, 169 .release = video_device_release,
171 .fops = &pwc_fops, 170 .fops = &pwc_fops,
172 .minor = -1, 171 .minor = -1,
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index 57f1f5d409..002e70a33a 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -71,7 +71,6 @@ static const struct v4l2_format v4l2_format_table[] =
71 71
72struct saa6752hs_state { 72struct saa6752hs_state {
73 struct i2c_client client; 73 struct i2c_client client;
74 struct v4l2_mpeg_compression old_params;
75 struct saa6752hs_mpeg_params params; 74 struct saa6752hs_mpeg_params params;
76 enum saa6752hs_videoformat video_format; 75 enum saa6752hs_videoformat video_format;
77 v4l2_std_id standard; 76 v4l2_std_id standard;
@@ -161,35 +160,6 @@ static struct saa6752hs_mpeg_params param_defaults =
161 .au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_256K, 160 .au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_256K,
162}; 161};
163 162
164static struct v4l2_mpeg_compression old_param_defaults =
165{
166 .st_type = V4L2_MPEG_TS_2,
167 .st_bitrate = {
168 .mode = V4L2_BITRATE_CBR,
169 .target = 7000,
170 },
171
172 .ts_pid_pmt = 16,
173 .ts_pid_video = 260,
174 .ts_pid_audio = 256,
175 .ts_pid_pcr = 259,
176
177 .vi_type = V4L2_MPEG_VI_2,
178 .vi_aspect_ratio = V4L2_MPEG_ASPECT_4_3,
179 .vi_bitrate = {
180 .mode = V4L2_BITRATE_VBR,
181 .target = 4000,
182 .max = 6000,
183 },
184
185 .au_type = V4L2_MPEG_AU_2_II,
186 .au_bitrate = {
187 .mode = V4L2_BITRATE_CBR,
188 .target = 256,
189 },
190
191};
192
193/* ---------------------------------------------------------------------- */ 163/* ---------------------------------------------------------------------- */
194 164
195static int saa6752hs_chip_command(struct i2c_client* client, 165static int saa6752hs_chip_command(struct i2c_client* client,
@@ -362,74 +332,6 @@ static void saa6752hs_set_subsampling(struct i2c_client* client,
362} 332}
363 333
364 334
365static void saa6752hs_old_set_params(struct i2c_client* client,
366 struct v4l2_mpeg_compression* params)
367{
368 struct saa6752hs_state *h = i2c_get_clientdata(client);
369
370 /* check PIDs */
371 if (params->ts_pid_pmt <= MPEG_PID_MAX) {
372 h->old_params.ts_pid_pmt = params->ts_pid_pmt;
373 h->params.ts_pid_pmt = params->ts_pid_pmt;
374 }
375 if (params->ts_pid_pcr <= MPEG_PID_MAX) {
376 h->old_params.ts_pid_pcr = params->ts_pid_pcr;
377 h->params.ts_pid_pcr = params->ts_pid_pcr;
378 }
379 if (params->ts_pid_video <= MPEG_PID_MAX) {
380 h->old_params.ts_pid_video = params->ts_pid_video;
381 h->params.ts_pid_video = params->ts_pid_video;
382 }
383 if (params->ts_pid_audio <= MPEG_PID_MAX) {
384 h->old_params.ts_pid_audio = params->ts_pid_audio;
385 h->params.ts_pid_audio = params->ts_pid_audio;
386 }
387
388 /* check bitrate parameters */
389 if ((params->vi_bitrate.mode == V4L2_BITRATE_CBR) ||
390 (params->vi_bitrate.mode == V4L2_BITRATE_VBR)) {
391 h->old_params.vi_bitrate.mode = params->vi_bitrate.mode;
392 h->params.vi_bitrate_mode = (params->vi_bitrate.mode == V4L2_BITRATE_VBR) ?
393 V4L2_MPEG_VIDEO_BITRATE_MODE_VBR : V4L2_MPEG_VIDEO_BITRATE_MODE_CBR;
394 }
395 if (params->vi_bitrate.mode != V4L2_BITRATE_NONE)
396 h->old_params.st_bitrate.target = params->st_bitrate.target;
397 if (params->vi_bitrate.mode != V4L2_BITRATE_NONE)
398 h->old_params.vi_bitrate.target = params->vi_bitrate.target;
399 if (params->vi_bitrate.mode == V4L2_BITRATE_VBR)
400 h->old_params.vi_bitrate.max = params->vi_bitrate.max;
401 if (params->au_bitrate.mode != V4L2_BITRATE_NONE)
402 h->old_params.au_bitrate.target = params->au_bitrate.target;
403
404 /* aspect ratio */
405 if (params->vi_aspect_ratio == V4L2_MPEG_ASPECT_4_3 ||
406 params->vi_aspect_ratio == V4L2_MPEG_ASPECT_16_9) {
407 h->old_params.vi_aspect_ratio = params->vi_aspect_ratio;
408 if (params->vi_aspect_ratio == V4L2_MPEG_ASPECT_4_3)
409 h->params.vi_aspect = V4L2_MPEG_VIDEO_ASPECT_4x3;
410 else
411 h->params.vi_aspect = V4L2_MPEG_VIDEO_ASPECT_16x9;
412 }
413
414 /* range checks */
415 if (h->old_params.st_bitrate.target > MPEG_TOTAL_TARGET_BITRATE_MAX)
416 h->old_params.st_bitrate.target = MPEG_TOTAL_TARGET_BITRATE_MAX;
417 if (h->old_params.vi_bitrate.target > MPEG_VIDEO_TARGET_BITRATE_MAX)
418 h->old_params.vi_bitrate.target = MPEG_VIDEO_TARGET_BITRATE_MAX;
419 if (h->old_params.vi_bitrate.max > MPEG_VIDEO_MAX_BITRATE_MAX)
420 h->old_params.vi_bitrate.max = MPEG_VIDEO_MAX_BITRATE_MAX;
421 h->params.vi_bitrate = params->vi_bitrate.target;
422 h->params.vi_bitrate_peak = params->vi_bitrate.max;
423 if (h->old_params.au_bitrate.target <= 256) {
424 h->old_params.au_bitrate.target = 256;
425 h->params.au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_256K;
426 }
427 else {
428 h->old_params.au_bitrate.target = 384;
429 h->params.au_l2_bitrate = V4L2_MPEG_AUDIO_L2_BITRATE_384K;
430 }
431}
432
433static int handle_ctrl(struct saa6752hs_mpeg_params *params, 335static int handle_ctrl(struct saa6752hs_mpeg_params *params,
434 struct v4l2_ext_control *ctrl, unsigned int cmd) 336 struct v4l2_ext_control *ctrl, unsigned int cmd)
435{ 337{
@@ -697,7 +599,6 @@ static int saa6752hs_attach(struct i2c_adapter *adap, int addr, int kind)
697 return -ENOMEM; 599 return -ENOMEM;
698 h->client = client_template; 600 h->client = client_template;
699 h->params = param_defaults; 601 h->params = param_defaults;
700 h->old_params = old_param_defaults;
701 h->client.adapter = adap; 602 h->client.adapter = adap;
702 h->client.addr = addr; 603 h->client.addr = addr;
703 604
@@ -734,23 +635,11 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg)
734{ 635{
735 struct saa6752hs_state *h = i2c_get_clientdata(client); 636 struct saa6752hs_state *h = i2c_get_clientdata(client);
736 struct v4l2_ext_controls *ctrls = arg; 637 struct v4l2_ext_controls *ctrls = arg;
737 struct v4l2_mpeg_compression *old_params = arg;
738 struct saa6752hs_mpeg_params params; 638 struct saa6752hs_mpeg_params params;
739 int err = 0; 639 int err = 0;
740 int i; 640 int i;
741 641
742 switch (cmd) { 642 switch (cmd) {
743 case VIDIOC_S_MPEGCOMP:
744 if (NULL == old_params) {
745 /* apply settings and start encoder */
746 saa6752hs_init(client);
747 break;
748 }
749 saa6752hs_old_set_params(client, old_params);
750 /* fall through */
751 case VIDIOC_G_MPEGCOMP:
752 *old_params = h->old_params;
753 break;
754 case VIDIOC_S_EXT_CTRLS: 643 case VIDIOC_S_EXT_CTRLS:
755 if (ctrls->ctrl_class != V4L2_CTRL_CLASS_MPEG) 644 if (ctrls->ctrl_class != V4L2_CTRL_CLASS_MPEG)
756 return -EINVAL; 645 return -EINVAL;
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 1a4a24471f..a499eea379 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -429,7 +429,7 @@ int saa7134_set_dmabits(struct saa7134_dev *dev)
429 429
430 assert_spin_locked(&dev->slock); 430 assert_spin_locked(&dev->slock);
431 431
432 if (dev->inresume) 432 if (dev->insuspend)
433 return 0; 433 return 0;
434 434
435 /* video capture -- dma 0 + video task A */ 435 /* video capture -- dma 0 + video task A */
@@ -563,6 +563,9 @@ static irqreturn_t saa7134_irq(int irq, void *dev_id)
563 unsigned long report,status; 563 unsigned long report,status;
564 int loop, handled = 0; 564 int loop, handled = 0;
565 565
566 if (dev->insuspend)
567 goto out;
568
566 for (loop = 0; loop < 10; loop++) { 569 for (loop = 0; loop < 10; loop++) {
567 report = saa_readl(SAA7134_IRQ_REPORT); 570 report = saa_readl(SAA7134_IRQ_REPORT);
568 status = saa_readl(SAA7134_IRQ_STATUS); 571 status = saa_readl(SAA7134_IRQ_STATUS);
@@ -1163,6 +1166,7 @@ static void __devexit saa7134_finidev(struct pci_dev *pci_dev)
1163 kfree(dev); 1166 kfree(dev);
1164} 1167}
1165 1168
1169#ifdef CONFIG_PM
1166static int saa7134_suspend(struct pci_dev *pci_dev , pm_message_t state) 1170static int saa7134_suspend(struct pci_dev *pci_dev , pm_message_t state)
1167{ 1171{
1168 1172
@@ -1176,6 +1180,19 @@ static int saa7134_suspend(struct pci_dev *pci_dev , pm_message_t state)
1176 saa_writel(SAA7134_IRQ2, 0); 1180 saa_writel(SAA7134_IRQ2, 0);
1177 saa_writel(SAA7134_MAIN_CTRL, 0); 1181 saa_writel(SAA7134_MAIN_CTRL, 0);
1178 1182
1183 synchronize_irq(pci_dev->irq);
1184 dev->insuspend = 1;
1185
1186 /* Disable timeout timers - if we have active buffers, we will
1187 fill them on resume*/
1188
1189 del_timer(&dev->video_q.timeout);
1190 del_timer(&dev->vbi_q.timeout);
1191 del_timer(&dev->ts_q.timeout);
1192
1193 if (dev->remote)
1194 saa7134_ir_stop(dev);
1195
1179 pci_set_power_state(pci_dev, pci_choose_state(pci_dev, state)); 1196 pci_set_power_state(pci_dev, pci_choose_state(pci_dev, state));
1180 pci_save_state(pci_dev); 1197 pci_save_state(pci_dev);
1181 1198
@@ -1194,24 +1211,27 @@ static int saa7134_resume(struct pci_dev *pci_dev)
1194 /* Do things that are done in saa7134_initdev , 1211 /* Do things that are done in saa7134_initdev ,
1195 except of initializing memory structures.*/ 1212 except of initializing memory structures.*/
1196 1213
1197 dev->inresume = 1;
1198 saa7134_board_init1(dev); 1214 saa7134_board_init1(dev);
1199 1215
1216 /* saa7134_hwinit1 */
1200 if (saa7134_boards[dev->board].video_out) 1217 if (saa7134_boards[dev->board].video_out)
1201 saa7134_videoport_init(dev); 1218 saa7134_videoport_init(dev);
1202
1203 if (card_has_mpeg(dev)) 1219 if (card_has_mpeg(dev))
1204 saa7134_ts_init_hw(dev); 1220 saa7134_ts_init_hw(dev);
1205 1221 if (dev->remote)
1222 saa7134_ir_start(dev, dev->remote);
1206 saa7134_hw_enable1(dev); 1223 saa7134_hw_enable1(dev);
1207 saa7134_set_decoder(dev); 1224
1208 saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &dev->tvnorm->id); 1225
1209 saa7134_board_init2(dev); 1226 saa7134_board_init2(dev);
1210 saa7134_hw_enable2(dev);
1211 1227
1228 /*saa7134_hwinit2*/
1229 saa7134_set_tvnorm_hw(dev);
1212 saa7134_tvaudio_setmute(dev); 1230 saa7134_tvaudio_setmute(dev);
1213 saa7134_tvaudio_setvolume(dev, dev->ctl_volume); 1231 saa7134_tvaudio_setvolume(dev, dev->ctl_volume);
1232 saa7134_tvaudio_do_scan(dev);
1214 saa7134_enable_i2s(dev); 1233 saa7134_enable_i2s(dev);
1234 saa7134_hw_enable2(dev);
1215 1235
1216 /*resume unfinished buffer(s)*/ 1236 /*resume unfinished buffer(s)*/
1217 spin_lock_irqsave(&dev->slock, flags); 1237 spin_lock_irqsave(&dev->slock, flags);
@@ -1219,13 +1239,19 @@ static int saa7134_resume(struct pci_dev *pci_dev)
1219 saa7134_buffer_requeue(dev, &dev->vbi_q); 1239 saa7134_buffer_requeue(dev, &dev->vbi_q);
1220 saa7134_buffer_requeue(dev, &dev->ts_q); 1240 saa7134_buffer_requeue(dev, &dev->ts_q);
1221 1241
1242 /* FIXME: Disable DMA audio sound - temporary till proper support
1243 is implemented*/
1244
1245 dev->dmasound.dma_running = 0;
1246
1222 /* start DMA now*/ 1247 /* start DMA now*/
1223 dev->inresume = 0; 1248 dev->insuspend = 0;
1224 saa7134_set_dmabits(dev); 1249 saa7134_set_dmabits(dev);
1225 spin_unlock_irqrestore(&dev->slock, flags); 1250 spin_unlock_irqrestore(&dev->slock, flags);
1226 1251
1227 return 0; 1252 return 0;
1228} 1253}
1254#endif
1229 1255
1230/* ----------------------------------------------------------- */ 1256/* ----------------------------------------------------------- */
1231 1257
@@ -1262,8 +1288,10 @@ static struct pci_driver saa7134_pci_driver = {
1262 .id_table = saa7134_pci_tbl, 1288 .id_table = saa7134_pci_tbl,
1263 .probe = saa7134_initdev, 1289 .probe = saa7134_initdev,
1264 .remove = __devexit_p(saa7134_finidev), 1290 .remove = __devexit_p(saa7134_finidev),
1291#ifdef CONFIG_PM
1265 .suspend = saa7134_suspend, 1292 .suspend = saa7134_suspend,
1266 .resume = saa7134_resume 1293 .resume = saa7134_resume
1294#endif
1267}; 1295};
1268 1296
1269static int saa7134_init(void) 1297static int saa7134_init(void)
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 34ca874dd7..75d0c5bf46 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -284,17 +284,6 @@ static int ts_do_ioctl(struct inode *inode, struct file *file,
284 case VIDIOC_S_CTRL: 284 case VIDIOC_S_CTRL:
285 return saa7134_common_ioctl(dev, cmd, arg); 285 return saa7134_common_ioctl(dev, cmd, arg);
286 286
287 case VIDIOC_S_MPEGCOMP:
288 printk(KERN_WARNING "VIDIOC_S_MPEGCOMP is obsolete. "
289 "Replace with VIDIOC_S_EXT_CTRLS!");
290 saa7134_i2c_call_clients(dev, VIDIOC_S_MPEGCOMP, arg);
291 ts_init_encoder(dev);
292 return 0;
293 case VIDIOC_G_MPEGCOMP:
294 printk(KERN_WARNING "VIDIOC_G_MPEGCOMP is obsolete. "
295 "Replace with VIDIOC_G_EXT_CTRLS!");
296 saa7134_i2c_call_clients(dev, VIDIOC_G_MPEGCOMP, arg);
297 return 0;
298 case VIDIOC_S_EXT_CTRLS: 287 case VIDIOC_S_EXT_CTRLS:
299 /* count == 0 is abused in saa6752hs.c, so that special 288 /* count == 0 is abused in saa6752hs.c, so that special
300 case is handled here explicitly. */ 289 case is handled here explicitly. */
@@ -342,7 +331,6 @@ static struct video_device saa7134_empress_template =
342 .name = "saa7134-empress", 331 .name = "saa7134-empress",
343 .type = 0 /* FIXME */, 332 .type = 0 /* FIXME */,
344 .type2 = 0 /* FIXME */, 333 .type2 = 0 /* FIXME */,
345 .hardware = 0,
346 .fops = &ts_fops, 334 .fops = &ts_fops,
347 .minor = -1, 335 .minor = -1,
348}; 336};
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 80d2644f76..3abaa1b8ac 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -44,6 +44,14 @@ module_param(ir_rc5_remote_gap, int, 0644);
44static int ir_rc5_key_timeout = 115; 44static int ir_rc5_key_timeout = 115;
45module_param(ir_rc5_key_timeout, int, 0644); 45module_param(ir_rc5_key_timeout, int, 0644);
46 46
47static int repeat_delay = 500;
48module_param(repeat_delay, int, 0644);
49MODULE_PARM_DESC(repeat_delay, "delay before key repeat started");
50static int repeat_period = 33;
51module_param(repeat_period, int, 0644);
52MODULE_PARM_DESC(repeat_period, "repeat period between"
53 "keypresses when key is down");
54
47#define dprintk(fmt, arg...) if (ir_debug) \ 55#define dprintk(fmt, arg...) if (ir_debug) \
48 printk(KERN_DEBUG "%s/ir: " fmt, dev->name , ## arg) 56 printk(KERN_DEBUG "%s/ir: " fmt, dev->name , ## arg)
49#define i2cdprintk(fmt, arg...) if (ir_debug) \ 57#define i2cdprintk(fmt, arg...) if (ir_debug) \
@@ -59,6 +67,13 @@ static int build_key(struct saa7134_dev *dev)
59 struct card_ir *ir = dev->remote; 67 struct card_ir *ir = dev->remote;
60 u32 gpio, data; 68 u32 gpio, data;
61 69
70 /* here comes the additional handshake steps for some cards */
71 switch (dev->board) {
72 case SAA7134_BOARD_GOTVIEW_7135:
73 saa_setb(SAA7134_GPIO_GPSTATUS1, 0x80);
74 saa_clearb(SAA7134_GPIO_GPSTATUS1, 0x80);
75 break;
76 }
62 /* rising SAA7134_GPIO_GPRESCAN reads the status */ 77 /* rising SAA7134_GPIO_GPRESCAN reads the status */
63 saa_clearb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN); 78 saa_clearb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN);
64 saa_setb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN); 79 saa_setb(SAA7134_GPIO_GPMODE3,SAA7134_GPIO_GPRESCAN);
@@ -159,7 +174,7 @@ static void saa7134_input_timer(unsigned long data)
159 mod_timer(&ir->timer, jiffies + msecs_to_jiffies(ir->polling)); 174 mod_timer(&ir->timer, jiffies + msecs_to_jiffies(ir->polling));
160} 175}
161 176
162static void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir) 177void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir)
163{ 178{
164 if (ir->polling) { 179 if (ir->polling) {
165 setup_timer(&ir->timer, saa7134_input_timer, 180 setup_timer(&ir->timer, saa7134_input_timer,
@@ -182,7 +197,7 @@ static void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir)
182 } 197 }
183} 198}
184 199
185static void saa7134_ir_stop(struct saa7134_dev *dev) 200void saa7134_ir_stop(struct saa7134_dev *dev)
186{ 201{
187 if (dev->remote->polling) 202 if (dev->remote->polling)
188 del_timer_sync(&dev->remote->timer); 203 del_timer_sync(&dev->remote->timer);
@@ -285,10 +300,10 @@ int saa7134_input_init1(struct saa7134_dev *dev)
285 break; 300 break;
286 case SAA7134_BOARD_GOTVIEW_7135: 301 case SAA7134_BOARD_GOTVIEW_7135:
287 ir_codes = ir_codes_gotview7135; 302 ir_codes = ir_codes_gotview7135;
288 mask_keycode = 0x0003EC; 303 mask_keycode = 0x0003CC;
289 mask_keyup = 0x008000;
290 mask_keydown = 0x000010; 304 mask_keydown = 0x000010;
291 polling = 50; // ms 305 polling = 5; /* ms */
306 saa_setb(SAA7134_GPIO_GPMODE1, 0x80);
292 break; 307 break;
293 case SAA7134_BOARD_VIDEOMATE_TV_PVR: 308 case SAA7134_BOARD_VIDEOMATE_TV_PVR:
294 case SAA7134_BOARD_VIDEOMATE_GOLD_PLUS: 309 case SAA7134_BOARD_VIDEOMATE_GOLD_PLUS:
@@ -386,6 +401,10 @@ int saa7134_input_init1(struct saa7134_dev *dev)
386 if (err) 401 if (err)
387 goto err_out_stop; 402 goto err_out_stop;
388 403
404 /* the remote isn't as bouncy as a keyboard */
405 ir->dev->rep[REP_DELAY] = repeat_delay;
406 ir->dev->rep[REP_PERIOD] = repeat_period;
407
389 return 0; 408 return 0;
390 409
391 err_out_stop: 410 err_out_stop:
diff --git a/drivers/media/video/saa7134/saa7134-tvaudio.c b/drivers/media/video/saa7134/saa7134-tvaudio.c
index 1b9e39a5ea..f8e304c762 100644
--- a/drivers/media/video/saa7134/saa7134-tvaudio.c
+++ b/drivers/media/video/saa7134/saa7134-tvaudio.c
@@ -27,6 +27,7 @@
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/delay.h> 29#include <linux/delay.h>
30#include <linux/freezer.h>
30#include <asm/div64.h> 31#include <asm/div64.h>
31 32
32#include "saa7134-reg.h" 33#include "saa7134-reg.h"
@@ -231,7 +232,7 @@ static void mute_input_7134(struct saa7134_dev *dev)
231 } 232 }
232 233
233 if (dev->hw_mute == mute && 234 if (dev->hw_mute == mute &&
234 dev->hw_input == in && !dev->inresume) { 235 dev->hw_input == in && !dev->insuspend) {
235 dprintk("mute/input: nothing to do [mute=%d,input=%s]\n", 236 dprintk("mute/input: nothing to do [mute=%d,input=%s]\n",
236 mute,in->name); 237 mute,in->name);
237 return; 238 return;
@@ -502,13 +503,17 @@ static int tvaudio_thread(void *data)
502 unsigned int i, audio, nscan; 503 unsigned int i, audio, nscan;
503 int max1,max2,carrier,rx,mode,lastmode,default_carrier; 504 int max1,max2,carrier,rx,mode,lastmode,default_carrier;
504 505
505 allow_signal(SIGTERM); 506
507 set_freezable();
508
506 for (;;) { 509 for (;;) {
507 tvaudio_sleep(dev,-1); 510 tvaudio_sleep(dev,-1);
508 if (kthread_should_stop() || signal_pending(current)) 511 if (kthread_should_stop())
509 goto done; 512 goto done;
510 513
511 restart: 514 restart:
515 try_to_freeze();
516
512 dev->thread.scan1 = dev->thread.scan2; 517 dev->thread.scan1 = dev->thread.scan2;
513 dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1); 518 dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1);
514 dev->tvaudio = NULL; 519 dev->tvaudio = NULL;
@@ -612,9 +617,12 @@ static int tvaudio_thread(void *data)
612 617
613 lastmode = 42; 618 lastmode = 42;
614 for (;;) { 619 for (;;) {
620
621 try_to_freeze();
622
615 if (tvaudio_sleep(dev,5000)) 623 if (tvaudio_sleep(dev,5000))
616 goto restart; 624 goto restart;
617 if (kthread_should_stop() || signal_pending(current)) 625 if (kthread_should_stop())
618 break; 626 break;
619 if (UNSET == dev->thread.mode) { 627 if (UNSET == dev->thread.mode) {
620 rx = tvaudio_getstereo(dev,&tvaudio[i]); 628 rx = tvaudio_getstereo(dev,&tvaudio[i]);
@@ -630,6 +638,7 @@ static int tvaudio_thread(void *data)
630 } 638 }
631 639
632 done: 640 done:
641 dev->thread.stopped = 1;
633 return 0; 642 return 0;
634} 643}
635 644
@@ -777,7 +786,8 @@ static int tvaudio_thread_ddep(void *data)
777 struct saa7134_dev *dev = data; 786 struct saa7134_dev *dev = data;
778 u32 value, norms, clock; 787 u32 value, norms, clock;
779 788
780 allow_signal(SIGTERM); 789
790 set_freezable();
781 791
782 clock = saa7134_boards[dev->board].audio_clock; 792 clock = saa7134_boards[dev->board].audio_clock;
783 if (UNSET != audio_clock_override) 793 if (UNSET != audio_clock_override)
@@ -790,10 +800,13 @@ static int tvaudio_thread_ddep(void *data)
790 800
791 for (;;) { 801 for (;;) {
792 tvaudio_sleep(dev,-1); 802 tvaudio_sleep(dev,-1);
793 if (kthread_should_stop() || signal_pending(current)) 803 if (kthread_should_stop())
794 goto done; 804 goto done;
795 805
796 restart: 806 restart:
807
808 try_to_freeze();
809
797 dev->thread.scan1 = dev->thread.scan2; 810 dev->thread.scan1 = dev->thread.scan2;
798 dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1); 811 dprintk("tvaudio thread scan start [%d]\n",dev->thread.scan1);
799 812
@@ -870,6 +883,7 @@ static int tvaudio_thread_ddep(void *data)
870 } 883 }
871 884
872 done: 885 done:
886 dev->thread.stopped = 1;
873 return 0; 887 return 0;
874} 888}
875 889
@@ -997,7 +1011,7 @@ int saa7134_tvaudio_init2(struct saa7134_dev *dev)
997int saa7134_tvaudio_fini(struct saa7134_dev *dev) 1011int saa7134_tvaudio_fini(struct saa7134_dev *dev)
998{ 1012{
999 /* shutdown tvaudio thread */ 1013 /* shutdown tvaudio thread */
1000 if (dev->thread.thread) 1014 if (dev->thread.thread && !dev->thread.stopped)
1001 kthread_stop(dev->thread.thread); 1015 kthread_stop(dev->thread.thread);
1002 1016
1003 saa_andorb(SAA7134_ANALOG_IO_SELECT, 0x07, 0x00); /* LINE1 */ 1017 saa_andorb(SAA7134_ANALOG_IO_SELECT, 0x07, 0x00); /* LINE1 */
@@ -1013,7 +1027,9 @@ int saa7134_tvaudio_do_scan(struct saa7134_dev *dev)
1013 } else if (dev->thread.thread) { 1027 } else if (dev->thread.thread) {
1014 dev->thread.mode = UNSET; 1028 dev->thread.mode = UNSET;
1015 dev->thread.scan2++; 1029 dev->thread.scan2++;
1016 wake_up_process(dev->thread.thread); 1030
1031 if (!dev->insuspend && !dev->thread.stopped)
1032 wake_up_process(dev->thread.thread);
1017 } else { 1033 } else {
1018 dev->automute = 0; 1034 dev->automute = 0;
1019 saa7134_tvaudio_setmute(dev); 1035 saa7134_tvaudio_setmute(dev);
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 471b92793c..3b9ffb4b64 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -560,15 +560,8 @@ void set_tvnorm(struct saa7134_dev *dev, struct saa7134_tvnorm *norm)
560 560
561 dev->crop_current = dev->crop_defrect; 561 dev->crop_current = dev->crop_defrect;
562 562
563 saa7134_set_decoder(dev); 563 saa7134_set_tvnorm_hw(dev);
564 564
565 if (card_in(dev, dev->ctl_input).tv) {
566 if ((card(dev).tuner_type == TUNER_PHILIPS_TDA8290)
567 && ((card(dev).tuner_config == 1)
568 || (card(dev).tuner_config == 2)))
569 saa7134_set_gpio(dev, 22, 5);
570 saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &norm->id);
571 }
572} 565}
573 566
574static void video_mux(struct saa7134_dev *dev, int input) 567static void video_mux(struct saa7134_dev *dev, int input)
@@ -579,7 +572,8 @@ static void video_mux(struct saa7134_dev *dev, int input)
579 saa7134_tvaudio_setinput(dev, &card_in(dev, input)); 572 saa7134_tvaudio_setinput(dev, &card_in(dev, input));
580} 573}
581 574
582void saa7134_set_decoder(struct saa7134_dev *dev) 575
576static void saa7134_set_decoder(struct saa7134_dev *dev)
583{ 577{
584 int luma_control, sync_control, mux; 578 int luma_control, sync_control, mux;
585 579
@@ -630,6 +624,19 @@ void saa7134_set_decoder(struct saa7134_dev *dev)
630 saa_writeb(SAA7134_RAW_DATA_OFFSET, 0x80); 624 saa_writeb(SAA7134_RAW_DATA_OFFSET, 0x80);
631} 625}
632 626
627void saa7134_set_tvnorm_hw(struct saa7134_dev *dev)
628{
629 saa7134_set_decoder(dev);
630
631 if (card_in(dev, dev->ctl_input).tv) {
632 if ((card(dev).tuner_type == TUNER_PHILIPS_TDA8290)
633 && ((card(dev).tuner_config == 1)
634 || (card(dev).tuner_config == 2)))
635 saa7134_set_gpio(dev, 22, 5);
636 saa7134_i2c_call_clients(dev, VIDIOC_S_STD, &dev->tvnorm->id);
637 }
638}
639
633static void set_h_prescale(struct saa7134_dev *dev, int task, int prescale) 640static void set_h_prescale(struct saa7134_dev *dev, int task, int prescale)
634{ 641{
635 static const struct { 642 static const struct {
@@ -2352,7 +2359,6 @@ struct video_device saa7134_video_template =
2352 .name = "saa7134-video", 2359 .name = "saa7134-video",
2353 .type = VID_TYPE_CAPTURE|VID_TYPE_TUNER| 2360 .type = VID_TYPE_CAPTURE|VID_TYPE_TUNER|
2354 VID_TYPE_CLIPPING|VID_TYPE_SCALES, 2361 VID_TYPE_CLIPPING|VID_TYPE_SCALES,
2355 .hardware = 0,
2356 .fops = &video_fops, 2362 .fops = &video_fops,
2357 .minor = -1, 2363 .minor = -1,
2358}; 2364};
@@ -2361,7 +2367,6 @@ struct video_device saa7134_vbi_template =
2361{ 2367{
2362 .name = "saa7134-vbi", 2368 .name = "saa7134-vbi",
2363 .type = VID_TYPE_TUNER|VID_TYPE_TELETEXT, 2369 .type = VID_TYPE_TUNER|VID_TYPE_TELETEXT,
2364 .hardware = 0,
2365 .fops = &video_fops, 2370 .fops = &video_fops,
2366 .minor = -1, 2371 .minor = -1,
2367}; 2372};
@@ -2370,7 +2375,6 @@ struct video_device saa7134_radio_template =
2370{ 2375{
2371 .name = "saa7134-radio", 2376 .name = "saa7134-radio",
2372 .type = VID_TYPE_TUNER, 2377 .type = VID_TYPE_TUNER,
2373 .hardware = 0,
2374 .fops = &radio_fops, 2378 .fops = &radio_fops,
2375 .minor = -1, 2379 .minor = -1,
2376}; 2380};
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index 28ec6804bd..66a390c321 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -333,6 +333,7 @@ struct saa7134_thread {
333 unsigned int scan1; 333 unsigned int scan1;
334 unsigned int scan2; 334 unsigned int scan2;
335 unsigned int mode; 335 unsigned int mode;
336 unsigned int stopped;
336}; 337};
337 338
338/* buffer for one video/vbi/ts frame */ 339/* buffer for one video/vbi/ts frame */
@@ -524,7 +525,7 @@ struct saa7134_dev {
524 unsigned int hw_mute; 525 unsigned int hw_mute;
525 int last_carrier; 526 int last_carrier;
526 int nosignal; 527 int nosignal;
527 unsigned int inresume; 528 unsigned int insuspend;
528 529
529 /* SAA7134_MPEG_* */ 530 /* SAA7134_MPEG_* */
530 struct saa7134_ts ts; 531 struct saa7134_ts ts;
@@ -632,7 +633,7 @@ extern struct video_device saa7134_radio_template;
632 633
633void set_tvnorm(struct saa7134_dev *dev, struct saa7134_tvnorm *norm); 634void set_tvnorm(struct saa7134_dev *dev, struct saa7134_tvnorm *norm);
634int saa7134_videoport_init(struct saa7134_dev *dev); 635int saa7134_videoport_init(struct saa7134_dev *dev);
635void saa7134_set_decoder(struct saa7134_dev *dev); 636void saa7134_set_tvnorm_hw(struct saa7134_dev *dev);
636 637
637int saa7134_common_ioctl(struct saa7134_dev *dev, 638int saa7134_common_ioctl(struct saa7134_dev *dev,
638 unsigned int cmd, void *arg); 639 unsigned int cmd, void *arg);
@@ -706,6 +707,8 @@ int saa7134_input_init1(struct saa7134_dev *dev);
706void saa7134_input_fini(struct saa7134_dev *dev); 707void saa7134_input_fini(struct saa7134_dev *dev);
707void saa7134_input_irq(struct saa7134_dev *dev); 708void saa7134_input_irq(struct saa7134_dev *dev);
708void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir); 709void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir);
710void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir);
711void saa7134_ir_stop(struct saa7134_dev *dev);
709 712
710 713
711/* 714/*
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index 93fb04ed99..d5d7d6cf73 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -1231,7 +1231,6 @@ static struct video_device se401_template = {
1231 .owner = THIS_MODULE, 1231 .owner = THIS_MODULE,
1232 .name = "se401 USB camera", 1232 .name = "se401 USB camera",
1233 .type = VID_TYPE_CAPTURE, 1233 .type = VID_TYPE_CAPTURE,
1234 .hardware = VID_HARDWARE_SE401,
1235 .fops = &se401_fops, 1234 .fops = &se401_fops,
1236}; 1235};
1237 1236
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 6991e06f76..511847912c 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -3319,7 +3319,6 @@ sn9c102_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
3319 strcpy(cam->v4ldev->name, "SN9C1xx PC Camera"); 3319 strcpy(cam->v4ldev->name, "SN9C1xx PC Camera");
3320 cam->v4ldev->owner = THIS_MODULE; 3320 cam->v4ldev->owner = THIS_MODULE;
3321 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES; 3321 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
3322 cam->v4ldev->hardware = 0;
3323 cam->v4ldev->fops = &sn9c102_fops; 3322 cam->v4ldev->fops = &sn9c102_fops;
3324 cam->v4ldev->minor = video_nr[dev_nr]; 3323 cam->v4ldev->minor = video_nr[dev_nr];
3325 cam->v4ldev->release = video_device_release; 3324 cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c
index eb220461ac..3fb85af5d1 100644
--- a/drivers/media/video/stradis.c
+++ b/drivers/media/video/stradis.c
@@ -1917,7 +1917,6 @@ static const struct file_operations saa_fops = {
1917static struct video_device saa_template = { 1917static struct video_device saa_template = {
1918 .name = "SAA7146A", 1918 .name = "SAA7146A",
1919 .type = VID_TYPE_CAPTURE | VID_TYPE_OVERLAY, 1919 .type = VID_TYPE_CAPTURE | VID_TYPE_OVERLAY,
1920 .hardware = VID_HARDWARE_SAA7146,
1921 .fops = &saa_fops, 1920 .fops = &saa_fops,
1922 .minor = -1, 1921 .minor = -1,
1923}; 1922};
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 9e009a7ab8..afc32aa56f 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -1398,7 +1398,6 @@ static struct video_device stv680_template = {
1398 .owner = THIS_MODULE, 1398 .owner = THIS_MODULE,
1399 .name = "STV0680 USB camera", 1399 .name = "STV0680 USB camera",
1400 .type = VID_TYPE_CAPTURE, 1400 .type = VID_TYPE_CAPTURE,
1401 .hardware = VID_HARDWARE_SE401,
1402 .fops = &stv680_fops, 1401 .fops = &stv680_fops,
1403 .release = video_device_release, 1402 .release = video_device_release,
1404 .minor = -1, 1403 .minor = -1,
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 94843086cd..6a777604f0 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -113,7 +113,7 @@ static void fe_standby(struct tuner *t)
113static int fe_has_signal(struct tuner *t) 113static int fe_has_signal(struct tuner *t)
114{ 114{
115 struct dvb_tuner_ops *fe_tuner_ops = &t->fe.ops.tuner_ops; 115 struct dvb_tuner_ops *fe_tuner_ops = &t->fe.ops.tuner_ops;
116 u16 strength; 116 u16 strength = 0;
117 117
118 if (fe_tuner_ops->get_rf_strength) 118 if (fe_tuner_ops->get_rf_strength)
119 fe_tuner_ops->get_rf_strength(&t->fe, &strength); 119 fe_tuner_ops->get_rf_strength(&t->fe, &strength);
diff --git a/drivers/media/video/usbvideo/usbvideo.c b/drivers/media/video/usbvideo/usbvideo.c
index 37ce36b9e5..fb434b5602 100644
--- a/drivers/media/video/usbvideo/usbvideo.c
+++ b/drivers/media/video/usbvideo/usbvideo.c
@@ -952,7 +952,6 @@ static const struct file_operations usbvideo_fops = {
952static const struct video_device usbvideo_template = { 952static const struct video_device usbvideo_template = {
953 .owner = THIS_MODULE, 953 .owner = THIS_MODULE,
954 .type = VID_TYPE_CAPTURE, 954 .type = VID_TYPE_CAPTURE,
955 .hardware = VID_HARDWARE_CPIA,
956 .fops = &usbvideo_fops, 955 .fops = &usbvideo_fops,
957}; 956};
958 957
diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c
index db3c9e3deb..da1ba02111 100644
--- a/drivers/media/video/usbvideo/vicam.c
+++ b/drivers/media/video/usbvideo/vicam.c
@@ -1074,7 +1074,6 @@ static struct video_device vicam_template = {
1074 .owner = THIS_MODULE, 1074 .owner = THIS_MODULE,
1075 .name = "ViCam-based USB Camera", 1075 .name = "ViCam-based USB Camera",
1076 .type = VID_TYPE_CAPTURE, 1076 .type = VID_TYPE_CAPTURE,
1077 .hardware = VID_HARDWARE_VICAM,
1078 .fops = &vicam_fops, 1077 .fops = &vicam_fops,
1079 .minor = -1, 1078 .minor = -1,
1080}; 1079};
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index e2f3c01cfa..36e689fa16 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -1400,7 +1400,6 @@ static const struct file_operations usbvision_fops = {
1400static struct video_device usbvision_video_template = { 1400static struct video_device usbvision_video_template = {
1401 .owner = THIS_MODULE, 1401 .owner = THIS_MODULE,
1402 .type = VID_TYPE_TUNER | VID_TYPE_CAPTURE, 1402 .type = VID_TYPE_TUNER | VID_TYPE_CAPTURE,
1403 .hardware = VID_HARDWARE_USBVISION,
1404 .fops = &usbvision_fops, 1403 .fops = &usbvision_fops,
1405 .name = "usbvision-video", 1404 .name = "usbvision-video",
1406 .release = video_device_release, 1405 .release = video_device_release,
@@ -1455,7 +1454,6 @@ static struct video_device usbvision_radio_template=
1455{ 1454{
1456 .owner = THIS_MODULE, 1455 .owner = THIS_MODULE,
1457 .type = VID_TYPE_TUNER, 1456 .type = VID_TYPE_TUNER,
1458 .hardware = VID_HARDWARE_USBVISION,
1459 .fops = &usbvision_radio_fops, 1457 .fops = &usbvision_radio_fops,
1460 .name = "usbvision-radio", 1458 .name = "usbvision-radio",
1461 .release = video_device_release, 1459 .release = video_device_release,
@@ -1492,7 +1490,6 @@ static struct video_device usbvision_vbi_template=
1492{ 1490{
1493 .owner = THIS_MODULE, 1491 .owner = THIS_MODULE,
1494 .type = VID_TYPE_TUNER, 1492 .type = VID_TYPE_TUNER,
1495 .hardware = VID_HARDWARE_USBVISION,
1496 .fops = &usbvision_vbi_fops, 1493 .fops = &usbvision_vbi_fops,
1497 .release = video_device_release, 1494 .release = video_device_release,
1498 .name = "usbvision-vbi", 1495 .name = "usbvision-vbi",
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 321249240d..1141b4bf41 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -317,8 +317,6 @@ static const char *v4l2_ioctls[] = {
317 [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", 317 [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT",
318 [_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT", 318 [_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT",
319 [_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT", 319 [_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT",
320 [_IOC_NR(VIDIOC_G_MPEGCOMP)] = "VIDIOC_G_MPEGCOMP",
321 [_IOC_NR(VIDIOC_S_MPEGCOMP)] = "VIDIOC_S_MPEGCOMP",
322 [_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS", 320 [_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS",
323 [_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF", 321 [_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF",
324 [_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF", 322 [_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF",
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index 5599a36490..89a44f16f0 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -967,6 +967,7 @@ int videobuf_cgmbuf(struct videobuf_queue *q,
967 967
968 return 0; 968 return 0;
969} 969}
970EXPORT_SYMBOL_GPL(videobuf_cgmbuf);
970#endif 971#endif
971 972
972/* --------------------------------------------------------------------- */ 973/* --------------------------------------------------------------------- */
@@ -985,7 +986,6 @@ EXPORT_SYMBOL_GPL(videobuf_reqbufs);
985EXPORT_SYMBOL_GPL(videobuf_querybuf); 986EXPORT_SYMBOL_GPL(videobuf_querybuf);
986EXPORT_SYMBOL_GPL(videobuf_qbuf); 987EXPORT_SYMBOL_GPL(videobuf_qbuf);
987EXPORT_SYMBOL_GPL(videobuf_dqbuf); 988EXPORT_SYMBOL_GPL(videobuf_dqbuf);
988EXPORT_SYMBOL_GPL(videobuf_cgmbuf);
989EXPORT_SYMBOL_GPL(videobuf_streamon); 989EXPORT_SYMBOL_GPL(videobuf_streamon);
990EXPORT_SYMBOL_GPL(videobuf_streamoff); 990EXPORT_SYMBOL_GPL(videobuf_streamoff);
991 991
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 3eb6123227..9ab94a749d 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -27,6 +27,7 @@
27#include <linux/pci.h> 27#include <linux/pci.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/scatterlist.h>
30#include <asm/page.h> 31#include <asm/page.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
32 33
@@ -60,12 +61,13 @@ videobuf_vmalloc_to_sg(unsigned char *virt, int nr_pages)
60 sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL); 61 sglist = kcalloc(nr_pages, sizeof(struct scatterlist), GFP_KERNEL);
61 if (NULL == sglist) 62 if (NULL == sglist)
62 return NULL; 63 return NULL;
64 sg_init_table(sglist, nr_pages);
63 for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) { 65 for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
64 pg = vmalloc_to_page(virt); 66 pg = vmalloc_to_page(virt);
65 if (NULL == pg) 67 if (NULL == pg)
66 goto err; 68 goto err;
67 BUG_ON(PageHighMem(pg)); 69 BUG_ON(PageHighMem(pg));
68 sglist[i].page = pg; 70 sg_set_page(&sglist[i], pg);
69 sglist[i].length = PAGE_SIZE; 71 sglist[i].length = PAGE_SIZE;
70 } 72 }
71 return sglist; 73 return sglist;
@@ -86,13 +88,14 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
86 sglist = kcalloc(nr_pages, sizeof(*sglist), GFP_KERNEL); 88 sglist = kcalloc(nr_pages, sizeof(*sglist), GFP_KERNEL);
87 if (NULL == sglist) 89 if (NULL == sglist)
88 return NULL; 90 return NULL;
91 sg_init_table(sglist, nr_pages);
89 92
90 if (NULL == pages[0]) 93 if (NULL == pages[0])
91 goto nopage; 94 goto nopage;
92 if (PageHighMem(pages[0])) 95 if (PageHighMem(pages[0]))
93 /* DMA to highmem pages might not work */ 96 /* DMA to highmem pages might not work */
94 goto highmem; 97 goto highmem;
95 sglist[0].page = pages[0]; 98 sg_set_page(&sglist[0], pages[0]);
96 sglist[0].offset = offset; 99 sglist[0].offset = offset;
97 sglist[0].length = PAGE_SIZE - offset; 100 sglist[0].length = PAGE_SIZE - offset;
98 for (i = 1; i < nr_pages; i++) { 101 for (i = 1; i < nr_pages; i++) {
@@ -100,7 +103,7 @@ videobuf_pages_to_sg(struct page **pages, int nr_pages, int offset)
100 goto nopage; 103 goto nopage;
101 if (PageHighMem(pages[i])) 104 if (PageHighMem(pages[i]))
102 goto highmem; 105 goto highmem;
103 sglist[i].page = pages[i]; 106 sg_set_page(&sglist[i], pages[i]);
104 sglist[i].length = PAGE_SIZE; 107 sglist[i].length = PAGE_SIZE;
105 } 108 }
106 return sglist; 109 return sglist;
diff --git a/drivers/media/video/videocodec.c b/drivers/media/video/videocodec.c
index f2bbd7a4d5..87951ec825 100644
--- a/drivers/media/video/videocodec.c
+++ b/drivers/media/video/videocodec.c
@@ -86,8 +86,8 @@ videocodec_attach (struct videocodec_master *master)
86 } 86 }
87 87
88 dprintk(2, 88 dprintk(2,
89 "videocodec_attach: '%s', type: %x, flags %lx, magic %lx\n", 89 "videocodec_attach: '%s', flags %lx, magic %lx\n",
90 master->name, master->type, master->flags, master->magic); 90 master->name, master->flags, master->magic);
91 91
92 if (!h) { 92 if (!h) {
93 dprintk(1, 93 dprintk(1,
diff --git a/drivers/media/video/videodev.c b/drivers/media/video/videodev.c
index 8d8e517b34..9611c39902 100644
--- a/drivers/media/video/videodev.c
+++ b/drivers/media/video/videodev.c
@@ -1313,48 +1313,6 @@ static int __video_do_ioctl(struct inode *inode, struct file *file,
1313 ret=vfd->vidioc_cropcap(file, fh, p); 1313 ret=vfd->vidioc_cropcap(file, fh, p);
1314 break; 1314 break;
1315 } 1315 }
1316 case VIDIOC_G_MPEGCOMP:
1317 {
1318 struct v4l2_mpeg_compression *p=arg;
1319
1320 /*FIXME: Several fields not shown */
1321 if (!vfd->vidioc_g_mpegcomp)
1322 break;
1323 ret=vfd->vidioc_g_mpegcomp(file, fh, p);
1324 if (!ret)
1325 dbgarg (cmd, "ts_pid_pmt=%d, ts_pid_audio=%d,"
1326 " ts_pid_video=%d, ts_pid_pcr=%d, "
1327 "ps_size=%d, au_sample_rate=%d, "
1328 "au_pesid=%c, vi_frame_rate=%d, "
1329 "vi_frames_per_gop=%d, "
1330 "vi_bframes_count=%d, vi_pesid=%c\n",
1331 p->ts_pid_pmt,p->ts_pid_audio,
1332 p->ts_pid_video,p->ts_pid_pcr,
1333 p->ps_size, p->au_sample_rate,
1334 p->au_pesid, p->vi_frame_rate,
1335 p->vi_frames_per_gop,
1336 p->vi_bframes_count, p->vi_pesid);
1337 break;
1338 }
1339 case VIDIOC_S_MPEGCOMP:
1340 {
1341 struct v4l2_mpeg_compression *p=arg;
1342 /*FIXME: Several fields not shown */
1343 if (!vfd->vidioc_s_mpegcomp)
1344 break;
1345 dbgarg (cmd, "ts_pid_pmt=%d, ts_pid_audio=%d, "
1346 "ts_pid_video=%d, ts_pid_pcr=%d, ps_size=%d, "
1347 "au_sample_rate=%d, au_pesid=%c, "
1348 "vi_frame_rate=%d, vi_frames_per_gop=%d, "
1349 "vi_bframes_count=%d, vi_pesid=%c\n",
1350 p->ts_pid_pmt,p->ts_pid_audio, p->ts_pid_video,
1351 p->ts_pid_pcr, p->ps_size, p->au_sample_rate,
1352 p->au_pesid, p->vi_frame_rate,
1353 p->vi_frames_per_gop, p->vi_bframes_count,
1354 p->vi_pesid);
1355 ret=vfd->vidioc_s_mpegcomp(file, fh, p);
1356 break;
1357 }
1358 case VIDIOC_G_JPEGCOMP: 1316 case VIDIOC_G_JPEGCOMP:
1359 { 1317 {
1360 struct v4l2_jpegcompression *p=arg; 1318 struct v4l2_jpegcompression *p=arg;
diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c
index b532aa280a..ee73dc7513 100644
--- a/drivers/media/video/vivi.c
+++ b/drivers/media/video/vivi.c
@@ -1119,7 +1119,6 @@ static const struct file_operations vivi_fops = {
1119static struct video_device vivi = { 1119static struct video_device vivi = {
1120 .name = "vivi", 1120 .name = "vivi",
1121 .type = VID_TYPE_CAPTURE, 1121 .type = VID_TYPE_CAPTURE,
1122 .hardware = 0,
1123 .fops = &vivi_fops, 1122 .fops = &vivi_fops,
1124 .minor = -1, 1123 .minor = -1,
1125// .release = video_device_release, 1124// .release = video_device_release,
diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c
index 4736640863..08aaae07c7 100644
--- a/drivers/media/video/w9966.c
+++ b/drivers/media/video/w9966.c
@@ -196,7 +196,6 @@ static struct video_device w9966_template = {
196 .owner = THIS_MODULE, 196 .owner = THIS_MODULE,
197 .name = W9966_DRIVERNAME, 197 .name = W9966_DRIVERNAME,
198 .type = VID_TYPE_CAPTURE | VID_TYPE_SCALES, 198 .type = VID_TYPE_CAPTURE | VID_TYPE_SCALES,
199 .hardware = VID_HARDWARE_W9966,
200 .fops = &w9966_fops, 199 .fops = &w9966_fops,
201}; 200};
202 201
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 9e7f3e685d..2ae1430f5f 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -3549,7 +3549,6 @@ w9968cf_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
3549 strcpy(cam->v4ldev->name, symbolic(camlist, mod_id)); 3549 strcpy(cam->v4ldev->name, symbolic(camlist, mod_id));
3550 cam->v4ldev->owner = THIS_MODULE; 3550 cam->v4ldev->owner = THIS_MODULE;
3551 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES; 3551 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
3552 cam->v4ldev->hardware = VID_HARDWARE_W9968CF;
3553 cam->v4ldev->fops = &w9968cf_fops; 3552 cam->v4ldev->fops = &w9968cf_fops;
3554 cam->v4ldev->minor = video_nr[dev_nr]; 3553 cam->v4ldev->minor = video_nr[dev_nr];
3555 cam->v4ldev->release = video_device_release; 3554 cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index 08a93c31c0..2c5665c824 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -1985,7 +1985,6 @@ zc0301_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
1985 strcpy(cam->v4ldev->name, "ZC0301[P] PC Camera"); 1985 strcpy(cam->v4ldev->name, "ZC0301[P] PC Camera");
1986 cam->v4ldev->owner = THIS_MODULE; 1986 cam->v4ldev->owner = THIS_MODULE;
1987 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES; 1987 cam->v4ldev->type = VID_TYPE_CAPTURE | VID_TYPE_SCALES;
1988 cam->v4ldev->hardware = 0;
1989 cam->v4ldev->fops = &zc0301_fops; 1988 cam->v4ldev->fops = &zc0301_fops;
1990 cam->v4ldev->minor = video_nr[dev_nr]; 1989 cam->v4ldev->minor = video_nr[dev_nr];
1991 cam->v4ldev->release = video_device_release; 1990 cam->v4ldev->release = video_device_release;
diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c
index 48da36a15f..6e0ac4c5c3 100644
--- a/drivers/media/video/zoran_card.c
+++ b/drivers/media/video/zoran_card.c
@@ -1235,8 +1235,14 @@ zoran_setup_videocodec (struct zoran *zr,
1235 return m; 1235 return m;
1236 } 1236 }
1237 1237
1238 m->magic = 0L; /* magic not used */ 1238 /* magic and type are unused for master struct. Makes sense only at
1239 m->type = VID_HARDWARE_ZR36067; 1239 codec structs.
1240 In the past, .type were initialized to the old V4L1 .hardware
1241 value, as VID_HARDWARE_ZR36067
1242 */
1243 m->magic = 0L;
1244 m->type = 0;
1245
1240 m->flags = CODEC_FLAG_ENCODER | CODEC_FLAG_DECODER; 1246 m->flags = CODEC_FLAG_ENCODER | CODEC_FLAG_DECODER;
1241 strncpy(m->name, ZR_DEVNAME(zr), sizeof(m->name)); 1247 strncpy(m->name, ZR_DEVNAME(zr), sizeof(m->name));
1242 m->data = zr; 1248 m->data = zr;
diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index 419e5af785..dd3d7d2c8b 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c
@@ -60,7 +60,6 @@
60 60
61#include <linux/spinlock.h> 61#include <linux/spinlock.h>
62#define MAP_NR(x) virt_to_page(x) 62#define MAP_NR(x) virt_to_page(x)
63#define ZORAN_HARDWARE VID_HARDWARE_ZR36067
64#define ZORAN_VID_TYPE ( \ 63#define ZORAN_VID_TYPE ( \
65 VID_TYPE_CAPTURE | \ 64 VID_TYPE_CAPTURE | \
66 VID_TYPE_OVERLAY | \ 65 VID_TYPE_OVERLAY | \
@@ -4659,7 +4658,6 @@ struct video_device zoran_template __devinitdata = {
4659#ifdef CONFIG_VIDEO_V4L2 4658#ifdef CONFIG_VIDEO_V4L2
4660 .type2 = ZORAN_V4L2_VID_FLAGS, 4659 .type2 = ZORAN_V4L2_VID_FLAGS,
4661#endif 4660#endif
4662 .hardware = ZORAN_HARDWARE,
4663 .fops = &zoran_fops, 4661 .fops = &zoran_fops,
4664 .release = &zoran_vdev_release, 4662 .release = &zoran_vdev_release,
4665 .minor = -1 4663 .minor = -1
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index a5d0354bbb..9203a0b221 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -13,6 +13,7 @@
13#include <linux/blkdev.h> 13#include <linux/blkdev.h>
14#include <linux/freezer.h> 14#include <linux/freezer.h>
15#include <linux/kthread.h> 15#include <linux/kthread.h>
16#include <linux/scatterlist.h>
16 17
17#include <linux/mmc/card.h> 18#include <linux/mmc/card.h>
18#include <linux/mmc/host.h> 19#include <linux/mmc/host.h>
@@ -153,19 +154,21 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
153 blk_queue_max_hw_segments(mq->queue, bouncesz / 512); 154 blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
154 blk_queue_max_segment_size(mq->queue, bouncesz); 155 blk_queue_max_segment_size(mq->queue, bouncesz);
155 156
156 mq->sg = kzalloc(sizeof(struct scatterlist), 157 mq->sg = kmalloc(sizeof(struct scatterlist),
157 GFP_KERNEL); 158 GFP_KERNEL);
158 if (!mq->sg) { 159 if (!mq->sg) {
159 ret = -ENOMEM; 160 ret = -ENOMEM;
160 goto cleanup_queue; 161 goto cleanup_queue;
161 } 162 }
163 sg_init_table(mq->sg, 1);
162 164
163 mq->bounce_sg = kzalloc(sizeof(struct scatterlist) * 165 mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
164 bouncesz / 512, GFP_KERNEL); 166 bouncesz / 512, GFP_KERNEL);
165 if (!mq->bounce_sg) { 167 if (!mq->bounce_sg) {
166 ret = -ENOMEM; 168 ret = -ENOMEM;
167 goto cleanup_queue; 169 goto cleanup_queue;
168 } 170 }
171 sg_init_table(mq->bounce_sg, bouncesz / 512);
169 } 172 }
170 } 173 }
171#endif 174#endif
@@ -302,12 +305,12 @@ static void copy_sg(struct scatterlist *dst, unsigned int dst_len,
302 BUG_ON(dst_len == 0); 305 BUG_ON(dst_len == 0);
303 306
304 if (dst_size == 0) { 307 if (dst_size == 0) {
305 dst_buf = page_address(dst->page) + dst->offset; 308 dst_buf = sg_virt(dst);
306 dst_size = dst->length; 309 dst_size = dst->length;
307 } 310 }
308 311
309 if (src_size == 0) { 312 if (src_size == 0) {
310 src_buf = page_address(src->page) + src->offset; 313 src_buf = sg_virt(dst);
311 src_size = src->length; 314 src_size = src->length;
312 } 315 }
313 316
@@ -353,9 +356,7 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
353 return 1; 356 return 1;
354 } 357 }
355 358
356 mq->sg[0].page = virt_to_page(mq->bounce_buf); 359 sg_init_one(mq->sg, mq->bounce_buf, 0);
357 mq->sg[0].offset = offset_in_page(mq->bounce_buf);
358 mq->sg[0].length = 0;
359 360
360 while (sg_len) { 361 while (sg_len) {
361 mq->sg[0].length += mq->bounce_sg[sg_len - 1].length; 362 mq->sg[0].length += mq->bounce_sg[sg_len - 1].length;
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 7a452c2ad1..b1edcefdd4 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -149,7 +149,7 @@ static inline void at91_mci_sg_to_dma(struct at91mci_host *host, struct mmc_data
149 149
150 sg = &data->sg[i]; 150 sg = &data->sg[i];
151 151
152 sgbuffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset; 152 sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
153 amount = min(size, sg->length); 153 amount = min(size, sg->length);
154 size -= amount; 154 size -= amount;
155 155
@@ -226,7 +226,7 @@ static void at91_mci_pre_dma_read(struct at91mci_host *host)
226 sg = &data->sg[host->transfer_index++]; 226 sg = &data->sg[host->transfer_index++];
227 pr_debug("sg = %p\n", sg); 227 pr_debug("sg = %p\n", sg);
228 228
229 sg->dma_address = dma_map_page(NULL, sg->page, sg->offset, sg->length, DMA_FROM_DEVICE); 229 sg->dma_address = dma_map_page(NULL, sg_page(sg), sg->offset, sg->length, DMA_FROM_DEVICE);
230 230
231 pr_debug("dma address = %08X, length = %d\n", sg->dma_address, sg->length); 231 pr_debug("dma address = %08X, length = %d\n", sg->dma_address, sg->length);
232 232
@@ -283,7 +283,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
283 int index; 283 int index;
284 284
285 /* Swap the contents of the buffer */ 285 /* Swap the contents of the buffer */
286 buffer = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset; 286 buffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
287 pr_debug("buffer = %p, length = %d\n", buffer, sg->length); 287 pr_debug("buffer = %p, length = %d\n", buffer, sg->length);
288 288
289 for (index = 0; index < (sg->length / 4); index++) 289 for (index = 0; index < (sg->length / 4); index++)
@@ -292,7 +292,7 @@ static void at91_mci_post_dma_read(struct at91mci_host *host)
292 kunmap_atomic(buffer, KM_BIO_SRC_IRQ); 292 kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
293 } 293 }
294 294
295 flush_dcache_page(sg->page); 295 flush_dcache_page(sg_page(sg));
296 } 296 }
297 297
298 /* Is there another transfer to trigger? */ 298 /* Is there another transfer to trigger? */
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 92c4d0dfee..bcbb6d247b 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -340,7 +340,7 @@ static void au1xmmc_send_pio(struct au1xmmc_host *host)
340 340
341 /* This is the pointer to the data buffer */ 341 /* This is the pointer to the data buffer */
342 sg = &data->sg[host->pio.index]; 342 sg = &data->sg[host->pio.index];
343 sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset; 343 sg_ptr = sg_virt(sg) + host->pio.offset;
344 344
345 /* This is the space left inside the buffer */ 345 /* This is the space left inside the buffer */
346 sg_len = data->sg[host->pio.index].length - host->pio.offset; 346 sg_len = data->sg[host->pio.index].length - host->pio.offset;
@@ -400,7 +400,7 @@ static void au1xmmc_receive_pio(struct au1xmmc_host *host)
400 400
401 if (host->pio.index < host->dma.len) { 401 if (host->pio.index < host->dma.len) {
402 sg = &data->sg[host->pio.index]; 402 sg = &data->sg[host->pio.index];
403 sg_ptr = page_address(sg->page) + sg->offset + host->pio.offset; 403 sg_ptr = sg_virt(sg) + host->pio.offset;
404 404
405 /* This is the space left inside the buffer */ 405 /* This is the space left inside the buffer */
406 sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset; 406 sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset;
@@ -613,14 +613,11 @@ au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
613 613
614 if (host->flags & HOST_F_XMIT){ 614 if (host->flags & HOST_F_XMIT){
615 ret = au1xxx_dbdma_put_source_flags(channel, 615 ret = au1xxx_dbdma_put_source_flags(channel,
616 (void *) (page_address(sg->page) + 616 (void *) sg_virt(sg), len, flags);
617 sg->offset),
618 len, flags);
619 } 617 }
620 else { 618 else {
621 ret = au1xxx_dbdma_put_dest_flags(channel, 619 ret = au1xxx_dbdma_put_dest_flags(channel,
622 (void *) (page_address(sg->page) + 620 (void *) sg_virt(sg),
623 sg->offset),
624 len, flags); 621 len, flags);
625 } 622 }
626 623
diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c
index 6ebc41e759..fc72e1fadb 100644
--- a/drivers/mmc/host/imxmmc.c
+++ b/drivers/mmc/host/imxmmc.c
@@ -262,7 +262,7 @@ static void imxmci_setup_data(struct imxmci_host *host, struct mmc_data *data)
262 } 262 }
263 263
264 /* Convert back to virtual address */ 264 /* Convert back to virtual address */
265 host->data_ptr = (u16*)(page_address(data->sg->page) + data->sg->offset); 265 host->data_ptr = (u16*)sg_virt(sg);
266 host->data_cnt = 0; 266 host->data_cnt = 0;
267 267
268 clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events); 268 clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events);
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 7ae18eaed6..12c2d807c1 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -813,7 +813,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
813 && dir == DMA_FROM_DEVICE) 813 && dir == DMA_FROM_DEVICE)
814 dir = DMA_BIDIRECTIONAL; 814 dir = DMA_BIDIRECTIONAL;
815 815
816 dma_addr = dma_map_page(dma_dev, sg->page, 0, 816 dma_addr = dma_map_page(dma_dev, sg_page(sg), 0,
817 PAGE_SIZE, dir); 817 PAGE_SIZE, dir);
818 if (direction == DMA_TO_DEVICE) 818 if (direction == DMA_TO_DEVICE)
819 t->tx_dma = dma_addr + sg->offset; 819 t->tx_dma = dma_addr + sg->offset;
@@ -822,7 +822,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
822 } 822 }
823 823
824 /* allow pio too; we don't allow highmem */ 824 /* allow pio too; we don't allow highmem */
825 kmap_addr = kmap(sg->page); 825 kmap_addr = kmap(sg_page(sg));
826 if (direction == DMA_TO_DEVICE) 826 if (direction == DMA_TO_DEVICE)
827 t->tx_buf = kmap_addr + sg->offset; 827 t->tx_buf = kmap_addr + sg->offset;
828 else 828 else
@@ -855,8 +855,8 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
855 855
856 /* discard mappings */ 856 /* discard mappings */
857 if (direction == DMA_FROM_DEVICE) 857 if (direction == DMA_FROM_DEVICE)
858 flush_kernel_dcache_page(sg->page); 858 flush_kernel_dcache_page(sg_page(sg));
859 kunmap(sg->page); 859 kunmap(sg_page(sg));
860 if (dma_dev) 860 if (dma_dev)
861 dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); 861 dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);
862 862
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 000e6a9197..0f39c490f0 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -169,7 +169,7 @@ static inline char *mmci_kmap_atomic(struct mmci_host *host, unsigned long *flag
169 struct scatterlist *sg = host->sg_ptr; 169 struct scatterlist *sg = host->sg_ptr;
170 170
171 local_irq_save(*flags); 171 local_irq_save(*flags);
172 return kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset; 172 return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
173} 173}
174 174
175static inline void mmci_kunmap_atomic(struct mmci_host *host, void *buffer, unsigned long *flags) 175static inline void mmci_kunmap_atomic(struct mmci_host *host, void *buffer, unsigned long *flags)
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 60a67dfcda..971e18b91f 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -24,10 +24,10 @@
24#include <linux/mmc/host.h> 24#include <linux/mmc/host.h>
25#include <linux/mmc/card.h> 25#include <linux/mmc/card.h>
26#include <linux/clk.h> 26#include <linux/clk.h>
27#include <linux/scatterlist.h>
27 28
28#include <asm/io.h> 29#include <asm/io.h>
29#include <asm/irq.h> 30#include <asm/irq.h>
30#include <asm/scatterlist.h>
31#include <asm/mach-types.h> 31#include <asm/mach-types.h>
32 32
33#include <asm/arch/board.h> 33#include <asm/arch/board.h>
@@ -383,7 +383,7 @@ mmc_omap_sg_to_buf(struct mmc_omap_host *host)
383 383
384 sg = host->data->sg + host->sg_idx; 384 sg = host->data->sg + host->sg_idx;
385 host->buffer_bytes_left = sg->length; 385 host->buffer_bytes_left = sg->length;
386 host->buffer = page_address(sg->page) + sg->offset; 386 host->buffer = sg_virt(sg);
387 if (host->buffer_bytes_left > host->total_bytes_left) 387 if (host->buffer_bytes_left > host->total_bytes_left)
388 host->buffer_bytes_left = host->total_bytes_left; 388 host->buffer_bytes_left = host->total_bytes_left;
389} 389}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b397121b94..d7c5b94d8c 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -13,6 +13,7 @@
13#include <linux/highmem.h> 13#include <linux/highmem.h>
14#include <linux/pci.h> 14#include <linux/pci.h>
15#include <linux/dma-mapping.h> 15#include <linux/dma-mapping.h>
16#include <linux/scatterlist.h>
16 17
17#include <linux/mmc/host.h> 18#include <linux/mmc/host.h>
18 19
@@ -231,7 +232,7 @@ static void sdhci_deactivate_led(struct sdhci_host *host)
231 232
232static inline char* sdhci_sg_to_buffer(struct sdhci_host* host) 233static inline char* sdhci_sg_to_buffer(struct sdhci_host* host)
233{ 234{
234 return page_address(host->cur_sg->page) + host->cur_sg->offset; 235 return sg_virt(host->cur_sg);
235} 236}
236 237
237static inline int sdhci_next_sg(struct sdhci_host* host) 238static inline int sdhci_next_sg(struct sdhci_host* host)
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 9b904795eb..c11a3d2560 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -192,7 +192,7 @@ static void tifm_sd_transfer_data(struct tifm_sd *host)
192 } 192 }
193 off = sg[host->sg_pos].offset + host->block_pos; 193 off = sg[host->sg_pos].offset + host->block_pos;
194 194
195 pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT); 195 pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
196 p_off = offset_in_page(off); 196 p_off = offset_in_page(off);
197 p_cnt = PAGE_SIZE - p_off; 197 p_cnt = PAGE_SIZE - p_off;
198 p_cnt = min(p_cnt, cnt); 198 p_cnt = min(p_cnt, cnt);
@@ -241,18 +241,18 @@ static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data)
241 } 241 }
242 off = sg[host->sg_pos].offset + host->block_pos; 242 off = sg[host->sg_pos].offset + host->block_pos;
243 243
244 pg = nth_page(sg[host->sg_pos].page, off >> PAGE_SHIFT); 244 pg = nth_page(sg_page(&sg[host->sg_pos]), off >> PAGE_SHIFT);
245 p_off = offset_in_page(off); 245 p_off = offset_in_page(off);
246 p_cnt = PAGE_SIZE - p_off; 246 p_cnt = PAGE_SIZE - p_off;
247 p_cnt = min(p_cnt, cnt); 247 p_cnt = min(p_cnt, cnt);
248 p_cnt = min(p_cnt, t_size); 248 p_cnt = min(p_cnt, t_size);
249 249
250 if (r_data->flags & MMC_DATA_WRITE) 250 if (r_data->flags & MMC_DATA_WRITE)
251 tifm_sd_copy_page(host->bounce_buf.page, 251 tifm_sd_copy_page(sg_page(&host->bounce_buf),
252 r_data->blksz - t_size, 252 r_data->blksz - t_size,
253 pg, p_off, p_cnt); 253 pg, p_off, p_cnt);
254 else if (r_data->flags & MMC_DATA_READ) 254 else if (r_data->flags & MMC_DATA_READ)
255 tifm_sd_copy_page(pg, p_off, host->bounce_buf.page, 255 tifm_sd_copy_page(pg, p_off, sg_page(&host->bounce_buf),
256 r_data->blksz - t_size, p_cnt); 256 r_data->blksz - t_size, p_cnt);
257 257
258 t_size -= p_cnt; 258 t_size -= p_cnt;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 80db11c05f..fa4c8c53cc 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -269,7 +269,7 @@ static inline int wbsd_next_sg(struct wbsd_host *host)
269 269
270static inline char *wbsd_sg_to_buffer(struct wbsd_host *host) 270static inline char *wbsd_sg_to_buffer(struct wbsd_host *host)
271{ 271{
272 return page_address(host->cur_sg->page) + host->cur_sg->offset; 272 return sg_virt(host->cur_sg);
273} 273}
274 274
275static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data) 275static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
@@ -283,7 +283,7 @@ static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
283 len = data->sg_len; 283 len = data->sg_len;
284 284
285 for (i = 0; i < len; i++) { 285 for (i = 0; i < len; i++) {
286 sgbuf = page_address(sg[i].page) + sg[i].offset; 286 sgbuf = sg_virt(&sg[i]);
287 memcpy(dmabuf, sgbuf, sg[i].length); 287 memcpy(dmabuf, sgbuf, sg[i].length);
288 dmabuf += sg[i].length; 288 dmabuf += sg[i].length;
289 } 289 }
@@ -300,7 +300,7 @@ static inline void wbsd_dma_to_sg(struct wbsd_host *host, struct mmc_data *data)
300 len = data->sg_len; 300 len = data->sg_len;
301 301
302 for (i = 0; i < len; i++) { 302 for (i = 0; i < len; i++) {
303 sgbuf = page_address(sg[i].page) + sg[i].offset; 303 sgbuf = sg_virt(&sg[i]);
304 memcpy(sgbuf, dmabuf, sg[i].length); 304 memcpy(sgbuf, dmabuf, sg[i].length);
305 dmabuf += sg[i].length; 305 dmabuf += sg[i].length;
306 } 306 }
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 3aa3dca56a..a9eb1c5162 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -85,6 +85,7 @@ static int cfi_intelext_point (struct mtd_info *mtd, loff_t from, size_t len,
85static void cfi_intelext_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, 85static void cfi_intelext_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from,
86 size_t len); 86 size_t len);
87 87
88static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
88static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode); 89static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
89static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr); 90static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
90#include "fwh_lock.h" 91#include "fwh_lock.h"
@@ -641,73 +642,13 @@ static int cfi_intelext_partition_fixup(struct mtd_info *mtd,
641/* 642/*
642 * *********** CHIP ACCESS FUNCTIONS *********** 643 * *********** CHIP ACCESS FUNCTIONS ***********
643 */ 644 */
644 645static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
645static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
646{ 646{
647 DECLARE_WAITQUEUE(wait, current); 647 DECLARE_WAITQUEUE(wait, current);
648 struct cfi_private *cfi = map->fldrv_priv; 648 struct cfi_private *cfi = map->fldrv_priv;
649 map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01); 649 map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01);
650 unsigned long timeo;
651 struct cfi_pri_intelext *cfip = cfi->cmdset_priv; 650 struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
652 651 unsigned long timeo = jiffies + HZ;
653 resettime:
654 timeo = jiffies + HZ;
655 retry:
656 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
657 /*
658 * OK. We have possibility for contension on the write/erase
659 * operations which are global to the real chip and not per
660 * partition. So let's fight it over in the partition which
661 * currently has authority on the operation.
662 *
663 * The rules are as follows:
664 *
665 * - any write operation must own shared->writing.
666 *
667 * - any erase operation must own _both_ shared->writing and
668 * shared->erasing.
669 *
670 * - contension arbitration is handled in the owner's context.
671 *
672 * The 'shared' struct can be read and/or written only when
673 * its lock is taken.
674 */
675 struct flchip_shared *shared = chip->priv;
676 struct flchip *contender;
677 spin_lock(&shared->lock);
678 contender = shared->writing;
679 if (contender && contender != chip) {
680 /*
681 * The engine to perform desired operation on this
682 * partition is already in use by someone else.
683 * Let's fight over it in the context of the chip
684 * currently using it. If it is possible to suspend,
685 * that other partition will do just that, otherwise
686 * it'll happily send us to sleep. In any case, when
687 * get_chip returns success we're clear to go ahead.
688 */
689 int ret = spin_trylock(contender->mutex);
690 spin_unlock(&shared->lock);
691 if (!ret)
692 goto retry;
693 spin_unlock(chip->mutex);
694 ret = get_chip(map, contender, contender->start, mode);
695 spin_lock(chip->mutex);
696 if (ret) {
697 spin_unlock(contender->mutex);
698 return ret;
699 }
700 timeo = jiffies + HZ;
701 spin_lock(&shared->lock);
702 spin_unlock(contender->mutex);
703 }
704
705 /* We now own it */
706 shared->writing = chip;
707 if (mode == FL_ERASING)
708 shared->erasing = chip;
709 spin_unlock(&shared->lock);
710 }
711 652
712 switch (chip->state) { 653 switch (chip->state) {
713 654
@@ -722,16 +663,11 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
722 if (chip->priv && map_word_andequal(map, status, status_PWS, status_PWS)) 663 if (chip->priv && map_word_andequal(map, status, status_PWS, status_PWS))
723 break; 664 break;
724 665
725 if (time_after(jiffies, timeo)) {
726 printk(KERN_ERR "%s: Waiting for chip to be ready timed out. Status %lx\n",
727 map->name, status.x[0]);
728 return -EIO;
729 }
730 spin_unlock(chip->mutex); 666 spin_unlock(chip->mutex);
731 cfi_udelay(1); 667 cfi_udelay(1);
732 spin_lock(chip->mutex); 668 spin_lock(chip->mutex);
733 /* Someone else might have been playing with it. */ 669 /* Someone else might have been playing with it. */
734 goto retry; 670 return -EAGAIN;
735 } 671 }
736 672
737 case FL_READY: 673 case FL_READY:
@@ -809,10 +745,82 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
809 schedule(); 745 schedule();
810 remove_wait_queue(&chip->wq, &wait); 746 remove_wait_queue(&chip->wq, &wait);
811 spin_lock(chip->mutex); 747 spin_lock(chip->mutex);
812 goto resettime; 748 return -EAGAIN;
813 } 749 }
814} 750}
815 751
752static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
753{
754 int ret;
755
756 retry:
757 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING
758 || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
759 /*
760 * OK. We have possibility for contention on the write/erase
761 * operations which are global to the real chip and not per
762 * partition. So let's fight it over in the partition which
763 * currently has authority on the operation.
764 *
765 * The rules are as follows:
766 *
767 * - any write operation must own shared->writing.
768 *
769 * - any erase operation must own _both_ shared->writing and
770 * shared->erasing.
771 *
772 * - contention arbitration is handled in the owner's context.
773 *
774 * The 'shared' struct can be read and/or written only when
775 * its lock is taken.
776 */
777 struct flchip_shared *shared = chip->priv;
778 struct flchip *contender;
779 spin_lock(&shared->lock);
780 contender = shared->writing;
781 if (contender && contender != chip) {
782 /*
783 * The engine to perform desired operation on this
784 * partition is already in use by someone else.
785 * Let's fight over it in the context of the chip
786 * currently using it. If it is possible to suspend,
787 * that other partition will do just that, otherwise
788 * it'll happily send us to sleep. In any case, when
789 * get_chip returns success we're clear to go ahead.
790 */
791 ret = spin_trylock(contender->mutex);
792 spin_unlock(&shared->lock);
793 if (!ret)
794 goto retry;
795 spin_unlock(chip->mutex);
796 ret = chip_ready(map, contender, contender->start, mode);
797 spin_lock(chip->mutex);
798
799 if (ret == -EAGAIN) {
800 spin_unlock(contender->mutex);
801 goto retry;
802 }
803 if (ret) {
804 spin_unlock(contender->mutex);
805 return ret;
806 }
807 spin_lock(&shared->lock);
808 spin_unlock(contender->mutex);
809 }
810
811 /* We now own it */
812 shared->writing = chip;
813 if (mode == FL_ERASING)
814 shared->erasing = chip;
815 spin_unlock(&shared->lock);
816 }
817 ret = chip_ready(map, chip, adr, mode);
818 if (ret == -EAGAIN)
819 goto retry;
820
821 return ret;
822}
823
816static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr) 824static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr)
817{ 825{
818 struct cfi_private *cfi = map->fldrv_priv; 826 struct cfi_private *cfi = map->fldrv_priv;
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 8f9c3baeb3..246d4512f6 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -300,7 +300,7 @@ config MTD_NAND_PLATFORM
300 via platform_data. 300 via platform_data.
301 301
302config MTD_ALAUDA 302config MTD_ALAUDA
303 tristate "MTD driver for Olympus MAUSB-10 and Fijufilm DPC-R1" 303 tristate "MTD driver for Olympus MAUSB-10 and Fujifilm DPC-R1"
304 depends on MTD_NAND && USB 304 depends on MTD_NAND && USB
305 help 305 help
306 These two (and possibly other) Alauda-based cardreaders for 306 These two (and possibly other) Alauda-based cardreaders for
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index ab9f5c5db3..0e72153b32 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -220,7 +220,7 @@ static int doc_ecc_decode(struct rs_control *rs, uint8_t *data, uint8_t *ecc)
220 } 220 }
221 } 221 }
222 /* If the parity is wrong, no rescue possible */ 222 /* If the parity is wrong, no rescue possible */
223 return parity ? -1 : nerr; 223 return parity ? -EBADMSG : nerr;
224} 224}
225 225
226static void DoC_Delay(struct doc_priv *doc, unsigned short cycles) 226static void DoC_Delay(struct doc_priv *doc, unsigned short cycles)
@@ -1034,7 +1034,7 @@ static int doc200x_correct_data(struct mtd_info *mtd, u_char *dat,
1034 WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf); 1034 WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf);
1035 else 1035 else
1036 WriteDOC(DOC_ECC_DIS, docptr, ECCConf); 1036 WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
1037 if (no_ecc_failures && (ret == -1)) { 1037 if (no_ecc_failures && (ret == -EBADMSG)) {
1038 printk(KERN_ERR "suppressing ECC failure\n"); 1038 printk(KERN_ERR "suppressing ECC failure\n");
1039 ret = 0; 1039 ret = 0;
1040 } 1040 }
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b4e0e77238..e29c1da7f5 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -789,7 +789,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
789 int stat; 789 int stat;
790 790
791 stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]); 791 stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
792 if (stat == -1) 792 if (stat < 0)
793 mtd->ecc_stats.failed++; 793 mtd->ecc_stats.failed++;
794 else 794 else
795 mtd->ecc_stats.corrected += stat; 795 mtd->ecc_stats.corrected += stat;
@@ -833,7 +833,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
833 int stat; 833 int stat;
834 834
835 stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]); 835 stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
836 if (stat == -1) 836 if (stat < 0)
837 mtd->ecc_stats.failed++; 837 mtd->ecc_stats.failed++;
838 else 838 else
839 mtd->ecc_stats.corrected += stat; 839 mtd->ecc_stats.corrected += stat;
@@ -874,7 +874,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
874 chip->read_buf(mtd, oob, eccbytes); 874 chip->read_buf(mtd, oob, eccbytes);
875 stat = chip->ecc.correct(mtd, p, oob, NULL); 875 stat = chip->ecc.correct(mtd, p, oob, NULL);
876 876
877 if (stat == -1) 877 if (stat < 0)
878 mtd->ecc_stats.failed++; 878 mtd->ecc_stats.failed++;
879 else 879 else
880 mtd->ecc_stats.corrected += stat; 880 mtd->ecc_stats.corrected += stat;
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index fde593e5e6..9003a135e0 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -189,7 +189,7 @@ int nand_correct_data(struct mtd_info *mtd, u_char *dat,
189 if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1) 189 if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
190 return 1; 190 return 1;
191 191
192 return -1; 192 return -EBADMSG;
193} 193}
194EXPORT_SYMBOL(nand_correct_data); 194EXPORT_SYMBOL(nand_correct_data);
195 195
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a7574807dc..10490b48d9 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -511,7 +511,7 @@ static int init_nandsim(struct mtd_info *mtd)
511 } 511 }
512 512
513 if (ns->options & OPT_SMALLPAGE) { 513 if (ns->options & OPT_SMALLPAGE) {
514 if (ns->geom.totsz < (64 << 20)) { 514 if (ns->geom.totsz < (32 << 20)) {
515 ns->geom.pgaddrbytes = 3; 515 ns->geom.pgaddrbytes = 3;
516 ns->geom.secaddrbytes = 2; 516 ns->geom.secaddrbytes = 2;
517 } else { 517 } else {
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 21b921dd6a..66f76e9618 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -488,12 +488,24 @@ static void s3c2410_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
488 readsb(this->IO_ADDR_R, buf, len); 488 readsb(this->IO_ADDR_R, buf, len);
489} 489}
490 490
491static void s3c2440_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
492{
493 struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
494 readsl(info->regs + S3C2440_NFDATA, buf, len / 4);
495}
496
491static void s3c2410_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len) 497static void s3c2410_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
492{ 498{
493 struct nand_chip *this = mtd->priv; 499 struct nand_chip *this = mtd->priv;
494 writesb(this->IO_ADDR_W, buf, len); 500 writesb(this->IO_ADDR_W, buf, len);
495} 501}
496 502
503static void s3c2440_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
504{
505 struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
506 writesl(info->regs + S3C2440_NFDATA, buf, len / 4);
507}
508
497/* device management functions */ 509/* device management functions */
498 510
499static int s3c2410_nand_remove(struct platform_device *pdev) 511static int s3c2410_nand_remove(struct platform_device *pdev)
@@ -604,6 +616,8 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
604 info->sel_bit = S3C2440_NFCONT_nFCE; 616 info->sel_bit = S3C2440_NFCONT_nFCE;
605 chip->cmd_ctrl = s3c2440_nand_hwcontrol; 617 chip->cmd_ctrl = s3c2440_nand_hwcontrol;
606 chip->dev_ready = s3c2440_nand_devready; 618 chip->dev_ready = s3c2440_nand_devready;
619 chip->read_buf = s3c2440_nand_read_buf;
620 chip->write_buf = s3c2440_nand_write_buf;
607 break; 621 break;
608 622
609 case TYPE_S3C2412: 623 case TYPE_S3C2412:
diff --git a/drivers/mtd/onenand/onenand_sim.c b/drivers/mtd/onenand/onenand_sim.c
index 0d89ad5776..d64200b7c9 100644
--- a/drivers/mtd/onenand/onenand_sim.c
+++ b/drivers/mtd/onenand/onenand_sim.c
@@ -88,11 +88,11 @@ do { \
88 88
89/** 89/**
90 * onenand_lock_handle - Handle Lock scheme 90 * onenand_lock_handle - Handle Lock scheme
91 * @param this OneNAND device structure 91 * @this: OneNAND device structure
92 * @param cmd The command to be sent 92 * @cmd: The command to be sent
93 * 93 *
94 * Send lock command to OneNAND device. 94 * Send lock command to OneNAND device.
95 * The lock scheme is depends on chip type. 95 * The lock scheme depends on chip type.
96 */ 96 */
97static void onenand_lock_handle(struct onenand_chip *this, int cmd) 97static void onenand_lock_handle(struct onenand_chip *this, int cmd)
98{ 98{
@@ -131,8 +131,8 @@ static void onenand_lock_handle(struct onenand_chip *this, int cmd)
131 131
132/** 132/**
133 * onenand_bootram_handle - Handle BootRAM area 133 * onenand_bootram_handle - Handle BootRAM area
134 * @param this OneNAND device structure 134 * @this: OneNAND device structure
135 * @param cmd The command to be sent 135 * @cmd: The command to be sent
136 * 136 *
137 * Emulate BootRAM area. It is possible to do basic operation using BootRAM. 137 * Emulate BootRAM area. It is possible to do basic operation using BootRAM.
138 */ 138 */
@@ -153,10 +153,10 @@ static void onenand_bootram_handle(struct onenand_chip *this, int cmd)
153 153
154/** 154/**
155 * onenand_update_interrupt - Set interrupt register 155 * onenand_update_interrupt - Set interrupt register
156 * @param this OneNAND device structure 156 * @this: OneNAND device structure
157 * @param cmd The command to be sent 157 * @cmd: The command to be sent
158 * 158 *
159 * Update interrupt register. The status is depends on command. 159 * Update interrupt register. The status depends on command.
160 */ 160 */
161static void onenand_update_interrupt(struct onenand_chip *this, int cmd) 161static void onenand_update_interrupt(struct onenand_chip *this, int cmd)
162{ 162{
@@ -189,11 +189,12 @@ static void onenand_update_interrupt(struct onenand_chip *this, int cmd)
189} 189}
190 190
191/** 191/**
192 * onenand_check_overwrite - Check over-write if happend 192 * onenand_check_overwrite - Check if over-write happened
193 * @param dest The destination pointer 193 * @dest: The destination pointer
194 * @param src The source pointer 194 * @src: The source pointer
195 * @param count The length to be check 195 * @count: The length to be check
196 * @return 0 on same, otherwise 1 196 *
197 * Returns: 0 on same, otherwise 1
197 * 198 *
198 * Compare the source with destination 199 * Compare the source with destination
199 */ 200 */
@@ -213,10 +214,10 @@ static int onenand_check_overwrite(void *dest, void *src, size_t count)
213 214
214/** 215/**
215 * onenand_data_handle - Handle OneNAND Core and DataRAM 216 * onenand_data_handle - Handle OneNAND Core and DataRAM
216 * @param this OneNAND device structure 217 * @this: OneNAND device structure
217 * @param cmd The command to be sent 218 * @cmd: The command to be sent
218 * @param dataram Which dataram used 219 * @dataram: Which dataram used
219 * @param offset The offset to OneNAND Core 220 * @offset: The offset to OneNAND Core
220 * 221 *
221 * Copy data from OneNAND Core to DataRAM (read) 222 * Copy data from OneNAND Core to DataRAM (read)
222 * Copy data from DataRAM to OneNAND Core (write) 223 * Copy data from DataRAM to OneNAND Core (write)
@@ -295,8 +296,8 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
295 296
296/** 297/**
297 * onenand_command_handle - Handle command 298 * onenand_command_handle - Handle command
298 * @param this OneNAND device structure 299 * @this: OneNAND device structure
299 * @param cmd The command to be sent 300 * @cmd: The command to be sent
300 * 301 *
301 * Emulate OneNAND command. 302 * Emulate OneNAND command.
302 */ 303 */
@@ -350,8 +351,8 @@ static void onenand_command_handle(struct onenand_chip *this, int cmd)
350 351
351/** 352/**
352 * onenand_writew - [OneNAND Interface] Emulate write operation 353 * onenand_writew - [OneNAND Interface] Emulate write operation
353 * @param value value to write 354 * @value: value to write
354 * @param addr address to write 355 * @addr: address to write
355 * 356 *
356 * Write OneNAND register with value 357 * Write OneNAND register with value
357 */ 358 */
@@ -373,7 +374,7 @@ static void onenand_writew(unsigned short value, void __iomem * addr)
373 374
374/** 375/**
375 * flash_init - Initialize OneNAND simulator 376 * flash_init - Initialize OneNAND simulator
376 * @param flash OneNAND simulaotr data strucutres 377 * @flash: OneNAND simulator data strucutres
377 * 378 *
378 * Initialize OneNAND simulator. 379 * Initialize OneNAND simulator.
379 */ 380 */
@@ -416,7 +417,7 @@ static int __init flash_init(struct onenand_flash *flash)
416 417
417/** 418/**
418 * flash_exit - Clean up OneNAND simulator 419 * flash_exit - Clean up OneNAND simulator
419 * @param flash OneNAND simulaotr data strucutres 420 * @flash: OneNAND simulator data structures
420 * 421 *
421 * Clean up OneNAND simulator. 422 * Clean up OneNAND simulator.
422 */ 423 */
@@ -424,7 +425,6 @@ static void flash_exit(struct onenand_flash *flash)
424{ 425{
425 vfree(ONENAND_CORE(flash)); 426 vfree(ONENAND_CORE(flash));
426 kfree(flash->base); 427 kfree(flash->base);
427 kfree(flash);
428} 428}
429 429
430static int __init onenand_sim_init(void) 430static int __init onenand_sim_init(void)
@@ -449,7 +449,7 @@ static int __init onenand_sim_init(void)
449 info->onenand.write_word = onenand_writew; 449 info->onenand.write_word = onenand_writew;
450 450
451 if (flash_init(&info->flash)) { 451 if (flash_init(&info->flash)) {
452 printk(KERN_ERR "Unable to allocat flash.\n"); 452 printk(KERN_ERR "Unable to allocate flash.\n");
453 kfree(ffchars); 453 kfree(ffchars);
454 kfree(info); 454 kfree(info);
455 return -ENOMEM; 455 return -ENOMEM;
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index eb75773a9e..86b8641b46 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3103,4 +3103,10 @@ config NETPOLL_TRAP
3103config NET_POLL_CONTROLLER 3103config NET_POLL_CONTROLLER
3104 def_bool NETPOLL 3104 def_bool NETPOLL
3105 3105
3106config VIRTIO_NET
3107 tristate "Virtio network driver (EXPERIMENTAL)"
3108 depends on EXPERIMENTAL && VIRTIO
3109 ---help---
3110 This is the virtual network driver for lguest. Say Y or M.
3111
3106endif # NETDEVICES 3112endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 22f78cbd12..593262065c 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -183,7 +183,6 @@ obj-$(CONFIG_ZORRO8390) += zorro8390.o
183obj-$(CONFIG_HPLANCE) += hplance.o 7990.o 183obj-$(CONFIG_HPLANCE) += hplance.o 7990.o
184obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o 184obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o
185obj-$(CONFIG_EQUALIZER) += eql.o 185obj-$(CONFIG_EQUALIZER) += eql.o
186obj-$(CONFIG_LGUEST_NET) += lguest_net.o
187obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o 186obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o
188obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o 187obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o
189obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o 188obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o
@@ -243,3 +242,4 @@ obj-$(CONFIG_FS_ENET) += fs_enet/
243 242
244obj-$(CONFIG_NETXEN_NIC) += netxen/ 243obj-$(CONFIG_NETXEN_NIC) += netxen/
245obj-$(CONFIG_NIU) += niu.o 244obj-$(CONFIG_NIU) += niu.o
245obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index ed53aaab4c..ae41973615 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -471,7 +471,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
471 } 471 }
472 472
473 len = max(skb->len, ETH_ZLEN); 473 len = max(skb->len, ETH_ZLEN);
474 queue = skb->queue_mapping; 474 queue = skb_get_queue_mapping(skb);
475#ifdef CONFIG_NETDEVICES_MULTIQUEUE 475#ifdef CONFIG_NETDEVICES_MULTIQUEUE
476 netif_stop_subqueue(dev, queue); 476 netif_stop_subqueue(dev, queue);
477#else 477#else
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2b5782056d..0fbf1bbbae 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -751,13 +751,11 @@ mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_devi
751 if (mii_head) { 751 if (mii_head) {
752 mii_tail->mii_next = mip; 752 mii_tail->mii_next = mip;
753 mii_tail = mip; 753 mii_tail = mip;
754 } 754 } else {
755 else {
756 mii_head = mii_tail = mip; 755 mii_head = mii_tail = mip;
757 fep->hwp->fec_mii_data = regval; 756 fep->hwp->fec_mii_data = regval;
758 } 757 }
759 } 758 } else {
760 else {
761 retval = 1; 759 retval = 1;
762 } 760 }
763 761
@@ -768,14 +766,11 @@ mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_devi
768 766
769static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c) 767static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c)
770{ 768{
771 int k;
772
773 if(!c) 769 if(!c)
774 return; 770 return;
775 771
776 for(k = 0; (c+k)->mii_data != mk_mii_end; k++) { 772 for (; c->mii_data != mk_mii_end; c++)
777 mii_queue(dev, (c+k)->mii_data, (c+k)->funct); 773 mii_queue(dev, c->mii_data, c->funct);
778 }
779} 774}
780 775
781static void mii_parse_sr(uint mii_reg, struct net_device *dev) 776static void mii_parse_sr(uint mii_reg, struct net_device *dev)
@@ -792,7 +787,6 @@ static void mii_parse_sr(uint mii_reg, struct net_device *dev)
792 status |= PHY_STAT_FAULT; 787 status |= PHY_STAT_FAULT;
793 if (mii_reg & 0x0020) 788 if (mii_reg & 0x0020)
794 status |= PHY_STAT_ANC; 789 status |= PHY_STAT_ANC;
795
796 *s = status; 790 *s = status;
797} 791}
798 792
@@ -1239,7 +1233,6 @@ mii_link_interrupt(int irq, void * dev_id);
1239#endif 1233#endif
1240 1234
1241#if defined(CONFIG_M5272) 1235#if defined(CONFIG_M5272)
1242
1243/* 1236/*
1244 * Code specific to Coldfire 5272 setup. 1237 * Code specific to Coldfire 5272 setup.
1245 */ 1238 */
@@ -2020,8 +2013,7 @@ static void mii_relink(struct work_struct *work)
2020 & (PHY_STAT_100FDX | PHY_STAT_10FDX)) 2013 & (PHY_STAT_100FDX | PHY_STAT_10FDX))
2021 duplex = 1; 2014 duplex = 1;
2022 fec_restart(dev, duplex); 2015 fec_restart(dev, duplex);
2023 } 2016 } else
2024 else
2025 fec_stop(dev); 2017 fec_stop(dev);
2026 2018
2027#if 0 2019#if 0
@@ -2119,8 +2111,7 @@ mii_discover_phy(uint mii_reg, struct net_device *dev)
2119 fep->phy_id = phytype << 16; 2111 fep->phy_id = phytype << 16;
2120 mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), 2112 mii_queue(dev, mk_mii_read(MII_REG_PHYIR2),
2121 mii_discover_phy3); 2113 mii_discover_phy3);
2122 } 2114 } else {
2123 else {
2124 fep->phy_addr++; 2115 fep->phy_addr++;
2125 mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), 2116 mii_queue(dev, mk_mii_read(MII_REG_PHYIR1),
2126 mii_discover_phy); 2117 mii_discover_phy);
@@ -2574,8 +2565,7 @@ fec_restart(struct net_device *dev, int duplex)
2574 if (duplex) { 2565 if (duplex) {
2575 fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x04;/* MII enable */ 2566 fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x04;/* MII enable */
2576 fecp->fec_x_cntrl = 0x04; /* FD enable */ 2567 fecp->fec_x_cntrl = 0x04; /* FD enable */
2577 } 2568 } else {
2578 else {
2579 /* MII enable|No Rcv on Xmit */ 2569 /* MII enable|No Rcv on Xmit */
2580 fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x06; 2570 fecp->fec_r_cntrl = OPT_FRAME_SIZE | 0x06;
2581 fecp->fec_x_cntrl = 0x00; 2571 fecp->fec_x_cntrl = 0x00;
diff --git a/drivers/net/lguest_net.c b/drivers/net/lguest_net.c
deleted file mode 100644
index abce2ee843..0000000000
--- a/drivers/net/lguest_net.c
+++ /dev/null
@@ -1,555 +0,0 @@
1/*D:500
2 * The Guest network driver.
3 *
4 * This is very simple a virtual network driver, and our last Guest driver.
5 * The only trick is that it can talk directly to multiple other recipients
6 * (ie. other Guests on the same network). It can also be used with only the
7 * Host on the network.
8 :*/
9
10/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26//#define DEBUG
27#include <linux/netdevice.h>
28#include <linux/etherdevice.h>
29#include <linux/module.h>
30#include <linux/mm_types.h>
31#include <linux/io.h>
32#include <linux/lguest_bus.h>
33
34#define SHARED_SIZE PAGE_SIZE
35#define MAX_LANS 4
36#define NUM_SKBS 8
37
38/*M:011 Network code master Jeff Garzik points out numerous shortcomings in
39 * this driver if it aspires to greatness.
40 *
41 * Firstly, it doesn't use "NAPI": the networking's New API, and is poorer for
42 * it. As he says "NAPI means system-wide load leveling, across multiple
43 * network interfaces. Lack of NAPI can mean competition at higher loads."
44 *
45 * He also points out that we don't implement set_mac_address, so users cannot
46 * change the devices hardware address. When I asked why one would want to:
47 * "Bonding, and situations where you /do/ want the MAC address to "leak" out
48 * of the host onto the wider net."
49 *
50 * Finally, he would like module unloading: "It is not unrealistic to think of
51 * [un|re|]loading the net support module in an lguest guest. And, adding
52 * module support makes the programmer more responsible, because they now have
53 * to learn to clean up after themselves. Any driver that cannot clean up
54 * after itself is an incomplete driver in my book."
55 :*/
56
57/*D:530 The "struct lguestnet_info" contains all the information we need to
58 * know about the network device. */
59struct lguestnet_info
60{
61 /* The mapped device page(s) (an array of "struct lguest_net"). */
62 struct lguest_net *peer;
63 /* The physical address of the device page(s) */
64 unsigned long peer_phys;
65 /* The size of the device page(s). */
66 unsigned long mapsize;
67
68 /* The lguest_device I come from */
69 struct lguest_device *lgdev;
70
71 /* My peerid (ie. my slot in the array). */
72 unsigned int me;
73
74 /* Receive queue: the network packets waiting to be filled. */
75 struct sk_buff *skb[NUM_SKBS];
76 struct lguest_dma dma[NUM_SKBS];
77};
78/*:*/
79
80/* How many bytes left in this page. */
81static unsigned int rest_of_page(void *data)
82{
83 return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
84}
85
86/*D:570 Each peer (ie. Guest or Host) on the network binds their receive
87 * buffers to a different key: we simply use the physical address of the
88 * device's memory page plus the peer number. The Host insists that all keys
89 * be a multiple of 4, so we multiply the peer number by 4. */
90static unsigned long peer_key(struct lguestnet_info *info, unsigned peernum)
91{
92 return info->peer_phys + 4 * peernum;
93}
94
95/* This is the routine which sets up a "struct lguest_dma" to point to a
96 * network packet, similar to req_to_dma() in lguest_blk.c. The structure of a
97 * "struct sk_buff" has grown complex over the years: it consists of a "head"
98 * linear section pointed to by "skb->data", and possibly an array of
99 * "fragments" in the case of a non-linear packet.
100 *
101 * Our receive buffers don't use fragments at all but outgoing skbs might, so
102 * we handle it. */
103static void skb_to_dma(const struct sk_buff *skb, unsigned int headlen,
104 struct lguest_dma *dma)
105{
106 unsigned int i, seg;
107
108 /* First, we put the linear region into the "struct lguest_dma". Each
109 * entry can't go over a page boundary, so even though all our packets
110 * are 1514 bytes or less, we might need to use two entries here: */
111 for (i = seg = 0; i < headlen; seg++, i += rest_of_page(skb->data+i)) {
112 dma->addr[seg] = virt_to_phys(skb->data + i);
113 dma->len[seg] = min((unsigned)(headlen - i),
114 rest_of_page(skb->data + i));
115 }
116
117 /* Now we handle the fragments: at least they're guaranteed not to go
118 * over a page. skb_shinfo(skb) returns a pointer to the structure
119 * which tells us about the number of fragments and the fragment
120 * array. */
121 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, seg++) {
122 const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
123 /* Should not happen with MTU less than 64k - 2 * PAGE_SIZE. */
124 if (seg == LGUEST_MAX_DMA_SECTIONS) {
125 /* We will end up sending a truncated packet should
126 * this ever happen. Plus, a cool log message! */
127 printk("Woah dude! Megapacket!\n");
128 break;
129 }
130 dma->addr[seg] = page_to_phys(f->page) + f->page_offset;
131 dma->len[seg] = f->size;
132 }
133
134 /* If after all that we didn't use the entire "struct lguest_dma"
135 * array, we terminate it with a 0 length. */
136 if (seg < LGUEST_MAX_DMA_SECTIONS)
137 dma->len[seg] = 0;
138}
139
140/*
141 * Packet transmission.
142 *
143 * Our packet transmission is a little unusual. A real network card would just
144 * send out the packet and leave the receivers to decide if they're interested.
145 * Instead, we look through the network device memory page and see if any of
146 * the ethernet addresses match the packet destination, and if so we send it to
147 * that Guest.
148 *
149 * This is made a little more complicated in two cases. The first case is
150 * broadcast packets: for that we send the packet to all Guests on the network,
151 * one at a time. The second case is "promiscuous" mode, where a Guest wants
152 * to see all the packets on the network. We need a way for the Guest to tell
153 * us it wants to see all packets, so it sets the "multicast" bit on its
154 * published MAC address, which is never valid in a real ethernet address.
155 */
156#define PROMISC_BIT 0x01
157
158/* This is the callback which is summoned whenever the network device's
159 * multicast or promiscuous state changes. If the card is in promiscuous mode,
160 * we advertise that in our ethernet address in the device's memory. We do the
161 * same if Linux wants any or all multicast traffic. */
162static void lguestnet_set_multicast(struct net_device *dev)
163{
164 struct lguestnet_info *info = netdev_priv(dev);
165
166 if ((dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || dev->mc_count)
167 info->peer[info->me].mac[0] |= PROMISC_BIT;
168 else
169 info->peer[info->me].mac[0] &= ~PROMISC_BIT;
170}
171
172/* A simple test function to see if a peer wants to see all packets.*/
173static int promisc(struct lguestnet_info *info, unsigned int peer)
174{
175 return info->peer[peer].mac[0] & PROMISC_BIT;
176}
177
178/* Another simple function to see if a peer's advertised ethernet address
179 * matches a packet's destination ethernet address. */
180static int mac_eq(const unsigned char mac[ETH_ALEN],
181 struct lguestnet_info *info, unsigned int peer)
182{
183 /* Ignore multicast bit, which peer turns on to mean promisc. */
184 if ((info->peer[peer].mac[0] & (~PROMISC_BIT)) != mac[0])
185 return 0;
186 return memcmp(mac+1, info->peer[peer].mac+1, ETH_ALEN-1) == 0;
187}
188
189/* This is the function which actually sends a packet once we've decided a
190 * peer wants it: */
191static void transfer_packet(struct net_device *dev,
192 struct sk_buff *skb,
193 unsigned int peernum)
194{
195 struct lguestnet_info *info = netdev_priv(dev);
196 struct lguest_dma dma;
197
198 /* We use our handy "struct lguest_dma" packing function to prepare
199 * the skb for sending. */
200 skb_to_dma(skb, skb_headlen(skb), &dma);
201 pr_debug("xfer length %04x (%u)\n", htons(skb->len), skb->len);
202
203 /* This is the actual send call which copies the packet. */
204 lguest_send_dma(peer_key(info, peernum), &dma);
205
206 /* Check that the entire packet was transmitted. If not, it could mean
207 * that the other Guest registered a short receive buffer, but this
208 * driver should never do that. More likely, the peer is dead. */
209 if (dma.used_len != skb->len) {
210 dev->stats.tx_carrier_errors++;
211 pr_debug("Bad xfer to peer %i: %i of %i (dma %p/%i)\n",
212 peernum, dma.used_len, skb->len,
213 (void *)dma.addr[0], dma.len[0]);
214 } else {
215 /* On success we update the stats. */
216 dev->stats.tx_bytes += skb->len;
217 dev->stats.tx_packets++;
218 }
219}
220
221/* Another helper function to tell is if a slot in the device memory is unused.
222 * Since we always set the Local Assignment bit in the ethernet address, the
223 * first byte can never be 0. */
224static int unused_peer(const struct lguest_net peer[], unsigned int num)
225{
226 return peer[num].mac[0] == 0;
227}
228
229/* Finally, here is the routine which handles an outgoing packet. It's called
230 * "start_xmit" for traditional reasons. */
231static int lguestnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
232{
233 unsigned int i;
234 int broadcast;
235 struct lguestnet_info *info = netdev_priv(dev);
236 /* Extract the destination ethernet address from the packet. */
237 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
238 DECLARE_MAC_BUF(mac);
239
240 pr_debug("%s: xmit %s\n", dev->name, print_mac(mac, dest));
241
242 /* If it's a multicast packet, we broadcast to everyone. That's not
243 * very efficient, but there are very few applications which actually
244 * use multicast, which is a shame really.
245 *
246 * As etherdevice.h points out: "By definition the broadcast address is
247 * also a multicast address." So we don't have to test for broadcast
248 * packets separately. */
249 broadcast = is_multicast_ether_addr(dest);
250
251 /* Look through all the published ethernet addresses to see if we
252 * should send this packet. */
253 for (i = 0; i < info->mapsize/sizeof(struct lguest_net); i++) {
254 /* We don't send to ourselves (we actually can't SEND_DMA to
255 * ourselves anyway), and don't send to unused slots.*/
256 if (i == info->me || unused_peer(info->peer, i))
257 continue;
258
259 /* If it's broadcast we send it. If they want every packet we
260 * send it. If the destination matches their address we send
261 * it. Otherwise we go to the next peer. */
262 if (!broadcast && !promisc(info, i) && !mac_eq(dest, info, i))
263 continue;
264
265 pr_debug("lguestnet %s: sending from %i to %i\n",
266 dev->name, info->me, i);
267 /* Our routine which actually does the transfer. */
268 transfer_packet(dev, skb, i);
269 }
270
271 /* An xmit routine is expected to dispose of the packet, so we do. */
272 dev_kfree_skb(skb);
273
274 /* As per kernel convention, 0 means success. This is why I love
275 * networking: even if we never sent to anyone, that's still
276 * success! */
277 return 0;
278}
279
280/*D:560
281 * Packet receiving.
282 *
283 * First, here's a helper routine which fills one of our array of receive
284 * buffers: */
285static int fill_slot(struct net_device *dev, unsigned int slot)
286{
287 struct lguestnet_info *info = netdev_priv(dev);
288
289 /* We can receive ETH_DATA_LEN (1500) byte packets, plus a standard
290 * ethernet header of ETH_HLEN (14) bytes. */
291 info->skb[slot] = netdev_alloc_skb(dev, ETH_HLEN + ETH_DATA_LEN);
292 if (!info->skb[slot]) {
293 printk("%s: could not fill slot %i\n", dev->name, slot);
294 return -ENOMEM;
295 }
296
297 /* skb_to_dma() is a helper which sets up the "struct lguest_dma" to
298 * point to the data in the skb: we also use it for sending out a
299 * packet. */
300 skb_to_dma(info->skb[slot], ETH_HLEN + ETH_DATA_LEN, &info->dma[slot]);
301
302 /* This is a Write Memory Barrier: it ensures that the entry in the
303 * receive buffer array is written *before* we set the "used_len" entry
304 * to 0. If the Host were looking at the receive buffer array from a
305 * different CPU, it could potentially see "used_len = 0" and not see
306 * the updated receive buffer information. This would be a horribly
307 * nasty bug, so make sure the compiler and CPU know this has to happen
308 * first. */
309 wmb();
310 /* Writing 0 to "used_len" tells the Host it can use this receive
311 * buffer now. */
312 info->dma[slot].used_len = 0;
313 return 0;
314}
315
316/* This is the actual receive routine. When we receive an interrupt from the
317 * Host to tell us a packet has been delivered, we arrive here: */
318static irqreturn_t lguestnet_rcv(int irq, void *dev_id)
319{
320 struct net_device *dev = dev_id;
321 struct lguestnet_info *info = netdev_priv(dev);
322 unsigned int i, done = 0;
323
324 /* Look through our entire receive array for an entry which has data
325 * in it. */
326 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
327 unsigned int length;
328 struct sk_buff *skb;
329
330 length = info->dma[i].used_len;
331 if (length == 0)
332 continue;
333
334 /* We've found one! Remember the skb (we grabbed the length
335 * above), and immediately refill the slot we've taken it
336 * from. */
337 done++;
338 skb = info->skb[i];
339 fill_slot(dev, i);
340
341 /* This shouldn't happen: micropackets could be sent by a
342 * badly-behaved Guest on the network, but the Host will never
343 * stuff more data in the buffer than the buffer length. */
344 if (length < ETH_HLEN || length > ETH_HLEN + ETH_DATA_LEN) {
345 pr_debug(KERN_WARNING "%s: unbelievable skb len: %i\n",
346 dev->name, length);
347 dev_kfree_skb(skb);
348 continue;
349 }
350
351 /* skb_put(), what a great function! I've ranted about this
352 * function before (http://lkml.org/lkml/1999/9/26/24). You
353 * call it after you've added data to the end of an skb (in
354 * this case, it was the Host which wrote the data). */
355 skb_put(skb, length);
356
357 /* The ethernet header contains a protocol field: we use the
358 * standard helper to extract it, and place the result in
359 * skb->protocol. The helper also sets up skb->pkt_type and
360 * eats up the ethernet header from the front of the packet. */
361 skb->protocol = eth_type_trans(skb, dev);
362
363 /* If this device doesn't need checksums for sending, we also
364 * don't need to check the packets when they come in. */
365 if (dev->features & NETIF_F_NO_CSUM)
366 skb->ip_summed = CHECKSUM_UNNECESSARY;
367
368 /* As a last resort for debugging the driver or the lguest I/O
369 * subsystem, you can uncomment the "#define DEBUG" at the top
370 * of this file, which turns all the pr_debug() into printk()
371 * and floods the logs. */
372 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
373 ntohs(skb->protocol), skb->len, skb->pkt_type);
374
375 /* Update the packet and byte counts (visible from ifconfig,
376 * and good for debugging). */
377 dev->stats.rx_bytes += skb->len;
378 dev->stats.rx_packets++;
379
380 /* Hand our fresh network packet into the stack's "network
381 * interface receive" routine. That will free the packet
382 * itself when it's finished. */
383 netif_rx(skb);
384 }
385
386 /* If we found any packets, we assume the interrupt was for us. */
387 return done ? IRQ_HANDLED : IRQ_NONE;
388}
389
390/*D:550 This is where we start: when the device is brought up by dhcpd or
391 * ifconfig. At this point we advertise our MAC address to the rest of the
392 * network, and register receive buffers ready for incoming packets. */
393static int lguestnet_open(struct net_device *dev)
394{
395 int i;
396 struct lguestnet_info *info = netdev_priv(dev);
397
398 /* Copy our MAC address into the device page, so others on the network
399 * can find us. */
400 memcpy(info->peer[info->me].mac, dev->dev_addr, ETH_ALEN);
401
402 /* We might already be in promisc mode (dev->flags & IFF_PROMISC). Our
403 * set_multicast callback handles this already, so we call it now. */
404 lguestnet_set_multicast(dev);
405
406 /* Allocate packets and put them into our "struct lguest_dma" array.
407 * If we fail to allocate all the packets we could still limp along,
408 * but it's a sign of real stress so we should probably give up now. */
409 for (i = 0; i < ARRAY_SIZE(info->dma); i++) {
410 if (fill_slot(dev, i) != 0)
411 goto cleanup;
412 }
413
414 /* Finally we tell the Host where our array of "struct lguest_dma"
415 * receive buffers is, binding it to the key corresponding to the
416 * device's physical memory plus our peerid. */
417 if (lguest_bind_dma(peer_key(info,info->me), info->dma,
418 NUM_SKBS, lgdev_irq(info->lgdev)) != 0)
419 goto cleanup;
420 return 0;
421
422cleanup:
423 while (--i >= 0)
424 dev_kfree_skb(info->skb[i]);
425 return -ENOMEM;
426}
427/*:*/
428
429/* The close routine is called when the device is no longer in use: we clean up
430 * elegantly. */
431static int lguestnet_close(struct net_device *dev)
432{
433 unsigned int i;
434 struct lguestnet_info *info = netdev_priv(dev);
435
436 /* Clear all trace of our existence out of the device memory by setting
437 * the slot which held our MAC address to 0 (unused). */
438 memset(&info->peer[info->me], 0, sizeof(info->peer[info->me]));
439
440 /* Unregister our array of receive buffers */
441 lguest_unbind_dma(peer_key(info, info->me), info->dma);
442 for (i = 0; i < ARRAY_SIZE(info->dma); i++)
443 dev_kfree_skb(info->skb[i]);
444 return 0;
445}
446
447/*D:510 The network device probe function is basically a standard ethernet
448 * device setup. It reads the "struct lguest_device_desc" and sets the "struct
449 * net_device". Oh, the line-by-line excitement! Let's skip over it. :*/
450static int lguestnet_probe(struct lguest_device *lgdev)
451{
452 int err, irqf = IRQF_SHARED;
453 struct net_device *dev;
454 struct lguestnet_info *info;
455 struct lguest_device_desc *desc = &lguest_devices[lgdev->index];
456
457 pr_debug("lguest_net: probing for device %i\n", lgdev->index);
458
459 dev = alloc_etherdev(sizeof(struct lguestnet_info));
460 if (!dev)
461 return -ENOMEM;
462
463 /* Ethernet defaults with some changes */
464 ether_setup(dev);
465 dev->set_mac_address = NULL;
466
467 dev->dev_addr[0] = 0x02; /* set local assignment bit (IEEE802) */
468 dev->dev_addr[1] = 0x00;
469 memcpy(&dev->dev_addr[2], &lguest_data.guestid, 2);
470 dev->dev_addr[4] = 0x00;
471 dev->dev_addr[5] = 0x00;
472
473 dev->open = lguestnet_open;
474 dev->stop = lguestnet_close;
475 dev->hard_start_xmit = lguestnet_start_xmit;
476
477 /* We don't actually support multicast yet, but turning on/off
478 * promisc also calls dev->set_multicast_list. */
479 dev->set_multicast_list = lguestnet_set_multicast;
480 SET_NETDEV_DEV(dev, &lgdev->dev);
481
482 /* The network code complains if you have "scatter-gather" capability
483 * if you don't also handle checksums (it seem that would be
484 * "illogical"). So we use a lie of omission and don't tell it that we
485 * can handle scattered packets unless we also don't want checksums,
486 * even though to us they're completely independent. */
487 if (desc->features & LGUEST_NET_F_NOCSUM)
488 dev->features = NETIF_F_SG|NETIF_F_NO_CSUM;
489
490 info = netdev_priv(dev);
491 info->mapsize = PAGE_SIZE * desc->num_pages;
492 info->peer_phys = ((unsigned long)desc->pfn << PAGE_SHIFT);
493 info->lgdev = lgdev;
494 info->peer = lguest_map(info->peer_phys, desc->num_pages);
495 if (!info->peer) {
496 err = -ENOMEM;
497 goto free;
498 }
499
500 /* This stores our peerid (upper bits reserved for future). */
501 info->me = (desc->features & (info->mapsize-1));
502
503 err = register_netdev(dev);
504 if (err) {
505 pr_debug("lguestnet: registering device failed\n");
506 goto unmap;
507 }
508
509 if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
510 irqf |= IRQF_SAMPLE_RANDOM;
511 if (request_irq(lgdev_irq(lgdev), lguestnet_rcv, irqf, "lguestnet",
512 dev) != 0) {
513 pr_debug("lguestnet: cannot get irq %i\n", lgdev_irq(lgdev));
514 goto unregister;
515 }
516
517 pr_debug("lguestnet: registered device %s\n", dev->name);
518 /* Finally, we put the "struct net_device" in the generic "struct
519 * lguest_device"s private pointer. Again, it's not necessary, but
520 * makes sure the cool kernel kids don't tease us. */
521 lgdev->private = dev;
522 return 0;
523
524unregister:
525 unregister_netdev(dev);
526unmap:
527 lguest_unmap(info->peer);
528free:
529 free_netdev(dev);
530 return err;
531}
532
533static struct lguest_driver lguestnet_drv = {
534 .name = "lguestnet",
535 .owner = THIS_MODULE,
536 .device_type = LGUEST_DEVICE_T_NET,
537 .probe = lguestnet_probe,
538};
539
540static __init int lguestnet_init(void)
541{
542 return register_lguest_driver(&lguestnet_drv);
543}
544module_init(lguestnet_init);
545
546MODULE_DESCRIPTION("Lguest network driver");
547MODULE_LICENSE("GPL");
548
549/*D:580
550 * This is the last of the Drivers, and with this we have covered the many and
551 * wonderous and fine (and boring) details of the Guest.
552 *
553 * "make Launcher" beckons, where we answer questions like "Where do Guests
554 * come from?", and "What do you do when someone asks for optimization?"
555 */
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 6471d33afb..50648738d6 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -736,7 +736,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
736 MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); 736 MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
737 MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); 737 MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
738 738
739 err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 1000); 739 err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
740 740
741 if (err) 741 if (err)
742 mlx4_err(dev, "INIT_HCA returns %d\n", err); 742 mlx4_err(dev, "INIT_HCA returns %d\n", err);
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index 4b3c109d5e..887633b207 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -60,7 +60,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
60 PCI_DMA_BIDIRECTIONAL); 60 PCI_DMA_BIDIRECTIONAL);
61 61
62 for (i = 0; i < chunk->npages; ++i) 62 for (i = 0; i < chunk->npages; ++i)
63 __free_pages(chunk->mem[i].page, 63 __free_pages(sg_page(&chunk->mem[i]),
64 get_order(chunk->mem[i].length)); 64 get_order(chunk->mem[i].length));
65} 65}
66 66
@@ -70,7 +70,7 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
70 70
71 for (i = 0; i < chunk->npages; ++i) 71 for (i = 0; i < chunk->npages; ++i)
72 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, 72 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
73 lowmem_page_address(chunk->mem[i].page), 73 lowmem_page_address(sg_page(&chunk->mem[i])),
74 sg_dma_address(&chunk->mem[i])); 74 sg_dma_address(&chunk->mem[i]));
75} 75}
76 76
@@ -95,10 +95,13 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
95 95
96static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask) 96static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
97{ 97{
98 mem->page = alloc_pages(gfp_mask, order); 98 struct page *page;
99 if (!mem->page) 99
100 page = alloc_pages(gfp_mask, order);
101 if (!page)
100 return -ENOMEM; 102 return -ENOMEM;
101 103
104 sg_set_page(mem, page);
102 mem->length = PAGE_SIZE << order; 105 mem->length = PAGE_SIZE << order;
103 mem->offset = 0; 106 mem->offset = 0;
104 return 0; 107 return 0;
@@ -145,6 +148,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
145 if (!chunk) 148 if (!chunk)
146 goto fail; 149 goto fail;
147 150
151 sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
148 chunk->npages = 0; 152 chunk->npages = 0;
149 chunk->nsg = 0; 153 chunk->nsg = 0;
150 list_add_tail(&chunk->list, &icm->chunk_list); 154 list_add_tail(&chunk->list, &icm->chunk_list);
@@ -334,7 +338,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
334 * been assigned to. 338 * been assigned to.
335 */ 339 */
336 if (chunk->mem[i].length > offset) { 340 if (chunk->mem[i].length > offset) {
337 page = chunk->mem[i].page; 341 page = sg_page(&chunk->mem[i]);
338 goto out; 342 goto out;
339 } 343 }
340 offset -= chunk->mem[i].length; 344 offset -= chunk->mem[i].length;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 5b41e8bdd6..651c2699d5 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -3274,6 +3274,7 @@ static const struct ethtool_ops mv643xx_ethtool_ops = {
3274 .get_drvinfo = mv643xx_get_drvinfo, 3274 .get_drvinfo = mv643xx_get_drvinfo,
3275 .get_link = mv643xx_eth_get_link, 3275 .get_link = mv643xx_eth_get_link,
3276 .set_sg = ethtool_op_set_sg, 3276 .set_sg = ethtool_op_set_sg,
3277 .get_sset_count = mv643xx_get_sset_count,
3277 .get_ethtool_stats = mv643xx_get_ethtool_stats, 3278 .get_ethtool_stats = mv643xx_get_ethtool_stats,
3278 .get_strings = mv643xx_get_strings, 3279 .get_strings = mv643xx_get_strings,
3279 .nway_reset = mv643xx_eth_nway_restart, 3280 .nway_reset = mv643xx_eth_nway_restart,
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index ed1f9bbb2a..112ab079ce 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -3103,31 +3103,12 @@ static int niu_alloc_tx_ring_info(struct niu *np,
3103 3103
3104static void niu_size_rbr(struct niu *np, struct rx_ring_info *rp) 3104static void niu_size_rbr(struct niu *np, struct rx_ring_info *rp)
3105{ 3105{
3106 u16 bs; 3106 u16 bss;
3107 3107
3108 switch (PAGE_SIZE) { 3108 bss = min(PAGE_SHIFT, 15);
3109 case 4 * 1024:
3110 case 8 * 1024:
3111 case 16 * 1024:
3112 case 32 * 1024:
3113 rp->rbr_block_size = PAGE_SIZE;
3114 rp->rbr_blocks_per_page = 1;
3115 break;
3116 3109
3117 default: 3110 rp->rbr_block_size = 1 << bss;
3118 if (PAGE_SIZE % (32 * 1024) == 0) 3111 rp->rbr_blocks_per_page = 1 << (PAGE_SHIFT-bss);
3119 bs = 32 * 1024;
3120 else if (PAGE_SIZE % (16 * 1024) == 0)
3121 bs = 16 * 1024;
3122 else if (PAGE_SIZE % (8 * 1024) == 0)
3123 bs = 8 * 1024;
3124 else if (PAGE_SIZE % (4 * 1024) == 0)
3125 bs = 4 * 1024;
3126 else
3127 BUG();
3128 rp->rbr_block_size = bs;
3129 rp->rbr_blocks_per_page = PAGE_SIZE / bs;
3130 }
3131 3112
3132 rp->rbr_sizes[0] = 256; 3113 rp->rbr_sizes[0] = 256;
3133 rp->rbr_sizes[1] = 1024; 3114 rp->rbr_sizes[1] = 1024;
@@ -7902,12 +7883,7 @@ static int __init niu_init(void)
7902{ 7883{
7903 int err = 0; 7884 int err = 0;
7904 7885
7905 BUILD_BUG_ON((PAGE_SIZE < 4 * 1024) || 7886 BUILD_BUG_ON(PAGE_SIZE < 4 * 1024);
7906 ((PAGE_SIZE > 32 * 1024) &&
7907 ((PAGE_SIZE % (32 * 1024)) != 0 &&
7908 (PAGE_SIZE % (16 * 1024)) != 0 &&
7909 (PAGE_SIZE % (8 * 1024)) != 0 &&
7910 (PAGE_SIZE % (4 * 1024)) != 0)));
7911 7887
7912 niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT); 7888 niu_debug = netif_msg_init(debug, NIU_MSG_DEFAULT);
7913 7889
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index c0b6d19d14..bcb0885011 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -55,7 +55,7 @@
55#include <linux/mm.h> 55#include <linux/mm.h>
56#include <linux/ppp_defs.h> 56#include <linux/ppp_defs.h>
57#include <linux/ppp-comp.h> 57#include <linux/ppp-comp.h>
58#include <asm/scatterlist.h> 58#include <linux/scatterlist.h>
59 59
60#include "ppp_mppe.h" 60#include "ppp_mppe.h"
61 61
@@ -68,9 +68,7 @@ MODULE_VERSION("1.0.2");
68static unsigned int 68static unsigned int
69setup_sg(struct scatterlist *sg, const void *address, unsigned int length) 69setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
70{ 70{
71 sg[0].page = virt_to_page(address); 71 sg_init_one(sg, address, length);
72 sg[0].offset = offset_in_page(address);
73 sg[0].length = length;
74 return length; 72 return length;
75} 73}
76 74
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 419c00cbe6..e8960f294a 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -44,7 +44,8 @@
44 printk( "Assertion failed! %s,%s,%s,line=%d\n", \ 44 printk( "Assertion failed! %s,%s,%s,line=%d\n", \
45 #expr,__FILE__,__FUNCTION__,__LINE__); \ 45 #expr,__FILE__,__FUNCTION__,__LINE__); \
46 } 46 }
47#define dprintk(fmt, args...) do { printk(PFX fmt, ## args); } while (0) 47#define dprintk(fmt, args...) \
48 do { printk(KERN_DEBUG PFX fmt, ## args); } while (0)
48#else 49#else
49#define assert(expr) do {} while (0) 50#define assert(expr) do {} while (0)
50#define dprintk(fmt, args...) do {} while (0) 51#define dprintk(fmt, args...) do {} while (0)
@@ -111,19 +112,15 @@ enum mac_version {
111 RTL_GIGA_MAC_VER_05 = 0x05, // 8110SCd 112 RTL_GIGA_MAC_VER_05 = 0x05, // 8110SCd
112 RTL_GIGA_MAC_VER_06 = 0x06, // 8110SCe 113 RTL_GIGA_MAC_VER_06 = 0x06, // 8110SCe
113 RTL_GIGA_MAC_VER_11 = 0x0b, // 8168Bb 114 RTL_GIGA_MAC_VER_11 = 0x0b, // 8168Bb
114 RTL_GIGA_MAC_VER_12 = 0x0c, // 8168Be 8168Bf 115 RTL_GIGA_MAC_VER_12 = 0x0c, // 8168Be
115 RTL_GIGA_MAC_VER_13 = 0x0d, // 8101Eb 8101Ec 116 RTL_GIGA_MAC_VER_13 = 0x0d, // 8101Eb
116 RTL_GIGA_MAC_VER_14 = 0x0e, // 8101 117 RTL_GIGA_MAC_VER_14 = 0x0e, // 8101 ?
117 RTL_GIGA_MAC_VER_15 = 0x0f // 8101 118 RTL_GIGA_MAC_VER_15 = 0x0f, // 8101 ?
118}; 119 RTL_GIGA_MAC_VER_16 = 0x11, // 8101Ec
119 120 RTL_GIGA_MAC_VER_17 = 0x10, // 8168Bf
120enum phy_version { 121 RTL_GIGA_MAC_VER_18 = 0x12, // 8168CP
121 RTL_GIGA_PHY_VER_C = 0x03, /* PHY Reg 0x03 bit0-3 == 0x0000 */ 122 RTL_GIGA_MAC_VER_19 = 0x13, // 8168C
122 RTL_GIGA_PHY_VER_D = 0x04, /* PHY Reg 0x03 bit0-3 == 0x0000 */ 123 RTL_GIGA_MAC_VER_20 = 0x14 // 8168C
123 RTL_GIGA_PHY_VER_E = 0x05, /* PHY Reg 0x03 bit0-3 == 0x0000 */
124 RTL_GIGA_PHY_VER_F = 0x06, /* PHY Reg 0x03 bit0-3 == 0x0001 */
125 RTL_GIGA_PHY_VER_G = 0x07, /* PHY Reg 0x03 bit0-3 == 0x0002 */
126 RTL_GIGA_PHY_VER_H = 0x08, /* PHY Reg 0x03 bit0-3 == 0x0003 */
127}; 124};
128 125
129#define _R(NAME,MAC,MASK) \ 126#define _R(NAME,MAC,MASK) \
@@ -144,7 +141,12 @@ static const struct {
144 _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_12, 0xff7e1880), // PCI-E 141 _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_12, 0xff7e1880), // PCI-E
145 _R("RTL8101e", RTL_GIGA_MAC_VER_13, 0xff7e1880), // PCI-E 8139 142 _R("RTL8101e", RTL_GIGA_MAC_VER_13, 0xff7e1880), // PCI-E 8139
146 _R("RTL8100e", RTL_GIGA_MAC_VER_14, 0xff7e1880), // PCI-E 8139 143 _R("RTL8100e", RTL_GIGA_MAC_VER_14, 0xff7e1880), // PCI-E 8139
147 _R("RTL8100e", RTL_GIGA_MAC_VER_15, 0xff7e1880) // PCI-E 8139 144 _R("RTL8100e", RTL_GIGA_MAC_VER_15, 0xff7e1880), // PCI-E 8139
145 _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_17, 0xff7e1880), // PCI-E
146 _R("RTL8101e", RTL_GIGA_MAC_VER_16, 0xff7e1880), // PCI-E
147 _R("RTL8168cp/8111cp", RTL_GIGA_MAC_VER_18, 0xff7e1880), // PCI-E
148 _R("RTL8168c/8111c", RTL_GIGA_MAC_VER_19, 0xff7e1880), // PCI-E
149 _R("RTL8168c/8111c", RTL_GIGA_MAC_VER_20, 0xff7e1880) // PCI-E
148}; 150};
149#undef _R 151#undef _R
150 152
@@ -165,7 +167,7 @@ static struct pci_device_id rtl8169_pci_tbl[] = {
165 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, 167 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 },
166 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, 168 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
167 { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 }, 169 { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 },
168 { PCI_DEVICE(0x1259, 0xc107), 0, 0, RTL_CFG_0 }, 170 { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 },
169 { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 }, 171 { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 },
170 { PCI_VENDOR_ID_LINKSYS, 0x1032, 172 { PCI_VENDOR_ID_LINKSYS, 0x1032,
171 PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 }, 173 PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 },
@@ -277,6 +279,7 @@ enum rtl_register_content {
277 TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */ 279 TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */
278 280
279 /* Config1 register p.24 */ 281 /* Config1 register p.24 */
282 MSIEnable = (1 << 5), /* Enable Message Signaled Interrupt */
280 PMEnable = (1 << 0), /* Power Management Enable */ 283 PMEnable = (1 << 0), /* Power Management Enable */
281 284
282 /* Config2 register p. 25 */ 285 /* Config2 register p. 25 */
@@ -380,17 +383,20 @@ struct ring_info {
380 u8 __pad[sizeof(void *) - sizeof(u32)]; 383 u8 __pad[sizeof(void *) - sizeof(u32)];
381}; 384};
382 385
386enum features {
387 RTL_FEATURE_WOL = (1 << 0),
388 RTL_FEATURE_MSI = (1 << 1),
389};
390
383struct rtl8169_private { 391struct rtl8169_private {
384 void __iomem *mmio_addr; /* memory map physical address */ 392 void __iomem *mmio_addr; /* memory map physical address */
385 struct pci_dev *pci_dev; /* Index of PCI device */ 393 struct pci_dev *pci_dev; /* Index of PCI device */
386 struct net_device *dev; 394 struct net_device *dev;
387 struct napi_struct napi; 395 struct napi_struct napi;
388 struct net_device_stats stats; /* statistics of net device */
389 spinlock_t lock; /* spin lock flag */ 396 spinlock_t lock; /* spin lock flag */
390 u32 msg_enable; 397 u32 msg_enable;
391 int chipset; 398 int chipset;
392 int mac_version; 399 int mac_version;
393 int phy_version;
394 u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ 400 u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
395 u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ 401 u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
396 u32 dirty_rx; 402 u32 dirty_rx;
@@ -420,7 +426,7 @@ struct rtl8169_private {
420 unsigned int (*phy_reset_pending)(void __iomem *); 426 unsigned int (*phy_reset_pending)(void __iomem *);
421 unsigned int (*link_ok)(void __iomem *); 427 unsigned int (*link_ok)(void __iomem *);
422 struct delayed_work task; 428 struct delayed_work task;
423 unsigned wol_enabled : 1; 429 unsigned features;
424}; 430};
425 431
426MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>"); 432MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
@@ -626,7 +632,10 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
626 632
627 RTL_W8(Cfg9346, Cfg9346_Lock); 633 RTL_W8(Cfg9346, Cfg9346_Lock);
628 634
629 tp->wol_enabled = (wol->wolopts) ? 1 : 0; 635 if (wol->wolopts)
636 tp->features |= RTL_FEATURE_WOL;
637 else
638 tp->features &= ~RTL_FEATURE_WOL;
630 639
631 spin_unlock_irq(&tp->lock); 640 spin_unlock_irq(&tp->lock);
632 641
@@ -707,7 +716,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
707 716
708 /* This tweak comes straight from Realtek's driver. */ 717 /* This tweak comes straight from Realtek's driver. */
709 if ((speed == SPEED_100) && (duplex == DUPLEX_HALF) && 718 if ((speed == SPEED_100) && (duplex == DUPLEX_HALF) &&
710 (tp->mac_version == RTL_GIGA_MAC_VER_13)) { 719 ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
720 (tp->mac_version == RTL_GIGA_MAC_VER_16))) {
711 auto_nego = ADVERTISE_100HALF | ADVERTISE_CSMA; 721 auto_nego = ADVERTISE_100HALF | ADVERTISE_CSMA;
712 } 722 }
713 } 723 }
@@ -715,7 +725,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
715 /* The 8100e/8101e do Fast Ethernet only. */ 725 /* The 8100e/8101e do Fast Ethernet only. */
716 if ((tp->mac_version == RTL_GIGA_MAC_VER_13) || 726 if ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
717 (tp->mac_version == RTL_GIGA_MAC_VER_14) || 727 (tp->mac_version == RTL_GIGA_MAC_VER_14) ||
718 (tp->mac_version == RTL_GIGA_MAC_VER_15)) { 728 (tp->mac_version == RTL_GIGA_MAC_VER_15) ||
729 (tp->mac_version == RTL_GIGA_MAC_VER_16)) {
719 if ((giga_ctrl & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)) && 730 if ((giga_ctrl & (ADVERTISE_1000FULL | ADVERTISE_1000HALF)) &&
720 netif_msg_link(tp)) { 731 netif_msg_link(tp)) {
721 printk(KERN_INFO "%s: PHY does not support 1000Mbps.\n", 732 printk(KERN_INFO "%s: PHY does not support 1000Mbps.\n",
@@ -726,7 +737,8 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
726 737
727 auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; 738 auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
728 739
729 if (tp->mac_version == RTL_GIGA_MAC_VER_12) { 740 if ((tp->mac_version == RTL_GIGA_MAC_VER_12) ||
741 (tp->mac_version == RTL_GIGA_MAC_VER_17)) {
730 /* Vendor specific (0x1f) and reserved (0x0e) MII registers. */ 742 /* Vendor specific (0x1f) and reserved (0x0e) MII registers. */
731 mdio_write(ioaddr, 0x1f, 0x0000); 743 mdio_write(ioaddr, 0x1f, 0x0000);
732 mdio_write(ioaddr, 0x0e, 0x0000); 744 mdio_write(ioaddr, 0x0e, 0x0000);
@@ -1104,26 +1116,51 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
1104 */ 1116 */
1105 const struct { 1117 const struct {
1106 u32 mask; 1118 u32 mask;
1119 u32 val;
1107 int mac_version; 1120 int mac_version;
1108 } mac_info[] = { 1121 } mac_info[] = {
1109 { 0x38800000, RTL_GIGA_MAC_VER_15 }, 1122 /* 8168B family. */
1110 { 0x38000000, RTL_GIGA_MAC_VER_12 }, 1123 { 0x7c800000, 0x3c800000, RTL_GIGA_MAC_VER_18 },
1111 { 0x34000000, RTL_GIGA_MAC_VER_13 }, 1124 { 0x7cf00000, 0x3c000000, RTL_GIGA_MAC_VER_19 },
1112 { 0x30800000, RTL_GIGA_MAC_VER_14 }, 1125 { 0x7cf00000, 0x3c200000, RTL_GIGA_MAC_VER_20 },
1113 { 0x30000000, RTL_GIGA_MAC_VER_11 }, 1126 { 0x7c800000, 0x3c000000, RTL_GIGA_MAC_VER_20 },
1114 { 0x98000000, RTL_GIGA_MAC_VER_06 }, 1127
1115 { 0x18000000, RTL_GIGA_MAC_VER_05 }, 1128 /* 8168B family. */
1116 { 0x10000000, RTL_GIGA_MAC_VER_04 }, 1129 { 0x7cf00000, 0x38000000, RTL_GIGA_MAC_VER_12 },
1117 { 0x04000000, RTL_GIGA_MAC_VER_03 }, 1130 { 0x7cf00000, 0x38500000, RTL_GIGA_MAC_VER_17 },
1118 { 0x00800000, RTL_GIGA_MAC_VER_02 }, 1131 { 0x7c800000, 0x38000000, RTL_GIGA_MAC_VER_17 },
1119 { 0x00000000, RTL_GIGA_MAC_VER_01 } /* Catch-all */ 1132 { 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 },
1133
1134 /* 8101 family. */
1135 { 0x7cf00000, 0x34000000, RTL_GIGA_MAC_VER_13 },
1136 { 0x7cf00000, 0x34200000, RTL_GIGA_MAC_VER_16 },
1137 { 0x7c800000, 0x34000000, RTL_GIGA_MAC_VER_16 },
1138 /* FIXME: where did these entries come from ? -- FR */
1139 { 0xfc800000, 0x38800000, RTL_GIGA_MAC_VER_15 },
1140 { 0xfc800000, 0x30800000, RTL_GIGA_MAC_VER_14 },
1141
1142 /* 8110 family. */
1143 { 0xfc800000, 0x98000000, RTL_GIGA_MAC_VER_06 },
1144 { 0xfc800000, 0x18000000, RTL_GIGA_MAC_VER_05 },
1145 { 0xfc800000, 0x10000000, RTL_GIGA_MAC_VER_04 },
1146 { 0xfc800000, 0x04000000, RTL_GIGA_MAC_VER_03 },
1147 { 0xfc800000, 0x00800000, RTL_GIGA_MAC_VER_02 },
1148 { 0xfc800000, 0x00000000, RTL_GIGA_MAC_VER_01 },
1149
1150 { 0x00000000, 0x00000000, RTL_GIGA_MAC_VER_01 } /* Catch-all */
1120 }, *p = mac_info; 1151 }, *p = mac_info;
1121 u32 reg; 1152 u32 reg;
1122 1153
1123 reg = RTL_R32(TxConfig) & 0xfc800000; 1154 reg = RTL_R32(TxConfig);
1124 while ((reg & p->mask) != p->mask) 1155 while ((reg & p->mask) != p->val)
1125 p++; 1156 p++;
1126 tp->mac_version = p->mac_version; 1157 tp->mac_version = p->mac_version;
1158
1159 if (p->mask == 0x00000000) {
1160 struct pci_dev *pdev = tp->pci_dev;
1161
1162 dev_info(&pdev->dev, "unknown MAC (%08x)\n", reg);
1163 }
1127} 1164}
1128 1165
1129static void rtl8169_print_mac_version(struct rtl8169_private *tp) 1166static void rtl8169_print_mac_version(struct rtl8169_private *tp)
@@ -1131,54 +1168,21 @@ static void rtl8169_print_mac_version(struct rtl8169_private *tp)
1131 dprintk("mac_version = 0x%02x\n", tp->mac_version); 1168 dprintk("mac_version = 0x%02x\n", tp->mac_version);
1132} 1169}
1133 1170
1134static void rtl8169_get_phy_version(struct rtl8169_private *tp, 1171struct phy_reg {
1135 void __iomem *ioaddr)
1136{
1137 const struct {
1138 u16 mask;
1139 u16 set;
1140 int phy_version;
1141 } phy_info[] = {
1142 { 0x000f, 0x0002, RTL_GIGA_PHY_VER_G },
1143 { 0x000f, 0x0001, RTL_GIGA_PHY_VER_F },
1144 { 0x000f, 0x0000, RTL_GIGA_PHY_VER_E },
1145 { 0x0000, 0x0000, RTL_GIGA_PHY_VER_D } /* Catch-all */
1146 }, *p = phy_info;
1147 u16 reg; 1172 u16 reg;
1173 u16 val;
1174};
1148 1175
1149 reg = mdio_read(ioaddr, MII_PHYSID2) & 0xffff; 1176static void rtl_phy_write(void __iomem *ioaddr, struct phy_reg *regs, int len)
1150 while ((reg & p->mask) != p->set)
1151 p++;
1152 tp->phy_version = p->phy_version;
1153}
1154
1155static void rtl8169_print_phy_version(struct rtl8169_private *tp)
1156{ 1177{
1157 struct { 1178 while (len-- > 0) {
1158 int version; 1179 mdio_write(ioaddr, regs->reg, regs->val);
1159 char *msg; 1180 regs++;
1160 u32 reg;
1161 } phy_print[] = {
1162 { RTL_GIGA_PHY_VER_G, "RTL_GIGA_PHY_VER_G", 0x0002 },
1163 { RTL_GIGA_PHY_VER_F, "RTL_GIGA_PHY_VER_F", 0x0001 },
1164 { RTL_GIGA_PHY_VER_E, "RTL_GIGA_PHY_VER_E", 0x0000 },
1165 { RTL_GIGA_PHY_VER_D, "RTL_GIGA_PHY_VER_D", 0x0000 },
1166 { 0, NULL, 0x0000 }
1167 }, *p;
1168
1169 for (p = phy_print; p->msg; p++) {
1170 if (tp->phy_version == p->version) {
1171 dprintk("phy_version == %s (%04x)\n", p->msg, p->reg);
1172 return;
1173 }
1174 } 1181 }
1175 dprintk("phy_version == Unknown\n");
1176} 1182}
1177 1183
1178static void rtl8169_hw_phy_config(struct net_device *dev) 1184static void rtl8169s_hw_phy_config(void __iomem *ioaddr)
1179{ 1185{
1180 struct rtl8169_private *tp = netdev_priv(dev);
1181 void __iomem *ioaddr = tp->mmio_addr;
1182 struct { 1186 struct {
1183 u16 regs[5]; /* Beware of bit-sign propagation */ 1187 u16 regs[5]; /* Beware of bit-sign propagation */
1184 } phy_magic[5] = { { 1188 } phy_magic[5] = { {
@@ -1211,33 +1215,9 @@ static void rtl8169_hw_phy_config(struct net_device *dev)
1211 }, *p = phy_magic; 1215 }, *p = phy_magic;
1212 unsigned int i; 1216 unsigned int i;
1213 1217
1214 rtl8169_print_mac_version(tp); 1218 mdio_write(ioaddr, 0x1f, 0x0001); //w 31 2 0 1
1215 rtl8169_print_phy_version(tp); 1219 mdio_write(ioaddr, 0x15, 0x1000); //w 21 15 0 1000
1216 1220 mdio_write(ioaddr, 0x18, 0x65c7); //w 24 15 0 65c7
1217 if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
1218 return;
1219 if (tp->phy_version >= RTL_GIGA_PHY_VER_H)
1220 return;
1221
1222 dprintk("MAC version != 0 && PHY version == 0 or 1\n");
1223 dprintk("Do final_reg2.cfg\n");
1224
1225 /* Shazam ! */
1226
1227 if (tp->mac_version == RTL_GIGA_MAC_VER_04) {
1228 mdio_write(ioaddr, 31, 0x0002);
1229 mdio_write(ioaddr, 1, 0x90d0);
1230 mdio_write(ioaddr, 31, 0x0000);
1231 return;
1232 }
1233
1234 if ((tp->mac_version != RTL_GIGA_MAC_VER_02) &&
1235 (tp->mac_version != RTL_GIGA_MAC_VER_03))
1236 return;
1237
1238 mdio_write(ioaddr, 31, 0x0001); //w 31 2 0 1
1239 mdio_write(ioaddr, 21, 0x1000); //w 21 15 0 1000
1240 mdio_write(ioaddr, 24, 0x65c7); //w 24 15 0 65c7
1241 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0 1221 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
1242 1222
1243 for (i = 0; i < ARRAY_SIZE(phy_magic); i++, p++) { 1223 for (i = 0; i < ARRAY_SIZE(phy_magic); i++, p++) {
@@ -1250,7 +1230,115 @@ static void rtl8169_hw_phy_config(struct net_device *dev)
1250 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 1); //w 4 11 11 1 1230 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 1); //w 4 11 11 1
1251 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0 1231 rtl8169_write_gmii_reg_bit(ioaddr, 4, 11, 0); //w 4 11 11 0
1252 } 1232 }
1253 mdio_write(ioaddr, 31, 0x0000); //w 31 2 0 0 1233 mdio_write(ioaddr, 0x1f, 0x0000); //w 31 2 0 0
1234}
1235
1236static void rtl8169sb_hw_phy_config(void __iomem *ioaddr)
1237{
1238 struct phy_reg phy_reg_init[] = {
1239 { 0x1f, 0x0002 },
1240 { 0x01, 0x90d0 },
1241 { 0x1f, 0x0000 }
1242 };
1243
1244 rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
1245}
1246static void rtl8168b_hw_phy_config(void __iomem *ioaddr)
1247{
1248 struct phy_reg phy_reg_init[] = {
1249 { 0x1f, 0x0000 },
1250 { 0x10, 0xf41b },
1251 { 0x1f, 0x0000 }
1252 };
1253
1254 rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
1255}
1256
1257static void rtl8168cp_hw_phy_config(void __iomem *ioaddr)
1258{
1259 struct phy_reg phy_reg_init[] = {
1260 { 0x1f, 0x0000 },
1261 { 0x1d, 0x0f00 },
1262 { 0x1f, 0x0002 },
1263 { 0x0c, 0x1ec8 },
1264 { 0x1f, 0x0000 }
1265 };
1266
1267 rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
1268}
1269
1270static void rtl8168c_hw_phy_config(void __iomem *ioaddr)
1271{
1272 struct phy_reg phy_reg_init[] = {
1273 { 0x1f, 0x0001 },
1274 { 0x12, 0x2300 },
1275 { 0x1f, 0x0002 },
1276 { 0x00, 0x88d4 },
1277 { 0x01, 0x82b1 },
1278 { 0x03, 0x7002 },
1279 { 0x08, 0x9e30 },
1280 { 0x09, 0x01f0 },
1281 { 0x0a, 0x5500 },
1282 { 0x0c, 0x00c8 },
1283 { 0x1f, 0x0003 },
1284 { 0x12, 0xc096 },
1285 { 0x16, 0x000a },
1286 { 0x1f, 0x0000 }
1287 };
1288
1289 rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
1290}
1291
1292static void rtl8168cx_hw_phy_config(void __iomem *ioaddr)
1293{
1294 struct phy_reg phy_reg_init[] = {
1295 { 0x1f, 0x0000 },
1296 { 0x12, 0x2300 },
1297 { 0x1f, 0x0003 },
1298 { 0x16, 0x0f0a },
1299 { 0x1f, 0x0000 },
1300 { 0x1f, 0x0002 },
1301 { 0x0c, 0x7eb8 },
1302 { 0x1f, 0x0000 }
1303 };
1304
1305 rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
1306}
1307
1308static void rtl_hw_phy_config(struct net_device *dev)
1309{
1310 struct rtl8169_private *tp = netdev_priv(dev);
1311 void __iomem *ioaddr = tp->mmio_addr;
1312
1313 rtl8169_print_mac_version(tp);
1314
1315 switch (tp->mac_version) {
1316 case RTL_GIGA_MAC_VER_01:
1317 break;
1318 case RTL_GIGA_MAC_VER_02:
1319 case RTL_GIGA_MAC_VER_03:
1320 rtl8169s_hw_phy_config(ioaddr);
1321 break;
1322 case RTL_GIGA_MAC_VER_04:
1323 rtl8169sb_hw_phy_config(ioaddr);
1324 break;
1325 case RTL_GIGA_MAC_VER_11:
1326 case RTL_GIGA_MAC_VER_12:
1327 case RTL_GIGA_MAC_VER_17:
1328 rtl8168b_hw_phy_config(ioaddr);
1329 break;
1330 case RTL_GIGA_MAC_VER_18:
1331 rtl8168cp_hw_phy_config(ioaddr);
1332 break;
1333 case RTL_GIGA_MAC_VER_19:
1334 rtl8168c_hw_phy_config(ioaddr);
1335 break;
1336 case RTL_GIGA_MAC_VER_20:
1337 rtl8168cx_hw_phy_config(ioaddr);
1338 break;
1339 default:
1340 break;
1341 }
1254} 1342}
1255 1343
1256static void rtl8169_phy_timer(unsigned long __opaque) 1344static void rtl8169_phy_timer(unsigned long __opaque)
@@ -1262,7 +1350,6 @@ static void rtl8169_phy_timer(unsigned long __opaque)
1262 unsigned long timeout = RTL8169_PHY_TIMEOUT; 1350 unsigned long timeout = RTL8169_PHY_TIMEOUT;
1263 1351
1264 assert(tp->mac_version > RTL_GIGA_MAC_VER_01); 1352 assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
1265 assert(tp->phy_version < RTL_GIGA_PHY_VER_H);
1266 1353
1267 if (!(tp->phy_1000_ctrl_reg & ADVERTISE_1000FULL)) 1354 if (!(tp->phy_1000_ctrl_reg & ADVERTISE_1000FULL))
1268 return; 1355 return;
@@ -1297,8 +1384,7 @@ static inline void rtl8169_delete_timer(struct net_device *dev)
1297 struct rtl8169_private *tp = netdev_priv(dev); 1384 struct rtl8169_private *tp = netdev_priv(dev);
1298 struct timer_list *timer = &tp->timer; 1385 struct timer_list *timer = &tp->timer;
1299 1386
1300 if ((tp->mac_version <= RTL_GIGA_MAC_VER_01) || 1387 if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
1301 (tp->phy_version >= RTL_GIGA_PHY_VER_H))
1302 return; 1388 return;
1303 1389
1304 del_timer_sync(timer); 1390 del_timer_sync(timer);
@@ -1309,8 +1395,7 @@ static inline void rtl8169_request_timer(struct net_device *dev)
1309 struct rtl8169_private *tp = netdev_priv(dev); 1395 struct rtl8169_private *tp = netdev_priv(dev);
1310 struct timer_list *timer = &tp->timer; 1396 struct timer_list *timer = &tp->timer;
1311 1397
1312 if ((tp->mac_version <= RTL_GIGA_MAC_VER_01) || 1398 if (tp->mac_version <= RTL_GIGA_MAC_VER_01)
1313 (tp->phy_version >= RTL_GIGA_PHY_VER_H))
1314 return; 1399 return;
1315 1400
1316 mod_timer(timer, jiffies + RTL8169_PHY_TIMEOUT); 1401 mod_timer(timer, jiffies + RTL8169_PHY_TIMEOUT);
@@ -1362,7 +1447,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
1362{ 1447{
1363 void __iomem *ioaddr = tp->mmio_addr; 1448 void __iomem *ioaddr = tp->mmio_addr;
1364 1449
1365 rtl8169_hw_phy_config(dev); 1450 rtl_hw_phy_config(dev);
1366 1451
1367 dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n"); 1452 dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
1368 RTL_W8(0x82, 0x01); 1453 RTL_W8(0x82, 0x01);
@@ -1457,6 +1542,7 @@ static const struct rtl_cfg_info {
1457 unsigned int align; 1542 unsigned int align;
1458 u16 intr_event; 1543 u16 intr_event;
1459 u16 napi_event; 1544 u16 napi_event;
1545 unsigned msi;
1460} rtl_cfg_infos [] = { 1546} rtl_cfg_infos [] = {
1461 [RTL_CFG_0] = { 1547 [RTL_CFG_0] = {
1462 .hw_start = rtl_hw_start_8169, 1548 .hw_start = rtl_hw_start_8169,
@@ -1464,7 +1550,8 @@ static const struct rtl_cfg_info {
1464 .align = 0, 1550 .align = 0,
1465 .intr_event = SYSErr | LinkChg | RxOverflow | 1551 .intr_event = SYSErr | LinkChg | RxOverflow |
1466 RxFIFOOver | TxErr | TxOK | RxOK | RxErr, 1552 RxFIFOOver | TxErr | TxOK | RxOK | RxErr,
1467 .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow 1553 .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow,
1554 .msi = 0
1468 }, 1555 },
1469 [RTL_CFG_1] = { 1556 [RTL_CFG_1] = {
1470 .hw_start = rtl_hw_start_8168, 1557 .hw_start = rtl_hw_start_8168,
@@ -1472,7 +1559,8 @@ static const struct rtl_cfg_info {
1472 .align = 8, 1559 .align = 8,
1473 .intr_event = SYSErr | LinkChg | RxOverflow | 1560 .intr_event = SYSErr | LinkChg | RxOverflow |
1474 TxErr | TxOK | RxOK | RxErr, 1561 TxErr | TxOK | RxOK | RxErr,
1475 .napi_event = TxErr | TxOK | RxOK | RxOverflow 1562 .napi_event = TxErr | TxOK | RxOK | RxOverflow,
1563 .msi = RTL_FEATURE_MSI
1476 }, 1564 },
1477 [RTL_CFG_2] = { 1565 [RTL_CFG_2] = {
1478 .hw_start = rtl_hw_start_8101, 1566 .hw_start = rtl_hw_start_8101,
@@ -1480,10 +1568,39 @@ static const struct rtl_cfg_info {
1480 .align = 8, 1568 .align = 8,
1481 .intr_event = SYSErr | LinkChg | RxOverflow | PCSTimeout | 1569 .intr_event = SYSErr | LinkChg | RxOverflow | PCSTimeout |
1482 RxFIFOOver | TxErr | TxOK | RxOK | RxErr, 1570 RxFIFOOver | TxErr | TxOK | RxOK | RxErr,
1483 .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow 1571 .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow,
1572 .msi = RTL_FEATURE_MSI
1484 } 1573 }
1485}; 1574};
1486 1575
1576/* Cfg9346_Unlock assumed. */
1577static unsigned rtl_try_msi(struct pci_dev *pdev, void __iomem *ioaddr,
1578 const struct rtl_cfg_info *cfg)
1579{
1580 unsigned msi = 0;
1581 u8 cfg2;
1582
1583 cfg2 = RTL_R8(Config2) & ~MSIEnable;
1584 if (cfg->msi) {
1585 if (pci_enable_msi(pdev)) {
1586 dev_info(&pdev->dev, "no MSI. Back to INTx.\n");
1587 } else {
1588 cfg2 |= MSIEnable;
1589 msi = RTL_FEATURE_MSI;
1590 }
1591 }
1592 RTL_W8(Config2, cfg2);
1593 return msi;
1594}
1595
1596static void rtl_disable_msi(struct pci_dev *pdev, struct rtl8169_private *tp)
1597{
1598 if (tp->features & RTL_FEATURE_MSI) {
1599 pci_disable_msi(pdev);
1600 tp->features &= ~RTL_FEATURE_MSI;
1601 }
1602}
1603
1487static int __devinit 1604static int __devinit
1488rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1605rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1489{ 1606{
@@ -1596,10 +1713,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1596 1713
1597 /* Identify chip attached to board */ 1714 /* Identify chip attached to board */
1598 rtl8169_get_mac_version(tp, ioaddr); 1715 rtl8169_get_mac_version(tp, ioaddr);
1599 rtl8169_get_phy_version(tp, ioaddr);
1600 1716
1601 rtl8169_print_mac_version(tp); 1717 rtl8169_print_mac_version(tp);
1602 rtl8169_print_phy_version(tp);
1603 1718
1604 for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) { 1719 for (i = ARRAY_SIZE(rtl_chip_info) - 1; i >= 0; i--) {
1605 if (tp->mac_version == rtl_chip_info[i].mac_version) 1720 if (tp->mac_version == rtl_chip_info[i].mac_version)
@@ -1619,6 +1734,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1619 RTL_W8(Cfg9346, Cfg9346_Unlock); 1734 RTL_W8(Cfg9346, Cfg9346_Unlock);
1620 RTL_W8(Config1, RTL_R8(Config1) | PMEnable); 1735 RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
1621 RTL_W8(Config5, RTL_R8(Config5) & PMEStatus); 1736 RTL_W8(Config5, RTL_R8(Config5) & PMEStatus);
1737 tp->features |= rtl_try_msi(pdev, ioaddr, cfg);
1622 RTL_W8(Cfg9346, Cfg9346_Lock); 1738 RTL_W8(Cfg9346, Cfg9346_Lock);
1623 1739
1624 if (RTL_R8(PHYstatus) & TBI_Enable) { 1740 if (RTL_R8(PHYstatus) & TBI_Enable) {
@@ -1686,7 +1802,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1686 1802
1687 rc = register_netdev(dev); 1803 rc = register_netdev(dev);
1688 if (rc < 0) 1804 if (rc < 0)
1689 goto err_out_unmap_5; 1805 goto err_out_msi_5;
1690 1806
1691 pci_set_drvdata(pdev, dev); 1807 pci_set_drvdata(pdev, dev);
1692 1808
@@ -1709,7 +1825,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1709out: 1825out:
1710 return rc; 1826 return rc;
1711 1827
1712err_out_unmap_5: 1828err_out_msi_5:
1829 rtl_disable_msi(pdev, tp);
1713 iounmap(ioaddr); 1830 iounmap(ioaddr);
1714err_out_free_res_4: 1831err_out_free_res_4:
1715 pci_release_regions(pdev); 1832 pci_release_regions(pdev);
@@ -1730,6 +1847,7 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
1730 flush_scheduled_work(); 1847 flush_scheduled_work();
1731 1848
1732 unregister_netdev(dev); 1849 unregister_netdev(dev);
1850 rtl_disable_msi(pdev, tp);
1733 rtl8169_release_board(pdev, dev, tp->mmio_addr); 1851 rtl8169_release_board(pdev, dev, tp->mmio_addr);
1734 pci_set_drvdata(pdev, NULL); 1852 pci_set_drvdata(pdev, NULL);
1735} 1853}
@@ -1773,7 +1891,8 @@ static int rtl8169_open(struct net_device *dev)
1773 1891
1774 smp_mb(); 1892 smp_mb();
1775 1893
1776 retval = request_irq(dev->irq, rtl8169_interrupt, IRQF_SHARED, 1894 retval = request_irq(dev->irq, rtl8169_interrupt,
1895 (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
1777 dev->name, dev); 1896 dev->name, dev);
1778 if (retval < 0) 1897 if (retval < 0)
1779 goto err_release_ring_2; 1898 goto err_release_ring_2;
@@ -1933,7 +2052,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
1933 2052
1934 if ((tp->mac_version == RTL_GIGA_MAC_VER_02) || 2053 if ((tp->mac_version == RTL_GIGA_MAC_VER_02) ||
1935 (tp->mac_version == RTL_GIGA_MAC_VER_03)) { 2054 (tp->mac_version == RTL_GIGA_MAC_VER_03)) {
1936 dprintk(KERN_INFO PFX "Set MAC Reg C+CR Offset 0xE0. " 2055 dprintk("Set MAC Reg C+CR Offset 0xE0. "
1937 "Bit-3 and bit-14 MUST be 1\n"); 2056 "Bit-3 and bit-14 MUST be 1\n");
1938 tp->cp_cmd |= (1 << 14); 2057 tp->cp_cmd |= (1 << 14);
1939 } 2058 }
@@ -2029,7 +2148,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
2029 void __iomem *ioaddr = tp->mmio_addr; 2148 void __iomem *ioaddr = tp->mmio_addr;
2030 struct pci_dev *pdev = tp->pci_dev; 2149 struct pci_dev *pdev = tp->pci_dev;
2031 2150
2032 if (tp->mac_version == RTL_GIGA_MAC_VER_13) { 2151 if ((tp->mac_version == RTL_GIGA_MAC_VER_13) ||
2152 (tp->mac_version == RTL_GIGA_MAC_VER_16)) {
2033 pci_write_config_word(pdev, 0x68, 0x00); 2153 pci_write_config_word(pdev, 0x68, 0x00);
2034 pci_write_config_word(pdev, 0x69, 0x08); 2154 pci_write_config_word(pdev, 0x69, 0x08);
2035 } 2155 }
@@ -2259,7 +2379,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
2259 dev_kfree_skb(skb); 2379 dev_kfree_skb(skb);
2260 tx_skb->skb = NULL; 2380 tx_skb->skb = NULL;
2261 } 2381 }
2262 tp->stats.tx_dropped++; 2382 tp->dev->stats.tx_dropped++;
2263 } 2383 }
2264 } 2384 }
2265 tp->cur_tx = tp->dirty_tx = 0; 2385 tp->cur_tx = tp->dirty_tx = 0;
@@ -2310,7 +2430,7 @@ static void rtl8169_reinit_task(struct work_struct *work)
2310 ret = rtl8169_open(dev); 2430 ret = rtl8169_open(dev);
2311 if (unlikely(ret < 0)) { 2431 if (unlikely(ret < 0)) {
2312 if (net_ratelimit() && netif_msg_drv(tp)) { 2432 if (net_ratelimit() && netif_msg_drv(tp)) {
2313 printk(PFX KERN_ERR "%s: reinit failure (status = %d)." 2433 printk(KERN_ERR PFX "%s: reinit failure (status = %d)."
2314 " Rescheduling.\n", dev->name, ret); 2434 " Rescheduling.\n", dev->name, ret);
2315 } 2435 }
2316 rtl8169_schedule_work(dev, rtl8169_reinit_task); 2436 rtl8169_schedule_work(dev, rtl8169_reinit_task);
@@ -2340,9 +2460,10 @@ static void rtl8169_reset_task(struct work_struct *work)
2340 rtl8169_init_ring_indexes(tp); 2460 rtl8169_init_ring_indexes(tp);
2341 rtl_hw_start(dev); 2461 rtl_hw_start(dev);
2342 netif_wake_queue(dev); 2462 netif_wake_queue(dev);
2463 rtl8169_check_link_status(dev, tp, tp->mmio_addr);
2343 } else { 2464 } else {
2344 if (net_ratelimit() && netif_msg_intr(tp)) { 2465 if (net_ratelimit() && netif_msg_intr(tp)) {
2345 printk(PFX KERN_EMERG "%s: Rx buffers shortage\n", 2466 printk(KERN_EMERG PFX "%s: Rx buffers shortage\n",
2346 dev->name); 2467 dev->name);
2347 } 2468 }
2348 rtl8169_schedule_work(dev, rtl8169_reset_task); 2469 rtl8169_schedule_work(dev, rtl8169_reset_task);
@@ -2496,7 +2617,7 @@ err_stop:
2496 netif_stop_queue(dev); 2617 netif_stop_queue(dev);
2497 ret = NETDEV_TX_BUSY; 2618 ret = NETDEV_TX_BUSY;
2498err_update_stats: 2619err_update_stats:
2499 tp->stats.tx_dropped++; 2620 dev->stats.tx_dropped++;
2500 goto out; 2621 goto out;
2501} 2622}
2502 2623
@@ -2571,8 +2692,8 @@ static void rtl8169_tx_interrupt(struct net_device *dev,
2571 if (status & DescOwn) 2692 if (status & DescOwn)
2572 break; 2693 break;
2573 2694
2574 tp->stats.tx_bytes += len; 2695 dev->stats.tx_bytes += len;
2575 tp->stats.tx_packets++; 2696 dev->stats.tx_packets++;
2576 2697
2577 rtl8169_unmap_tx_skb(tp->pci_dev, tx_skb, tp->TxDescArray + entry); 2698 rtl8169_unmap_tx_skb(tp->pci_dev, tx_skb, tp->TxDescArray + entry);
2578 2699
@@ -2672,14 +2793,14 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
2672 "%s: Rx ERROR. status = %08x\n", 2793 "%s: Rx ERROR. status = %08x\n",
2673 dev->name, status); 2794 dev->name, status);
2674 } 2795 }
2675 tp->stats.rx_errors++; 2796 dev->stats.rx_errors++;
2676 if (status & (RxRWT | RxRUNT)) 2797 if (status & (RxRWT | RxRUNT))
2677 tp->stats.rx_length_errors++; 2798 dev->stats.rx_length_errors++;
2678 if (status & RxCRC) 2799 if (status & RxCRC)
2679 tp->stats.rx_crc_errors++; 2800 dev->stats.rx_crc_errors++;
2680 if (status & RxFOVF) { 2801 if (status & RxFOVF) {
2681 rtl8169_schedule_work(dev, rtl8169_reset_task); 2802 rtl8169_schedule_work(dev, rtl8169_reset_task);
2682 tp->stats.rx_fifo_errors++; 2803 dev->stats.rx_fifo_errors++;
2683 } 2804 }
2684 rtl8169_mark_to_asic(desc, tp->rx_buf_sz); 2805 rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
2685 } else { 2806 } else {
@@ -2694,8 +2815,8 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
2694 * sized frames. 2815 * sized frames.
2695 */ 2816 */
2696 if (unlikely(rtl8169_fragmented_frame(status))) { 2817 if (unlikely(rtl8169_fragmented_frame(status))) {
2697 tp->stats.rx_dropped++; 2818 dev->stats.rx_dropped++;
2698 tp->stats.rx_length_errors++; 2819 dev->stats.rx_length_errors++;
2699 rtl8169_mark_to_asic(desc, tp->rx_buf_sz); 2820 rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
2700 continue; 2821 continue;
2701 } 2822 }
@@ -2719,8 +2840,8 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
2719 rtl8169_rx_skb(skb); 2840 rtl8169_rx_skb(skb);
2720 2841
2721 dev->last_rx = jiffies; 2842 dev->last_rx = jiffies;
2722 tp->stats.rx_bytes += pkt_size; 2843 dev->stats.rx_bytes += pkt_size;
2723 tp->stats.rx_packets++; 2844 dev->stats.rx_packets++;
2724 } 2845 }
2725 2846
2726 /* Work around for AMD plateform. */ 2847 /* Work around for AMD plateform. */
@@ -2881,7 +3002,7 @@ core_down:
2881 rtl8169_asic_down(ioaddr); 3002 rtl8169_asic_down(ioaddr);
2882 3003
2883 /* Update the error counts. */ 3004 /* Update the error counts. */
2884 tp->stats.rx_missed_errors += RTL_R32(RxMissed); 3005 dev->stats.rx_missed_errors += RTL_R32(RxMissed);
2885 RTL_W32(RxMissed, 0); 3006 RTL_W32(RxMissed, 0);
2886 3007
2887 spin_unlock_irq(&tp->lock); 3008 spin_unlock_irq(&tp->lock);
@@ -2984,7 +3105,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
2984 (tp->mac_version == RTL_GIGA_MAC_VER_12) || 3105 (tp->mac_version == RTL_GIGA_MAC_VER_12) ||
2985 (tp->mac_version == RTL_GIGA_MAC_VER_13) || 3106 (tp->mac_version == RTL_GIGA_MAC_VER_13) ||
2986 (tp->mac_version == RTL_GIGA_MAC_VER_14) || 3107 (tp->mac_version == RTL_GIGA_MAC_VER_14) ||
2987 (tp->mac_version == RTL_GIGA_MAC_VER_15)) { 3108 (tp->mac_version == RTL_GIGA_MAC_VER_15) ||
3109 (tp->mac_version == RTL_GIGA_MAC_VER_16) ||
3110 (tp->mac_version == RTL_GIGA_MAC_VER_17)) {
2988 mc_filter[0] = 0xffffffff; 3111 mc_filter[0] = 0xffffffff;
2989 mc_filter[1] = 0xffffffff; 3112 mc_filter[1] = 0xffffffff;
2990 } 3113 }
@@ -3011,12 +3134,12 @@ static struct net_device_stats *rtl8169_get_stats(struct net_device *dev)
3011 3134
3012 if (netif_running(dev)) { 3135 if (netif_running(dev)) {
3013 spin_lock_irqsave(&tp->lock, flags); 3136 spin_lock_irqsave(&tp->lock, flags);
3014 tp->stats.rx_missed_errors += RTL_R32(RxMissed); 3137 dev->stats.rx_missed_errors += RTL_R32(RxMissed);
3015 RTL_W32(RxMissed, 0); 3138 RTL_W32(RxMissed, 0);
3016 spin_unlock_irqrestore(&tp->lock, flags); 3139 spin_unlock_irqrestore(&tp->lock, flags);
3017 } 3140 }
3018 3141
3019 return &tp->stats; 3142 return &dev->stats;
3020} 3143}
3021 3144
3022#ifdef CONFIG_PM 3145#ifdef CONFIG_PM
@@ -3037,14 +3160,15 @@ static int rtl8169_suspend(struct pci_dev *pdev, pm_message_t state)
3037 3160
3038 rtl8169_asic_down(ioaddr); 3161 rtl8169_asic_down(ioaddr);
3039 3162
3040 tp->stats.rx_missed_errors += RTL_R32(RxMissed); 3163 dev->stats.rx_missed_errors += RTL_R32(RxMissed);
3041 RTL_W32(RxMissed, 0); 3164 RTL_W32(RxMissed, 0);
3042 3165
3043 spin_unlock_irq(&tp->lock); 3166 spin_unlock_irq(&tp->lock);
3044 3167
3045out_pci_suspend: 3168out_pci_suspend:
3046 pci_save_state(pdev); 3169 pci_save_state(pdev);
3047 pci_enable_wake(pdev, pci_choose_state(pdev, state), tp->wol_enabled); 3170 pci_enable_wake(pdev, pci_choose_state(pdev, state),
3171 (tp->features & RTL_FEATURE_WOL) ? 1 : 0);
3048 pci_set_power_state(pdev, pci_choose_state(pdev, state)); 3172 pci_set_power_state(pdev, pci_choose_state(pdev, state));
3049 3173
3050 return 0; 3174 return 0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 014dc2cfe4..09440d783e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@
64 64
65#define DRV_MODULE_NAME "tg3" 65#define DRV_MODULE_NAME "tg3"
66#define PFX DRV_MODULE_NAME ": " 66#define PFX DRV_MODULE_NAME ": "
67#define DRV_MODULE_VERSION "3.84" 67#define DRV_MODULE_VERSION "3.85"
68#define DRV_MODULE_RELDATE "October 12, 2007" 68#define DRV_MODULE_RELDATE "October 18, 2007"
69 69
70#define TG3_DEF_MAC_MODE 0 70#define TG3_DEF_MAC_MODE 0
71#define TG3_DEF_RX_MODE 0 71#define TG3_DEF_RX_MODE 0
@@ -200,6 +200,7 @@ static struct pci_device_id tg3_pci_tbl[] = {
200 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906M)}, 200 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906M)},
201 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5784)}, 201 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5784)},
202 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5764)}, 202 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5764)},
203 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5723)},
203 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761)}, 204 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761)},
204 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)}, 205 {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)},
205 {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, 206 {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)},
@@ -5028,10 +5029,7 @@ static int tg3_poll_fw(struct tg3 *tp)
5028/* Save PCI command register before chip reset */ 5029/* Save PCI command register before chip reset */
5029static void tg3_save_pci_state(struct tg3 *tp) 5030static void tg3_save_pci_state(struct tg3 *tp)
5030{ 5031{
5031 u32 val; 5032 pci_read_config_word(tp->pdev, PCI_COMMAND, &tp->pci_cmd);
5032
5033 pci_read_config_dword(tp->pdev, TG3PCI_COMMAND, &val);
5034 tp->pci_cmd = val;
5035} 5033}
5036 5034
5037/* Restore PCI state after chip reset */ 5035/* Restore PCI state after chip reset */
@@ -5054,7 +5052,7 @@ static void tg3_restore_pci_state(struct tg3 *tp)
5054 PCISTATE_ALLOW_APE_SHMEM_WR; 5052 PCISTATE_ALLOW_APE_SHMEM_WR;
5055 pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val); 5053 pci_write_config_dword(tp->pdev, TG3PCI_PCISTATE, val);
5056 5054
5057 pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd); 5055 pci_write_config_word(tp->pdev, PCI_COMMAND, tp->pci_cmd);
5058 5056
5059 if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) { 5057 if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
5060 pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, 5058 pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE,
@@ -10820,9 +10818,24 @@ out_not_found:
10820 strcpy(tp->board_part_number, "none"); 10818 strcpy(tp->board_part_number, "none");
10821} 10819}
10822 10820
10821static int __devinit tg3_fw_img_is_valid(struct tg3 *tp, u32 offset)
10822{
10823 u32 val;
10824
10825 if (tg3_nvram_read_swab(tp, offset, &val) ||
10826 (val & 0xfc000000) != 0x0c000000 ||
10827 tg3_nvram_read_swab(tp, offset + 4, &val) ||
10828 val != 0)
10829 return 0;
10830
10831 return 1;
10832}
10833
10823static void __devinit tg3_read_fw_ver(struct tg3 *tp) 10834static void __devinit tg3_read_fw_ver(struct tg3 *tp)
10824{ 10835{
10825 u32 val, offset, start; 10836 u32 val, offset, start;
10837 u32 ver_offset;
10838 int i, bcnt;
10826 10839
10827 if (tg3_nvram_read_swab(tp, 0, &val)) 10840 if (tg3_nvram_read_swab(tp, 0, &val))
10828 return; 10841 return;
@@ -10835,29 +10848,71 @@ static void __devinit tg3_read_fw_ver(struct tg3 *tp)
10835 return; 10848 return;
10836 10849
10837 offset = tg3_nvram_logical_addr(tp, offset); 10850 offset = tg3_nvram_logical_addr(tp, offset);
10838 if (tg3_nvram_read_swab(tp, offset, &val)) 10851
10852 if (!tg3_fw_img_is_valid(tp, offset) ||
10853 tg3_nvram_read_swab(tp, offset + 8, &ver_offset))
10839 return; 10854 return;
10840 10855
10841 if ((val & 0xfc000000) == 0x0c000000) { 10856 offset = offset + ver_offset - start;
10842 u32 ver_offset, addr; 10857 for (i = 0; i < 16; i += 4) {
10843 int i; 10858 if (tg3_nvram_read(tp, offset + i, &val))
10859 return;
10844 10860
10845 if (tg3_nvram_read_swab(tp, offset + 4, &val) || 10861 val = le32_to_cpu(val);
10846 tg3_nvram_read_swab(tp, offset + 8, &ver_offset)) 10862 memcpy(tp->fw_ver + i, &val, 4);
10863 }
10864
10865 if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF) ||
10866 (tp->tg3_flags & TG3_FLG3_ENABLE_APE))
10867 return;
10868
10869 for (offset = TG3_NVM_DIR_START;
10870 offset < TG3_NVM_DIR_END;
10871 offset += TG3_NVM_DIRENT_SIZE) {
10872 if (tg3_nvram_read_swab(tp, offset, &val))
10847 return; 10873 return;
10848 10874
10849 if (val != 0) 10875 if ((val >> TG3_NVM_DIRTYPE_SHIFT) == TG3_NVM_DIRTYPE_ASFINI)
10876 break;
10877 }
10878
10879 if (offset == TG3_NVM_DIR_END)
10880 return;
10881
10882 if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS))
10883 start = 0x08000000;
10884 else if (tg3_nvram_read_swab(tp, offset - 4, &start))
10885 return;
10886
10887 if (tg3_nvram_read_swab(tp, offset + 4, &offset) ||
10888 !tg3_fw_img_is_valid(tp, offset) ||
10889 tg3_nvram_read_swab(tp, offset + 8, &val))
10890 return;
10891
10892 offset += val - start;
10893
10894 bcnt = strlen(tp->fw_ver);
10895
10896 tp->fw_ver[bcnt++] = ',';
10897 tp->fw_ver[bcnt++] = ' ';
10898
10899 for (i = 0; i < 4; i++) {
10900 if (tg3_nvram_read(tp, offset, &val))
10850 return; 10901 return;
10851 10902
10852 addr = offset + ver_offset - start; 10903 val = le32_to_cpu(val);
10853 for (i = 0; i < 16; i += 4) { 10904 offset += sizeof(val);
10854 if (tg3_nvram_read(tp, addr + i, &val))
10855 return;
10856 10905
10857 val = cpu_to_le32(val); 10906 if (bcnt > TG3_VER_SIZE - sizeof(val)) {
10858 memcpy(tp->fw_ver + i, &val, 4); 10907 memcpy(&tp->fw_ver[bcnt], &val, TG3_VER_SIZE - bcnt);
10908 break;
10859 } 10909 }
10910
10911 memcpy(&tp->fw_ver[bcnt], &val, sizeof(val));
10912 bcnt += sizeof(val);
10860 } 10913 }
10914
10915 tp->fw_ver[TG3_VER_SIZE - 1] = 0;
10861} 10916}
10862 10917
10863static struct pci_dev * __devinit tg3_find_peer(struct tg3 *); 10918static struct pci_dev * __devinit tg3_find_peer(struct tg3 *);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 6dbdad2b8f..1d5b2a3dd2 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1540,6 +1540,12 @@
1540#define TG3_EEPROM_MAGIC_HW 0xabcd 1540#define TG3_EEPROM_MAGIC_HW 0xabcd
1541#define TG3_EEPROM_MAGIC_HW_MSK 0xffff 1541#define TG3_EEPROM_MAGIC_HW_MSK 0xffff
1542 1542
1543#define TG3_NVM_DIR_START 0x18
1544#define TG3_NVM_DIR_END 0x78
1545#define TG3_NVM_DIRENT_SIZE 0xc
1546#define TG3_NVM_DIRTYPE_SHIFT 24
1547#define TG3_NVM_DIRTYPE_ASFINI 1
1548
1543/* 32K Window into NIC internal memory */ 1549/* 32K Window into NIC internal memory */
1544#define NIC_SRAM_WIN_BASE 0x00008000 1550#define NIC_SRAM_WIN_BASE 0x00008000
1545 1551
@@ -2415,10 +2421,11 @@ struct tg3 {
2415#define PHY_REV_BCM5411_X0 0x1 /* Found on Netgear GA302T */ 2421#define PHY_REV_BCM5411_X0 0x1 /* Found on Netgear GA302T */
2416 2422
2417 u32 led_ctrl; 2423 u32 led_ctrl;
2418 u32 pci_cmd; 2424 u16 pci_cmd;
2419 2425
2420 char board_part_number[24]; 2426 char board_part_number[24];
2421 char fw_ver[16]; 2427#define TG3_VER_SIZE 32
2428 char fw_ver[TG3_VER_SIZE];
2422 u32 nic_sram_data_cfg; 2429 u32 nic_sram_data_cfg;
2423 u32 pci_clock_ctrl; 2430 u32 pci_clock_ctrl;
2424 struct pci_dev *pdev_peer; 2431 struct pci_dev *pdev_peer;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
new file mode 100644
index 0000000000..e396c9d2af
--- /dev/null
+++ b/drivers/net/virtio_net.c
@@ -0,0 +1,435 @@
1/* A simple network driver using virtio.
2 *
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19//#define DEBUG
20#include <linux/netdevice.h>
21#include <linux/etherdevice.h>
22#include <linux/module.h>
23#include <linux/virtio.h>
24#include <linux/virtio_net.h>
25#include <linux/scatterlist.h>
26
27/* FIXME: MTU in config. */
28#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
29
30struct virtnet_info
31{
32 struct virtio_device *vdev;
33 struct virtqueue *rvq, *svq;
34 struct net_device *dev;
35 struct napi_struct napi;
36
37 /* Number of input buffers, and max we've ever had. */
38 unsigned int num, max;
39
40 /* Receive & send queues. */
41 struct sk_buff_head recv;
42 struct sk_buff_head send;
43};
44
45static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
46{
47 return (struct virtio_net_hdr *)skb->cb;
48}
49
50static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
51{
52 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
53}
54
55static bool skb_xmit_done(struct virtqueue *rvq)
56{
57 struct virtnet_info *vi = rvq->vdev->priv;
58
59 /* In case we were waiting for output buffers. */
60 netif_wake_queue(vi->dev);
61 return true;
62}
63
64static void receive_skb(struct net_device *dev, struct sk_buff *skb,
65 unsigned len)
66{
67 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
68
69 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
70 pr_debug("%s: short packet %i\n", dev->name, len);
71 dev->stats.rx_length_errors++;
72 goto drop;
73 }
74 len -= sizeof(struct virtio_net_hdr);
75 BUG_ON(len > MAX_PACKET_LEN);
76
77 skb_trim(skb, len);
78 skb->protocol = eth_type_trans(skb, dev);
79 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
80 ntohs(skb->protocol), skb->len, skb->pkt_type);
81 dev->stats.rx_bytes += skb->len;
82 dev->stats.rx_packets++;
83
84 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
85 pr_debug("Needs csum!\n");
86 skb->ip_summed = CHECKSUM_PARTIAL;
87 skb->csum_start = hdr->csum_start;
88 skb->csum_offset = hdr->csum_offset;
89 if (skb->csum_start > skb->len - 2
90 || skb->csum_offset > skb->len - 2) {
91 if (net_ratelimit())
92 printk(KERN_WARNING "%s: csum=%u/%u len=%u\n",
93 dev->name, skb->csum_start,
94 skb->csum_offset, skb->len);
95 goto frame_err;
96 }
97 }
98
99 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
100 pr_debug("GSO!\n");
101 switch (hdr->gso_type) {
102 case VIRTIO_NET_HDR_GSO_TCPV4:
103 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
104 break;
105 case VIRTIO_NET_HDR_GSO_TCPV4_ECN:
106 skb_shinfo(skb)->gso_type = SKB_GSO_TCP_ECN;
107 break;
108 case VIRTIO_NET_HDR_GSO_UDP:
109 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
110 break;
111 case VIRTIO_NET_HDR_GSO_TCPV6:
112 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
113 break;
114 default:
115 if (net_ratelimit())
116 printk(KERN_WARNING "%s: bad gso type %u.\n",
117 dev->name, hdr->gso_type);
118 goto frame_err;
119 }
120
121 skb_shinfo(skb)->gso_size = hdr->gso_size;
122 if (skb_shinfo(skb)->gso_size == 0) {
123 if (net_ratelimit())
124 printk(KERN_WARNING "%s: zero gso size.\n",
125 dev->name);
126 goto frame_err;
127 }
128
129 /* Header must be checked, and gso_segs computed. */
130 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
131 skb_shinfo(skb)->gso_segs = 0;
132 }
133
134 netif_receive_skb(skb);
135 return;
136
137frame_err:
138 dev->stats.rx_frame_errors++;
139drop:
140 dev_kfree_skb(skb);
141}
142
143static void try_fill_recv(struct virtnet_info *vi)
144{
145 struct sk_buff *skb;
146 struct scatterlist sg[1+MAX_SKB_FRAGS];
147 int num, err;
148
149 for (;;) {
150 skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
151 if (unlikely(!skb))
152 break;
153
154 skb_put(skb, MAX_PACKET_LEN);
155 vnet_hdr_to_sg(sg, skb);
156 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
157 skb_queue_head(&vi->recv, skb);
158
159 err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
160 if (err) {
161 skb_unlink(skb, &vi->recv);
162 kfree_skb(skb);
163 break;
164 }
165 vi->num++;
166 }
167 if (unlikely(vi->num > vi->max))
168 vi->max = vi->num;
169 vi->rvq->vq_ops->kick(vi->rvq);
170}
171
172static bool skb_recv_done(struct virtqueue *rvq)
173{
174 struct virtnet_info *vi = rvq->vdev->priv;
175 netif_rx_schedule(vi->dev, &vi->napi);
176 /* Suppress further interrupts. */
177 return false;
178}
179
180static int virtnet_poll(struct napi_struct *napi, int budget)
181{
182 struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
183 struct sk_buff *skb = NULL;
184 unsigned int len, received = 0;
185
186again:
187 while (received < budget &&
188 (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
189 __skb_unlink(skb, &vi->recv);
190 receive_skb(vi->dev, skb, len);
191 vi->num--;
192 received++;
193 }
194
195 /* FIXME: If we oom and completely run out of inbufs, we need
196 * to start a timer trying to fill more. */
197 if (vi->num < vi->max / 2)
198 try_fill_recv(vi);
199
200 /* All done? */
201 if (!skb) {
202 netif_rx_complete(vi->dev, napi);
203 if (unlikely(!vi->rvq->vq_ops->restart(vi->rvq))
204 && netif_rx_reschedule(vi->dev, napi))
205 goto again;
206 }
207
208 return received;
209}
210
211static void free_old_xmit_skbs(struct virtnet_info *vi)
212{
213 struct sk_buff *skb;
214 unsigned int len;
215
216 while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
217 pr_debug("Sent skb %p\n", skb);
218 __skb_unlink(skb, &vi->send);
219 vi->dev->stats.tx_bytes += len;
220 vi->dev->stats.tx_packets++;
221 kfree_skb(skb);
222 }
223}
224
225static int start_xmit(struct sk_buff *skb, struct net_device *dev)
226{
227 struct virtnet_info *vi = netdev_priv(dev);
228 int num, err;
229 struct scatterlist sg[1+MAX_SKB_FRAGS];
230 struct virtio_net_hdr *hdr;
231 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
232 DECLARE_MAC_BUF(mac);
233
234 pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest));
235
236 free_old_xmit_skbs(vi);
237
238 /* Encode metadata header at front. */
239 hdr = skb_vnet_hdr(skb);
240 if (skb->ip_summed == CHECKSUM_PARTIAL) {
241 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
242 hdr->csum_start = skb->csum_start - skb_headroom(skb);
243 hdr->csum_offset = skb->csum_offset;
244 } else {
245 hdr->flags = 0;
246 hdr->csum_offset = hdr->csum_start = 0;
247 }
248
249 if (skb_is_gso(skb)) {
250 hdr->gso_size = skb_shinfo(skb)->gso_size;
251 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
252 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN;
253 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
254 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
255 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
256 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
257 else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
258 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
259 else
260 BUG();
261 } else {
262 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
263 hdr->gso_size = 0;
264 }
265
266 vnet_hdr_to_sg(sg, skb);
267 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
268 __skb_queue_head(&vi->send, skb);
269 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
270 if (err) {
271 pr_debug("%s: virtio not prepared to send\n", dev->name);
272 skb_unlink(skb, &vi->send);
273 netif_stop_queue(dev);
274 return NETDEV_TX_BUSY;
275 }
276 vi->svq->vq_ops->kick(vi->svq);
277
278 return 0;
279}
280
281static int virtnet_open(struct net_device *dev)
282{
283 struct virtnet_info *vi = netdev_priv(dev);
284
285 try_fill_recv(vi);
286
287 /* If we didn't even get one input buffer, we're useless. */
288 if (vi->num == 0)
289 return -ENOMEM;
290
291 napi_enable(&vi->napi);
292 return 0;
293}
294
295static int virtnet_close(struct net_device *dev)
296{
297 struct virtnet_info *vi = netdev_priv(dev);
298 struct sk_buff *skb;
299
300 napi_disable(&vi->napi);
301
302 /* networking core has neutered skb_xmit_done/skb_recv_done, so don't
303 * worry about races vs. get(). */
304 vi->rvq->vq_ops->shutdown(vi->rvq);
305 while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
306 kfree_skb(skb);
307 vi->num--;
308 }
309 vi->svq->vq_ops->shutdown(vi->svq);
310 while ((skb = __skb_dequeue(&vi->send)) != NULL)
311 kfree_skb(skb);
312
313 BUG_ON(vi->num != 0);
314 return 0;
315}
316
317static int virtnet_probe(struct virtio_device *vdev)
318{
319 int err;
320 unsigned int len;
321 struct net_device *dev;
322 struct virtnet_info *vi;
323 void *token;
324
325 /* Allocate ourselves a network device with room for our info */
326 dev = alloc_etherdev(sizeof(struct virtnet_info));
327 if (!dev)
328 return -ENOMEM;
329
330 /* Set up network device as normal. */
331 ether_setup(dev);
332 dev->open = virtnet_open;
333 dev->stop = virtnet_close;
334 dev->hard_start_xmit = start_xmit;
335 dev->features = NETIF_F_HIGHDMA;
336 SET_NETDEV_DEV(dev, &vdev->dev);
337
338 /* Do we support "hardware" checksums? */
339 token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_F, &len);
340 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_NO_CSUM)) {
341 /* This opens up the world of extra features. */
342 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
343 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4))
344 dev->features |= NETIF_F_TSO;
345 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_UFO))
346 dev->features |= NETIF_F_UFO;
347 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4_ECN))
348 dev->features |= NETIF_F_TSO_ECN;
349 if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO6))
350 dev->features |= NETIF_F_TSO6;
351 }
352
353 /* Configuration may specify what MAC to use. Otherwise random. */
354 token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_MAC_F, &len);
355 if (token) {
356 dev->addr_len = len;
357 vdev->config->get(vdev, token, dev->dev_addr, len);
358 } else
359 random_ether_addr(dev->dev_addr);
360
361 /* Set up our device-specific information */
362 vi = netdev_priv(dev);
363 netif_napi_add(dev, &vi->napi, virtnet_poll, 16);
364 vi->dev = dev;
365 vi->vdev = vdev;
366
367 /* We expect two virtqueues, receive then send. */
368 vi->rvq = vdev->config->find_vq(vdev, skb_recv_done);
369 if (IS_ERR(vi->rvq)) {
370 err = PTR_ERR(vi->rvq);
371 goto free;
372 }
373
374 vi->svq = vdev->config->find_vq(vdev, skb_xmit_done);
375 if (IS_ERR(vi->svq)) {
376 err = PTR_ERR(vi->svq);
377 goto free_recv;
378 }
379
380 /* Initialize our empty receive and send queues. */
381 skb_queue_head_init(&vi->recv);
382 skb_queue_head_init(&vi->send);
383
384 err = register_netdev(dev);
385 if (err) {
386 pr_debug("virtio_net: registering device failed\n");
387 goto free_send;
388 }
389 pr_debug("virtnet: registered device %s\n", dev->name);
390 vdev->priv = vi;
391 return 0;
392
393free_send:
394 vdev->config->del_vq(vi->svq);
395free_recv:
396 vdev->config->del_vq(vi->rvq);
397free:
398 free_netdev(dev);
399 return err;
400}
401
402static void virtnet_remove(struct virtio_device *vdev)
403{
404 unregister_netdev(vdev->priv);
405 free_netdev(vdev->priv);
406}
407
408static struct virtio_device_id id_table[] = {
409 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
410 { 0 },
411};
412
413static struct virtio_driver virtio_net = {
414 .driver.name = KBUILD_MODNAME,
415 .driver.owner = THIS_MODULE,
416 .id_table = id_table,
417 .probe = virtnet_probe,
418 .remove = __devexit_p(virtnet_remove),
419};
420
421static int __init init(void)
422{
423 return register_virtio_driver(&virtio_net);
424}
425
426static void __exit fini(void)
427{
428 unregister_virtio_driver(&virtio_net);
429}
430module_init(init);
431module_exit(fini);
432
433MODULE_DEVICE_TABLE(virtio, id_table);
434MODULE_DESCRIPTION("Virtio network driver");
435MODULE_LICENSE("GPL");
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b3c4dbff26..7c60cbd85d 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -42,6 +42,7 @@
42#include <linux/reboot.h> 42#include <linux/reboot.h>
43#include <linux/proc_fs.h> 43#include <linux/proc_fs.h>
44#include <linux/seq_file.h> 44#include <linux/seq_file.h>
45#include <linux/scatterlist.h>
45 46
46#include <asm/byteorder.h> 47#include <asm/byteorder.h>
47#include <asm/cache.h> /* for L1_CACHE_BYTES */ 48#include <asm/cache.h> /* for L1_CACHE_BYTES */
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index 5b86ee5c1e..5eace9e66e 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -557,44 +557,6 @@ lba_bios_init(void)
557#ifdef CONFIG_64BIT 557#ifdef CONFIG_64BIT
558 558
559/* 559/*
560** Determine if a device is already configured.
561** If so, reserve it resources.
562**
563** Read PCI cfg command register and see if I/O or MMIO is enabled.
564** PAT has to enable the devices it's using.
565**
566** Note: resources are fixed up before we try to claim them.
567*/
568static void
569lba_claim_dev_resources(struct pci_dev *dev)
570{
571 u16 cmd;
572 int i, srch_flags;
573
574 (void) pci_read_config_word(dev, PCI_COMMAND, &cmd);
575
576 srch_flags = (cmd & PCI_COMMAND_IO) ? IORESOURCE_IO : 0;
577 if (cmd & PCI_COMMAND_MEMORY)
578 srch_flags |= IORESOURCE_MEM;
579
580 if (!srch_flags)
581 return;
582
583 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
584 if (dev->resource[i].flags & srch_flags) {
585 pci_claim_resource(dev, i);
586 DBG(" claimed %s %d [%lx,%lx]/%lx\n",
587 pci_name(dev), i,
588 dev->resource[i].start,
589 dev->resource[i].end,
590 dev->resource[i].flags
591 );
592 }
593 }
594}
595
596
597/*
598 * truncate_pat_collision: Deal with overlaps or outright collisions 560 * truncate_pat_collision: Deal with overlaps or outright collisions
599 * between PAT PDC reported ranges. 561 * between PAT PDC reported ranges.
600 * 562 *
@@ -653,7 +615,6 @@ truncate_pat_collision(struct resource *root, struct resource *new)
653} 615}
654 616
655#else 617#else
656#define lba_claim_dev_resources(dev) do { } while (0)
657#define truncate_pat_collision(r,n) (0) 618#define truncate_pat_collision(r,n) (0)
658#endif 619#endif
659 620
@@ -684,8 +645,12 @@ lba_fixup_bus(struct pci_bus *bus)
684 ** pci_alloc_primary_bus() mangles this. 645 ** pci_alloc_primary_bus() mangles this.
685 */ 646 */
686 if (bus->self) { 647 if (bus->self) {
648 int i;
687 /* PCI-PCI Bridge */ 649 /* PCI-PCI Bridge */
688 pci_read_bridge_bases(bus); 650 pci_read_bridge_bases(bus);
651 for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
652 pci_claim_resource(bus->self, i);
653 }
689 } else { 654 } else {
690 /* Host-PCI Bridge */ 655 /* Host-PCI Bridge */
691 int err, i; 656 int err, i;
@@ -803,6 +768,9 @@ lba_fixup_bus(struct pci_bus *bus)
803 DBG("lba_fixup_bus() WTF? 0x%lx [%lx/%lx] XXX", 768 DBG("lba_fixup_bus() WTF? 0x%lx [%lx/%lx] XXX",
804 res->flags, res->start, res->end); 769 res->flags, res->start, res->end);
805 } 770 }
771 if ((i != PCI_ROM_RESOURCE) ||
772 (res->flags & IORESOURCE_ROM_ENABLE))
773 pci_claim_resource(dev, i);
806 } 774 }
807 775
808#ifdef FBB_SUPPORT 776#ifdef FBB_SUPPORT
@@ -814,11 +782,6 @@ lba_fixup_bus(struct pci_bus *bus)
814 bus->bridge_ctl &= ~(status & PCI_STATUS_FAST_BACK); 782 bus->bridge_ctl &= ~(status & PCI_STATUS_FAST_BACK);
815#endif 783#endif
816 784
817 if (is_pdc_pat()) {
818 /* Claim resources for PDC's devices */
819 lba_claim_dev_resources(dev);
820 }
821
822 /* 785 /*
823 ** P2PB's have no IRQs. ignore them. 786 ** P2PB's have no IRQs. ignore them.
824 */ 787 */
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index fc4bde259d..ebb09e98d2 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -282,6 +282,7 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun
282 unsigned short i; 282 unsigned short i;
283 char in[count+1], *temp; 283 char in[count+1], *temp;
284 struct device *dev; 284 struct device *dev;
285 int ret;
285 286
286 if (!entry || !buf || !count) 287 if (!entry || !buf || !count)
287 return -EINVAL; 288 return -EINVAL;
@@ -333,7 +334,9 @@ pdcspath_hwpath_write(struct pdcspath_entry *entry, const char *buf, size_t coun
333 334
334 /* Update the symlink to the real device */ 335 /* Update the symlink to the real device */
335 sysfs_remove_link(&entry->kobj, "device"); 336 sysfs_remove_link(&entry->kobj, "device");
336 sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device"); 337 ret = sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
338 WARN_ON(ret);
339
337 write_unlock(&entry->rw_lock); 340 write_unlock(&entry->rw_lock);
338 341
339 printk(KERN_INFO PDCS_PREFIX ": changed \"%s\" path to \"%s\"\n", 342 printk(KERN_INFO PDCS_PREFIX ": changed \"%s\" path to \"%s\"\n",
@@ -1003,8 +1006,10 @@ pdcs_register_pathentries(void)
1003 entry->ready = 2; 1006 entry->ready = 2;
1004 1007
1005 /* Add a nice symlink to the real device */ 1008 /* Add a nice symlink to the real device */
1006 if (entry->dev) 1009 if (entry->dev) {
1007 sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device"); 1010 err = sysfs_create_link(&entry->kobj, &entry->dev->kobj, "device");
1011 WARN_ON(err);
1012 }
1008 1013
1009 write_unlock(&entry->rw_lock); 1014 write_unlock(&entry->rw_lock);
1010 } 1015 }
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index d044c48323..e527a0e1d6 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -28,6 +28,7 @@
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/string.h> 29#include <linux/string.h>
30#include <linux/pci.h> 30#include <linux/pci.h>
31#include <linux/scatterlist.h>
31 32
32#include <asm/byteorder.h> 33#include <asm/byteorder.h>
33#include <asm/io.h> 34#include <asm/io.h>
@@ -1909,8 +1910,8 @@ sba_driver_callback(struct parisc_device *dev)
1909 global_ioc_cnt *= 2; 1910 global_ioc_cnt *= 2;
1910 } 1911 }
1911 1912
1912 printk(KERN_INFO "%s found %s at 0x%lx\n", 1913 printk(KERN_INFO "%s found %s at 0x%llx\n",
1913 MODULE_NAME, version, dev->hpa.start); 1914 MODULE_NAME, version, (unsigned long long)dev->hpa.start);
1914 1915
1915 sba_dev = kzalloc(sizeof(struct sba_device), GFP_KERNEL); 1916 sba_dev = kzalloc(sizeof(struct sba_device), GFP_KERNEL);
1916 if (!sba_dev) { 1917 if (!sba_dev) {
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 38cdf9fa36..1e8d2d17f0 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -155,6 +155,7 @@ superio_init(struct pci_dev *pcidev)
155 struct superio_device *sio = &sio_dev; 155 struct superio_device *sio = &sio_dev;
156 struct pci_dev *pdev = sio->lio_pdev; 156 struct pci_dev *pdev = sio->lio_pdev;
157 u16 word; 157 u16 word;
158 int ret;
158 159
159 if (sio->suckyio_irq_enabled) 160 if (sio->suckyio_irq_enabled)
160 return; 161 return;
@@ -200,7 +201,8 @@ superio_init(struct pci_dev *pcidev)
200 pci_write_config_word (pdev, PCI_COMMAND, word); 201 pci_write_config_word (pdev, PCI_COMMAND, word);
201 202
202 pci_set_master (pdev); 203 pci_set_master (pdev);
203 pci_enable_device(pdev); 204 ret = pci_enable_device(pdev);
205 BUG_ON(ret < 0); /* not too much we can do about this... */
204 206
205 /* 207 /*
206 * Next project is programming the onboard interrupt controllers. 208 * Next project is programming the onboard interrupt controllers.
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 006054a409..5550556507 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -20,6 +20,9 @@ obj-$(CONFIG_PCI_MSI) += msi.o
20# Build the Hypertransport interrupt support 20# Build the Hypertransport interrupt support
21obj-$(CONFIG_HT_IRQ) += htirq.o 21obj-$(CONFIG_HT_IRQ) += htirq.o
22 22
23# Build Intel IOMMU support
24obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
25
23# 26#
24# Some architectures use the generic PCI setup functions 27# Some architectures use the generic PCI setup functions
25# 28#
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
new file mode 100644
index 0000000000..5dfdfdac92
--- /dev/null
+++ b/drivers/pci/dmar.c
@@ -0,0 +1,329 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 *
21 * This file implements early detection/parsing of DMA Remapping Devices
22 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
23 * tables.
24 */
25
26#include <linux/pci.h>
27#include <linux/dmar.h>
28
29#undef PREFIX
30#define PREFIX "DMAR:"
31
32/* No locks are needed as DMA remapping hardware unit
33 * list is constructed at boot time and hotplug of
34 * these units are not supported by the architecture.
35 */
36LIST_HEAD(dmar_drhd_units);
37LIST_HEAD(dmar_rmrr_units);
38
39static struct acpi_table_header * __initdata dmar_tbl;
40
41static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
42{
43 /*
44 * add INCLUDE_ALL at the tail, so scan the list will find it at
45 * the very end.
46 */
47 if (drhd->include_all)
48 list_add_tail(&drhd->list, &dmar_drhd_units);
49 else
50 list_add(&drhd->list, &dmar_drhd_units);
51}
52
53static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
54{
55 list_add(&rmrr->list, &dmar_rmrr_units);
56}
57
58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
59 struct pci_dev **dev, u16 segment)
60{
61 struct pci_bus *bus;
62 struct pci_dev *pdev = NULL;
63 struct acpi_dmar_pci_path *path;
64 int count;
65
66 bus = pci_find_bus(segment, scope->bus);
67 path = (struct acpi_dmar_pci_path *)(scope + 1);
68 count = (scope->length - sizeof(struct acpi_dmar_device_scope))
69 / sizeof(struct acpi_dmar_pci_path);
70
71 while (count) {
72 if (pdev)
73 pci_dev_put(pdev);
74 /*
75 * Some BIOSes list non-exist devices in DMAR table, just
76 * ignore it
77 */
78 if (!bus) {
79 printk(KERN_WARNING
80 PREFIX "Device scope bus [%d] not found\n",
81 scope->bus);
82 break;
83 }
84 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
85 if (!pdev) {
86 printk(KERN_WARNING PREFIX
87 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
88 segment, bus->number, path->dev, path->fn);
89 break;
90 }
91 path ++;
92 count --;
93 bus = pdev->subordinate;
94 }
95 if (!pdev) {
96 printk(KERN_WARNING PREFIX
97 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
98 segment, scope->bus, path->dev, path->fn);
99 *dev = NULL;
100 return 0;
101 }
102 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
103 pdev->subordinate) || (scope->entry_type == \
104 ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
105 pci_dev_put(pdev);
106 printk(KERN_WARNING PREFIX
107 "Device scope type does not match for %s\n",
108 pci_name(pdev));
109 return -EINVAL;
110 }
111 *dev = pdev;
112 return 0;
113}
114
115static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
116 struct pci_dev ***devices, u16 segment)
117{
118 struct acpi_dmar_device_scope *scope;
119 void * tmp = start;
120 int index;
121 int ret;
122
123 *cnt = 0;
124 while (start < end) {
125 scope = start;
126 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
127 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
128 (*cnt)++;
129 else
130 printk(KERN_WARNING PREFIX
131 "Unsupported device scope\n");
132 start += scope->length;
133 }
134 if (*cnt == 0)
135 return 0;
136
137 *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
138 if (!*devices)
139 return -ENOMEM;
140
141 start = tmp;
142 index = 0;
143 while (start < end) {
144 scope = start;
145 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
146 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
147 ret = dmar_parse_one_dev_scope(scope,
148 &(*devices)[index], segment);
149 if (ret) {
150 kfree(*devices);
151 return ret;
152 }
153 index ++;
154 }
155 start += scope->length;
156 }
157
158 return 0;
159}
160
161/**
162 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
163 * structure which uniquely represent one DMA remapping hardware unit
164 * present in the platform
165 */
166static int __init
167dmar_parse_one_drhd(struct acpi_dmar_header *header)
168{
169 struct acpi_dmar_hardware_unit *drhd;
170 struct dmar_drhd_unit *dmaru;
171 int ret = 0;
172 static int include_all;
173
174 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
175 if (!dmaru)
176 return -ENOMEM;
177
178 drhd = (struct acpi_dmar_hardware_unit *)header;
179 dmaru->reg_base_addr = drhd->address;
180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
181
182 if (!dmaru->include_all)
183 ret = dmar_parse_dev_scope((void *)(drhd + 1),
184 ((void *)drhd) + header->length,
185 &dmaru->devices_cnt, &dmaru->devices,
186 drhd->segment);
187 else {
188 /* Only allow one INCLUDE_ALL */
189 if (include_all) {
190 printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL "
191 "device scope is allowed\n");
192 ret = -EINVAL;
193 }
194 include_all = 1;
195 }
196
197 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
198 kfree(dmaru);
199 else
200 dmar_register_drhd_unit(dmaru);
201 return ret;
202}
203
204static int __init
205dmar_parse_one_rmrr(struct acpi_dmar_header *header)
206{
207 struct acpi_dmar_reserved_memory *rmrr;
208 struct dmar_rmrr_unit *rmrru;
209 int ret = 0;
210
211 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
212 if (!rmrru)
213 return -ENOMEM;
214
215 rmrr = (struct acpi_dmar_reserved_memory *)header;
216 rmrru->base_address = rmrr->base_address;
217 rmrru->end_address = rmrr->end_address;
218 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
219 ((void *)rmrr) + header->length,
220 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
221
222 if (ret || (rmrru->devices_cnt == 0))
223 kfree(rmrru);
224 else
225 dmar_register_rmrr_unit(rmrru);
226 return ret;
227}
228
229static void __init
230dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
231{
232 struct acpi_dmar_hardware_unit *drhd;
233 struct acpi_dmar_reserved_memory *rmrr;
234
235 switch (header->type) {
236 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
237 drhd = (struct acpi_dmar_hardware_unit *)header;
238 printk (KERN_INFO PREFIX
239 "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
240 drhd->flags, drhd->address);
241 break;
242 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
243 rmrr = (struct acpi_dmar_reserved_memory *)header;
244
245 printk (KERN_INFO PREFIX
246 "RMRR base: 0x%016Lx end: 0x%016Lx\n",
247 rmrr->base_address, rmrr->end_address);
248 break;
249 }
250}
251
252/**
253 * parse_dmar_table - parses the DMA reporting table
254 */
255static int __init
256parse_dmar_table(void)
257{
258 struct acpi_table_dmar *dmar;
259 struct acpi_dmar_header *entry_header;
260 int ret = 0;
261
262 dmar = (struct acpi_table_dmar *)dmar_tbl;
263 if (!dmar)
264 return -ENODEV;
265
266 if (!dmar->width) {
267 printk (KERN_WARNING PREFIX "Zero: Invalid DMAR haw\n");
268 return -EINVAL;
269 }
270
271 printk (KERN_INFO PREFIX "Host address width %d\n",
272 dmar->width + 1);
273
274 entry_header = (struct acpi_dmar_header *)(dmar + 1);
275 while (((unsigned long)entry_header) <
276 (((unsigned long)dmar) + dmar_tbl->length)) {
277 dmar_table_print_dmar_entry(entry_header);
278
279 switch (entry_header->type) {
280 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
281 ret = dmar_parse_one_drhd(entry_header);
282 break;
283 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
284 ret = dmar_parse_one_rmrr(entry_header);
285 break;
286 default:
287 printk(KERN_WARNING PREFIX
288 "Unknown DMAR structure type\n");
289 ret = 0; /* for forward compatibility */
290 break;
291 }
292 if (ret)
293 break;
294
295 entry_header = ((void *)entry_header + entry_header->length);
296 }
297 return ret;
298}
299
300
301int __init dmar_table_init(void)
302{
303
304 parse_dmar_table();
305 if (list_empty(&dmar_drhd_units)) {
306 printk(KERN_INFO PREFIX "No DMAR devices found\n");
307 return -ENODEV;
308 }
309 return 0;
310}
311
312/**
313 * early_dmar_detect - checks to see if the platform supports DMAR devices
314 */
315int __init early_dmar_detect(void)
316{
317 acpi_status status = AE_OK;
318
319 /* if we could find DMAR table, then there are DMAR devices */
320 status = acpi_get_table(ACPI_SIG_DMAR, 0,
321 (struct acpi_table_header **)&dmar_tbl);
322
323 if (ACPI_SUCCESS(status) && !dmar_tbl) {
324 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
325 status = AE_NOT_FOUND;
326 }
327
328 return (ACPI_SUCCESS(status) ? 1 : 0);
329}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
new file mode 100644
index 0000000000..0c4ab3b072
--- /dev/null
+++ b/drivers/pci/intel-iommu.c
@@ -0,0 +1,2271 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
37#include <asm/iommu.h>
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
57static int dmar_forcedac;
58
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
75 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
79 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
93static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
107static inline void *alloc_pgtable_page(void)
108{
109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
127 return iommu_kmem_cache_alloc(iommu_domain_cache);
128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
147 return iommu_kmem_cache_alloc(iommu_iova_cache);
148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
668static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
669 u64 addr, unsigned int pages, int non_present_entry_flush)
670{
671 unsigned int mask;
672
673 BUG_ON(addr & (~PAGE_MASK_4K));
674 BUG_ON(pages == 0);
675
676 /* Fallback to domain selective flush if no PSI support */
677 if (!cap_pgsel_inv(iommu->cap))
678 return iommu_flush_iotlb_dsi(iommu, did,
679 non_present_entry_flush);
680
681 /*
682 * PSI requires page size to be 2 ^ x, and the base address is naturally
683 * aligned to the size
684 */
685 mask = ilog2(__roundup_pow_of_two(pages));
686 /* Fallback to domain selective flush if size is too big */
687 if (mask > cap_max_amask_val(iommu->cap))
688 return iommu_flush_iotlb_dsi(iommu, did,
689 non_present_entry_flush);
690
691 return __iommu_flush_iotlb(iommu, did, addr, mask,
692 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
693}
694
695static int iommu_enable_translation(struct intel_iommu *iommu)
696{
697 u32 sts;
698 unsigned long flags;
699
700 spin_lock_irqsave(&iommu->register_lock, flags);
701 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
702
703 /* Make sure hardware complete it */
704 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
705 readl, (sts & DMA_GSTS_TES), sts);
706
707 iommu->gcmd |= DMA_GCMD_TE;
708 spin_unlock_irqrestore(&iommu->register_lock, flags);
709 return 0;
710}
711
712static int iommu_disable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flag;
716
717 spin_lock_irqsave(&iommu->register_lock, flag);
718 iommu->gcmd &= ~DMA_GCMD_TE;
719 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
720
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (!(sts & DMA_GSTS_TES)), sts);
724
725 spin_unlock_irqrestore(&iommu->register_lock, flag);
726 return 0;
727}
728
729/* iommu interrupt handling. Most stuff are MSI-like. */
730
731static char *fault_reason_strings[] =
732{
733 "Software",
734 "Present bit in root entry is clear",
735 "Present bit in context entry is clear",
736 "Invalid context entry",
737 "Access beyond MGAW",
738 "PTE Write access is not set",
739 "PTE Read access is not set",
740 "Next page table ptr is invalid",
741 "Root table address invalid",
742 "Context table ptr is invalid",
743 "non-zero reserved fields in RTP",
744 "non-zero reserved fields in CTP",
745 "non-zero reserved fields in PTE",
746 "Unknown"
747};
748#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
749
750char *dmar_get_fault_reason(u8 fault_reason)
751{
752 if (fault_reason > MAX_FAULT_REASON_IDX)
753 return fault_reason_strings[MAX_FAULT_REASON_IDX];
754 else
755 return fault_reason_strings[fault_reason];
756}
757
758void dmar_msi_unmask(unsigned int irq)
759{
760 struct intel_iommu *iommu = get_irq_data(irq);
761 unsigned long flag;
762
763 /* unmask it */
764 spin_lock_irqsave(&iommu->register_lock, flag);
765 writel(0, iommu->reg + DMAR_FECTL_REG);
766 /* Read a reg to force flush the post write */
767 readl(iommu->reg + DMAR_FECTL_REG);
768 spin_unlock_irqrestore(&iommu->register_lock, flag);
769}
770
771void dmar_msi_mask(unsigned int irq)
772{
773 unsigned long flag;
774 struct intel_iommu *iommu = get_irq_data(irq);
775
776 /* mask it */
777 spin_lock_irqsave(&iommu->register_lock, flag);
778 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
779 /* Read a reg to force flush the post write */
780 readl(iommu->reg + DMAR_FECTL_REG);
781 spin_unlock_irqrestore(&iommu->register_lock, flag);
782}
783
784void dmar_msi_write(int irq, struct msi_msg *msg)
785{
786 struct intel_iommu *iommu = get_irq_data(irq);
787 unsigned long flag;
788
789 spin_lock_irqsave(&iommu->register_lock, flag);
790 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
791 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
792 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
793 spin_unlock_irqrestore(&iommu->register_lock, flag);
794}
795
796void dmar_msi_read(int irq, struct msi_msg *msg)
797{
798 struct intel_iommu *iommu = get_irq_data(irq);
799 unsigned long flag;
800
801 spin_lock_irqsave(&iommu->register_lock, flag);
802 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
803 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
804 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
805 spin_unlock_irqrestore(&iommu->register_lock, flag);
806}
807
808static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
809 u8 fault_reason, u16 source_id, u64 addr)
810{
811 char *reason;
812
813 reason = dmar_get_fault_reason(fault_reason);
814
815 printk(KERN_ERR
816 "DMAR:[%s] Request device [%02x:%02x.%d] "
817 "fault addr %llx \n"
818 "DMAR:[fault reason %02d] %s\n",
819 (type ? "DMA Read" : "DMA Write"),
820 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
821 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
822 return 0;
823}
824
825#define PRIMARY_FAULT_REG_LEN (16)
826static irqreturn_t iommu_page_fault(int irq, void *dev_id)
827{
828 struct intel_iommu *iommu = dev_id;
829 int reg, fault_index;
830 u32 fault_status;
831 unsigned long flag;
832
833 spin_lock_irqsave(&iommu->register_lock, flag);
834 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
835
836 /* TBD: ignore advanced fault log currently */
837 if (!(fault_status & DMA_FSTS_PPF))
838 goto clear_overflow;
839
840 fault_index = dma_fsts_fault_record_index(fault_status);
841 reg = cap_fault_reg_offset(iommu->cap);
842 while (1) {
843 u8 fault_reason;
844 u16 source_id;
845 u64 guest_addr;
846 int type;
847 u32 data;
848
849 /* highest 32 bits */
850 data = readl(iommu->reg + reg +
851 fault_index * PRIMARY_FAULT_REG_LEN + 12);
852 if (!(data & DMA_FRCD_F))
853 break;
854
855 fault_reason = dma_frcd_fault_reason(data);
856 type = dma_frcd_type(data);
857
858 data = readl(iommu->reg + reg +
859 fault_index * PRIMARY_FAULT_REG_LEN + 8);
860 source_id = dma_frcd_source_id(data);
861
862 guest_addr = dmar_readq(iommu->reg + reg +
863 fault_index * PRIMARY_FAULT_REG_LEN);
864 guest_addr = dma_frcd_page_addr(guest_addr);
865 /* clear the fault */
866 writel(DMA_FRCD_F, iommu->reg + reg +
867 fault_index * PRIMARY_FAULT_REG_LEN + 12);
868
869 spin_unlock_irqrestore(&iommu->register_lock, flag);
870
871 iommu_page_fault_do_one(iommu, type, fault_reason,
872 source_id, guest_addr);
873
874 fault_index++;
875 if (fault_index > cap_num_fault_regs(iommu->cap))
876 fault_index = 0;
877 spin_lock_irqsave(&iommu->register_lock, flag);
878 }
879clear_overflow:
880 /* clear primary fault overflow */
881 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
882 if (fault_status & DMA_FSTS_PFO)
883 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
884
885 spin_unlock_irqrestore(&iommu->register_lock, flag);
886 return IRQ_HANDLED;
887}
888
889int dmar_set_interrupt(struct intel_iommu *iommu)
890{
891 int irq, ret;
892
893 irq = create_irq();
894 if (!irq) {
895 printk(KERN_ERR "IOMMU: no free vectors\n");
896 return -EINVAL;
897 }
898
899 set_irq_data(irq, iommu);
900 iommu->irq = irq;
901
902 ret = arch_setup_dmar_msi(irq);
903 if (ret) {
904 set_irq_data(irq, NULL);
905 iommu->irq = 0;
906 destroy_irq(irq);
907 return 0;
908 }
909
910 /* Force fault register is cleared */
911 iommu_page_fault(irq, iommu);
912
913 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
914 if (ret)
915 printk(KERN_ERR "IOMMU: can't request irq\n");
916 return ret;
917}
918
919static int iommu_init_domains(struct intel_iommu *iommu)
920{
921 unsigned long ndomains;
922 unsigned long nlongs;
923
924 ndomains = cap_ndoms(iommu->cap);
925 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
926 nlongs = BITS_TO_LONGS(ndomains);
927
928 /* TBD: there might be 64K domains,
929 * consider other allocation for future chip
930 */
931 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
932 if (!iommu->domain_ids) {
933 printk(KERN_ERR "Allocating domain id array failed\n");
934 return -ENOMEM;
935 }
936 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
937 GFP_KERNEL);
938 if (!iommu->domains) {
939 printk(KERN_ERR "Allocating domain array failed\n");
940 kfree(iommu->domain_ids);
941 return -ENOMEM;
942 }
943
944 /*
945 * if Caching mode is set, then invalid translations are tagged
946 * with domainid 0. Hence we need to pre-allocate it.
947 */
948 if (cap_caching_mode(iommu->cap))
949 set_bit(0, iommu->domain_ids);
950 return 0;
951}
952
953static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
954{
955 struct intel_iommu *iommu;
956 int ret;
957 int map_size;
958 u32 ver;
959
960 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
961 if (!iommu)
962 return NULL;
963 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
964 if (!iommu->reg) {
965 printk(KERN_ERR "IOMMU: can't map the region\n");
966 goto error;
967 }
968 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
969 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
970
971 /* the registers might be more than one page */
972 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
973 cap_max_fault_reg_offset(iommu->cap));
974 map_size = PAGE_ALIGN_4K(map_size);
975 if (map_size > PAGE_SIZE_4K) {
976 iounmap(iommu->reg);
977 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
978 if (!iommu->reg) {
979 printk(KERN_ERR "IOMMU: can't map the region\n");
980 goto error;
981 }
982 }
983
984 ver = readl(iommu->reg + DMAR_VER_REG);
985 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
986 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
987 iommu->cap, iommu->ecap);
988 ret = iommu_init_domains(iommu);
989 if (ret)
990 goto error_unmap;
991 spin_lock_init(&iommu->lock);
992 spin_lock_init(&iommu->register_lock);
993
994 drhd->iommu = iommu;
995 return iommu;
996error_unmap:
997 iounmap(iommu->reg);
998 iommu->reg = 0;
999error:
1000 kfree(iommu);
1001 return NULL;
1002}
1003
1004static void domain_exit(struct dmar_domain *domain);
1005static void free_iommu(struct intel_iommu *iommu)
1006{
1007 struct dmar_domain *domain;
1008 int i;
1009
1010 if (!iommu)
1011 return;
1012
1013 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1014 for (; i < cap_ndoms(iommu->cap); ) {
1015 domain = iommu->domains[i];
1016 clear_bit(i, iommu->domain_ids);
1017 domain_exit(domain);
1018 i = find_next_bit(iommu->domain_ids,
1019 cap_ndoms(iommu->cap), i+1);
1020 }
1021
1022 if (iommu->gcmd & DMA_GCMD_TE)
1023 iommu_disable_translation(iommu);
1024
1025 if (iommu->irq) {
1026 set_irq_data(iommu->irq, NULL);
1027 /* This will mask the irq */
1028 free_irq(iommu->irq, iommu);
1029 destroy_irq(iommu->irq);
1030 }
1031
1032 kfree(iommu->domains);
1033 kfree(iommu->domain_ids);
1034
1035 /* free context mapping */
1036 free_context_table(iommu);
1037
1038 if (iommu->reg)
1039 iounmap(iommu->reg);
1040 kfree(iommu);
1041}
1042
1043static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1044{
1045 unsigned long num;
1046 unsigned long ndomains;
1047 struct dmar_domain *domain;
1048 unsigned long flags;
1049
1050 domain = alloc_domain_mem();
1051 if (!domain)
1052 return NULL;
1053
1054 ndomains = cap_ndoms(iommu->cap);
1055
1056 spin_lock_irqsave(&iommu->lock, flags);
1057 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1058 if (num >= ndomains) {
1059 spin_unlock_irqrestore(&iommu->lock, flags);
1060 free_domain_mem(domain);
1061 printk(KERN_ERR "IOMMU: no free domain ids\n");
1062 return NULL;
1063 }
1064
1065 set_bit(num, iommu->domain_ids);
1066 domain->id = num;
1067 domain->iommu = iommu;
1068 iommu->domains[num] = domain;
1069 spin_unlock_irqrestore(&iommu->lock, flags);
1070
1071 return domain;
1072}
1073
1074static void iommu_free_domain(struct dmar_domain *domain)
1075{
1076 unsigned long flags;
1077
1078 spin_lock_irqsave(&domain->iommu->lock, flags);
1079 clear_bit(domain->id, domain->iommu->domain_ids);
1080 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1081}
1082
1083static struct iova_domain reserved_iova_list;
1084
1085static void dmar_init_reserved_ranges(void)
1086{
1087 struct pci_dev *pdev = NULL;
1088 struct iova *iova;
1089 int i;
1090 u64 addr, size;
1091
1092 init_iova_domain(&reserved_iova_list);
1093
1094 /* IOAPIC ranges shouldn't be accessed by DMA */
1095 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1096 IOVA_PFN(IOAPIC_RANGE_END));
1097 if (!iova)
1098 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1099
1100 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1101 for_each_pci_dev(pdev) {
1102 struct resource *r;
1103
1104 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1105 r = &pdev->resource[i];
1106 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1107 continue;
1108 addr = r->start;
1109 addr &= PAGE_MASK_4K;
1110 size = r->end - addr;
1111 size = PAGE_ALIGN_4K(size);
1112 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1113 IOVA_PFN(size + addr) - 1);
1114 if (!iova)
1115 printk(KERN_ERR "Reserve iova failed\n");
1116 }
1117 }
1118
1119}
1120
1121static void domain_reserve_special_ranges(struct dmar_domain *domain)
1122{
1123 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1124}
1125
1126static inline int guestwidth_to_adjustwidth(int gaw)
1127{
1128 int agaw;
1129 int r = (gaw - 12) % 9;
1130
1131 if (r == 0)
1132 agaw = gaw;
1133 else
1134 agaw = gaw + 9 - r;
1135 if (agaw > 64)
1136 agaw = 64;
1137 return agaw;
1138}
1139
1140static int domain_init(struct dmar_domain *domain, int guest_width)
1141{
1142 struct intel_iommu *iommu;
1143 int adjust_width, agaw;
1144 unsigned long sagaw;
1145
1146 init_iova_domain(&domain->iovad);
1147 spin_lock_init(&domain->mapping_lock);
1148
1149 domain_reserve_special_ranges(domain);
1150
1151 /* calculate AGAW */
1152 iommu = domain->iommu;
1153 if (guest_width > cap_mgaw(iommu->cap))
1154 guest_width = cap_mgaw(iommu->cap);
1155 domain->gaw = guest_width;
1156 adjust_width = guestwidth_to_adjustwidth(guest_width);
1157 agaw = width_to_agaw(adjust_width);
1158 sagaw = cap_sagaw(iommu->cap);
1159 if (!test_bit(agaw, &sagaw)) {
1160 /* hardware doesn't support it, choose a bigger one */
1161 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1162 agaw = find_next_bit(&sagaw, 5, agaw);
1163 if (agaw >= 5)
1164 return -ENODEV;
1165 }
1166 domain->agaw = agaw;
1167 INIT_LIST_HEAD(&domain->devices);
1168
1169 /* always allocate the top pgd */
1170 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1171 if (!domain->pgd)
1172 return -ENOMEM;
1173 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1174 return 0;
1175}
1176
1177static void domain_exit(struct dmar_domain *domain)
1178{
1179 u64 end;
1180
1181 /* Domain 0 is reserved, so dont process it */
1182 if (!domain)
1183 return;
1184
1185 domain_remove_dev_info(domain);
1186 /* destroy iovas */
1187 put_iova_domain(&domain->iovad);
1188 end = DOMAIN_MAX_ADDR(domain->gaw);
1189 end = end & (~PAGE_MASK_4K);
1190
1191 /* clear ptes */
1192 dma_pte_clear_range(domain, 0, end);
1193
1194 /* free page tables */
1195 dma_pte_free_pagetable(domain, 0, end);
1196
1197 iommu_free_domain(domain);
1198 free_domain_mem(domain);
1199}
1200
1201static int domain_context_mapping_one(struct dmar_domain *domain,
1202 u8 bus, u8 devfn)
1203{
1204 struct context_entry *context;
1205 struct intel_iommu *iommu = domain->iommu;
1206 unsigned long flags;
1207
1208 pr_debug("Set context mapping for %02x:%02x.%d\n",
1209 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1210 BUG_ON(!domain->pgd);
1211 context = device_to_context_entry(iommu, bus, devfn);
1212 if (!context)
1213 return -ENOMEM;
1214 spin_lock_irqsave(&iommu->lock, flags);
1215 if (context_present(*context)) {
1216 spin_unlock_irqrestore(&iommu->lock, flags);
1217 return 0;
1218 }
1219
1220 context_set_domain_id(*context, domain->id);
1221 context_set_address_width(*context, domain->agaw);
1222 context_set_address_root(*context, virt_to_phys(domain->pgd));
1223 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1224 context_set_fault_enable(*context);
1225 context_set_present(*context);
1226 __iommu_flush_cache(iommu, context, sizeof(*context));
1227
1228 /* it's a non-present to present mapping */
1229 if (iommu_flush_context_device(iommu, domain->id,
1230 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1231 iommu_flush_write_buffer(iommu);
1232 else
1233 iommu_flush_iotlb_dsi(iommu, 0, 0);
1234 spin_unlock_irqrestore(&iommu->lock, flags);
1235 return 0;
1236}
1237
1238static int
1239domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1240{
1241 int ret;
1242 struct pci_dev *tmp, *parent;
1243
1244 ret = domain_context_mapping_one(domain, pdev->bus->number,
1245 pdev->devfn);
1246 if (ret)
1247 return ret;
1248
1249 /* dependent device mapping */
1250 tmp = pci_find_upstream_pcie_bridge(pdev);
1251 if (!tmp)
1252 return 0;
1253 /* Secondary interface's bus number and devfn 0 */
1254 parent = pdev->bus->self;
1255 while (parent != tmp) {
1256 ret = domain_context_mapping_one(domain, parent->bus->number,
1257 parent->devfn);
1258 if (ret)
1259 return ret;
1260 parent = parent->bus->self;
1261 }
1262 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1263 return domain_context_mapping_one(domain,
1264 tmp->subordinate->number, 0);
1265 else /* this is a legacy PCI bridge */
1266 return domain_context_mapping_one(domain,
1267 tmp->bus->number, tmp->devfn);
1268}
1269
1270static int domain_context_mapped(struct dmar_domain *domain,
1271 struct pci_dev *pdev)
1272{
1273 int ret;
1274 struct pci_dev *tmp, *parent;
1275
1276 ret = device_context_mapped(domain->iommu,
1277 pdev->bus->number, pdev->devfn);
1278 if (!ret)
1279 return ret;
1280 /* dependent device mapping */
1281 tmp = pci_find_upstream_pcie_bridge(pdev);
1282 if (!tmp)
1283 return ret;
1284 /* Secondary interface's bus number and devfn 0 */
1285 parent = pdev->bus->self;
1286 while (parent != tmp) {
1287 ret = device_context_mapped(domain->iommu, parent->bus->number,
1288 parent->devfn);
1289 if (!ret)
1290 return ret;
1291 parent = parent->bus->self;
1292 }
1293 if (tmp->is_pcie)
1294 return device_context_mapped(domain->iommu,
1295 tmp->subordinate->number, 0);
1296 else
1297 return device_context_mapped(domain->iommu,
1298 tmp->bus->number, tmp->devfn);
1299}
1300
1301static int
1302domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1303 u64 hpa, size_t size, int prot)
1304{
1305 u64 start_pfn, end_pfn;
1306 struct dma_pte *pte;
1307 int index;
1308
1309 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1310 return -EINVAL;
1311 iova &= PAGE_MASK_4K;
1312 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1313 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1314 index = 0;
1315 while (start_pfn < end_pfn) {
1316 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1317 if (!pte)
1318 return -ENOMEM;
1319 /* We don't need lock here, nobody else
1320 * touches the iova range
1321 */
1322 BUG_ON(dma_pte_addr(*pte));
1323 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1324 dma_set_pte_prot(*pte, prot);
1325 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1326 start_pfn++;
1327 index++;
1328 }
1329 return 0;
1330}
1331
1332static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1333{
1334 clear_context_table(domain->iommu, bus, devfn);
1335 iommu_flush_context_global(domain->iommu, 0);
1336 iommu_flush_iotlb_global(domain->iommu, 0);
1337}
1338
1339static void domain_remove_dev_info(struct dmar_domain *domain)
1340{
1341 struct device_domain_info *info;
1342 unsigned long flags;
1343
1344 spin_lock_irqsave(&device_domain_lock, flags);
1345 while (!list_empty(&domain->devices)) {
1346 info = list_entry(domain->devices.next,
1347 struct device_domain_info, link);
1348 list_del(&info->link);
1349 list_del(&info->global);
1350 if (info->dev)
1351 info->dev->dev.archdata.iommu = NULL;
1352 spin_unlock_irqrestore(&device_domain_lock, flags);
1353
1354 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1355 free_devinfo_mem(info);
1356
1357 spin_lock_irqsave(&device_domain_lock, flags);
1358 }
1359 spin_unlock_irqrestore(&device_domain_lock, flags);
1360}
1361
1362/*
1363 * find_domain
1364 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1365 */
1366struct dmar_domain *
1367find_domain(struct pci_dev *pdev)
1368{
1369 struct device_domain_info *info;
1370
1371 /* No lock here, assumes no domain exit in normal case */
1372 info = pdev->dev.archdata.iommu;
1373 if (info)
1374 return info->domain;
1375 return NULL;
1376}
1377
1378static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1379 struct pci_dev *dev)
1380{
1381 int index;
1382
1383 while (dev) {
1384 for (index = 0; index < cnt; index ++)
1385 if (dev == devices[index])
1386 return 1;
1387
1388 /* Check our parent */
1389 dev = dev->bus->self;
1390 }
1391
1392 return 0;
1393}
1394
1395static struct dmar_drhd_unit *
1396dmar_find_matched_drhd_unit(struct pci_dev *dev)
1397{
1398 struct dmar_drhd_unit *drhd = NULL;
1399
1400 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1401 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1402 drhd->devices_cnt, dev))
1403 return drhd;
1404 }
1405
1406 return NULL;
1407}
1408
1409/* domain is initialized */
1410static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1411{
1412 struct dmar_domain *domain, *found = NULL;
1413 struct intel_iommu *iommu;
1414 struct dmar_drhd_unit *drhd;
1415 struct device_domain_info *info, *tmp;
1416 struct pci_dev *dev_tmp;
1417 unsigned long flags;
1418 int bus = 0, devfn = 0;
1419
1420 domain = find_domain(pdev);
1421 if (domain)
1422 return domain;
1423
1424 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1425 if (dev_tmp) {
1426 if (dev_tmp->is_pcie) {
1427 bus = dev_tmp->subordinate->number;
1428 devfn = 0;
1429 } else {
1430 bus = dev_tmp->bus->number;
1431 devfn = dev_tmp->devfn;
1432 }
1433 spin_lock_irqsave(&device_domain_lock, flags);
1434 list_for_each_entry(info, &device_domain_list, global) {
1435 if (info->bus == bus && info->devfn == devfn) {
1436 found = info->domain;
1437 break;
1438 }
1439 }
1440 spin_unlock_irqrestore(&device_domain_lock, flags);
1441 /* pcie-pci bridge already has a domain, uses it */
1442 if (found) {
1443 domain = found;
1444 goto found_domain;
1445 }
1446 }
1447
1448 /* Allocate new domain for the device */
1449 drhd = dmar_find_matched_drhd_unit(pdev);
1450 if (!drhd) {
1451 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1452 pci_name(pdev));
1453 return NULL;
1454 }
1455 iommu = drhd->iommu;
1456
1457 domain = iommu_alloc_domain(iommu);
1458 if (!domain)
1459 goto error;
1460
1461 if (domain_init(domain, gaw)) {
1462 domain_exit(domain);
1463 goto error;
1464 }
1465
1466 /* register pcie-to-pci device */
1467 if (dev_tmp) {
1468 info = alloc_devinfo_mem();
1469 if (!info) {
1470 domain_exit(domain);
1471 goto error;
1472 }
1473 info->bus = bus;
1474 info->devfn = devfn;
1475 info->dev = NULL;
1476 info->domain = domain;
1477 /* This domain is shared by devices under p2p bridge */
1478 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1479
1480 /* pcie-to-pci bridge already has a domain, uses it */
1481 found = NULL;
1482 spin_lock_irqsave(&device_domain_lock, flags);
1483 list_for_each_entry(tmp, &device_domain_list, global) {
1484 if (tmp->bus == bus && tmp->devfn == devfn) {
1485 found = tmp->domain;
1486 break;
1487 }
1488 }
1489 if (found) {
1490 free_devinfo_mem(info);
1491 domain_exit(domain);
1492 domain = found;
1493 } else {
1494 list_add(&info->link, &domain->devices);
1495 list_add(&info->global, &device_domain_list);
1496 }
1497 spin_unlock_irqrestore(&device_domain_lock, flags);
1498 }
1499
1500found_domain:
1501 info = alloc_devinfo_mem();
1502 if (!info)
1503 goto error;
1504 info->bus = pdev->bus->number;
1505 info->devfn = pdev->devfn;
1506 info->dev = pdev;
1507 info->domain = domain;
1508 spin_lock_irqsave(&device_domain_lock, flags);
1509 /* somebody is fast */
1510 found = find_domain(pdev);
1511 if (found != NULL) {
1512 spin_unlock_irqrestore(&device_domain_lock, flags);
1513 if (found != domain) {
1514 domain_exit(domain);
1515 domain = found;
1516 }
1517 free_devinfo_mem(info);
1518 return domain;
1519 }
1520 list_add(&info->link, &domain->devices);
1521 list_add(&info->global, &device_domain_list);
1522 pdev->dev.archdata.iommu = info;
1523 spin_unlock_irqrestore(&device_domain_lock, flags);
1524 return domain;
1525error:
1526 /* recheck it here, maybe others set it */
1527 return find_domain(pdev);
1528}
1529
1530static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1531{
1532 struct dmar_domain *domain;
1533 unsigned long size;
1534 u64 base;
1535 int ret;
1536
1537 printk(KERN_INFO
1538 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1539 pci_name(pdev), start, end);
1540 /* page table init */
1541 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1542 if (!domain)
1543 return -ENOMEM;
1544
1545 /* The address might not be aligned */
1546 base = start & PAGE_MASK_4K;
1547 size = end - base;
1548 size = PAGE_ALIGN_4K(size);
1549 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1550 IOVA_PFN(base + size) - 1)) {
1551 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1552 ret = -ENOMEM;
1553 goto error;
1554 }
1555
1556 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1557 size, base, pci_name(pdev));
1558 /*
1559 * RMRR range might have overlap with physical memory range,
1560 * clear it first
1561 */
1562 dma_pte_clear_range(domain, base, base + size);
1563
1564 ret = domain_page_mapping(domain, base, base, size,
1565 DMA_PTE_READ|DMA_PTE_WRITE);
1566 if (ret)
1567 goto error;
1568
1569 /* context entry init */
1570 ret = domain_context_mapping(domain, pdev);
1571 if (!ret)
1572 return 0;
1573error:
1574 domain_exit(domain);
1575 return ret;
1576
1577}
1578
1579static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1580 struct pci_dev *pdev)
1581{
1582 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1583 return 0;
1584 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1585 rmrr->end_address + 1);
1586}
1587
1588#ifdef CONFIG_DMAR_GFX_WA
1589extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1590static void __init iommu_prepare_gfx_mapping(void)
1591{
1592 struct pci_dev *pdev = NULL;
1593 u64 base, size;
1594 int slot;
1595 int ret;
1596
1597 for_each_pci_dev(pdev) {
1598 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1599 !IS_GFX_DEVICE(pdev))
1600 continue;
1601 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1602 pci_name(pdev));
1603 slot = arch_get_ram_range(0, &base, &size);
1604 while (slot >= 0) {
1605 ret = iommu_prepare_identity_map(pdev,
1606 base, base + size);
1607 if (ret)
1608 goto error;
1609 slot = arch_get_ram_range(slot, &base, &size);
1610 }
1611 continue;
1612error:
1613 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1614 }
1615}
1616#endif
1617
1618#ifdef CONFIG_DMAR_FLOPPY_WA
1619static inline void iommu_prepare_isa(void)
1620{
1621 struct pci_dev *pdev;
1622 int ret;
1623
1624 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1625 if (!pdev)
1626 return;
1627
1628 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1629 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1630
1631 if (ret)
1632 printk("IOMMU: Failed to create 0-64M identity map, "
1633 "floppy might not work\n");
1634
1635}
1636#else
1637static inline void iommu_prepare_isa(void)
1638{
1639 return;
1640}
1641#endif /* !CONFIG_DMAR_FLPY_WA */
1642
1643int __init init_dmars(void)
1644{
1645 struct dmar_drhd_unit *drhd;
1646 struct dmar_rmrr_unit *rmrr;
1647 struct pci_dev *pdev;
1648 struct intel_iommu *iommu;
1649 int ret, unit = 0;
1650
1651 /*
1652 * for each drhd
1653 * allocate root
1654 * initialize and program root entry to not present
1655 * endfor
1656 */
1657 for_each_drhd_unit(drhd) {
1658 if (drhd->ignored)
1659 continue;
1660 iommu = alloc_iommu(drhd);
1661 if (!iommu) {
1662 ret = -ENOMEM;
1663 goto error;
1664 }
1665
1666 /*
1667 * TBD:
1668 * we could share the same root & context tables
1669 * amoung all IOMMU's. Need to Split it later.
1670 */
1671 ret = iommu_alloc_root_entry(iommu);
1672 if (ret) {
1673 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1674 goto error;
1675 }
1676 }
1677
1678 /*
1679 * For each rmrr
1680 * for each dev attached to rmrr
1681 * do
1682 * locate drhd for dev, alloc domain for dev
1683 * allocate free domain
1684 * allocate page table entries for rmrr
1685 * if context not allocated for bus
1686 * allocate and init context
1687 * set present in root table for this bus
1688 * init context with domain, translation etc
1689 * endfor
1690 * endfor
1691 */
1692 for_each_rmrr_units(rmrr) {
1693 int i;
1694 for (i = 0; i < rmrr->devices_cnt; i++) {
1695 pdev = rmrr->devices[i];
1696 /* some BIOS lists non-exist devices in DMAR table */
1697 if (!pdev)
1698 continue;
1699 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1700 if (ret)
1701 printk(KERN_ERR
1702 "IOMMU: mapping reserved region failed\n");
1703 }
1704 }
1705
1706 iommu_prepare_gfx_mapping();
1707
1708 iommu_prepare_isa();
1709
1710 /*
1711 * for each drhd
1712 * enable fault log
1713 * global invalidate context cache
1714 * global invalidate iotlb
1715 * enable translation
1716 */
1717 for_each_drhd_unit(drhd) {
1718 if (drhd->ignored)
1719 continue;
1720 iommu = drhd->iommu;
1721 sprintf (iommu->name, "dmar%d", unit++);
1722
1723 iommu_flush_write_buffer(iommu);
1724
1725 ret = dmar_set_interrupt(iommu);
1726 if (ret)
1727 goto error;
1728
1729 iommu_set_root_entry(iommu);
1730
1731 iommu_flush_context_global(iommu, 0);
1732 iommu_flush_iotlb_global(iommu, 0);
1733
1734 ret = iommu_enable_translation(iommu);
1735 if (ret)
1736 goto error;
1737 }
1738
1739 return 0;
1740error:
1741 for_each_drhd_unit(drhd) {
1742 if (drhd->ignored)
1743 continue;
1744 iommu = drhd->iommu;
1745 free_iommu(iommu);
1746 }
1747 return ret;
1748}
1749
1750static inline u64 aligned_size(u64 host_addr, size_t size)
1751{
1752 u64 addr;
1753 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1754 return PAGE_ALIGN_4K(addr);
1755}
1756
1757struct iova *
1758iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1759{
1760 struct iova *piova;
1761
1762 /* Make sure it's in range */
1763 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1764 if (!size || (IOVA_START_ADDR + size > end))
1765 return NULL;
1766
1767 piova = alloc_iova(&domain->iovad,
1768 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1769 return piova;
1770}
1771
1772static struct iova *
1773__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1774 size_t size)
1775{
1776 struct pci_dev *pdev = to_pci_dev(dev);
1777 struct iova *iova = NULL;
1778
1779 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1780 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1781 } else {
1782 /*
1783 * First try to allocate an io virtual address in
1784 * DMA_32BIT_MASK and if that fails then try allocating
1785 * from higer range
1786 */
1787 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1788 if (!iova)
1789 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1790 }
1791
1792 if (!iova) {
1793 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1794 return NULL;
1795 }
1796
1797 return iova;
1798}
1799
1800static struct dmar_domain *
1801get_valid_domain_for_dev(struct pci_dev *pdev)
1802{
1803 struct dmar_domain *domain;
1804 int ret;
1805
1806 domain = get_domain_for_dev(pdev,
1807 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1808 if (!domain) {
1809 printk(KERN_ERR
1810 "Allocating domain for %s failed", pci_name(pdev));
1811 return 0;
1812 }
1813
1814 /* make sure context mapping is ok */
1815 if (unlikely(!domain_context_mapped(domain, pdev))) {
1816 ret = domain_context_mapping(domain, pdev);
1817 if (ret) {
1818 printk(KERN_ERR
1819 "Domain context map for %s failed",
1820 pci_name(pdev));
1821 return 0;
1822 }
1823 }
1824
1825 return domain;
1826}
1827
1828static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1829 size_t size, int dir)
1830{
1831 struct pci_dev *pdev = to_pci_dev(hwdev);
1832 int ret;
1833 struct dmar_domain *domain;
1834 unsigned long start_addr;
1835 struct iova *iova;
1836 int prot = 0;
1837
1838 BUG_ON(dir == DMA_NONE);
1839 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1840 return virt_to_bus(addr);
1841
1842 domain = get_valid_domain_for_dev(pdev);
1843 if (!domain)
1844 return 0;
1845
1846 addr = (void *)virt_to_phys(addr);
1847 size = aligned_size((u64)addr, size);
1848
1849 iova = __intel_alloc_iova(hwdev, domain, size);
1850 if (!iova)
1851 goto error;
1852
1853 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1854
1855 /*
1856 * Check if DMAR supports zero-length reads on write only
1857 * mappings..
1858 */
1859 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1860 !cap_zlr(domain->iommu->cap))
1861 prot |= DMA_PTE_READ;
1862 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1863 prot |= DMA_PTE_WRITE;
1864 /*
1865 * addr - (addr + size) might be partial page, we should map the whole
1866 * page. Note: if two part of one page are separately mapped, we
1867 * might have two guest_addr mapping to the same host addr, but this
1868 * is not a big problem
1869 */
1870 ret = domain_page_mapping(domain, start_addr,
1871 ((u64)addr) & PAGE_MASK_4K, size, prot);
1872 if (ret)
1873 goto error;
1874
1875 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1876 pci_name(pdev), size, (u64)addr,
1877 size, (u64)start_addr, dir);
1878
1879 /* it's a non-present to present mapping */
1880 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1881 start_addr, size >> PAGE_SHIFT_4K, 1);
1882 if (ret)
1883 iommu_flush_write_buffer(domain->iommu);
1884
1885 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1886
1887error:
1888 if (iova)
1889 __free_iova(&domain->iovad, iova);
1890 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1891 pci_name(pdev), size, (u64)addr, dir);
1892 return 0;
1893}
1894
1895static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1896 size_t size, int dir)
1897{
1898 struct pci_dev *pdev = to_pci_dev(dev);
1899 struct dmar_domain *domain;
1900 unsigned long start_addr;
1901 struct iova *iova;
1902
1903 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1904 return;
1905 domain = find_domain(pdev);
1906 BUG_ON(!domain);
1907
1908 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1909 if (!iova)
1910 return;
1911
1912 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1913 size = aligned_size((u64)dev_addr, size);
1914
1915 pr_debug("Device %s unmapping: %lx@%llx\n",
1916 pci_name(pdev), size, (u64)start_addr);
1917
1918 /* clear the whole page */
1919 dma_pte_clear_range(domain, start_addr, start_addr + size);
1920 /* free page tables */
1921 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1922
1923 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1924 size >> PAGE_SHIFT_4K, 0))
1925 iommu_flush_write_buffer(domain->iommu);
1926
1927 /* free iova */
1928 __free_iova(&domain->iovad, iova);
1929}
1930
1931static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1932 dma_addr_t *dma_handle, gfp_t flags)
1933{
1934 void *vaddr;
1935 int order;
1936
1937 size = PAGE_ALIGN_4K(size);
1938 order = get_order(size);
1939 flags &= ~(GFP_DMA | GFP_DMA32);
1940
1941 vaddr = (void *)__get_free_pages(flags, order);
1942 if (!vaddr)
1943 return NULL;
1944 memset(vaddr, 0, size);
1945
1946 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1947 if (*dma_handle)
1948 return vaddr;
1949 free_pages((unsigned long)vaddr, order);
1950 return NULL;
1951}
1952
1953static void intel_free_coherent(struct device *hwdev, size_t size,
1954 void *vaddr, dma_addr_t dma_handle)
1955{
1956 int order;
1957
1958 size = PAGE_ALIGN_4K(size);
1959 order = get_order(size);
1960
1961 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1962 free_pages((unsigned long)vaddr, order);
1963}
1964
1965#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1966static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
1967 int nelems, int dir)
1968{
1969 int i;
1970 struct pci_dev *pdev = to_pci_dev(hwdev);
1971 struct dmar_domain *domain;
1972 unsigned long start_addr;
1973 struct iova *iova;
1974 size_t size = 0;
1975 void *addr;
1976 struct scatterlist *sg;
1977
1978 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1979 return;
1980
1981 domain = find_domain(pdev);
1982
1983 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
1984 if (!iova)
1985 return;
1986 for_each_sg(sglist, sg, nelems, i) {
1987 addr = SG_ENT_VIRT_ADDRESS(sg);
1988 size += aligned_size((u64)addr, sg->length);
1989 }
1990
1991 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1992
1993 /* clear the whole page */
1994 dma_pte_clear_range(domain, start_addr, start_addr + size);
1995 /* free page tables */
1996 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1997
1998 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1999 size >> PAGE_SHIFT_4K, 0))
2000 iommu_flush_write_buffer(domain->iommu);
2001
2002 /* free iova */
2003 __free_iova(&domain->iovad, iova);
2004}
2005
2006static int intel_nontranslate_map_sg(struct device *hddev,
2007 struct scatterlist *sglist, int nelems, int dir)
2008{
2009 int i;
2010 struct scatterlist *sg;
2011
2012 for_each_sg(sglist, sg, nelems, i) {
2013 BUG_ON(!sg_page(sg));
2014 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2015 sg->dma_length = sg->length;
2016 }
2017 return nelems;
2018}
2019
2020static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2021 int nelems, int dir)
2022{
2023 void *addr;
2024 int i;
2025 struct pci_dev *pdev = to_pci_dev(hwdev);
2026 struct dmar_domain *domain;
2027 size_t size = 0;
2028 int prot = 0;
2029 size_t offset = 0;
2030 struct iova *iova = NULL;
2031 int ret;
2032 struct scatterlist *sg;
2033 unsigned long start_addr;
2034
2035 BUG_ON(dir == DMA_NONE);
2036 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2037 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2038
2039 domain = get_valid_domain_for_dev(pdev);
2040 if (!domain)
2041 return 0;
2042
2043 for_each_sg(sglist, sg, nelems, i) {
2044 addr = SG_ENT_VIRT_ADDRESS(sg);
2045 addr = (void *)virt_to_phys(addr);
2046 size += aligned_size((u64)addr, sg->length);
2047 }
2048
2049 iova = __intel_alloc_iova(hwdev, domain, size);
2050 if (!iova) {
2051 sglist->dma_length = 0;
2052 return 0;
2053 }
2054
2055 /*
2056 * Check if DMAR supports zero-length reads on write only
2057 * mappings..
2058 */
2059 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2060 !cap_zlr(domain->iommu->cap))
2061 prot |= DMA_PTE_READ;
2062 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2063 prot |= DMA_PTE_WRITE;
2064
2065 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2066 offset = 0;
2067 for_each_sg(sglist, sg, nelems, i) {
2068 addr = SG_ENT_VIRT_ADDRESS(sg);
2069 addr = (void *)virt_to_phys(addr);
2070 size = aligned_size((u64)addr, sg->length);
2071 ret = domain_page_mapping(domain, start_addr + offset,
2072 ((u64)addr) & PAGE_MASK_4K,
2073 size, prot);
2074 if (ret) {
2075 /* clear the page */
2076 dma_pte_clear_range(domain, start_addr,
2077 start_addr + offset);
2078 /* free page tables */
2079 dma_pte_free_pagetable(domain, start_addr,
2080 start_addr + offset);
2081 /* free iova */
2082 __free_iova(&domain->iovad, iova);
2083 return 0;
2084 }
2085 sg->dma_address = start_addr + offset +
2086 ((u64)addr & (~PAGE_MASK_4K));
2087 sg->dma_length = sg->length;
2088 offset += size;
2089 }
2090
2091 /* it's a non-present to present mapping */
2092 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2093 start_addr, offset >> PAGE_SHIFT_4K, 1))
2094 iommu_flush_write_buffer(domain->iommu);
2095 return nelems;
2096}
2097
2098static struct dma_mapping_ops intel_dma_ops = {
2099 .alloc_coherent = intel_alloc_coherent,
2100 .free_coherent = intel_free_coherent,
2101 .map_single = intel_map_single,
2102 .unmap_single = intel_unmap_single,
2103 .map_sg = intel_map_sg,
2104 .unmap_sg = intel_unmap_sg,
2105};
2106
2107static inline int iommu_domain_cache_init(void)
2108{
2109 int ret = 0;
2110
2111 iommu_domain_cache = kmem_cache_create("iommu_domain",
2112 sizeof(struct dmar_domain),
2113 0,
2114 SLAB_HWCACHE_ALIGN,
2115
2116 NULL);
2117 if (!iommu_domain_cache) {
2118 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2119 ret = -ENOMEM;
2120 }
2121
2122 return ret;
2123}
2124
2125static inline int iommu_devinfo_cache_init(void)
2126{
2127 int ret = 0;
2128
2129 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2130 sizeof(struct device_domain_info),
2131 0,
2132 SLAB_HWCACHE_ALIGN,
2133
2134 NULL);
2135 if (!iommu_devinfo_cache) {
2136 printk(KERN_ERR "Couldn't create devinfo cache\n");
2137 ret = -ENOMEM;
2138 }
2139
2140 return ret;
2141}
2142
2143static inline int iommu_iova_cache_init(void)
2144{
2145 int ret = 0;
2146
2147 iommu_iova_cache = kmem_cache_create("iommu_iova",
2148 sizeof(struct iova),
2149 0,
2150 SLAB_HWCACHE_ALIGN,
2151
2152 NULL);
2153 if (!iommu_iova_cache) {
2154 printk(KERN_ERR "Couldn't create iova cache\n");
2155 ret = -ENOMEM;
2156 }
2157
2158 return ret;
2159}
2160
2161static int __init iommu_init_mempool(void)
2162{
2163 int ret;
2164 ret = iommu_iova_cache_init();
2165 if (ret)
2166 return ret;
2167
2168 ret = iommu_domain_cache_init();
2169 if (ret)
2170 goto domain_error;
2171
2172 ret = iommu_devinfo_cache_init();
2173 if (!ret)
2174 return ret;
2175
2176 kmem_cache_destroy(iommu_domain_cache);
2177domain_error:
2178 kmem_cache_destroy(iommu_iova_cache);
2179
2180 return -ENOMEM;
2181}
2182
2183static void __init iommu_exit_mempool(void)
2184{
2185 kmem_cache_destroy(iommu_devinfo_cache);
2186 kmem_cache_destroy(iommu_domain_cache);
2187 kmem_cache_destroy(iommu_iova_cache);
2188
2189}
2190
2191void __init detect_intel_iommu(void)
2192{
2193 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2194 return;
2195 if (early_dmar_detect()) {
2196 iommu_detected = 1;
2197 }
2198}
2199
2200static void __init init_no_remapping_devices(void)
2201{
2202 struct dmar_drhd_unit *drhd;
2203
2204 for_each_drhd_unit(drhd) {
2205 if (!drhd->include_all) {
2206 int i;
2207 for (i = 0; i < drhd->devices_cnt; i++)
2208 if (drhd->devices[i] != NULL)
2209 break;
2210 /* ignore DMAR unit if no pci devices exist */
2211 if (i == drhd->devices_cnt)
2212 drhd->ignored = 1;
2213 }
2214 }
2215
2216 if (dmar_map_gfx)
2217 return;
2218
2219 for_each_drhd_unit(drhd) {
2220 int i;
2221 if (drhd->ignored || drhd->include_all)
2222 continue;
2223
2224 for (i = 0; i < drhd->devices_cnt; i++)
2225 if (drhd->devices[i] &&
2226 !IS_GFX_DEVICE(drhd->devices[i]))
2227 break;
2228
2229 if (i < drhd->devices_cnt)
2230 continue;
2231
2232 /* bypass IOMMU if it is just for gfx devices */
2233 drhd->ignored = 1;
2234 for (i = 0; i < drhd->devices_cnt; i++) {
2235 if (!drhd->devices[i])
2236 continue;
2237 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2238 }
2239 }
2240}
2241
2242int __init intel_iommu_init(void)
2243{
2244 int ret = 0;
2245
2246 if (no_iommu || swiotlb || dmar_disabled)
2247 return -ENODEV;
2248
2249 if (dmar_table_init())
2250 return -ENODEV;
2251
2252 iommu_init_mempool();
2253 dmar_init_reserved_ranges();
2254
2255 init_no_remapping_devices();
2256
2257 ret = init_dmars();
2258 if (ret) {
2259 printk(KERN_ERR "IOMMU: dmar init failed\n");
2260 put_iova_domain(&reserved_iova_list);
2261 iommu_exit_mempool();
2262 return ret;
2263 }
2264 printk(KERN_INFO
2265 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2266
2267 force_iommu = 1;
2268 dma_ops = &intel_dma_ops;
2269 return 0;
2270}
2271
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
new file mode 100644
index 0000000000..ee88dd2400
--- /dev/null
+++ b/drivers/pci/intel-iommu.h
@@ -0,0 +1,325 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
19 */
20
21#ifndef _INTEL_IOMMU_H_
22#define _INTEL_IOMMU_H_
23
24#include <linux/types.h>
25#include <linux/msi.h>
26#include "iova.h"
27#include <linux/io.h>
28
29/*
30 * Intel IOMMU register specification per version 1.0 public spec.
31 */
32
33#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
34#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
35#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
36#define DMAR_GCMD_REG 0x18 /* Global command register */
37#define DMAR_GSTS_REG 0x1c /* Global status register */
38#define DMAR_RTADDR_REG 0x20 /* Root entry table */
39#define DMAR_CCMD_REG 0x28 /* Context command reg */
40#define DMAR_FSTS_REG 0x34 /* Fault Status register */
41#define DMAR_FECTL_REG 0x38 /* Fault control register */
42#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
43#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
44#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
45#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
46#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
47#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
48#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
49#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
50#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
51
52#define OFFSET_STRIDE (9)
53/*
54#define dmar_readl(dmar, reg) readl(dmar + reg)
55#define dmar_readq(dmar, reg) ({ \
56 u32 lo, hi; \
57 lo = readl(dmar + reg); \
58 hi = readl(dmar + reg + 4); \
59 (((u64) hi) << 32) + lo; })
60*/
61static inline u64 dmar_readq(void *addr)
62{
63 u32 lo, hi;
64 lo = readl(addr);
65 hi = readl(addr + 4);
66 return (((u64) hi) << 32) + lo;
67}
68
69static inline void dmar_writeq(void __iomem *addr, u64 val)
70{
71 writel((u32)val, addr);
72 writel((u32)(val >> 32), addr + 4);
73}
74
75#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
76#define DMAR_VER_MINOR(v) ((v) & 0x0f)
77
78/*
79 * Decoding Capability Register
80 */
81#define cap_read_drain(c) (((c) >> 55) & 1)
82#define cap_write_drain(c) (((c) >> 54) & 1)
83#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
84#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
85#define cap_pgsel_inv(c) (((c) >> 39) & 1)
86
87#define cap_super_page_val(c) (((c) >> 34) & 0xf)
88#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
89 * OFFSET_STRIDE) + 21)
90
91#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
92#define cap_max_fault_reg_offset(c) \
93 (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
94
95#define cap_zlr(c) (((c) >> 22) & 1)
96#define cap_isoch(c) (((c) >> 23) & 1)
97#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
98#define cap_sagaw(c) (((c) >> 8) & 0x1f)
99#define cap_caching_mode(c) (((c) >> 7) & 1)
100#define cap_phmr(c) (((c) >> 6) & 1)
101#define cap_plmr(c) (((c) >> 5) & 1)
102#define cap_rwbf(c) (((c) >> 4) & 1)
103#define cap_afl(c) (((c) >> 3) & 1)
104#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
105/*
106 * Extended Capability Register
107 */
108
109#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1)
110#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
111#define ecap_max_iotlb_offset(e) \
112 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
113#define ecap_coherent(e) ((e) & 0x1)
114
115
116/* IOTLB_REG */
117#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
118#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
119#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
120#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
121#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
122#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
123#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
124#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
125#define DMA_TLB_IVT (((u64)1) << 63)
126#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
127#define DMA_TLB_MAX_SIZE (0x3f)
128
129/* GCMD_REG */
130#define DMA_GCMD_TE (((u32)1) << 31)
131#define DMA_GCMD_SRTP (((u32)1) << 30)
132#define DMA_GCMD_SFL (((u32)1) << 29)
133#define DMA_GCMD_EAFL (((u32)1) << 28)
134#define DMA_GCMD_WBF (((u32)1) << 27)
135
136/* GSTS_REG */
137#define DMA_GSTS_TES (((u32)1) << 31)
138#define DMA_GSTS_RTPS (((u32)1) << 30)
139#define DMA_GSTS_FLS (((u32)1) << 29)
140#define DMA_GSTS_AFLS (((u32)1) << 28)
141#define DMA_GSTS_WBFS (((u32)1) << 27)
142
143/* CCMD_REG */
144#define DMA_CCMD_ICC (((u64)1) << 63)
145#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
146#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
147#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
148#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
149#define DMA_CCMD_MASK_NOBIT 0
150#define DMA_CCMD_MASK_1BIT 1
151#define DMA_CCMD_MASK_2BIT 2
152#define DMA_CCMD_MASK_3BIT 3
153#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
154#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
155
156/* FECTL_REG */
157#define DMA_FECTL_IM (((u32)1) << 31)
158
159/* FSTS_REG */
160#define DMA_FSTS_PPF ((u32)2)
161#define DMA_FSTS_PFO ((u32)1)
162#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
163
164/* FRCD_REG, 32 bits access */
165#define DMA_FRCD_F (((u32)1) << 31)
166#define dma_frcd_type(d) ((d >> 30) & 1)
167#define dma_frcd_fault_reason(c) (c & 0xff)
168#define dma_frcd_source_id(c) (c & 0xffff)
169#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
170
171/*
172 * 0: Present
173 * 1-11: Reserved
174 * 12-63: Context Ptr (12 - (haw-1))
175 * 64-127: Reserved
176 */
177struct root_entry {
178 u64 val;
179 u64 rsvd1;
180};
181#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
182static inline bool root_present(struct root_entry *root)
183{
184 return (root->val & 1);
185}
186static inline void set_root_present(struct root_entry *root)
187{
188 root->val |= 1;
189}
190static inline void set_root_value(struct root_entry *root, unsigned long value)
191{
192 root->val |= value & PAGE_MASK_4K;
193}
194
195struct context_entry;
196static inline struct context_entry *
197get_context_addr_from_root(struct root_entry *root)
198{
199 return (struct context_entry *)
200 (root_present(root)?phys_to_virt(
201 root->val & PAGE_MASK_4K):
202 NULL);
203}
204
205/*
206 * low 64 bits:
207 * 0: present
208 * 1: fault processing disable
209 * 2-3: translation type
210 * 12-63: address space root
211 * high 64 bits:
212 * 0-2: address width
213 * 3-6: aval
214 * 8-23: domain id
215 */
216struct context_entry {
217 u64 lo;
218 u64 hi;
219};
220#define context_present(c) ((c).lo & 1)
221#define context_fault_disable(c) (((c).lo >> 1) & 1)
222#define context_translation_type(c) (((c).lo >> 2) & 3)
223#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
224#define context_address_width(c) ((c).hi & 7)
225#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
226
227#define context_set_present(c) do {(c).lo |= 1;} while (0)
228#define context_set_fault_enable(c) \
229 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
230#define context_set_translation_type(c, val) \
231 do { \
232 (c).lo &= (((u64)-1) << 4) | 3; \
233 (c).lo |= ((val) & 3) << 2; \
234 } while (0)
235#define CONTEXT_TT_MULTI_LEVEL 0
236#define context_set_address_root(c, val) \
237 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
238#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
239#define context_set_domain_id(c, val) \
240 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
241#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
242
243/*
244 * 0: readable
245 * 1: writable
246 * 2-6: reserved
247 * 7: super page
248 * 8-11: available
249 * 12-63: Host physcial address
250 */
251struct dma_pte {
252 u64 val;
253};
254#define dma_clear_pte(p) do {(p).val = 0;} while (0)
255
256#define DMA_PTE_READ (1)
257#define DMA_PTE_WRITE (2)
258
259#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
260#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
261#define dma_set_pte_prot(p, prot) \
262 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
263#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
264#define dma_set_pte_addr(p, addr) do {\
265 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
266#define dma_pte_present(p) (((p).val & 3) != 0)
267
268struct intel_iommu;
269
270struct dmar_domain {
271 int id; /* domain id */
272 struct intel_iommu *iommu; /* back pointer to owning iommu */
273
274 struct list_head devices; /* all devices' list */
275 struct iova_domain iovad; /* iova's that belong to this domain */
276
277 struct dma_pte *pgd; /* virtual address */
278 spinlock_t mapping_lock; /* page table lock */
279 int gaw; /* max guest address width */
280
281 /* adjusted guest address width, 0 is level 2 30-bit */
282 int agaw;
283
284#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
285 int flags;
286};
287
288/* PCI domain-device relationship */
289struct device_domain_info {
290 struct list_head link; /* link to domain siblings */
291 struct list_head global; /* link to global list */
292 u8 bus; /* PCI bus numer */
293 u8 devfn; /* PCI devfn number */
294 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
295 struct dmar_domain *domain; /* pointer to domain */
296};
297
298extern int init_dmars(void);
299
300struct intel_iommu {
301 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
302 u64 cap;
303 u64 ecap;
304 unsigned long *domain_ids; /* bitmap of domains */
305 struct dmar_domain **domains; /* ptr to domains */
306 int seg;
307 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
308 spinlock_t lock; /* protect context, domain ids */
309 spinlock_t register_lock; /* protect register handling */
310 struct root_entry *root_entry; /* virtual address */
311
312 unsigned int irq;
313 unsigned char name[7]; /* Device Name */
314 struct msi_msg saved_msg;
315 struct sys_device sysdev;
316};
317
318#ifndef CONFIG_DMAR_GFX_WA
319static inline void iommu_prepare_gfx_mapping(void)
320{
321 return;
322}
323#endif /* !CONFIG_DMAR_GFX_WA */
324
325#endif
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
new file mode 100644
index 0000000000..a84571c293
--- /dev/null
+++ b/drivers/pci/iova.c
@@ -0,0 +1,394 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This file is released under the GPLv2.
5 *
6 * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
7 */
8
9#include "iova.h"
10
11void
12init_iova_domain(struct iova_domain *iovad)
13{
14 spin_lock_init(&iovad->iova_alloc_lock);
15 spin_lock_init(&iovad->iova_rbtree_lock);
16 iovad->rbroot = RB_ROOT;
17 iovad->cached32_node = NULL;
18
19}
20
21static struct rb_node *
22__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
23{
24 if ((*limit_pfn != DMA_32BIT_PFN) ||
25 (iovad->cached32_node == NULL))
26 return rb_last(&iovad->rbroot);
27 else {
28 struct rb_node *prev_node = rb_prev(iovad->cached32_node);
29 struct iova *curr_iova =
30 container_of(iovad->cached32_node, struct iova, node);
31 *limit_pfn = curr_iova->pfn_lo - 1;
32 return prev_node;
33 }
34}
35
36static void
37__cached_rbnode_insert_update(struct iova_domain *iovad,
38 unsigned long limit_pfn, struct iova *new)
39{
40 if (limit_pfn != DMA_32BIT_PFN)
41 return;
42 iovad->cached32_node = &new->node;
43}
44
45static void
46__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
47{
48 struct iova *cached_iova;
49 struct rb_node *curr;
50
51 if (!iovad->cached32_node)
52 return;
53 curr = iovad->cached32_node;
54 cached_iova = container_of(curr, struct iova, node);
55
56 if (free->pfn_lo >= cached_iova->pfn_lo)
57 iovad->cached32_node = rb_next(&free->node);
58}
59
60/* Computes the padding size required, to make the
61 * the start address naturally aligned on its size
62 */
63static int
64iova_get_pad_size(int size, unsigned int limit_pfn)
65{
66 unsigned int pad_size = 0;
67 unsigned int order = ilog2(size);
68
69 if (order)
70 pad_size = (limit_pfn + 1) % (1 << order);
71
72 return pad_size;
73}
74
75static int __alloc_iova_range(struct iova_domain *iovad, unsigned long size,
76 unsigned long limit_pfn, struct iova *new, bool size_aligned)
77{
78 struct rb_node *curr = NULL;
79 unsigned long flags;
80 unsigned long saved_pfn;
81 unsigned int pad_size = 0;
82
83 /* Walk the tree backwards */
84 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
85 saved_pfn = limit_pfn;
86 curr = __get_cached_rbnode(iovad, &limit_pfn);
87 while (curr) {
88 struct iova *curr_iova = container_of(curr, struct iova, node);
89 if (limit_pfn < curr_iova->pfn_lo)
90 goto move_left;
91 else if (limit_pfn < curr_iova->pfn_hi)
92 goto adjust_limit_pfn;
93 else {
94 if (size_aligned)
95 pad_size = iova_get_pad_size(size, limit_pfn);
96 if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
97 break; /* found a free slot */
98 }
99adjust_limit_pfn:
100 limit_pfn = curr_iova->pfn_lo - 1;
101move_left:
102 curr = rb_prev(curr);
103 }
104
105 if (!curr) {
106 if (size_aligned)
107 pad_size = iova_get_pad_size(size, limit_pfn);
108 if ((IOVA_START_PFN + size + pad_size) > limit_pfn) {
109 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
110 return -ENOMEM;
111 }
112 }
113
114 /* pfn_lo will point to size aligned address if size_aligned is set */
115 new->pfn_lo = limit_pfn - (size + pad_size) + 1;
116 new->pfn_hi = new->pfn_lo + size - 1;
117
118 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
119 return 0;
120}
121
122static void
123iova_insert_rbtree(struct rb_root *root, struct iova *iova)
124{
125 struct rb_node **new = &(root->rb_node), *parent = NULL;
126 /* Figure out where to put new node */
127 while (*new) {
128 struct iova *this = container_of(*new, struct iova, node);
129 parent = *new;
130
131 if (iova->pfn_lo < this->pfn_lo)
132 new = &((*new)->rb_left);
133 else if (iova->pfn_lo > this->pfn_lo)
134 new = &((*new)->rb_right);
135 else
136 BUG(); /* this should not happen */
137 }
138 /* Add new node and rebalance tree. */
139 rb_link_node(&iova->node, parent, new);
140 rb_insert_color(&iova->node, root);
141}
142
143/**
144 * alloc_iova - allocates an iova
145 * @iovad - iova domain in question
146 * @size - size of page frames to allocate
147 * @limit_pfn - max limit address
148 * @size_aligned - set if size_aligned address range is required
149 * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
150 * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
151 * flag is set then the allocated address iova->pfn_lo will be naturally
152 * aligned on roundup_power_of_two(size).
153 */
154struct iova *
155alloc_iova(struct iova_domain *iovad, unsigned long size,
156 unsigned long limit_pfn,
157 bool size_aligned)
158{
159 unsigned long flags;
160 struct iova *new_iova;
161 int ret;
162
163 new_iova = alloc_iova_mem();
164 if (!new_iova)
165 return NULL;
166
167 /* If size aligned is set then round the size to
168 * to next power of two.
169 */
170 if (size_aligned)
171 size = __roundup_pow_of_two(size);
172
173 spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
174 ret = __alloc_iova_range(iovad, size, limit_pfn, new_iova,
175 size_aligned);
176
177 if (ret) {
178 spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
179 free_iova_mem(new_iova);
180 return NULL;
181 }
182
183 /* Insert the new_iova into domain rbtree by holding writer lock */
184 spin_lock(&iovad->iova_rbtree_lock);
185 iova_insert_rbtree(&iovad->rbroot, new_iova);
186 __cached_rbnode_insert_update(iovad, limit_pfn, new_iova);
187 spin_unlock(&iovad->iova_rbtree_lock);
188
189 spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
190
191 return new_iova;
192}
193
194/**
195 * find_iova - find's an iova for a given pfn
196 * @iovad - iova domain in question.
197 * pfn - page frame number
198 * This function finds and returns an iova belonging to the
199 * given doamin which matches the given pfn.
200 */
201struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
202{
203 unsigned long flags;
204 struct rb_node *node;
205
206 /* Take the lock so that no other thread is manipulating the rbtree */
207 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
208 node = iovad->rbroot.rb_node;
209 while (node) {
210 struct iova *iova = container_of(node, struct iova, node);
211
212 /* If pfn falls within iova's range, return iova */
213 if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
214 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
215 /* We are not holding the lock while this iova
216 * is referenced by the caller as the same thread
217 * which called this function also calls __free_iova()
218 * and it is by desing that only one thread can possibly
219 * reference a particular iova and hence no conflict.
220 */
221 return iova;
222 }
223
224 if (pfn < iova->pfn_lo)
225 node = node->rb_left;
226 else if (pfn > iova->pfn_lo)
227 node = node->rb_right;
228 }
229
230 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
231 return NULL;
232}
233
234/**
235 * __free_iova - frees the given iova
236 * @iovad: iova domain in question.
237 * @iova: iova in question.
238 * Frees the given iova belonging to the giving domain
239 */
240void
241__free_iova(struct iova_domain *iovad, struct iova *iova)
242{
243 unsigned long flags;
244
245 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
246 __cached_rbnode_delete_update(iovad, iova);
247 rb_erase(&iova->node, &iovad->rbroot);
248 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
249 free_iova_mem(iova);
250}
251
252/**
253 * free_iova - finds and frees the iova for a given pfn
254 * @iovad: - iova domain in question.
255 * @pfn: - pfn that is allocated previously
256 * This functions finds an iova for a given pfn and then
257 * frees the iova from that domain.
258 */
259void
260free_iova(struct iova_domain *iovad, unsigned long pfn)
261{
262 struct iova *iova = find_iova(iovad, pfn);
263 if (iova)
264 __free_iova(iovad, iova);
265
266}
267
268/**
269 * put_iova_domain - destroys the iova doamin
270 * @iovad: - iova domain in question.
271 * All the iova's in that domain are destroyed.
272 */
273void put_iova_domain(struct iova_domain *iovad)
274{
275 struct rb_node *node;
276 unsigned long flags;
277
278 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
279 node = rb_first(&iovad->rbroot);
280 while (node) {
281 struct iova *iova = container_of(node, struct iova, node);
282 rb_erase(node, &iovad->rbroot);
283 free_iova_mem(iova);
284 node = rb_first(&iovad->rbroot);
285 }
286 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
287}
288
289static int
290__is_range_overlap(struct rb_node *node,
291 unsigned long pfn_lo, unsigned long pfn_hi)
292{
293 struct iova *iova = container_of(node, struct iova, node);
294
295 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
296 return 1;
297 return 0;
298}
299
300static struct iova *
301__insert_new_range(struct iova_domain *iovad,
302 unsigned long pfn_lo, unsigned long pfn_hi)
303{
304 struct iova *iova;
305
306 iova = alloc_iova_mem();
307 if (!iova)
308 return iova;
309
310 iova->pfn_hi = pfn_hi;
311 iova->pfn_lo = pfn_lo;
312 iova_insert_rbtree(&iovad->rbroot, iova);
313 return iova;
314}
315
316static void
317__adjust_overlap_range(struct iova *iova,
318 unsigned long *pfn_lo, unsigned long *pfn_hi)
319{
320 if (*pfn_lo < iova->pfn_lo)
321 iova->pfn_lo = *pfn_lo;
322 if (*pfn_hi > iova->pfn_hi)
323 *pfn_lo = iova->pfn_hi + 1;
324}
325
326/**
327 * reserve_iova - reserves an iova in the given range
328 * @iovad: - iova domain pointer
329 * @pfn_lo: - lower page frame address
330 * @pfn_hi:- higher pfn adderss
331 * This function allocates reserves the address range from pfn_lo to pfn_hi so
332 * that this address is not dished out as part of alloc_iova.
333 */
334struct iova *
335reserve_iova(struct iova_domain *iovad,
336 unsigned long pfn_lo, unsigned long pfn_hi)
337{
338 struct rb_node *node;
339 unsigned long flags;
340 struct iova *iova;
341 unsigned int overlap = 0;
342
343 spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
344 spin_lock(&iovad->iova_rbtree_lock);
345 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
346 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
347 iova = container_of(node, struct iova, node);
348 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
349 if ((pfn_lo >= iova->pfn_lo) &&
350 (pfn_hi <= iova->pfn_hi))
351 goto finish;
352 overlap = 1;
353
354 } else if (overlap)
355 break;
356 }
357
358 /* We are here either becasue this is the first reserver node
359 * or need to insert remaining non overlap addr range
360 */
361 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
362finish:
363
364 spin_unlock(&iovad->iova_rbtree_lock);
365 spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
366 return iova;
367}
368
369/**
370 * copy_reserved_iova - copies the reserved between domains
371 * @from: - source doamin from where to copy
372 * @to: - destination domin where to copy
373 * This function copies reserved iova's from one doamin to
374 * other.
375 */
376void
377copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
378{
379 unsigned long flags;
380 struct rb_node *node;
381
382 spin_lock_irqsave(&from->iova_alloc_lock, flags);
383 spin_lock(&from->iova_rbtree_lock);
384 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
385 struct iova *iova = container_of(node, struct iova, node);
386 struct iova *new_iova;
387 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
388 if (!new_iova)
389 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
390 iova->pfn_lo, iova->pfn_lo);
391 }
392 spin_unlock(&from->iova_rbtree_lock);
393 spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
394}
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
new file mode 100644
index 0000000000..ae3028d5a9
--- /dev/null
+++ b/drivers/pci/iova.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This file is released under the GPLv2.
5 *
6 * Copyright (C) 2006 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
7 *
8 */
9
10#ifndef _IOVA_H_
11#define _IOVA_H_
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/rbtree.h>
16#include <linux/dma-mapping.h>
17
18/*
19 * We need a fixed PAGE_SIZE of 4K irrespective of
20 * arch PAGE_SIZE for IOMMU page tables.
21 */
22#define PAGE_SHIFT_4K (12)
23#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
24#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
25#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
26
27/* IO virtual address start page frame number */
28#define IOVA_START_PFN (1)
29
30#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
31#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
32#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
33
34/* iova structure */
35struct iova {
36 struct rb_node node;
37 unsigned long pfn_hi; /* IOMMU dish out addr hi */
38 unsigned long pfn_lo; /* IOMMU dish out addr lo */
39};
40
41/* holds all the iova translations for a domain */
42struct iova_domain {
43 spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */
44 spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
45 struct rb_root rbroot; /* iova domain rbtree root */
46 struct rb_node *cached32_node; /* Save last alloced node */
47};
48
49struct iova *alloc_iova_mem(void);
50void free_iova_mem(struct iova *iova);
51void free_iova(struct iova_domain *iovad, unsigned long pfn);
52void __free_iova(struct iova_domain *iovad, struct iova *iova);
53struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
54 unsigned long limit_pfn,
55 bool size_aligned);
56struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
57 unsigned long pfn_hi);
58void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
59void init_iova_domain(struct iova_domain *iovad);
60struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
61void put_iova_domain(struct iova_domain *iovad);
62
63#endif
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6fda33de84..fc87e14b50 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -90,3 +90,4 @@ pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
90 return NULL; 90 return NULL;
91} 91}
92 92
93struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5db6b6690b..463a5a9d58 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -837,6 +837,19 @@ static void pci_release_dev(struct device *dev)
837 kfree(pci_dev); 837 kfree(pci_dev);
838} 838}
839 839
840static void set_pcie_port_type(struct pci_dev *pdev)
841{
842 int pos;
843 u16 reg16;
844
845 pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
846 if (!pos)
847 return;
848 pdev->is_pcie = 1;
849 pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
850 pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
851}
852
840/** 853/**
841 * pci_cfg_space_size - get the configuration space size of the PCI device. 854 * pci_cfg_space_size - get the configuration space size of the PCI device.
842 * @dev: PCI device 855 * @dev: PCI device
@@ -951,6 +964,7 @@ pci_scan_device(struct pci_bus *bus, int devfn)
951 dev->device = (l >> 16) & 0xffff; 964 dev->device = (l >> 16) & 0xffff;
952 dev->cfg_size = pci_cfg_space_size(dev); 965 dev->cfg_size = pci_cfg_space_size(dev);
953 dev->error_state = pci_channel_io_normal; 966 dev->error_state = pci_channel_io_normal;
967 set_pcie_port_type(dev);
954 968
955 /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) 969 /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
956 set this higher, assuming the system even supports it. */ 970 set this higher, assuming the system even supports it. */
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index c6e79d01ce..b001b5922e 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -14,6 +14,40 @@
14#include "pci.h" 14#include "pci.h"
15 15
16DECLARE_RWSEM(pci_bus_sem); 16DECLARE_RWSEM(pci_bus_sem);
17/*
18 * find the upstream PCIE-to-PCI bridge of a PCI device
19 * if the device is PCIE, return NULL
20 * if the device isn't connected to a PCIE bridge (that is its parent is a
21 * legacy PCI bridge and the bridge is directly connected to bus 0), return its
22 * parent
23 */
24struct pci_dev *
25pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
26{
27 struct pci_dev *tmp = NULL;
28
29 if (pdev->is_pcie)
30 return NULL;
31 while (1) {
32 if (!pdev->bus->self)
33 break;
34 pdev = pdev->bus->self;
35 /* a p2p bridge */
36 if (!pdev->is_pcie) {
37 tmp = pdev;
38 continue;
39 }
40 /* PCI device should connect to a PCIE bridge */
41 if (pdev->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE) {
42 /* Busted hardware? */
43 WARN_ON_ONCE(1);
44 return NULL;
45 }
46 return pdev;
47 }
48
49 return tmp;
50}
17 51
18static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr) 52static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr)
19{ 53{
diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c
index 39a90a6f0f..bbf3ee10da 100644
--- a/drivers/power/apm_power.c
+++ b/drivers/power/apm_power.c
@@ -26,65 +26,124 @@ static struct power_supply *main_battery;
26static void find_main_battery(void) 26static void find_main_battery(void)
27{ 27{
28 struct device *dev; 28 struct device *dev;
29 struct power_supply *bat, *batm; 29 struct power_supply *bat = NULL;
30 struct power_supply *max_charge_bat = NULL;
31 struct power_supply *max_energy_bat = NULL;
30 union power_supply_propval full; 32 union power_supply_propval full;
31 int max_charge = 0; 33 int max_charge = 0;
34 int max_energy = 0;
32 35
33 main_battery = NULL; 36 main_battery = NULL;
34 batm = NULL; 37
35 list_for_each_entry(dev, &power_supply_class->devices, node) { 38 list_for_each_entry(dev, &power_supply_class->devices, node) {
36 bat = dev_get_drvdata(dev); 39 bat = dev_get_drvdata(dev);
37 /* If none of battery devices cantains 'use_for_apm' flag, 40
38 choice one with maximum design charge */ 41 if (bat->use_for_apm) {
39 if (!PSY_PROP(bat, CHARGE_FULL_DESIGN, &full)) { 42 /* nice, we explicitly asked to report this battery. */
43 main_battery = bat;
44 return;
45 }
46
47 if (!PSY_PROP(bat, CHARGE_FULL_DESIGN, &full) ||
48 !PSY_PROP(bat, CHARGE_FULL, &full)) {
40 if (full.intval > max_charge) { 49 if (full.intval > max_charge) {
41 batm = bat; 50 max_charge_bat = bat;
42 max_charge = full.intval; 51 max_charge = full.intval;
43 } 52 }
53 } else if (!PSY_PROP(bat, ENERGY_FULL_DESIGN, &full) ||
54 !PSY_PROP(bat, ENERGY_FULL, &full)) {
55 if (full.intval > max_energy) {
56 max_energy_bat = bat;
57 max_energy = full.intval;
58 }
44 } 59 }
60 }
45 61
46 if (bat->use_for_apm) 62 if ((max_energy_bat && max_charge_bat) &&
47 main_battery = bat; 63 (max_energy_bat != max_charge_bat)) {
64 /* try guess battery with more capacity */
65 if (!PSY_PROP(max_charge_bat, VOLTAGE_MAX_DESIGN, &full)) {
66 if (max_energy > max_charge * full.intval)
67 main_battery = max_energy_bat;
68 else
69 main_battery = max_charge_bat;
70 } else if (!PSY_PROP(max_energy_bat, VOLTAGE_MAX_DESIGN,
71 &full)) {
72 if (max_charge > max_energy / full.intval)
73 main_battery = max_charge_bat;
74 else
75 main_battery = max_energy_bat;
76 } else {
77 /* give up, choice any */
78 main_battery = max_energy_bat;
79 }
80 } else if (max_charge_bat) {
81 main_battery = max_charge_bat;
82 } else if (max_energy_bat) {
83 main_battery = max_energy_bat;
84 } else {
85 /* give up, try the last if any */
86 main_battery = bat;
48 } 87 }
49 if (!main_battery)
50 main_battery = batm;
51} 88}
52 89
53static int calculate_time(int status) 90static int calculate_time(int status, int using_charge)
54{ 91{
55 union power_supply_propval charge_full, charge_empty; 92 union power_supply_propval full;
56 union power_supply_propval charge, I; 93 union power_supply_propval empty;
94 union power_supply_propval cur;
95 union power_supply_propval I;
96 enum power_supply_property full_prop;
97 enum power_supply_property full_design_prop;
98 enum power_supply_property empty_prop;
99 enum power_supply_property empty_design_prop;
100 enum power_supply_property cur_avg_prop;
101 enum power_supply_property cur_now_prop;
57 102
58 if (MPSY_PROP(CHARGE_FULL, &charge_full)) { 103 if (MPSY_PROP(CURRENT_AVG, &I)) {
59 /* if battery can't report this property, use design value */ 104 /* if battery can't report average value, use momentary */
60 if (MPSY_PROP(CHARGE_FULL_DESIGN, &charge_full)) 105 if (MPSY_PROP(CURRENT_NOW, &I))
61 return -1; 106 return -1;
62 } 107 }
63 108
64 if (MPSY_PROP(CHARGE_EMPTY, &charge_empty)) { 109 if (using_charge) {
65 /* if battery can't report this property, use design value */ 110 full_prop = POWER_SUPPLY_PROP_CHARGE_FULL;
66 if (MPSY_PROP(CHARGE_EMPTY_DESIGN, &charge_empty)) 111 full_design_prop = POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN;
67 charge_empty.intval = 0; 112 empty_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
113 empty_design_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
114 cur_avg_prop = POWER_SUPPLY_PROP_CHARGE_AVG;
115 cur_now_prop = POWER_SUPPLY_PROP_CHARGE_NOW;
116 } else {
117 full_prop = POWER_SUPPLY_PROP_ENERGY_FULL;
118 full_design_prop = POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN;
119 empty_prop = POWER_SUPPLY_PROP_ENERGY_EMPTY;
120 empty_design_prop = POWER_SUPPLY_PROP_CHARGE_EMPTY;
121 cur_avg_prop = POWER_SUPPLY_PROP_ENERGY_AVG;
122 cur_now_prop = POWER_SUPPLY_PROP_ENERGY_NOW;
68 } 123 }
69 124
70 if (MPSY_PROP(CHARGE_AVG, &charge)) { 125 if (_MPSY_PROP(full_prop, &full)) {
71 /* if battery can't report average value, use momentary */ 126 /* if battery can't report this property, use design value */
72 if (MPSY_PROP(CHARGE_NOW, &charge)) 127 if (_MPSY_PROP(full_design_prop, &full))
73 return -1; 128 return -1;
74 } 129 }
75 130
76 if (MPSY_PROP(CURRENT_AVG, &I)) { 131 if (_MPSY_PROP(empty_prop, &empty)) {
132 /* if battery can't report this property, use design value */
133 if (_MPSY_PROP(empty_design_prop, &empty))
134 empty.intval = 0;
135 }
136
137 if (_MPSY_PROP(cur_avg_prop, &cur)) {
77 /* if battery can't report average value, use momentary */ 138 /* if battery can't report average value, use momentary */
78 if (MPSY_PROP(CURRENT_NOW, &I)) 139 if (_MPSY_PROP(cur_now_prop, &cur))
79 return -1; 140 return -1;
80 } 141 }
81 142
82 if (status == POWER_SUPPLY_STATUS_CHARGING) 143 if (status == POWER_SUPPLY_STATUS_CHARGING)
83 return ((charge.intval - charge_full.intval) * 60L) / 144 return ((cur.intval - full.intval) * 60L) / I.intval;
84 I.intval;
85 else 145 else
86 return -((charge.intval - charge_empty.intval) * 60L) / 146 return -((cur.intval - empty.intval) * 60L) / I.intval;
87 I.intval;
88} 147}
89 148
90static int calculate_capacity(int using_charge) 149static int calculate_capacity(int using_charge)
@@ -200,18 +259,22 @@ static void apm_battery_apm_get_power_status(struct apm_power_info *info)
200 info->units = APM_UNITS_MINS; 259 info->units = APM_UNITS_MINS;
201 260
202 if (status.intval == POWER_SUPPLY_STATUS_CHARGING) { 261 if (status.intval == POWER_SUPPLY_STATUS_CHARGING) {
203 if (MPSY_PROP(TIME_TO_FULL_AVG, &time_to_full)) { 262 if (!MPSY_PROP(TIME_TO_FULL_AVG, &time_to_full) ||
204 if (MPSY_PROP(TIME_TO_FULL_NOW, &time_to_full)) 263 !MPSY_PROP(TIME_TO_FULL_NOW, &time_to_full)) {
205 info->time = calculate_time(status.intval); 264 info->time = time_to_full.intval / 60;
206 else 265 } else {
207 info->time = time_to_full.intval / 60; 266 info->time = calculate_time(status.intval, 0);
267 if (info->time == -1)
268 info->time = calculate_time(status.intval, 1);
208 } 269 }
209 } else { 270 } else {
210 if (MPSY_PROP(TIME_TO_EMPTY_AVG, &time_to_empty)) { 271 if (!MPSY_PROP(TIME_TO_EMPTY_AVG, &time_to_empty) ||
211 if (MPSY_PROP(TIME_TO_EMPTY_NOW, &time_to_empty)) 272 !MPSY_PROP(TIME_TO_EMPTY_NOW, &time_to_empty)) {
212 info->time = calculate_time(status.intval); 273 info->time = time_to_empty.intval / 60;
213 else 274 } else {
214 info->time = time_to_empty.intval / 60; 275 info->time = calculate_time(status.intval, 0);
276 if (info->time == -1)
277 info->time = calculate_time(status.intval, 1);
215 } 278 }
216 } 279 }
217 280
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 2edd5fb6d3..8d1c64a24d 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -48,8 +48,8 @@ struct raw3270 {
48 struct timer_list timer; /* Device timer. */ 48 struct timer_list timer; /* Device timer. */
49 49
50 unsigned char *ascebc; /* ascii -> ebcdic table */ 50 unsigned char *ascebc; /* ascii -> ebcdic table */
51 struct class_device *clttydev; /* 3270-class tty device ptr */ 51 struct device *clttydev; /* 3270-class tty device ptr */
52 struct class_device *cltubdev; /* 3270-class tub device ptr */ 52 struct device *cltubdev; /* 3270-class tub device ptr */
53 53
54 struct raw3270_request init_request; 54 struct raw3270_request init_request;
55 unsigned char init_data[256]; 55 unsigned char init_data[256];
@@ -1107,11 +1107,9 @@ raw3270_delete_device(struct raw3270 *rp)
1107 /* Remove from device chain. */ 1107 /* Remove from device chain. */
1108 mutex_lock(&raw3270_mutex); 1108 mutex_lock(&raw3270_mutex);
1109 if (rp->clttydev && !IS_ERR(rp->clttydev)) 1109 if (rp->clttydev && !IS_ERR(rp->clttydev))
1110 class_device_destroy(class3270, 1110 device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor));
1111 MKDEV(IBM_TTY3270_MAJOR, rp->minor));
1112 if (rp->cltubdev && !IS_ERR(rp->cltubdev)) 1111 if (rp->cltubdev && !IS_ERR(rp->cltubdev))
1113 class_device_destroy(class3270, 1112 device_destroy(class3270, MKDEV(IBM_FS3270_MAJOR, rp->minor));
1114 MKDEV(IBM_FS3270_MAJOR, rp->minor));
1115 list_del_init(&rp->list); 1113 list_del_init(&rp->list);
1116 mutex_unlock(&raw3270_mutex); 1114 mutex_unlock(&raw3270_mutex);
1117 1115
@@ -1181,24 +1179,22 @@ static int raw3270_create_attributes(struct raw3270 *rp)
1181 if (rc) 1179 if (rc)
1182 goto out; 1180 goto out;
1183 1181
1184 rp->clttydev = class_device_create(class3270, NULL, 1182 rp->clttydev = device_create(class3270, &rp->cdev->dev,
1185 MKDEV(IBM_TTY3270_MAJOR, rp->minor), 1183 MKDEV(IBM_TTY3270_MAJOR, rp->minor),
1186 &rp->cdev->dev, "tty%s", 1184 "tty%s", rp->cdev->dev.bus_id);
1187 rp->cdev->dev.bus_id);
1188 if (IS_ERR(rp->clttydev)) { 1185 if (IS_ERR(rp->clttydev)) {
1189 rc = PTR_ERR(rp->clttydev); 1186 rc = PTR_ERR(rp->clttydev);
1190 goto out_ttydev; 1187 goto out_ttydev;
1191 } 1188 }
1192 1189
1193 rp->cltubdev = class_device_create(class3270, NULL, 1190 rp->cltubdev = device_create(class3270, &rp->cdev->dev,
1194 MKDEV(IBM_FS3270_MAJOR, rp->minor), 1191 MKDEV(IBM_FS3270_MAJOR, rp->minor),
1195 &rp->cdev->dev, "tub%s", 1192 "tub%s", rp->cdev->dev.bus_id);
1196 rp->cdev->dev.bus_id);
1197 if (!IS_ERR(rp->cltubdev)) 1193 if (!IS_ERR(rp->cltubdev))
1198 goto out; 1194 goto out;
1199 1195
1200 rc = PTR_ERR(rp->cltubdev); 1196 rc = PTR_ERR(rp->cltubdev);
1201 class_device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor)); 1197 device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor));
1202 1198
1203out_ttydev: 1199out_ttydev:
1204 sysfs_remove_group(&rp->cdev->dev.kobj, &raw3270_attr_group); 1200 sysfs_remove_group(&rp->cdev->dev.kobj, &raw3270_attr_group);
diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c
index 2e0d29730b..aa7f166f40 100644
--- a/drivers/s390/char/tape_class.c
+++ b/drivers/s390/char/tape_class.c
@@ -69,12 +69,9 @@ struct tape_class_device *register_tape_dev(
69 if (rc) 69 if (rc)
70 goto fail_with_cdev; 70 goto fail_with_cdev;
71 71
72 tcd->class_device = class_device_create( 72 tcd->class_device = device_create(tape_class, device,
73 tape_class, 73 tcd->char_device->dev,
74 NULL, 74 "%s", tcd->device_name
75 tcd->char_device->dev,
76 device,
77 "%s", tcd->device_name
78 ); 75 );
79 rc = IS_ERR(tcd->class_device) ? PTR_ERR(tcd->class_device) : 0; 76 rc = IS_ERR(tcd->class_device) ? PTR_ERR(tcd->class_device) : 0;
80 if (rc) 77 if (rc)
@@ -90,7 +87,7 @@ struct tape_class_device *register_tape_dev(
90 return tcd; 87 return tcd;
91 88
92fail_with_class_device: 89fail_with_class_device:
93 class_device_destroy(tape_class, tcd->char_device->dev); 90 device_destroy(tape_class, tcd->char_device->dev);
94 91
95fail_with_cdev: 92fail_with_cdev:
96 cdev_del(tcd->char_device); 93 cdev_del(tcd->char_device);
@@ -105,11 +102,9 @@ EXPORT_SYMBOL(register_tape_dev);
105void unregister_tape_dev(struct tape_class_device *tcd) 102void unregister_tape_dev(struct tape_class_device *tcd)
106{ 103{
107 if (tcd != NULL && !IS_ERR(tcd)) { 104 if (tcd != NULL && !IS_ERR(tcd)) {
108 sysfs_remove_link( 105 sysfs_remove_link(&tcd->class_device->kobj,
109 &tcd->class_device->dev->kobj, 106 tcd->mode_name);
110 tcd->mode_name 107 device_destroy(tape_class, tcd->char_device->dev);
111 );
112 class_device_destroy(tape_class, tcd->char_device->dev);
113 cdev_del(tcd->char_device); 108 cdev_del(tcd->char_device);
114 kfree(tcd); 109 kfree(tcd);
115 } 110 }
diff --git a/drivers/s390/char/tape_class.h b/drivers/s390/char/tape_class.h
index a8bd9b47fa..e2b5ac918a 100644
--- a/drivers/s390/char/tape_class.h
+++ b/drivers/s390/char/tape_class.h
@@ -24,8 +24,8 @@
24#define TAPECLASS_NAME_LEN 32 24#define TAPECLASS_NAME_LEN 32
25 25
26struct tape_class_device { 26struct tape_class_device {
27 struct cdev * char_device; 27 struct cdev *char_device;
28 struct class_device * class_device; 28 struct device *class_device;
29 char device_name[TAPECLASS_NAME_LEN]; 29 char device_name[TAPECLASS_NAME_LEN];
30 char mode_name[TAPECLASS_NAME_LEN]; 30 char mode_name[TAPECLASS_NAME_LEN];
31}; 31};
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index 12f7a4ce82..e0c4c508e1 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -74,7 +74,7 @@ struct vmlogrdr_priv_t {
74 int dev_in_use; /* 1: already opened, 0: not opened*/ 74 int dev_in_use; /* 1: already opened, 0: not opened*/
75 spinlock_t priv_lock; 75 spinlock_t priv_lock;
76 struct device *device; 76 struct device *device;
77 struct class_device *class_device; 77 struct device *class_device;
78 int autorecording; 78 int autorecording;
79 int autopurge; 79 int autopurge;
80}; 80};
@@ -762,12 +762,10 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv)
762 device_unregister(dev); 762 device_unregister(dev);
763 return ret; 763 return ret;
764 } 764 }
765 priv->class_device = class_device_create( 765 priv->class_device = device_create(vmlogrdr_class, dev,
766 vmlogrdr_class, 766 MKDEV(vmlogrdr_major,
767 NULL, 767 priv->minor_num),
768 MKDEV(vmlogrdr_major, priv->minor_num), 768 "%s", dev->bus_id);
769 dev,
770 "%s", dev->bus_id );
771 if (IS_ERR(priv->class_device)) { 769 if (IS_ERR(priv->class_device)) {
772 ret = PTR_ERR(priv->class_device); 770 ret = PTR_ERR(priv->class_device);
773 priv->class_device=NULL; 771 priv->class_device=NULL;
@@ -783,8 +781,7 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv)
783 781
784static int vmlogrdr_unregister_device(struct vmlogrdr_priv_t *priv) 782static int vmlogrdr_unregister_device(struct vmlogrdr_priv_t *priv)
785{ 783{
786 class_device_destroy(vmlogrdr_class, 784 device_destroy(vmlogrdr_class, MKDEV(vmlogrdr_major, priv->minor_num));
787 MKDEV(vmlogrdr_major, priv->minor_num));
788 if (priv->device != NULL) { 785 if (priv->device != NULL) {
789 sysfs_remove_group(&priv->device->kobj, &vmlogrdr_attr_group); 786 sysfs_remove_group(&priv->device->kobj, &vmlogrdr_attr_group);
790 device_unregister(priv->device); 787 device_unregister(priv->device);
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 42c1f4659a..297cdceb0c 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -246,7 +246,7 @@ int chp_add_cmg_attr(struct channel_path *chp)
246static ssize_t chp_status_show(struct device *dev, 246static ssize_t chp_status_show(struct device *dev,
247 struct device_attribute *attr, char *buf) 247 struct device_attribute *attr, char *buf)
248{ 248{
249 struct channel_path *chp = container_of(dev, struct channel_path, dev); 249 struct channel_path *chp = to_channelpath(dev);
250 250
251 if (!chp) 251 if (!chp)
252 return 0; 252 return 0;
@@ -258,7 +258,7 @@ static ssize_t chp_status_write(struct device *dev,
258 struct device_attribute *attr, 258 struct device_attribute *attr,
259 const char *buf, size_t count) 259 const char *buf, size_t count)
260{ 260{
261 struct channel_path *cp = container_of(dev, struct channel_path, dev); 261 struct channel_path *cp = to_channelpath(dev);
262 char cmd[10]; 262 char cmd[10];
263 int num_args; 263 int num_args;
264 int error; 264 int error;
@@ -286,7 +286,7 @@ static ssize_t chp_configure_show(struct device *dev,
286 struct channel_path *cp; 286 struct channel_path *cp;
287 int status; 287 int status;
288 288
289 cp = container_of(dev, struct channel_path, dev); 289 cp = to_channelpath(dev);
290 status = chp_info_get_status(cp->chpid); 290 status = chp_info_get_status(cp->chpid);
291 if (status < 0) 291 if (status < 0)
292 return status; 292 return status;
@@ -308,7 +308,7 @@ static ssize_t chp_configure_write(struct device *dev,
308 return -EINVAL; 308 return -EINVAL;
309 if (val != 0 && val != 1) 309 if (val != 0 && val != 1)
310 return -EINVAL; 310 return -EINVAL;
311 cp = container_of(dev, struct channel_path, dev); 311 cp = to_channelpath(dev);
312 chp_cfg_schedule(cp->chpid, val); 312 chp_cfg_schedule(cp->chpid, val);
313 cfg_wait_idle(); 313 cfg_wait_idle();
314 314
@@ -320,7 +320,7 @@ static DEVICE_ATTR(configure, 0644, chp_configure_show, chp_configure_write);
320static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr, 320static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr,
321 char *buf) 321 char *buf)
322{ 322{
323 struct channel_path *chp = container_of(dev, struct channel_path, dev); 323 struct channel_path *chp = to_channelpath(dev);
324 324
325 if (!chp) 325 if (!chp)
326 return 0; 326 return 0;
@@ -374,7 +374,7 @@ static void chp_release(struct device *dev)
374{ 374{
375 struct channel_path *cp; 375 struct channel_path *cp;
376 376
377 cp = container_of(dev, struct channel_path, dev); 377 cp = to_channelpath(dev);
378 kfree(cp); 378 kfree(cp);
379} 379}
380 380
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 5d83dd4714..838f7ac0dc 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -182,6 +182,15 @@ static int css_register_subchannel(struct subchannel *sch)
182 sch->dev.bus = &css_bus_type; 182 sch->dev.bus = &css_bus_type;
183 sch->dev.release = &css_subchannel_release; 183 sch->dev.release = &css_subchannel_release;
184 sch->dev.groups = subch_attr_groups; 184 sch->dev.groups = subch_attr_groups;
185 /*
186 * We don't want to generate uevents for I/O subchannels that don't
187 * have a working ccw device behind them since they will be
188 * unregistered before they can be used anyway, so we delay the add
189 * uevent until after device recognition was successful.
190 */
191 if (!cio_is_console(sch->schid))
192 /* Console is special, no need to suppress. */
193 sch->dev.uevent_suppress = 1;
185 css_update_ssd_info(sch); 194 css_update_ssd_info(sch);
186 /* make it known to the system */ 195 /* make it known to the system */
187 ret = css_sch_device_register(sch); 196 ret = css_sch_device_register(sch);
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 7507067351..fd5d0c1570 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -559,6 +559,7 @@ zfcp_sg_list_alloc(struct zfcp_sg_list *sg_list, size_t size)
559 retval = -ENOMEM; 559 retval = -ENOMEM;
560 goto out; 560 goto out;
561 } 561 }
562 sg_init_table(sg_list->sg, sg_list->count);
562 563
563 for (i = 0, sg = sg_list->sg; i < sg_list->count; i++, sg++) { 564 for (i = 0, sg = sg_list->sg; i < sg_list->count; i++, sg++) {
564 sg->length = min(size, PAGE_SIZE); 565 sg->length = min(size, PAGE_SIZE);
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 57cac7008e..326e7ee232 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -63,7 +63,7 @@
63static inline void * 63static inline void *
64zfcp_sg_to_address(struct scatterlist *list) 64zfcp_sg_to_address(struct scatterlist *list)
65{ 65{
66 return (void *) (page_address(list->page) + list->offset); 66 return sg_virt(list);
67} 67}
68 68
69/** 69/**
@@ -74,7 +74,7 @@ zfcp_sg_to_address(struct scatterlist *list)
74static inline void 74static inline void
75zfcp_address_to_sg(void *address, struct scatterlist *list) 75zfcp_address_to_sg(void *address, struct scatterlist *list)
76{ 76{
77 list->page = virt_to_page(address); 77 sg_set_page(list, virt_to_page(address));
78 list->offset = ((unsigned long) address) & (PAGE_SIZE - 1); 78 list->offset = ((unsigned long) address) & (PAGE_SIZE - 1);
79} 79}
80 80
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index a6475a2bb8..9438d0b287 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -308,13 +308,15 @@ zfcp_erp_adisc(struct zfcp_port *port)
308 if (send_els == NULL) 308 if (send_els == NULL)
309 goto nomem; 309 goto nomem;
310 310
311 send_els->req = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC); 311 send_els->req = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
312 if (send_els->req == NULL) 312 if (send_els->req == NULL)
313 goto nomem; 313 goto nomem;
314 sg_init_table(send_els->req, 1);
314 315
315 send_els->resp = kzalloc(sizeof(struct scatterlist), GFP_ATOMIC); 316 send_els->resp = kmalloc(sizeof(struct scatterlist), GFP_ATOMIC);
316 if (send_els->resp == NULL) 317 if (send_els->resp == NULL)
317 goto nomem; 318 goto nomem;
319 sg_init_table(send_els->resp, 1);
318 320
319 address = (void *) get_zeroed_page(GFP_ATOMIC); 321 address = (void *) get_zeroed_page(GFP_ATOMIC);
320 if (address == NULL) 322 if (address == NULL)
@@ -363,7 +365,7 @@ zfcp_erp_adisc(struct zfcp_port *port)
363 retval = -ENOMEM; 365 retval = -ENOMEM;
364 freemem: 366 freemem:
365 if (address != NULL) 367 if (address != NULL)
366 __free_pages(send_els->req->page, 0); 368 __free_pages(sg_page(send_els->req), 0);
367 if (send_els != NULL) { 369 if (send_els != NULL) {
368 kfree(send_els->req); 370 kfree(send_els->req);
369 kfree(send_els->resp); 371 kfree(send_els->resp);
@@ -437,7 +439,7 @@ zfcp_erp_adisc_handler(unsigned long data)
437 439
438 out: 440 out:
439 zfcp_port_put(port); 441 zfcp_port_put(port);
440 __free_pages(send_els->req->page, 0); 442 __free_pages(sg_page(send_els->req), 0);
441 kfree(send_els->req); 443 kfree(send_els->req);
442 kfree(send_els->resp); 444 kfree(send_els->resp);
443 kfree(send_els); 445 kfree(send_els);
diff --git a/drivers/sbus/char/vfc_dev.c b/drivers/sbus/char/vfc_dev.c
index e7a1642b2a..d4f8fcded5 100644
--- a/drivers/sbus/char/vfc_dev.c
+++ b/drivers/sbus/char/vfc_dev.c
@@ -134,7 +134,7 @@ int init_vfc_hw(struct vfc_dev *dev)
134int init_vfc_devstruct(struct vfc_dev *dev, int instance) 134int init_vfc_devstruct(struct vfc_dev *dev, int instance)
135{ 135{
136 dev->instance=instance; 136 dev->instance=instance;
137 init_MUTEX(&dev->device_lock_sem); 137 mutex_init(&dev->device_lock_mtx);
138 dev->control_reg=0; 138 dev->control_reg=0;
139 dev->busy=0; 139 dev->busy=0;
140 return 0; 140 return 0;
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fb14014ee1..afb262b4be 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1840,7 +1840,7 @@ static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
1840 (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { 1840 (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) {
1841 if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) { 1841 if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) {
1842 struct scatterlist *sg = scsi_sglist(srb); 1842 struct scatterlist *sg = scsi_sglist(srb);
1843 char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1843 char *buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1844 memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length); 1844 memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
1845 kunmap_atomic(buf - sg->offset, KM_IRQ0); 1845 kunmap_atomic(buf - sg->offset, KM_IRQ0);
1846 } 1846 }
@@ -1919,7 +1919,7 @@ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int re
1919 char *buf; 1919 char *buf;
1920 unsigned long flags = 0; 1920 unsigned long flags = 0;
1921 local_irq_save(flags); 1921 local_irq_save(flags);
1922 buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1922 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1923 memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length); 1923 memcpy(buf, tw_dev->generic_buffer_virt[request_id], sg->length);
1924 kunmap_atomic(buf - sg->offset, KM_IRQ0); 1924 kunmap_atomic(buf - sg->offset, KM_IRQ0);
1925 local_irq_restore(flags); 1925 local_irq_restore(flags);
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index a64153b960..59716ebeb1 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1469,7 +1469,7 @@ static void tw_transfer_internal(TW_Device_Extension *tw_dev, int request_id,
1469 struct scatterlist *sg = scsi_sglist(cmd); 1469 struct scatterlist *sg = scsi_sglist(cmd);
1470 1470
1471 local_irq_save(flags); 1471 local_irq_save(flags);
1472 buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1472 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1473 transfer_len = min(sg->length, len); 1473 transfer_len = min(sg->length, len);
1474 1474
1475 memcpy(buf, data, transfer_len); 1475 memcpy(buf, data, transfer_len);
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 988f0bc5ed..2597209183 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -298,8 +298,7 @@ static __inline__ void initialize_SCp(Scsi_Cmnd * cmd)
298 if (cmd->use_sg) { 298 if (cmd->use_sg) {
299 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; 299 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
300 cmd->SCp.buffers_residual = cmd->use_sg - 1; 300 cmd->SCp.buffers_residual = cmd->use_sg - 1;
301 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+ 301 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
302 cmd->SCp.buffer->offset;
303 cmd->SCp.this_residual = cmd->SCp.buffer->length; 302 cmd->SCp.this_residual = cmd->SCp.buffer->length;
304 } else { 303 } else {
305 cmd->SCp.buffer = NULL; 304 cmd->SCp.buffer = NULL;
@@ -2143,8 +2142,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) {
2143 ++cmd->SCp.buffer; 2142 ++cmd->SCp.buffer;
2144 --cmd->SCp.buffers_residual; 2143 --cmd->SCp.buffers_residual;
2145 cmd->SCp.this_residual = cmd->SCp.buffer->length; 2144 cmd->SCp.this_residual = cmd->SCp.buffer->length;
2146 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page)+ 2145 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
2147 cmd->SCp.buffer->offset;
2148 dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual)); 2146 dprintk(NDEBUG_INFORMATION, ("scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual));
2149 } 2147 }
2150 /* 2148 /*
diff --git a/drivers/scsi/NCR53C9x.c b/drivers/scsi/NCR53C9x.c
index 96e8e29aa0..5b0efc9039 100644
--- a/drivers/scsi/NCR53C9x.c
+++ b/drivers/scsi/NCR53C9x.c
@@ -927,7 +927,7 @@ static void esp_get_dmabufs(struct NCR_ESP *esp, Scsi_Cmnd *sp)
927 esp->dma_mmu_get_scsi_sgl(esp, sp); 927 esp->dma_mmu_get_scsi_sgl(esp, sp);
928 else 928 else
929 sp->SCp.ptr = 929 sp->SCp.ptr =
930 (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset)); 930 (char *) virt_to_phys(sg_virt(sp->SCp.buffer));
931 } 931 }
932} 932}
933 933
@@ -1748,7 +1748,7 @@ static inline void advance_sg(struct NCR_ESP *esp, Scsi_Cmnd *sp)
1748 if (esp->dma_advance_sg) 1748 if (esp->dma_advance_sg)
1749 esp->dma_advance_sg (sp); 1749 esp->dma_advance_sg (sp);
1750 else 1750 else
1751 sp->SCp.ptr = (char *) virt_to_phys((page_address(sp->SCp.buffer->page) + sp->SCp.buffer->offset)); 1751 sp->SCp.ptr = (char *) virt_to_phys(sg_virt(sp->SCp.buffer));
1752 1752
1753} 1753}
1754 1754
diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c
index 3168a17948..137d065db3 100644
--- a/drivers/scsi/NCR53c406a.c
+++ b/drivers/scsi/NCR53c406a.c
@@ -875,8 +875,7 @@ static void NCR53c406a_intr(void *dev_id)
875 outb(TRANSFER_INFO | DMA_OP, CMD_REG); 875 outb(TRANSFER_INFO | DMA_OP, CMD_REG);
876#if USE_PIO 876#if USE_PIO
877 scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { 877 scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
878 NCR53c406a_pio_write(page_address(sg->page) + sg->offset, 878 NCR53c406a_pio_write(sg_virt(sg), sg->length);
879 sg->length);
880 } 879 }
881 REG0; 880 REG0;
882#endif /* USE_PIO */ 881#endif /* USE_PIO */
@@ -897,8 +896,7 @@ static void NCR53c406a_intr(void *dev_id)
897 outb(TRANSFER_INFO | DMA_OP, CMD_REG); 896 outb(TRANSFER_INFO | DMA_OP, CMD_REG);
898#if USE_PIO 897#if USE_PIO
899 scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { 898 scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
900 NCR53c406a_pio_read(page_address(sg->page) + sg->offset, 899 NCR53c406a_pio_read(sg_virt(sg), sg->length);
901 sg->length);
902 } 900 }
903 REG0; 901 REG0;
904#endif /* USE_PIO */ 902#endif /* USE_PIO */
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 80e448d0f3..a77ab8d693 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -356,7 +356,7 @@ static void aac_internal_transfer(struct scsi_cmnd *scsicmd, void *data, unsigne
356 int transfer_len; 356 int transfer_len;
357 struct scatterlist *sg = scsi_sglist(scsicmd); 357 struct scatterlist *sg = scsi_sglist(scsicmd);
358 358
359 buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 359 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
360 transfer_len = min(sg->length, len + offset); 360 transfer_len = min(sg->length, len + offset);
361 361
362 transfer_len -= offset; 362 transfer_len -= offset;
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index a58c265dc8..ea8c699476 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -613,7 +613,7 @@ struct aha152x_scdata {
613#define SCNEXT(SCpnt) SCDATA(SCpnt)->next 613#define SCNEXT(SCpnt) SCDATA(SCpnt)->next
614#define SCSEM(SCpnt) SCDATA(SCpnt)->done 614#define SCSEM(SCpnt) SCDATA(SCpnt)->done
615 615
616#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) 616#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))
617 617
618/* state handling */ 618/* state handling */
619static void seldi_run(struct Scsi_Host *shpnt); 619static void seldi_run(struct Scsi_Host *shpnt);
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 961a1882cb..bbcc2c52d7 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -49,7 +49,7 @@
49#include "aha1542.h" 49#include "aha1542.h"
50 50
51#define SCSI_BUF_PA(address) isa_virt_to_bus(address) 51#define SCSI_BUF_PA(address) isa_virt_to_bus(address)
52#define SCSI_SG_PA(sgent) (isa_page_to_bus((sgent)->page) + (sgent)->offset) 52#define SCSI_SG_PA(sgent) (isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)
53 53
54static void BAD_DMA(void *address, unsigned int length) 54static void BAD_DMA(void *address, unsigned int length)
55{ 55{
@@ -66,8 +66,7 @@ static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
66 int badseg) 66 int badseg)
67{ 67{
68 printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n", 68 printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
69 badseg, nseg, 69 badseg, nseg, sg_virt(sgp),
70 page_address(sgp->page) + sgp->offset,
71 (unsigned long long)SCSI_SG_PA(sgp), 70 (unsigned long long)SCSI_SG_PA(sgp),
72 sgp->length); 71 sgp->length);
73 72
@@ -712,8 +711,7 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
712 printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i); 711 printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i);
713 scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) { 712 scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
714 printk(KERN_CRIT "%d: %p %d\n", i, 713 printk(KERN_CRIT "%d: %p %d\n", i,
715 (page_address(sg->page) + 714 sg_virt(sg), sg->length);
716 sg->offset), sg->length);
717 }; 715 };
718 printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr); 716 printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr);
719 ptr = (unsigned char *) &cptr[i]; 717 ptr = (unsigned char *) &cptr[i];
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index f81777586b..f7a252885a 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -1343,7 +1343,7 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
1343 /* 4 bytes: Areca io control code */ 1343 /* 4 bytes: Areca io control code */
1344 1344
1345 sg = scsi_sglist(cmd); 1345 sg = scsi_sglist(cmd);
1346 buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1346 buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1347 if (scsi_sg_count(cmd) > 1) { 1347 if (scsi_sg_count(cmd) > 1) {
1348 retvalue = ARCMSR_MESSAGE_FAIL; 1348 retvalue = ARCMSR_MESSAGE_FAIL;
1349 goto message_out; 1349 goto message_out;
@@ -1593,7 +1593,7 @@ static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb,
1593 strncpy(&inqdata[32], "R001", 4); /* Product Revision */ 1593 strncpy(&inqdata[32], "R001", 4); /* Product Revision */
1594 1594
1595 sg = scsi_sglist(cmd); 1595 sg = scsi_sglist(cmd);
1596 buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1596 buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1597 1597
1598 memcpy(buffer, inqdata, sizeof(inqdata)); 1598 memcpy(buffer, inqdata, sizeof(inqdata));
1599 sg = scsi_sglist(cmd); 1599 sg = scsi_sglist(cmd);
diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
index 52d0b87e9a..d1780980fb 100644
--- a/drivers/scsi/atari_NCR5380.c
+++ b/drivers/scsi/atari_NCR5380.c
@@ -515,8 +515,7 @@ static inline void initialize_SCp(Scsi_Cmnd *cmd)
515 if (cmd->use_sg) { 515 if (cmd->use_sg) {
516 cmd->SCp.buffer = (struct scatterlist *)cmd->request_buffer; 516 cmd->SCp.buffer = (struct scatterlist *)cmd->request_buffer;
517 cmd->SCp.buffers_residual = cmd->use_sg - 1; 517 cmd->SCp.buffers_residual = cmd->use_sg - 1;
518 cmd->SCp.ptr = (char *)page_address(cmd->SCp.buffer->page) + 518 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
519 cmd->SCp.buffer->offset;
520 cmd->SCp.this_residual = cmd->SCp.buffer->length; 519 cmd->SCp.this_residual = cmd->SCp.buffer->length;
521 /* ++roman: Try to merge some scatter-buffers if they are at 520 /* ++roman: Try to merge some scatter-buffers if they are at
522 * contiguous physical addresses. 521 * contiguous physical addresses.
@@ -2054,8 +2053,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
2054 ++cmd->SCp.buffer; 2053 ++cmd->SCp.buffer;
2055 --cmd->SCp.buffers_residual; 2054 --cmd->SCp.buffers_residual;
2056 cmd->SCp.this_residual = cmd->SCp.buffer->length; 2055 cmd->SCp.this_residual = cmd->SCp.buffer->length;
2057 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + 2056 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
2058 cmd->SCp.buffer->offset;
2059 /* ++roman: Try to merge some scatter-buffers if 2057 /* ++roman: Try to merge some scatter-buffers if
2060 * they are at contiguous physical addresses. 2058 * they are at contiguous physical addresses.
2061 */ 2059 */
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 96180bb47e..982c5092be 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -172,7 +172,7 @@ static void IncStat(struct scsi_pointer *SCp, unsigned int Increment)
172 SCp->Status = 0; 172 SCp->Status = 0;
173 else { 173 else {
174 SCp->buffer++; 174 SCp->buffer++;
175 SCp->ptr = page_address(SCp->buffer->page) + SCp->buffer->offset; 175 SCp->ptr = sg_virt(SCp->buffer);
176 SCp->this_residual = SCp->buffer->length; 176 SCp->this_residual = SCp->buffer->length;
177 } 177 }
178 } 178 }
@@ -410,7 +410,7 @@ static int eata_pio_queue(struct scsi_cmnd *cmd,
410 } else { 410 } else {
411 cmd->SCp.buffer = cmd->request_buffer; 411 cmd->SCp.buffer = cmd->request_buffer;
412 cmd->SCp.buffers_residual = cmd->use_sg; 412 cmd->SCp.buffers_residual = cmd->use_sg;
413 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; 413 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
414 cmd->SCp.this_residual = cmd->SCp.buffer->length; 414 cmd->SCp.this_residual = cmd->SCp.buffer->length;
415 } 415 }
416 cmd->SCp.Status = (cmd->SCp.this_residual != 0); /* TRUE as long as bytes 416 cmd->SCp.Status = (cmd->SCp.this_residual != 0); /* TRUE as long as bytes
diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c
index 668569e885..8335b608e5 100644
--- a/drivers/scsi/fd_mcs.c
+++ b/drivers/scsi/fd_mcs.c
@@ -973,7 +973,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
973 if (current_SC->SCp.buffers_residual) { 973 if (current_SC->SCp.buffers_residual) {
974 --current_SC->SCp.buffers_residual; 974 --current_SC->SCp.buffers_residual;
975 ++current_SC->SCp.buffer; 975 ++current_SC->SCp.buffer;
976 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; 976 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
977 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 977 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
978 } else 978 } else
979 break; 979 break;
@@ -1006,7 +1006,7 @@ static irqreturn_t fd_mcs_intr(int irq, void *dev_id)
1006 if (!current_SC->SCp.this_residual && current_SC->SCp.buffers_residual) { 1006 if (!current_SC->SCp.this_residual && current_SC->SCp.buffers_residual) {
1007 --current_SC->SCp.buffers_residual; 1007 --current_SC->SCp.buffers_residual;
1008 ++current_SC->SCp.buffer; 1008 ++current_SC->SCp.buffer;
1009 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; 1009 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
1010 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 1010 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
1011 } 1011 }
1012 } 1012 }
@@ -1109,7 +1109,7 @@ static int fd_mcs_queue(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
1109 1109
1110 if (current_SC->use_sg) { 1110 if (current_SC->use_sg) {
1111 current_SC->SCp.buffer = (struct scatterlist *) current_SC->request_buffer; 1111 current_SC->SCp.buffer = (struct scatterlist *) current_SC->request_buffer;
1112 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; 1112 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
1113 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 1113 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
1114 current_SC->SCp.buffers_residual = current_SC->use_sg - 1; 1114 current_SC->SCp.buffers_residual = current_SC->use_sg - 1;
1115 } else { 1115 } else {
diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c
index 5d282e6a6a..2cd6b4959e 100644
--- a/drivers/scsi/fdomain.c
+++ b/drivers/scsi/fdomain.c
@@ -1321,7 +1321,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
1321 if (current_SC->SCp.buffers_residual) { 1321 if (current_SC->SCp.buffers_residual) {
1322 --current_SC->SCp.buffers_residual; 1322 --current_SC->SCp.buffers_residual;
1323 ++current_SC->SCp.buffer; 1323 ++current_SC->SCp.buffer;
1324 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; 1324 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
1325 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 1325 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
1326 } else 1326 } else
1327 break; 1327 break;
@@ -1354,7 +1354,7 @@ static irqreturn_t do_fdomain_16x0_intr(int irq, void *dev_id)
1354 && current_SC->SCp.buffers_residual) { 1354 && current_SC->SCp.buffers_residual) {
1355 --current_SC->SCp.buffers_residual; 1355 --current_SC->SCp.buffers_residual;
1356 ++current_SC->SCp.buffer; 1356 ++current_SC->SCp.buffer;
1357 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; 1357 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
1358 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 1358 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
1359 } 1359 }
1360 } 1360 }
@@ -1439,8 +1439,7 @@ static int fdomain_16x0_queue(struct scsi_cmnd *SCpnt,
1439 1439
1440 if (scsi_sg_count(current_SC)) { 1440 if (scsi_sg_count(current_SC)) {
1441 current_SC->SCp.buffer = scsi_sglist(current_SC); 1441 current_SC->SCp.buffer = scsi_sglist(current_SC);
1442 current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) 1442 current_SC->SCp.ptr = sg_virt(current_SC->SCp.buffer);
1443 + current_SC->SCp.buffer->offset;
1444 current_SC->SCp.this_residual = current_SC->SCp.buffer->length; 1443 current_SC->SCp.this_residual = current_SC->SCp.buffer->length;
1445 current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1; 1444 current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
1446 } else { 1445 } else {
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 3ac080ee6e..5ab3ce7624 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -2374,18 +2374,18 @@ static void gdth_copy_internal_data(gdth_ha_str *ha, Scsi_Cmnd *scp,
2374 if (cpsum+cpnow > cpcount) 2374 if (cpsum+cpnow > cpcount)
2375 cpnow = cpcount - cpsum; 2375 cpnow = cpcount - cpsum;
2376 cpsum += cpnow; 2376 cpsum += cpnow;
2377 if (!sl->page) { 2377 if (!sg_page(sl)) {
2378 printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n", 2378 printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n",
2379 ha->hanum); 2379 ha->hanum);
2380 return; 2380 return;
2381 } 2381 }
2382 local_irq_save(flags); 2382 local_irq_save(flags);
2383 address = kmap_atomic(sl->page, KM_BIO_SRC_IRQ) + sl->offset; 2383 address = kmap_atomic(sg_page(sl), KM_BIO_SRC_IRQ) + sl->offset;
2384 if (to_buffer) 2384 if (to_buffer)
2385 memcpy(buffer, address, cpnow); 2385 memcpy(buffer, address, cpnow);
2386 else 2386 else
2387 memcpy(address, buffer, cpnow); 2387 memcpy(address, buffer, cpnow);
2388 flush_dcache_page(sl->page); 2388 flush_dcache_page(sg_page(sl));
2389 kunmap_atomic(address, KM_BIO_SRC_IRQ); 2389 kunmap_atomic(address, KM_BIO_SRC_IRQ);
2390 local_irq_restore(flags); 2390 local_irq_restore(flags);
2391 if (cpsum == cpcount) 2391 if (cpsum == cpcount)
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 714e6273a7..db004a4507 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -1828,7 +1828,7 @@ static int ibmmca_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
1828 BUG_ON(scsi_sg_count(cmd) > 16); 1828 BUG_ON(scsi_sg_count(cmd) > 16);
1829 1829
1830 scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { 1830 scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
1831 ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset); 1831 ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg_page(sg)) + sg->offset);
1832 ld(shpnt)[ldn].sge[i].byte_length = sg->length; 1832 ld(shpnt)[ldn].sge[i].byte_length = sg->length;
1833 } 1833 }
1834 scb->enable |= IM_POINTER_TO_LIST; 1834 scb->enable |= IM_POINTER_TO_LIST;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 252d180646..8d0244c2e7 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -175,18 +175,18 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
175 175
176 while (bcount) { 176 while (bcount) {
177 count = min(pc->sg->length - pc->b_count, bcount); 177 count = min(pc->sg->length - pc->b_count, bcount);
178 if (PageHighMem(pc->sg->page)) { 178 if (PageHighMem(sg_page(pc->sg))) {
179 unsigned long flags; 179 unsigned long flags;
180 180
181 local_irq_save(flags); 181 local_irq_save(flags);
182 buf = kmap_atomic(pc->sg->page, KM_IRQ0) + 182 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
183 pc->sg->offset; 183 pc->sg->offset;
184 drive->hwif->atapi_input_bytes(drive, 184 drive->hwif->atapi_input_bytes(drive,
185 buf + pc->b_count, count); 185 buf + pc->b_count, count);
186 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); 186 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
187 local_irq_restore(flags); 187 local_irq_restore(flags);
188 } else { 188 } else {
189 buf = page_address(pc->sg->page) + pc->sg->offset; 189 buf = sg_virt(pc->sg);
190 drive->hwif->atapi_input_bytes(drive, 190 drive->hwif->atapi_input_bytes(drive,
191 buf + pc->b_count, count); 191 buf + pc->b_count, count);
192 } 192 }
@@ -212,18 +212,18 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
212 212
213 while (bcount) { 213 while (bcount) {
214 count = min(pc->sg->length - pc->b_count, bcount); 214 count = min(pc->sg->length - pc->b_count, bcount);
215 if (PageHighMem(pc->sg->page)) { 215 if (PageHighMem(sg_page(pc->sg))) {
216 unsigned long flags; 216 unsigned long flags;
217 217
218 local_irq_save(flags); 218 local_irq_save(flags);
219 buf = kmap_atomic(pc->sg->page, KM_IRQ0) + 219 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
220 pc->sg->offset; 220 pc->sg->offset;
221 drive->hwif->atapi_output_bytes(drive, 221 drive->hwif->atapi_output_bytes(drive,
222 buf + pc->b_count, count); 222 buf + pc->b_count, count);
223 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); 223 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
224 local_irq_restore(flags); 224 local_irq_restore(flags);
225 } else { 225 } else {
226 buf = page_address(pc->sg->page) + pc->sg->offset; 226 buf = sg_virt(pc->sg);
227 drive->hwif->atapi_output_bytes(drive, 227 drive->hwif->atapi_output_bytes(drive,
228 buf + pc->b_count, count); 228 buf + pc->b_count, count);
229 } 229 }
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 74cdc1f0a7..a3d0c6b149 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -705,9 +705,7 @@ static int imm_completion(struct scsi_cmnd *cmd)
705 cmd->SCp.buffer++; 705 cmd->SCp.buffer++;
706 cmd->SCp.this_residual = 706 cmd->SCp.this_residual =
707 cmd->SCp.buffer->length; 707 cmd->SCp.buffer->length;
708 cmd->SCp.ptr = 708 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
709 page_address(cmd->SCp.buffer->page) +
710 cmd->SCp.buffer->offset;
711 709
712 /* 710 /*
713 * Make sure that we transfer even number of bytes 711 * Make sure that we transfer even number of bytes
@@ -844,9 +842,7 @@ static int imm_engine(imm_struct *dev, struct scsi_cmnd *cmd)
844 cmd->SCp.buffer = 842 cmd->SCp.buffer =
845 (struct scatterlist *) cmd->request_buffer; 843 (struct scatterlist *) cmd->request_buffer;
846 cmd->SCp.this_residual = cmd->SCp.buffer->length; 844 cmd->SCp.this_residual = cmd->SCp.buffer->length;
847 cmd->SCp.ptr = 845 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
848 page_address(cmd->SCp.buffer->page) +
849 cmd->SCp.buffer->offset;
850 } else { 846 } else {
851 /* else fill the only available buffer */ 847 /* else fill the only available buffer */
852 cmd->SCp.buffer = NULL; 848 cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c
index ab7cbf3449..c8b452f287 100644
--- a/drivers/scsi/in2000.c
+++ b/drivers/scsi/in2000.c
@@ -372,7 +372,7 @@ static int in2000_queuecommand(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *))
372 if (cmd->use_sg) { 372 if (cmd->use_sg) {
373 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; 373 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
374 cmd->SCp.buffers_residual = cmd->use_sg - 1; 374 cmd->SCp.buffers_residual = cmd->use_sg - 1;
375 cmd->SCp.ptr = (char *) page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; 375 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
376 cmd->SCp.this_residual = cmd->SCp.buffer->length; 376 cmd->SCp.this_residual = cmd->SCp.buffer->length;
377 } else { 377 } else {
378 cmd->SCp.buffer = NULL; 378 cmd->SCp.buffer = NULL;
@@ -764,7 +764,7 @@ static void transfer_bytes(Scsi_Cmnd * cmd, int data_in_dir)
764 ++cmd->SCp.buffer; 764 ++cmd->SCp.buffer;
765 --cmd->SCp.buffers_residual; 765 --cmd->SCp.buffers_residual;
766 cmd->SCp.this_residual = cmd->SCp.buffer->length; 766 cmd->SCp.this_residual = cmd->SCp.buffer->length;
767 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + cmd->SCp.buffer->offset; 767 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
768 } 768 }
769 769
770/* Set up hardware registers */ 770/* Set up hardware registers */
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index c316a0bcae..439b97a6a2 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2872,6 +2872,7 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)
2872 } 2872 }
2873 2873
2874 scatterlist = sglist->scatterlist; 2874 scatterlist = sglist->scatterlist;
2875 sg_init_table(scatterlist, num_elem);
2875 2876
2876 sglist->order = order; 2877 sglist->order = order;
2877 sglist->num_sg = num_elem; 2878 sglist->num_sg = num_elem;
@@ -2884,12 +2885,12 @@ static struct ipr_sglist *ipr_alloc_ucode_buffer(int buf_len)
2884 2885
2885 /* Free up what we already allocated */ 2886 /* Free up what we already allocated */
2886 for (j = i - 1; j >= 0; j--) 2887 for (j = i - 1; j >= 0; j--)
2887 __free_pages(scatterlist[j].page, order); 2888 __free_pages(sg_page(&scatterlist[j]), order);
2888 kfree(sglist); 2889 kfree(sglist);
2889 return NULL; 2890 return NULL;
2890 } 2891 }
2891 2892
2892 scatterlist[i].page = page; 2893 sg_set_page(&scatterlist[i], page);
2893 } 2894 }
2894 2895
2895 return sglist; 2896 return sglist;
@@ -2910,7 +2911,7 @@ static void ipr_free_ucode_buffer(struct ipr_sglist *sglist)
2910 int i; 2911 int i;
2911 2912
2912 for (i = 0; i < sglist->num_sg; i++) 2913 for (i = 0; i < sglist->num_sg; i++)
2913 __free_pages(sglist->scatterlist[i].page, sglist->order); 2914 __free_pages(sg_page(&sglist->scatterlist[i]), sglist->order);
2914 2915
2915 kfree(sglist); 2916 kfree(sglist);
2916} 2917}
@@ -2940,9 +2941,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
2940 scatterlist = sglist->scatterlist; 2941 scatterlist = sglist->scatterlist;
2941 2942
2942 for (i = 0; i < (len / bsize_elem); i++, buffer += bsize_elem) { 2943 for (i = 0; i < (len / bsize_elem); i++, buffer += bsize_elem) {
2943 kaddr = kmap(scatterlist[i].page); 2944 struct page *page = sg_page(&scatterlist[i]);
2945
2946 kaddr = kmap(page);
2944 memcpy(kaddr, buffer, bsize_elem); 2947 memcpy(kaddr, buffer, bsize_elem);
2945 kunmap(scatterlist[i].page); 2948 kunmap(page);
2946 2949
2947 scatterlist[i].length = bsize_elem; 2950 scatterlist[i].length = bsize_elem;
2948 2951
@@ -2953,9 +2956,11 @@ static int ipr_copy_ucode_buffer(struct ipr_sglist *sglist,
2953 } 2956 }
2954 2957
2955 if (len % bsize_elem) { 2958 if (len % bsize_elem) {
2956 kaddr = kmap(scatterlist[i].page); 2959 struct page *page = sg_page(&scatterlist[i]);
2960
2961 kaddr = kmap(page);
2957 memcpy(kaddr, buffer, len % bsize_elem); 2962 memcpy(kaddr, buffer, len % bsize_elem);
2958 kunmap(scatterlist[i].page); 2963 kunmap(page);
2959 2964
2960 scatterlist[i].length = len % bsize_elem; 2965 scatterlist[i].length = len % bsize_elem;
2961 } 2966 }
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index edaac2714c..5c5a9b2628 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1515,7 +1515,7 @@ static int ips_is_passthru(struct scsi_cmnd *SC)
1515 /* kmap_atomic() ensures addressability of the user buffer.*/ 1515 /* kmap_atomic() ensures addressability of the user buffer.*/
1516 /* local_irq_save() protects the KM_IRQ0 address slot. */ 1516 /* local_irq_save() protects the KM_IRQ0 address slot. */
1517 local_irq_save(flags); 1517 local_irq_save(flags);
1518 buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 1518 buffer = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
1519 if (buffer && buffer[0] == 'C' && buffer[1] == 'O' && 1519 if (buffer && buffer[0] == 'C' && buffer[1] == 'O' &&
1520 buffer[2] == 'P' && buffer[3] == 'P') { 1520 buffer[2] == 'P' && buffer[3] == 'P') {
1521 kunmap_atomic(buffer - sg->offset, KM_IRQ0); 1521 kunmap_atomic(buffer - sg->offset, KM_IRQ0);
@@ -3523,7 +3523,7 @@ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count)
3523 /* kmap_atomic() ensures addressability of the data buffer.*/ 3523 /* kmap_atomic() ensures addressability of the data buffer.*/
3524 /* local_irq_save() protects the KM_IRQ0 address slot. */ 3524 /* local_irq_save() protects the KM_IRQ0 address slot. */
3525 local_irq_save(flags); 3525 local_irq_save(flags);
3526 buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset; 3526 buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
3527 memcpy(buffer, &cdata[xfer_cnt], min_cnt); 3527 memcpy(buffer, &cdata[xfer_cnt], min_cnt);
3528 kunmap_atomic(buffer - sg[i].offset, KM_IRQ0); 3528 kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
3529 local_irq_restore(flags); 3529 local_irq_restore(flags);
@@ -3556,7 +3556,7 @@ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count)
3556 /* kmap_atomic() ensures addressability of the data buffer.*/ 3556 /* kmap_atomic() ensures addressability of the data buffer.*/
3557 /* local_irq_save() protects the KM_IRQ0 address slot. */ 3557 /* local_irq_save() protects the KM_IRQ0 address slot. */
3558 local_irq_save(flags); 3558 local_irq_save(flags);
3559 buffer = kmap_atomic(sg[i].page, KM_IRQ0) + sg[i].offset; 3559 buffer = kmap_atomic(sg_page(&sg[i]), KM_IRQ0) + sg[i].offset;
3560 memcpy(&cdata[xfer_cnt], buffer, min_cnt); 3560 memcpy(&cdata[xfer_cnt], buffer, min_cnt);
3561 kunmap_atomic(buffer - sg[i].offset, KM_IRQ0); 3561 kunmap_atomic(buffer - sg[i].offset, KM_IRQ0);
3562 local_irq_restore(flags); 3562 local_irq_restore(flags);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a21455d027..6ce4109efd 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -70,9 +70,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
70static inline void 70static inline void
71iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size) 71iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
72{ 72{
73 ibuf->sg.page = virt_to_page(vbuf); 73 sg_init_one(&ibuf->sg, vbuf, size);
74 ibuf->sg.offset = offset_in_page(vbuf);
75 ibuf->sg.length = size;
76 ibuf->sent = 0; 74 ibuf->sent = 0;
77 ibuf->use_sendmsg = 1; 75 ibuf->use_sendmsg = 1;
78} 76}
@@ -80,13 +78,14 @@ iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
80static inline void 78static inline void
81iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg) 79iscsi_buf_init_sg(struct iscsi_buf *ibuf, struct scatterlist *sg)
82{ 80{
83 ibuf->sg.page = sg->page; 81 sg_init_table(&ibuf->sg, 1);
82 sg_set_page(&ibuf->sg, sg_page(sg));
84 ibuf->sg.offset = sg->offset; 83 ibuf->sg.offset = sg->offset;
85 ibuf->sg.length = sg->length; 84 ibuf->sg.length = sg->length;
86 /* 85 /*
87 * Fastpath: sg element fits into single page 86 * Fastpath: sg element fits into single page
88 */ 87 */
89 if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg->page)) 88 if (sg->length + sg->offset <= PAGE_SIZE && !PageSlab(sg_page(sg)))
90 ibuf->use_sendmsg = 0; 89 ibuf->use_sendmsg = 0;
91 else 90 else
92 ibuf->use_sendmsg = 1; 91 ibuf->use_sendmsg = 1;
@@ -716,7 +715,7 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
716 for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) { 715 for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) {
717 char *dest; 716 char *dest;
718 717
719 dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0); 718 dest = kmap_atomic(sg_page(&sg[i]), KM_SOFTIRQ0);
720 rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset, 719 rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset,
721 sg[i].length, offset); 720 sg[i].length, offset);
722 kunmap_atomic(dest, KM_SOFTIRQ0); 721 kunmap_atomic(dest, KM_SOFTIRQ0);
@@ -1103,9 +1102,9 @@ iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
1103 * slab case. 1102 * slab case.
1104 */ 1103 */
1105 if (buf->use_sendmsg) 1104 if (buf->use_sendmsg)
1106 res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags); 1105 res = sock_no_sendpage(sk, sg_page(&buf->sg), offset, size, flags);
1107 else 1106 else
1108 res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags); 1107 res = tcp_conn->sendpage(sk, sg_page(&buf->sg), offset, size, flags);
1109 1108
1110 if (res >= 0) { 1109 if (res >= 0) {
1111 conn->txdata_octets += res; 1110 conn->txdata_octets += res;
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 10d1aff993..66c6520357 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -658,7 +658,7 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int *busy)
658 struct scatterlist *sg; 658 struct scatterlist *sg;
659 659
660 sg = scsi_sglist(cmd); 660 sg = scsi_sglist(cmd);
661 buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; 661 buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset;
662 662
663 memset(buf, 0, cmd->cmnd[4]); 663 memset(buf, 0, cmd->cmnd[4]);
664 kunmap_atomic(buf - sg->offset, KM_IRQ0); 664 kunmap_atomic(buf - sg->offset, KM_IRQ0);
@@ -1542,10 +1542,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status)
1542 if( cmd->cmnd[0] == INQUIRY && !islogical ) { 1542 if( cmd->cmnd[0] == INQUIRY && !islogical ) {
1543 1543
1544 sgl = scsi_sglist(cmd); 1544 sgl = scsi_sglist(cmd);
1545 if( sgl->page ) { 1545 if( sg_page(sgl) ) {
1546 c = *(unsigned char *) 1546 c = *(unsigned char *) sg_virt(&sgl[0]);
1547 page_address((&sgl[0])->page) +
1548 (&sgl[0])->offset;
1549 } else { 1547 } else {
1550 printk(KERN_WARNING 1548 printk(KERN_WARNING
1551 "megaraid: invalid sg.\n"); 1549 "megaraid: invalid sg.\n");
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index 78779209ac..c892310818 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -1584,10 +1584,8 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy)
1584 caddr_t vaddr; 1584 caddr_t vaddr;
1585 1585
1586 sgl = scsi_sglist(scp); 1586 sgl = scsi_sglist(scp);
1587 if (sgl->page) { 1587 if (sg_page(sgl)) {
1588 vaddr = (caddr_t) 1588 vaddr = (caddr_t) sg_virt(&sgl[0]);
1589 (page_address((&sgl[0])->page)
1590 + (&sgl[0])->offset);
1591 1589
1592 memset(vaddr, 0, scp->cmnd[4]); 1590 memset(vaddr, 0, scp->cmnd[4]);
1593 } 1591 }
@@ -2328,10 +2326,8 @@ megaraid_mbox_dpc(unsigned long devp)
2328 && IS_RAID_CH(raid_dev, scb->dev_channel)) { 2326 && IS_RAID_CH(raid_dev, scb->dev_channel)) {
2329 2327
2330 sgl = scsi_sglist(scp); 2328 sgl = scsi_sglist(scp);
2331 if (sgl->page) { 2329 if (sg_page(sgl)) {
2332 c = *(unsigned char *) 2330 c = *(unsigned char *) sg_virt(&sgl[0]);
2333 (page_address((&sgl[0])->page) +
2334 (&sgl[0])->offset);
2335 } else { 2331 } else {
2336 con_log(CL_ANN, (KERN_WARNING 2332 con_log(CL_ANN, (KERN_WARNING
2337 "megaraid mailbox: invalid sg:%d\n", 2333 "megaraid mailbox: invalid sg:%d\n",
diff --git a/drivers/scsi/oktagon_esp.c b/drivers/scsi/oktagon_esp.c
index 26a6d55faf..8e5eadbd5c 100644
--- a/drivers/scsi/oktagon_esp.c
+++ b/drivers/scsi/oktagon_esp.c
@@ -550,8 +550,7 @@ void dma_mmu_get_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp)
550 550
551void dma_mmu_get_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp) 551void dma_mmu_get_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp)
552{ 552{
553 sp->SCp.ptr = page_address(sp->SCp.buffer->page)+ 553 sp->SCp.ptr = sg_virt(sp->SCp.buffer);
554 sp->SCp.buffer->offset;
555} 554}
556 555
557void dma_mmu_release_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp) 556void dma_mmu_release_scsi_one(struct NCR_ESP *esp, Scsi_Cmnd *sp)
@@ -564,8 +563,7 @@ void dma_mmu_release_scsi_sgl(struct NCR_ESP *esp, Scsi_Cmnd *sp)
564 563
565void dma_advance_sg(Scsi_Cmnd *sp) 564void dma_advance_sg(Scsi_Cmnd *sp)
566{ 565{
567 sp->SCp.ptr = page_address(sp->SCp.buffer->page)+ 566 sp->SCp.ptr = sg_virt(sp->SCp.buffer);
568 sp->SCp.buffer->offset;
569} 567}
570 568
571 569
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 331b789937..1c5c4b68f2 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -542,7 +542,7 @@ static int osst_verify_frame(struct osst_tape * STp, int frame_seq_number, int q
542 if (STp->raw) { 542 if (STp->raw) {
543 if (STp->buffer->syscall_result) { 543 if (STp->buffer->syscall_result) {
544 for (i=0; i < STp->buffer->sg_segs; i++) 544 for (i=0; i < STp->buffer->sg_segs; i++)
545 memset(page_address(STp->buffer->sg[i].page), 545 memset(page_address(sg_page(&STp->buffer->sg[i])),
546 0, STp->buffer->sg[i].length); 546 0, STp->buffer->sg[i].length);
547 strcpy(STp->buffer->b_data, "READ ERROR ON FRAME"); 547 strcpy(STp->buffer->b_data, "READ ERROR ON FRAME");
548 } else 548 } else
@@ -4437,7 +4437,7 @@ static int os_scsi_tape_open(struct inode * inode, struct file * filp)
4437 for (i = 0, b_size = 0; 4437 for (i = 0, b_size = 0;
4438 (i < STp->buffer->sg_segs) && ((b_size + STp->buffer->sg[i].length) <= OS_DATA_SIZE); 4438 (i < STp->buffer->sg_segs) && ((b_size + STp->buffer->sg[i].length) <= OS_DATA_SIZE);
4439 b_size += STp->buffer->sg[i++].length); 4439 b_size += STp->buffer->sg[i++].length);
4440 STp->buffer->aux = (os_aux_t *) (page_address(STp->buffer->sg[i].page) + OS_DATA_SIZE - b_size); 4440 STp->buffer->aux = (os_aux_t *) (page_address(sg_page(&STp->buffer->sg[i])) + OS_DATA_SIZE - b_size);
4441#if DEBUG 4441#if DEBUG
4442 printk(OSST_DEB_MSG "%s:D: b_data points to %p in segment 0 at %p\n", name, 4442 printk(OSST_DEB_MSG "%s:D: b_data points to %p in segment 0 at %p\n", name,
4443 STp->buffer->b_data, page_address(STp->buffer->sg[0].page)); 4443 STp->buffer->b_data, page_address(STp->buffer->sg[0].page));
@@ -5252,25 +5252,26 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
5252 /* Try to allocate the first segment up to OS_DATA_SIZE and the others 5252 /* Try to allocate the first segment up to OS_DATA_SIZE and the others
5253 big enough to reach the goal (code assumes no segments in place) */ 5253 big enough to reach the goal (code assumes no segments in place) */
5254 for (b_size = OS_DATA_SIZE, order = OSST_FIRST_ORDER; b_size >= PAGE_SIZE; order--, b_size /= 2) { 5254 for (b_size = OS_DATA_SIZE, order = OSST_FIRST_ORDER; b_size >= PAGE_SIZE; order--, b_size /= 2) {
5255 STbuffer->sg[0].page = alloc_pages(priority, order); 5255 struct page *page = alloc_pages(priority, order);
5256
5256 STbuffer->sg[0].offset = 0; 5257 STbuffer->sg[0].offset = 0;
5257 if (STbuffer->sg[0].page != NULL) { 5258 if (page != NULL) {
5259 sg_set_page(&STbuffer->sg[0], page);
5258 STbuffer->sg[0].length = b_size; 5260 STbuffer->sg[0].length = b_size;
5259 STbuffer->b_data = page_address(STbuffer->sg[0].page); 5261 STbuffer->b_data = page_address(page);
5260 break; 5262 break;
5261 } 5263 }
5262 } 5264 }
5263 if (STbuffer->sg[0].page == NULL) { 5265 if (sg_page(&STbuffer->sg[0]) == NULL) {
5264 printk(KERN_NOTICE "osst :I: Can't allocate tape buffer main segment.\n"); 5266 printk(KERN_NOTICE "osst :I: Can't allocate tape buffer main segment.\n");
5265 return 0; 5267 return 0;
5266 } 5268 }
5267 /* Got initial segment of 'bsize,order', continue with same size if possible, except for AUX */ 5269 /* Got initial segment of 'bsize,order', continue with same size if possible, except for AUX */
5268 for (segs=STbuffer->sg_segs=1, got=b_size; 5270 for (segs=STbuffer->sg_segs=1, got=b_size;
5269 segs < max_segs && got < OS_FRAME_SIZE; ) { 5271 segs < max_segs && got < OS_FRAME_SIZE; ) {
5270 STbuffer->sg[segs].page = 5272 struct page *page = alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
5271 alloc_pages(priority, (OS_FRAME_SIZE - got <= PAGE_SIZE) ? 0 : order);
5272 STbuffer->sg[segs].offset = 0; 5273 STbuffer->sg[segs].offset = 0;
5273 if (STbuffer->sg[segs].page == NULL) { 5274 if (page == NULL) {
5274 if (OS_FRAME_SIZE - got <= (max_segs - segs) * b_size / 2 && order) { 5275 if (OS_FRAME_SIZE - got <= (max_segs - segs) * b_size / 2 && order) {
5275 b_size /= 2; /* Large enough for the rest of the buffers */ 5276 b_size /= 2; /* Large enough for the rest of the buffers */
5276 order--; 5277 order--;
@@ -5284,6 +5285,7 @@ static int enlarge_buffer(struct osst_buffer *STbuffer, int need_dma)
5284 normalize_buffer(STbuffer); 5285 normalize_buffer(STbuffer);
5285 return 0; 5286 return 0;
5286 } 5287 }
5288 sg_set_page(&STbuffer->sg[segs], page);
5287 STbuffer->sg[segs].length = (OS_FRAME_SIZE - got <= PAGE_SIZE / 2) ? (OS_FRAME_SIZE - got) : b_size; 5289 STbuffer->sg[segs].length = (OS_FRAME_SIZE - got <= PAGE_SIZE / 2) ? (OS_FRAME_SIZE - got) : b_size;
5288 got += STbuffer->sg[segs].length; 5290 got += STbuffer->sg[segs].length;
5289 STbuffer->buffer_size = got; 5291 STbuffer->buffer_size = got;
@@ -5316,7 +5318,7 @@ static void normalize_buffer(struct osst_buffer *STbuffer)
5316 b_size < STbuffer->sg[i].length; 5318 b_size < STbuffer->sg[i].length;
5317 b_size *= 2, order++); 5319 b_size *= 2, order++);
5318 5320
5319 __free_pages(STbuffer->sg[i].page, order); 5321 __free_pages(sg_page(&STbuffer->sg[i]), order);
5320 STbuffer->buffer_size -= STbuffer->sg[i].length; 5322 STbuffer->buffer_size -= STbuffer->sg[i].length;
5321 } 5323 }
5322#if DEBUG 5324#if DEBUG
@@ -5344,7 +5346,7 @@ static int append_to_buffer(const char __user *ubp, struct osst_buffer *st_bp, i
5344 for ( ; i < st_bp->sg_segs && do_count > 0; i++) { 5346 for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
5345 cnt = st_bp->sg[i].length - offset < do_count ? 5347 cnt = st_bp->sg[i].length - offset < do_count ?
5346 st_bp->sg[i].length - offset : do_count; 5348 st_bp->sg[i].length - offset : do_count;
5347 res = copy_from_user(page_address(st_bp->sg[i].page) + offset, ubp, cnt); 5349 res = copy_from_user(page_address(sg_page(&st_bp->sg[i])) + offset, ubp, cnt);
5348 if (res) 5350 if (res)
5349 return (-EFAULT); 5351 return (-EFAULT);
5350 do_count -= cnt; 5352 do_count -= cnt;
@@ -5377,7 +5379,7 @@ static int from_buffer(struct osst_buffer *st_bp, char __user *ubp, int do_count
5377 for ( ; i < st_bp->sg_segs && do_count > 0; i++) { 5379 for ( ; i < st_bp->sg_segs && do_count > 0; i++) {
5378 cnt = st_bp->sg[i].length - offset < do_count ? 5380 cnt = st_bp->sg[i].length - offset < do_count ?
5379 st_bp->sg[i].length - offset : do_count; 5381 st_bp->sg[i].length - offset : do_count;
5380 res = copy_to_user(ubp, page_address(st_bp->sg[i].page) + offset, cnt); 5382 res = copy_to_user(ubp, page_address(sg_page(&st_bp->sg[i])) + offset, cnt);
5381 if (res) 5383 if (res)
5382 return (-EFAULT); 5384 return (-EFAULT);
5383 do_count -= cnt; 5385 do_count -= cnt;
@@ -5410,7 +5412,7 @@ static int osst_zero_buffer_tail(struct osst_buffer *st_bp)
5410 i < st_bp->sg_segs && do_count > 0; i++) { 5412 i < st_bp->sg_segs && do_count > 0; i++) {
5411 cnt = st_bp->sg[i].length - offset < do_count ? 5413 cnt = st_bp->sg[i].length - offset < do_count ?
5412 st_bp->sg[i].length - offset : do_count ; 5414 st_bp->sg[i].length - offset : do_count ;
5413 memset(page_address(st_bp->sg[i].page) + offset, 0, cnt); 5415 memset(page_address(sg_page(&st_bp->sg[i])) + offset, 0, cnt);
5414 do_count -= cnt; 5416 do_count -= cnt;
5415 offset = 0; 5417 offset = 0;
5416 } 5418 }
@@ -5430,7 +5432,7 @@ static int osst_copy_to_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
5430 for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) { 5432 for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
5431 cnt = st_bp->sg[i].length < do_count ? 5433 cnt = st_bp->sg[i].length < do_count ?
5432 st_bp->sg[i].length : do_count ; 5434 st_bp->sg[i].length : do_count ;
5433 memcpy(page_address(st_bp->sg[i].page), ptr, cnt); 5435 memcpy(page_address(sg_page(&st_bp->sg[i])), ptr, cnt);
5434 do_count -= cnt; 5436 do_count -= cnt;
5435 ptr += cnt; 5437 ptr += cnt;
5436 } 5438 }
@@ -5451,7 +5453,7 @@ static int osst_copy_from_buffer(struct osst_buffer *st_bp, unsigned char *ptr)
5451 for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) { 5453 for (i = 0; i < st_bp->sg_segs && do_count > 0; i++) {
5452 cnt = st_bp->sg[i].length < do_count ? 5454 cnt = st_bp->sg[i].length < do_count ?
5453 st_bp->sg[i].length : do_count ; 5455 st_bp->sg[i].length : do_count ;
5454 memcpy(ptr, page_address(st_bp->sg[i].page), cnt); 5456 memcpy(ptr, page_address(sg_page(&st_bp->sg[i])), cnt);
5455 do_count -= cnt; 5457 do_count -= cnt;
5456 ptr += cnt; 5458 ptr += cnt;
5457 } 5459 }
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index 98397559c5..7db28cd494 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -393,7 +393,7 @@ enum _burst_mode {
393#define MSG_EXT_SDTR 0x01 393#define MSG_EXT_SDTR 0x01
394 394
395/* scatter-gather table */ 395/* scatter-gather table */
396# define BUFFER_ADDR ((char *)((unsigned int)(SCpnt->SCp.buffer->page) + SCpnt->SCp.buffer->offset)) 396# define BUFFER_ADDR ((char *)((sg_virt(SCpnt->SCp.buffer))))
397 397
398#endif /*__nsp_cs__*/ 398#endif /*__nsp_cs__*/
399/* end */ 399/* end */
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 190e2a7d70..969b9387a0 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -443,8 +443,7 @@ SYM53C500_intr(int irq, void *dev_id)
443 443
444 scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { 444 scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
445 SYM53C500_pio_write(fast_pio, port_base, 445 SYM53C500_pio_write(fast_pio, port_base,
446 page_address(sg->page) + sg->offset, 446 sg_virt(sg), sg->length);
447 sg->length);
448 } 447 }
449 REG0(port_base); 448 REG0(port_base);
450 } 449 }
@@ -463,8 +462,7 @@ SYM53C500_intr(int irq, void *dev_id)
463 462
464 scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { 463 scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
465 SYM53C500_pio_read(fast_pio, port_base, 464 SYM53C500_pio_read(fast_pio, port_base,
466 page_address(sg->page) + sg->offset, 465 sg_virt(sg), sg->length);
467 sg->length);
468 } 466 }
469 REG0(port_base); 467 REG0(port_base);
470 } 468 }
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index 67b6d76a6c..67ee51a3d7 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -608,9 +608,7 @@ static int ppa_completion(struct scsi_cmnd *cmd)
608 cmd->SCp.buffer++; 608 cmd->SCp.buffer++;
609 cmd->SCp.this_residual = 609 cmd->SCp.this_residual =
610 cmd->SCp.buffer->length; 610 cmd->SCp.buffer->length;
611 cmd->SCp.ptr = 611 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
612 page_address(cmd->SCp.buffer->page) +
613 cmd->SCp.buffer->offset;
614 } 612 }
615 } 613 }
616 /* Now check to see if the drive is ready to comunicate */ 614 /* Now check to see if the drive is ready to comunicate */
@@ -756,8 +754,7 @@ static int ppa_engine(ppa_struct *dev, struct scsi_cmnd *cmd)
756 /* if many buffers are available, start filling the first */ 754 /* if many buffers are available, start filling the first */
757 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; 755 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
758 cmd->SCp.this_residual = cmd->SCp.buffer->length; 756 cmd->SCp.this_residual = cmd->SCp.buffer->length;
759 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + 757 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
760 cmd->SCp.buffer->offset;
761 } else { 758 } else {
762 /* else fill the only available buffer */ 759 /* else fill the only available buffer */
763 cmd->SCp.buffer = NULL; 760 cmd->SCp.buffer = NULL;
diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c
index 0f43d1d046..17b4a7c461 100644
--- a/drivers/scsi/ps3rom.c
+++ b/drivers/scsi/ps3rom.c
@@ -111,14 +111,14 @@ static int fill_from_dev_buffer(struct scsi_cmnd *cmd, const void *buf)
111 req_len = act_len = 0; 111 req_len = act_len = 0;
112 scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) { 112 scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
113 if (active) { 113 if (active) {
114 kaddr = kmap_atomic(sgpnt->page, KM_IRQ0); 114 kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
115 len = sgpnt->length; 115 len = sgpnt->length;
116 if ((req_len + len) > buflen) { 116 if ((req_len + len) > buflen) {
117 active = 0; 117 active = 0;
118 len = buflen - req_len; 118 len = buflen - req_len;
119 } 119 }
120 memcpy(kaddr + sgpnt->offset, buf + req_len, len); 120 memcpy(kaddr + sgpnt->offset, buf + req_len, len);
121 flush_kernel_dcache_page(sgpnt->page); 121 flush_kernel_dcache_page(sg_page(sgpnt));
122 kunmap_atomic(kaddr, KM_IRQ0); 122 kunmap_atomic(kaddr, KM_IRQ0);
123 act_len += len; 123 act_len += len;
124 } 124 }
@@ -147,7 +147,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *cmd, void *buf)
147 147
148 req_len = fin = 0; 148 req_len = fin = 0;
149 scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) { 149 scsi_for_each_sg(cmd, sgpnt, scsi_sg_count(cmd), k) {
150 kaddr = kmap_atomic(sgpnt->page, KM_IRQ0); 150 kaddr = kmap_atomic(sg_page(sgpnt), KM_IRQ0);
151 len = sgpnt->length; 151 len = sgpnt->length;
152 if ((req_len + len) > buflen) { 152 if ((req_len + len) > buflen) {
153 len = buflen - req_len; 153 len = buflen - req_len;
diff --git a/drivers/scsi/qlogicfas408.c b/drivers/scsi/qlogicfas408.c
index 2bfbf26c00..de7b3bc2cb 100644
--- a/drivers/scsi/qlogicfas408.c
+++ b/drivers/scsi/qlogicfas408.c
@@ -317,7 +317,7 @@ static unsigned int ql_pcmd(struct scsi_cmnd *cmd)
317 return ((priv->qabort == 1 ? 317 return ((priv->qabort == 1 ?
318 DID_ABORT : DID_RESET) << 16); 318 DID_ABORT : DID_RESET) << 16);
319 } 319 }
320 buf = page_address(sg->page) + sg->offset; 320 buf = sg_virt(sg);
321 if (ql_pdma(priv, phase, buf, sg->length)) 321 if (ql_pdma(priv, phase, buf, sg->length))
322 break; 322 break;
323 } 323 }
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 72ee4c9cfb..46cae5a212 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -625,7 +625,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
625 scsi_for_each_sg(scp, sg, scp->use_sg, k) { 625 scsi_for_each_sg(scp, sg, scp->use_sg, k) {
626 if (active) { 626 if (active) {
627 kaddr = (unsigned char *) 627 kaddr = (unsigned char *)
628 kmap_atomic(sg->page, KM_USER0); 628 kmap_atomic(sg_page(sg), KM_USER0);
629 if (NULL == kaddr) 629 if (NULL == kaddr)
630 return (DID_ERROR << 16); 630 return (DID_ERROR << 16);
631 kaddr_off = (unsigned char *)kaddr + sg->offset; 631 kaddr_off = (unsigned char *)kaddr + sg->offset;
@@ -672,7 +672,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
672 sg = scsi_sglist(scp); 672 sg = scsi_sglist(scp);
673 req_len = fin = 0; 673 req_len = fin = 0;
674 for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) { 674 for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) {
675 kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0); 675 kaddr = (unsigned char *)kmap_atomic(sg_page(sg), KM_USER0);
676 if (NULL == kaddr) 676 if (NULL == kaddr)
677 return -1; 677 return -1;
678 kaddr_off = (unsigned char *)kaddr + sg->offset; 678 kaddr_off = (unsigned char *)kaddr + sg->offset;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aac8a02cbe..61fdaf02f2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -295,7 +295,7 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
295 int i, err, nr_vecs = 0; 295 int i, err, nr_vecs = 0;
296 296
297 for_each_sg(sgl, sg, nsegs, i) { 297 for_each_sg(sgl, sg, nsegs, i) {
298 page = sg->page; 298 page = sg_page(sg);
299 off = sg->offset; 299 off = sg->offset;
300 len = sg->length; 300 len = sg->length;
301 data_len += len; 301 data_len += len;
@@ -764,7 +764,7 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
764 if (unlikely(!sgl)) 764 if (unlikely(!sgl))
765 goto enomem; 765 goto enomem;
766 766
767 memset(sgl, 0, sizeof(*sgl) * sgp->size); 767 sg_init_table(sgl, sgp->size);
768 768
769 /* 769 /*
770 * first loop through, set initial index and return value 770 * first loop through, set initial index and return value
@@ -781,6 +781,13 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
781 sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl); 781 sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
782 782
783 /* 783 /*
784 * if we have nothing left, mark the last segment as
785 * end-of-list
786 */
787 if (!left)
788 sg_mark_end(sgl, this);
789
790 /*
784 * don't allow subsequent mempool allocs to sleep, it would 791 * don't allow subsequent mempool allocs to sleep, it would
785 * violate the mempool principle. 792 * violate the mempool principle.
786 */ 793 */
@@ -2353,7 +2360,7 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
2353 *offset = *offset - len_complete + sg->offset; 2360 *offset = *offset - len_complete + sg->offset;
2354 2361
2355 /* Assumption: contiguous pages can be accessed as "page + i" */ 2362 /* Assumption: contiguous pages can be accessed as "page + i" */
2356 page = nth_page(sg->page, (*offset >> PAGE_SHIFT)); 2363 page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT));
2357 *offset &= ~PAGE_MASK; 2364 *offset &= ~PAGE_MASK;
2358 2365
2359 /* Bytes in this sg-entry from *offset to the end of the page */ 2366 /* Bytes in this sg-entry from *offset to the end of the page */
diff --git a/drivers/scsi/seagate.c b/drivers/scsi/seagate.c
index ce80fa9ad8..b11324479b 100644
--- a/drivers/scsi/seagate.c
+++ b/drivers/scsi/seagate.c
@@ -999,14 +999,14 @@ connect_loop:
999 for (i = 0; i < nobuffs; ++i) 999 for (i = 0; i < nobuffs; ++i)
1000 printk("scsi%d : buffer %d address = %p length = %d\n", 1000 printk("scsi%d : buffer %d address = %p length = %d\n",
1001 hostno, i, 1001 hostno, i,
1002 page_address(buffer[i].page) + buffer[i].offset, 1002 sg_virt(&buffer[i]),
1003 buffer[i].length); 1003 buffer[i].length);
1004 } 1004 }
1005#endif 1005#endif
1006 1006
1007 buffer = (struct scatterlist *) SCint->request_buffer; 1007 buffer = (struct scatterlist *) SCint->request_buffer;
1008 len = buffer->length; 1008 len = buffer->length;
1009 data = page_address(buffer->page) + buffer->offset; 1009 data = sg_virt(buffer);
1010 } else { 1010 } else {
1011 DPRINTK (DEBUG_SG, "scsi%d : scatter gather not requested.\n", hostno); 1011 DPRINTK (DEBUG_SG, "scsi%d : scatter gather not requested.\n", hostno);
1012 buffer = NULL; 1012 buffer = NULL;
@@ -1239,7 +1239,7 @@ connect_loop:
1239 --nobuffs; 1239 --nobuffs;
1240 ++buffer; 1240 ++buffer;
1241 len = buffer->length; 1241 len = buffer->length;
1242 data = page_address(buffer->page) + buffer->offset; 1242 data = sg_virt(buffer);
1243 DPRINTK (DEBUG_SG, 1243 DPRINTK (DEBUG_SG,
1244 "scsi%d : next scatter-gather buffer len = %d address = %08x\n", 1244 "scsi%d : next scatter-gather buffer len = %d address = %08x\n",
1245 hostno, len, data); 1245 hostno, len, data);
@@ -1396,7 +1396,7 @@ connect_loop:
1396 --nobuffs; 1396 --nobuffs;
1397 ++buffer; 1397 ++buffer;
1398 len = buffer->length; 1398 len = buffer->length;
1399 data = page_address(buffer->page) + buffer->offset; 1399 data = sg_virt(buffer);
1400 DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data); 1400 DPRINTK (DEBUG_SG, "scsi%d : next scatter-gather buffer len = %d address = %08x\n", hostno, len, data);
1401 } 1401 }
1402 break; 1402 break;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7238b2dfc4..cc19710028 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1169,7 +1169,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
1169 len = vma->vm_end - sa; 1169 len = vma->vm_end - sa;
1170 len = (len < sg->length) ? len : sg->length; 1170 len = (len < sg->length) ? len : sg->length;
1171 if (offset < len) { 1171 if (offset < len) {
1172 page = virt_to_page(page_address(sg->page) + offset); 1172 page = virt_to_page(page_address(sg_page(sg)) + offset);
1173 get_page(page); /* increment page count */ 1173 get_page(page); /* increment page count */
1174 break; 1174 break;
1175 } 1175 }
@@ -1717,13 +1717,13 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
1717 goto out_unlock; */ 1717 goto out_unlock; */
1718 } 1718 }
1719 1719
1720 sgl[0].page = pages[0]; 1720 sg_set_page(sgl, pages[0]);
1721 sgl[0].offset = uaddr & ~PAGE_MASK; 1721 sgl[0].offset = uaddr & ~PAGE_MASK;
1722 if (nr_pages > 1) { 1722 if (nr_pages > 1) {
1723 sgl[0].length = PAGE_SIZE - sgl[0].offset; 1723 sgl[0].length = PAGE_SIZE - sgl[0].offset;
1724 count -= sgl[0].length; 1724 count -= sgl[0].length;
1725 for (i=1; i < nr_pages ; i++) { 1725 for (i=1; i < nr_pages ; i++) {
1726 sgl[i].page = pages[i]; 1726 sg_set_page(&sgl[i], pages[i]);
1727 sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE; 1727 sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
1728 count -= PAGE_SIZE; 1728 count -= PAGE_SIZE;
1729 } 1729 }
@@ -1754,7 +1754,7 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
1754 int i; 1754 int i;
1755 1755
1756 for (i=0; i < nr_pages; i++) { 1756 for (i=0; i < nr_pages; i++) {
1757 struct page *page = sgl[i].page; 1757 struct page *page = sg_page(&sgl[i]);
1758 1758
1759 if (dirtied) 1759 if (dirtied)
1760 SetPageDirty(page); 1760 SetPageDirty(page);
@@ -1854,7 +1854,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
1854 scatter_elem_sz_prev = ret_sz; 1854 scatter_elem_sz_prev = ret_sz;
1855 } 1855 }
1856 } 1856 }
1857 sg->page = p; 1857 sg_set_page(sg, p);
1858 sg->length = (ret_sz > num) ? num : ret_sz; 1858 sg->length = (ret_sz > num) ? num : ret_sz;
1859 1859
1860 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " 1860 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
@@ -1907,14 +1907,14 @@ sg_write_xfer(Sg_request * srp)
1907 onum = 1; 1907 onum = 1;
1908 1908
1909 ksglen = sg->length; 1909 ksglen = sg->length;
1910 p = page_address(sg->page); 1910 p = page_address(sg_page(sg));
1911 for (j = 0, k = 0; j < onum; ++j) { 1911 for (j = 0, k = 0; j < onum; ++j) {
1912 res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); 1912 res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
1913 if (res) 1913 if (res)
1914 return res; 1914 return res;
1915 1915
1916 for (; p; sg = sg_next(sg), ksglen = sg->length, 1916 for (; p; sg = sg_next(sg), ksglen = sg->length,
1917 p = page_address(sg->page)) { 1917 p = page_address(sg_page(sg))) {
1918 if (usglen <= 0) 1918 if (usglen <= 0)
1919 break; 1919 break;
1920 if (ksglen > usglen) { 1920 if (ksglen > usglen) {
@@ -1991,12 +1991,12 @@ sg_remove_scat(Sg_scatter_hold * schp)
1991 } else { 1991 } else {
1992 int k; 1992 int k;
1993 1993
1994 for (k = 0; (k < schp->k_use_sg) && sg->page; 1994 for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
1995 ++k, sg = sg_next(sg)) { 1995 ++k, sg = sg_next(sg)) {
1996 SCSI_LOG_TIMEOUT(5, printk( 1996 SCSI_LOG_TIMEOUT(5, printk(
1997 "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", 1997 "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
1998 k, sg->page, sg->length)); 1998 k, sg_page(sg), sg->length));
1999 sg_page_free(sg->page, sg->length); 1999 sg_page_free(sg_page(sg), sg->length);
2000 } 2000 }
2001 } 2001 }
2002 kfree(schp->buffer); 2002 kfree(schp->buffer);
@@ -2038,7 +2038,7 @@ sg_read_xfer(Sg_request * srp)
2038 } else 2038 } else
2039 onum = 1; 2039 onum = 1;
2040 2040
2041 p = page_address(sg->page); 2041 p = page_address(sg_page(sg));
2042 ksglen = sg->length; 2042 ksglen = sg->length;
2043 for (j = 0, k = 0; j < onum; ++j) { 2043 for (j = 0, k = 0; j < onum; ++j) {
2044 res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); 2044 res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
@@ -2046,7 +2046,7 @@ sg_read_xfer(Sg_request * srp)
2046 return res; 2046 return res;
2047 2047
2048 for (; p; sg = sg_next(sg), ksglen = sg->length, 2048 for (; p; sg = sg_next(sg), ksglen = sg->length,
2049 p = page_address(sg->page)) { 2049 p = page_address(sg_page(sg))) {
2050 if (usglen <= 0) 2050 if (usglen <= 0)
2051 break; 2051 break;
2052 if (ksglen > usglen) { 2052 if (ksglen > usglen) {
@@ -2092,15 +2092,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
2092 if ((!outp) || (num_read_xfer <= 0)) 2092 if ((!outp) || (num_read_xfer <= 0))
2093 return 0; 2093 return 0;
2094 2094
2095 for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) { 2095 for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
2096 num = sg->length; 2096 num = sg->length;
2097 if (num > num_read_xfer) { 2097 if (num > num_read_xfer) {
2098 if (__copy_to_user(outp, page_address(sg->page), 2098 if (__copy_to_user(outp, page_address(sg_page(sg)),
2099 num_read_xfer)) 2099 num_read_xfer))
2100 return -EFAULT; 2100 return -EFAULT;
2101 break; 2101 break;
2102 } else { 2102 } else {
2103 if (__copy_to_user(outp, page_address(sg->page), 2103 if (__copy_to_user(outp, page_address(sg_page(sg)),
2104 num)) 2104 num))
2105 return -EFAULT; 2105 return -EFAULT;
2106 num_read_xfer -= num; 2106 num_read_xfer -= num;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 73c44cbdea..ce69b9efc1 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3797,7 +3797,7 @@ static void buf_to_sg(struct st_buffer *STbp, unsigned int length)
3797 sg = &(STbp->sg[0]); 3797 sg = &(STbp->sg[0]);
3798 frp = STbp->frp; 3798 frp = STbp->frp;
3799 for (i=count=0; count < length; i++) { 3799 for (i=count=0; count < length; i++) {
3800 sg[i].page = frp[i].page; 3800 sg_set_page(&sg[i], frp[i].page);
3801 if (length - count > frp[i].length) 3801 if (length - count > frp[i].length)
3802 sg[i].length = frp[i].length; 3802 sg[i].length = frp[i].length;
3803 else 3803 else
@@ -4446,14 +4446,14 @@ static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pa
4446 } 4446 }
4447 4447
4448 /* Populate the scatter/gather list */ 4448 /* Populate the scatter/gather list */
4449 sgl[0].page = pages[0]; 4449 sg_set_page(&sgl[0], pages[0]);
4450 sgl[0].offset = uaddr & ~PAGE_MASK; 4450 sgl[0].offset = uaddr & ~PAGE_MASK;
4451 if (nr_pages > 1) { 4451 if (nr_pages > 1) {
4452 sgl[0].length = PAGE_SIZE - sgl[0].offset; 4452 sgl[0].length = PAGE_SIZE - sgl[0].offset;
4453 count -= sgl[0].length; 4453 count -= sgl[0].length;
4454 for (i=1; i < nr_pages ; i++) { 4454 for (i=1; i < nr_pages ; i++) {
4455 sg_set_page(&sgl[i], pages[i]);;
4455 sgl[i].offset = 0; 4456 sgl[i].offset = 0;
4456 sgl[i].page = pages[i];
4457 sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE; 4457 sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
4458 count -= PAGE_SIZE; 4458 count -= PAGE_SIZE;
4459 } 4459 }
@@ -4483,7 +4483,7 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
4483 int i; 4483 int i;
4484 4484
4485 for (i=0; i < nr_pages; i++) { 4485 for (i=0; i < nr_pages; i++) {
4486 struct page *page = sgl[i].page; 4486 struct page *page = sg_page(&sgl[i]);
4487 4487
4488 if (dirtied) 4488 if (dirtied)
4489 SetPageDirty(page); 4489 SetPageDirty(page);
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 4aafe89b55..2dcde373b2 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -272,8 +272,7 @@ static struct scsi_host_template *the_template = NULL;
272#define HOSTNO instance->host_no 272#define HOSTNO instance->host_no
273#define H_NO(cmd) (cmd)->device->host->host_no 273#define H_NO(cmd) (cmd)->device->host->host_no
274 274
275#define SGADDR(buffer) (void *)(((unsigned long)page_address((buffer)->page)) + \ 275#define SGADDR(buffer) (void *)(((unsigned long)sg_virt(((buffer)))))
276 (buffer)->offset)
277 276
278#ifdef SUPPORT_TAGS 277#ifdef SUPPORT_TAGS
279 278
diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c
index 8befab7e98..90cee94d95 100644
--- a/drivers/scsi/sym53c416.c
+++ b/drivers/scsi/sym53c416.c
@@ -196,7 +196,7 @@ static unsigned int sym53c416_base_3[2] = {0,0};
196 196
197#define MAXHOSTS 4 197#define MAXHOSTS 4
198 198
199#define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) 199#define SG_ADDRESS(buffer) ((char *) sg_virt((buffer)))
200 200
201enum phases 201enum phases
202{ 202{
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 5c72ca31a4..44193049c4 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -430,10 +430,7 @@ static __inline__ void dc390_Going_remove (struct dc390_dcb* pDCB, struct dc390_
430 430
431static struct scatterlist* dc390_sg_build_single(struct scatterlist *sg, void *addr, unsigned int length) 431static struct scatterlist* dc390_sg_build_single(struct scatterlist *sg, void *addr, unsigned int length)
432{ 432{
433 memset(sg, 0, sizeof(struct scatterlist)); 433 sg_init_one(sg, addr, length);
434 sg->page = virt_to_page(addr);
435 sg->length = length;
436 sg->offset = (unsigned long)addr & ~PAGE_MASK;
437 return sg; 434 return sg;
438} 435}
439 436
diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index ea72bbeb8f..6d1f0edd79 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -681,7 +681,7 @@ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
681 681
682 max = scsi_sg_count(SCpnt); 682 max = scsi_sg_count(SCpnt);
683 scsi_for_each_sg(SCpnt, sg, max, i) { 683 scsi_for_each_sg(SCpnt, sg, max, i) {
684 mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset; 684 mscp->sglist[i].address = isa_page_to_bus(sg_page(sg)) + sg->offset;
685 mscp->sglist[i].num_bytes = sg->length; 685 mscp->sglist[i].num_bytes = sg->length;
686 transfer_length += sg->length; 686 transfer_length += sg->length;
687 } 687 }
diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c
index 0e8e642fd3..fdbb92d1f7 100644
--- a/drivers/scsi/wd33c93.c
+++ b/drivers/scsi/wd33c93.c
@@ -410,8 +410,7 @@ wd33c93_queuecommand(struct scsi_cmnd *cmd,
410 if (cmd->use_sg) { 410 if (cmd->use_sg) {
411 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer; 411 cmd->SCp.buffer = (struct scatterlist *) cmd->request_buffer;
412 cmd->SCp.buffers_residual = cmd->use_sg - 1; 412 cmd->SCp.buffers_residual = cmd->use_sg - 1;
413 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + 413 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
414 cmd->SCp.buffer->offset;
415 cmd->SCp.this_residual = cmd->SCp.buffer->length; 414 cmd->SCp.this_residual = cmd->SCp.buffer->length;
416 } else { 415 } else {
417 cmd->SCp.buffer = NULL; 416 cmd->SCp.buffer = NULL;
@@ -745,8 +744,7 @@ transfer_bytes(const wd33c93_regs regs, struct scsi_cmnd *cmd,
745 ++cmd->SCp.buffer; 744 ++cmd->SCp.buffer;
746 --cmd->SCp.buffers_residual; 745 --cmd->SCp.buffers_residual;
747 cmd->SCp.this_residual = cmd->SCp.buffer->length; 746 cmd->SCp.this_residual = cmd->SCp.buffer->length;
748 cmd->SCp.ptr = page_address(cmd->SCp.buffer->page) + 747 cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
749 cmd->SCp.buffer->offset;
750 } 748 }
751 if (!cmd->SCp.this_residual) /* avoid bogus setups */ 749 if (!cmd->SCp.this_residual) /* avoid bogus setups */
752 return; 750 return;
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 255c611e78..03cd44f231 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1123,7 +1123,7 @@ static int wd7000_queuecommand(struct scsi_cmnd *SCpnt,
1123 any2scsi(scb->maxlen, nseg * sizeof(Sgb)); 1123 any2scsi(scb->maxlen, nseg * sizeof(Sgb));
1124 1124
1125 scsi_for_each_sg(SCpnt, sg, nseg, i) { 1125 scsi_for_each_sg(SCpnt, sg, nseg, i) {
1126 any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset); 1126 any2scsi(sgb[i].ptr, isa_page_to_bus(sg_page(sg)) + sg->offset);
1127 any2scsi(sgb[i].len, sg->length); 1127 any2scsi(sgb[i].len, sg->length);
1128 } 1128 }
1129 } else { 1129 } else {
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 87665d7df6..ed438bc7e9 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -624,7 +624,7 @@ choice
624 624
625config SERIAL_BFIN_DMA 625config SERIAL_BFIN_DMA
626 bool "DMA mode" 626 bool "DMA mode"
627 depends on DMA_UNCACHED_1M && !KGDB_UART 627 depends on !DMA_UNCACHED_NONE && !KGDB_UART
628 help 628 help
629 This driver works under DMA mode. If this option is selected, the 629 This driver works under DMA mode. If this option is selected, the
630 blackfin simple dma driver is also enabled. 630 blackfin simple dma driver is also enabled.
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
new file mode 100644
index 0000000000..a7d4360ea7
--- /dev/null
+++ b/drivers/serial/mcf.c
@@ -0,0 +1,653 @@
1/****************************************************************************/
2
3/*
4 * mcf.c -- Freescale ColdFire UART driver
5 *
6 * (C) Copyright 2003-2007, Greg Ungerer <gerg@snapgear.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14/****************************************************************************/
15
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/interrupt.h>
19#include <linux/module.h>
20#include <linux/console.h>
21#include <linux/tty.h>
22#include <linux/tty_flip.h>
23#include <linux/serial.h>
24#include <linux/serial_core.h>
25#include <linux/io.h>
26#include <asm/coldfire.h>
27#include <asm/mcfsim.h>
28#include <asm/mcfuart.h>
29#include <asm/nettel.h>
30
31/****************************************************************************/
32
33/*
34 * Some boards implement the DTR/DCD lines using GPIO lines, most
35 * don't. Dummy out the access macros for those that don't. Those
36 * that do should define these macros somewhere in there board
37 * specific inlude files.
38 */
39#if !defined(mcf_getppdcd)
40#define mcf_getppdcd(p) (1)
41#endif
42#if !defined(mcf_getppdtr)
43#define mcf_getppdtr(p) (1)
44#endif
45#if !defined(mcf_setppdtr)
46#define mcf_setppdtr(p, v) do { } while (0)
47#endif
48
49/****************************************************************************/
50
51/*
52 * Local per-uart structure.
53 */
54struct mcf_uart {
55 struct uart_port port;
56 unsigned int sigs; /* Local copy of line sigs */
57 unsigned char imr; /* Local IMR mirror */
58};
59
60/****************************************************************************/
61
62static unsigned int mcf_tx_empty(struct uart_port *port)
63{
64 return (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXEMPTY) ?
65 TIOCSER_TEMT : 0;
66}
67
68/****************************************************************************/
69
70static unsigned int mcf_get_mctrl(struct uart_port *port)
71{
72 struct mcf_uart *pp = (struct mcf_uart *) port;
73 unsigned long flags;
74 unsigned int sigs;
75
76 spin_lock_irqsave(&port->lock, flags);
77 sigs = (readb(port->membase + MCFUART_UIPR) & MCFUART_UIPR_CTS) ?
78 0 : TIOCM_CTS;
79 sigs |= (pp->sigs & TIOCM_RTS);
80 sigs |= (mcf_getppdcd(port->line) ? TIOCM_CD : 0);
81 sigs |= (mcf_getppdtr(port->line) ? TIOCM_DTR : 0);
82 spin_unlock_irqrestore(&port->lock, flags);
83 return sigs;
84}
85
86/****************************************************************************/
87
88static void mcf_set_mctrl(struct uart_port *port, unsigned int sigs)
89{
90 struct mcf_uart *pp = (struct mcf_uart *) port;
91 unsigned long flags;
92
93 spin_lock_irqsave(&port->lock, flags);
94 pp->sigs = sigs;
95 mcf_setppdtr(port->line, (sigs & TIOCM_DTR));
96 if (sigs & TIOCM_RTS)
97 writeb(MCFUART_UOP_RTS, port->membase + MCFUART_UOP1);
98 else
99 writeb(MCFUART_UOP_RTS, port->membase + MCFUART_UOP0);
100 spin_unlock_irqrestore(&port->lock, flags);
101}
102
103/****************************************************************************/
104
105static void mcf_start_tx(struct uart_port *port)
106{
107 struct mcf_uart *pp = (struct mcf_uart *) port;
108 unsigned long flags;
109
110 spin_lock_irqsave(&port->lock, flags);
111 pp->imr |= MCFUART_UIR_TXREADY;
112 writeb(pp->imr, port->membase + MCFUART_UIMR);
113 spin_unlock_irqrestore(&port->lock, flags);
114}
115
116/****************************************************************************/
117
118static void mcf_stop_tx(struct uart_port *port)
119{
120 struct mcf_uart *pp = (struct mcf_uart *) port;
121 unsigned long flags;
122
123 spin_lock_irqsave(&port->lock, flags);
124 pp->imr &= ~MCFUART_UIR_TXREADY;
125 writeb(pp->imr, port->membase + MCFUART_UIMR);
126 spin_unlock_irqrestore(&port->lock, flags);
127}
128
129/****************************************************************************/
130
131static void mcf_stop_rx(struct uart_port *port)
132{
133 struct mcf_uart *pp = (struct mcf_uart *) port;
134 unsigned long flags;
135
136 spin_lock_irqsave(&port->lock, flags);
137 pp->imr &= ~MCFUART_UIR_RXREADY;
138 writeb(pp->imr, port->membase + MCFUART_UIMR);
139 spin_unlock_irqrestore(&port->lock, flags);
140}
141
142/****************************************************************************/
143
144static void mcf_break_ctl(struct uart_port *port, int break_state)
145{
146 unsigned long flags;
147
148 spin_lock_irqsave(&port->lock, flags);
149 if (break_state == -1)
150 writeb(MCFUART_UCR_CMDBREAKSTART, port->membase + MCFUART_UCR);
151 else
152 writeb(MCFUART_UCR_CMDBREAKSTOP, port->membase + MCFUART_UCR);
153 spin_unlock_irqrestore(&port->lock, flags);
154}
155
156/****************************************************************************/
157
158static void mcf_enable_ms(struct uart_port *port)
159{
160}
161
162/****************************************************************************/
163
164static int mcf_startup(struct uart_port *port)
165{
166 struct mcf_uart *pp = (struct mcf_uart *) port;
167 unsigned long flags;
168
169 spin_lock_irqsave(&port->lock, flags);
170
171 /* Reset UART, get it into known state... */
172 writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
173 writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
174
175 /* Enable the UART transmitter and receiver */
176 writeb(MCFUART_UCR_RXENABLE | MCFUART_UCR_TXENABLE,
177 port->membase + MCFUART_UCR);
178
179 /* Enable RX interrupts now */
180 pp->imr = MCFUART_UIR_RXREADY;
181 writeb(pp->imr, port->membase + MCFUART_UIMR);
182
183 spin_unlock_irqrestore(&port->lock, flags);
184
185 return 0;
186}
187
188/****************************************************************************/
189
190static void mcf_shutdown(struct uart_port *port)
191{
192 struct mcf_uart *pp = (struct mcf_uart *) port;
193 unsigned long flags;
194
195 spin_lock_irqsave(&port->lock, flags);
196
197 /* Disable all interrupts now */
198 pp->imr = 0;
199 writeb(pp->imr, port->membase + MCFUART_UIMR);
200
201 /* Disable UART transmitter and receiver */
202 writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
203 writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
204
205 spin_unlock_irqrestore(&port->lock, flags);
206}
207
208/****************************************************************************/
209
210static void mcf_set_termios(struct uart_port *port, struct ktermios *termios,
211 struct ktermios *old)
212{
213 unsigned long flags;
214 unsigned int baud, baudclk;
215 unsigned char mr1, mr2;
216
217 baud = uart_get_baud_rate(port, termios, old, 0, 230400);
218 baudclk = ((MCF_BUSCLK / baud) + 16) / 32;
219
220 mr1 = MCFUART_MR1_RXIRQRDY | MCFUART_MR1_RXERRCHAR;
221 mr2 = 0;
222
223 switch (termios->c_cflag & CSIZE) {
224 case CS5: mr1 |= MCFUART_MR1_CS5; break;
225 case CS6: mr1 |= MCFUART_MR1_CS6; break;
226 case CS7: mr1 |= MCFUART_MR1_CS7; break;
227 case CS8:
228 default: mr1 |= MCFUART_MR1_CS8; break;
229 }
230
231 if (termios->c_cflag & PARENB) {
232 if (termios->c_cflag & CMSPAR) {
233 if (termios->c_cflag & PARODD)
234 mr1 |= MCFUART_MR1_PARITYMARK;
235 else
236 mr1 |= MCFUART_MR1_PARITYSPACE;
237 } else {
238 if (termios->c_cflag & PARODD)
239 mr1 |= MCFUART_MR1_PARITYODD;
240 else
241 mr1 |= MCFUART_MR1_PARITYEVEN;
242 }
243 } else {
244 mr1 |= MCFUART_MR1_PARITYNONE;
245 }
246
247 if (termios->c_cflag & CSTOPB)
248 mr2 |= MCFUART_MR2_STOP2;
249 else
250 mr2 |= MCFUART_MR2_STOP1;
251
252 if (termios->c_cflag & CRTSCTS) {
253 mr1 |= MCFUART_MR1_RXRTS;
254 mr2 |= MCFUART_MR2_TXCTS;
255 }
256
257 spin_lock_irqsave(&port->lock, flags);
258 writeb(MCFUART_UCR_CMDRESETRX, port->membase + MCFUART_UCR);
259 writeb(MCFUART_UCR_CMDRESETTX, port->membase + MCFUART_UCR);
260 writeb(MCFUART_UCR_CMDRESETMRPTR, port->membase + MCFUART_UCR);
261 writeb(mr1, port->membase + MCFUART_UMR);
262 writeb(mr2, port->membase + MCFUART_UMR);
263 writeb((baudclk & 0xff00) >> 8, port->membase + MCFUART_UBG1);
264 writeb((baudclk & 0xff), port->membase + MCFUART_UBG2);
265 writeb(MCFUART_UCSR_RXCLKTIMER | MCFUART_UCSR_TXCLKTIMER,
266 port->membase + MCFUART_UCSR);
267 writeb(MCFUART_UCR_RXENABLE | MCFUART_UCR_TXENABLE,
268 port->membase + MCFUART_UCR);
269 spin_unlock_irqrestore(&port->lock, flags);
270}
271
272/****************************************************************************/
273
274static void mcf_rx_chars(struct mcf_uart *pp)
275{
276 struct uart_port *port = (struct uart_port *) pp;
277 unsigned char status, ch, flag;
278
279 while ((status = readb(port->membase + MCFUART_USR)) & MCFUART_USR_RXREADY) {
280 ch = readb(port->membase + MCFUART_URB);
281 flag = TTY_NORMAL;
282 port->icount.rx++;
283
284 if (status & MCFUART_USR_RXERR) {
285 writeb(MCFUART_UCR_CMDRESETERR,
286 port->membase + MCFUART_UCR);
287
288 if (status & MCFUART_USR_RXBREAK) {
289 port->icount.brk++;
290 if (uart_handle_break(port))
291 continue;
292 } else if (status & MCFUART_USR_RXPARITY) {
293 port->icount.parity++;
294 } else if (status & MCFUART_USR_RXOVERRUN) {
295 port->icount.overrun++;
296 } else if (status & MCFUART_USR_RXFRAMING) {
297 port->icount.frame++;
298 }
299
300 status &= port->read_status_mask;
301
302 if (status & MCFUART_USR_RXBREAK)
303 flag = TTY_BREAK;
304 else if (status & MCFUART_USR_RXPARITY)
305 flag = TTY_PARITY;
306 else if (status & MCFUART_USR_RXFRAMING)
307 flag = TTY_FRAME;
308 }
309
310 if (uart_handle_sysrq_char(port, ch))
311 continue;
312 uart_insert_char(port, status, MCFUART_USR_RXOVERRUN, ch, flag);
313 }
314
315 tty_flip_buffer_push(port->info->tty);
316}
317
318/****************************************************************************/
319
320static void mcf_tx_chars(struct mcf_uart *pp)
321{
322 struct uart_port *port = (struct uart_port *) pp;
323 struct circ_buf *xmit = &port->info->xmit;
324
325 if (port->x_char) {
326 /* Send special char - probably flow control */
327 writeb(port->x_char, port->membase + MCFUART_UTB);
328 port->x_char = 0;
329 port->icount.tx++;
330 return;
331 }
332
333 while (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY) {
334 if (xmit->head == xmit->tail)
335 break;
336 writeb(xmit->buf[xmit->tail], port->membase + MCFUART_UTB);
337 xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE -1);
338 port->icount.tx++;
339 }
340
341 if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
342 uart_write_wakeup(port);
343
344 if (xmit->head == xmit->tail) {
345 pp->imr &= ~MCFUART_UIR_TXREADY;
346 writeb(pp->imr, port->membase + MCFUART_UIMR);
347 }
348}
349
350/****************************************************************************/
351
352static irqreturn_t mcf_interrupt(int irq, void *data)
353{
354 struct uart_port *port = data;
355 struct mcf_uart *pp = (struct mcf_uart *) port;
356 unsigned int isr;
357
358 isr = readb(port->membase + MCFUART_UISR) & pp->imr;
359 if (isr & MCFUART_UIR_RXREADY)
360 mcf_rx_chars(pp);
361 if (isr & MCFUART_UIR_TXREADY)
362 mcf_tx_chars(pp);
363 return IRQ_HANDLED;
364}
365
366/****************************************************************************/
367
368static void mcf_config_port(struct uart_port *port, int flags)
369{
370 port->type = PORT_MCF;
371
372 /* Clear mask, so no surprise interrupts. */
373 writeb(0, port->membase + MCFUART_UIMR);
374
375 if (request_irq(port->irq, mcf_interrupt, IRQF_DISABLED, "UART", port))
376 printk(KERN_ERR "MCF: unable to attach ColdFire UART %d "
377 "interrupt vector=%d\n", port->line, port->irq);
378}
379
380/****************************************************************************/
381
382static const char *mcf_type(struct uart_port *port)
383{
384 return (port->type == PORT_MCF) ? "ColdFire UART" : NULL;
385}
386
387/****************************************************************************/
388
389static int mcf_request_port(struct uart_port *port)
390{
391 /* UARTs always present */
392 return 0;
393}
394
395/****************************************************************************/
396
397static void mcf_release_port(struct uart_port *port)
398{
399 /* Nothing to release... */
400}
401
402/****************************************************************************/
403
404static int mcf_verify_port(struct uart_port *port, struct serial_struct *ser)
405{
406 if ((ser->type != PORT_UNKNOWN) && (ser->type != PORT_MCF))
407 return -EINVAL;
408 return 0;
409}
410
411/****************************************************************************/
412
413/*
414 * Define the basic serial functions we support.
415 */
416static struct uart_ops mcf_uart_ops = {
417 .tx_empty = mcf_tx_empty,
418 .get_mctrl = mcf_get_mctrl,
419 .set_mctrl = mcf_set_mctrl,
420 .start_tx = mcf_start_tx,
421 .stop_tx = mcf_stop_tx,
422 .stop_rx = mcf_stop_rx,
423 .enable_ms = mcf_enable_ms,
424 .break_ctl = mcf_break_ctl,
425 .startup = mcf_startup,
426 .shutdown = mcf_shutdown,
427 .set_termios = mcf_set_termios,
428 .type = mcf_type,
429 .request_port = mcf_request_port,
430 .release_port = mcf_release_port,
431 .config_port = mcf_config_port,
432 .verify_port = mcf_verify_port,
433};
434
435static struct mcf_uart mcf_ports[3];
436
437#define MCF_MAXPORTS (sizeof(mcf_ports) / sizeof(struct mcf_uart))
438
439/****************************************************************************/
440#if defined(CONFIG_SERIAL_MCF_CONSOLE)
441/****************************************************************************/
442
443int __init early_mcf_setup(struct mcf_platform_uart *platp)
444{
445 struct uart_port *port;
446 int i;
447
448 for (i = 0; ((i < MCF_MAXPORTS) && (platp[i].mapbase)); i++) {
449 port = &mcf_ports[i].port;
450
451 port->line = i;
452 port->type = PORT_MCF;
453 port->mapbase = platp[i].mapbase;
454 port->membase = (platp[i].membase) ? platp[i].membase :
455 (unsigned char __iomem *) port->mapbase;
456 port->iotype = SERIAL_IO_MEM;
457 port->irq = platp[i].irq;
458 port->uartclk = MCF_BUSCLK;
459 port->flags = ASYNC_BOOT_AUTOCONF;
460 port->ops = &mcf_uart_ops;
461 }
462
463 return 0;
464}
465
466/****************************************************************************/
467
468static void mcf_console_putc(struct console *co, const char c)
469{
470 struct uart_port *port = &(mcf_ports + co->index)->port;
471 int i;
472
473 for (i = 0; (i < 0x10000); i++) {
474 if (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY)
475 break;
476 }
477 writeb(c, port->membase + MCFUART_UTB);
478 for (i = 0; (i < 0x10000); i++) {
479 if (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY)
480 break;
481 }
482}
483
484/****************************************************************************/
485
486static void mcf_console_write(struct console *co, const char *s, unsigned int count)
487{
488 for (; (count); count--, s++) {
489 mcf_console_putc(co, *s);
490 if (*s == '\n')
491 mcf_console_putc(co, '\r');
492 }
493}
494
495/****************************************************************************/
496
497static int __init mcf_console_setup(struct console *co, char *options)
498{
499 struct uart_port *port;
500 int baud = CONFIG_SERIAL_MCF_BAUDRATE;
501 int bits = 8;
502 int parity = 'n';
503 int flow = 'n';
504
505 if ((co->index >= 0) && (co->index <= MCF_MAXPORTS))
506 co->index = 0;
507 port = &mcf_ports[co->index].port;
508 if (port->membase == 0)
509 return -ENODEV;
510
511 if (options)
512 uart_parse_options(options, &baud, &parity, &bits, &flow);
513
514 return uart_set_options(port, co, baud, parity, bits, flow);
515}
516
517/****************************************************************************/
518
519static struct uart_driver mcf_driver;
520
521static struct console mcf_console = {
522 .name = "ttyS",
523 .write = mcf_console_write,
524 .device = uart_console_device,
525 .setup = mcf_console_setup,
526 .flags = CON_PRINTBUFFER,
527 .index = -1,
528 .data = &mcf_driver,
529};
530
531static int __init mcf_console_init(void)
532{
533 register_console(&mcf_console);
534 return 0;
535}
536
537console_initcall(mcf_console_init);
538
539#define MCF_CONSOLE &mcf_console
540
541/****************************************************************************/
542#else
543/****************************************************************************/
544
545#define MCF_CONSOLE NULL
546
547/****************************************************************************/
548#endif /* CONFIG_MCF_CONSOLE */
549/****************************************************************************/
550
551/*
552 * Define the mcf UART driver structure.
553 */
554static struct uart_driver mcf_driver = {
555 .owner = THIS_MODULE,
556 .driver_name = "mcf",
557 .dev_name = "ttyS",
558 .major = TTY_MAJOR,
559 .minor = 64,
560 .nr = MCF_MAXPORTS,
561 .cons = MCF_CONSOLE,
562};
563
564/****************************************************************************/
565
566static int __devinit mcf_probe(struct platform_device *pdev)
567{
568 struct mcf_platform_uart *platp = pdev->dev.platform_data;
569 struct uart_port *port;
570 int i;
571
572 for (i = 0; ((i < MCF_MAXPORTS) && (platp[i].mapbase)); i++) {
573 port = &mcf_ports[i].port;
574
575 port->line = i;
576 port->type = PORT_MCF;
577 port->mapbase = platp[i].mapbase;
578 port->membase = (platp[i].membase) ? platp[i].membase :
579 (unsigned char __iomem *) platp[i].mapbase;
580 port->iotype = SERIAL_IO_MEM;
581 port->irq = platp[i].irq;
582 port->uartclk = MCF_BUSCLK;
583 port->ops = &mcf_uart_ops;
584 port->flags = ASYNC_BOOT_AUTOCONF;
585
586 uart_add_one_port(&mcf_driver, port);
587 }
588
589 return 0;
590}
591
592/****************************************************************************/
593
594static int mcf_remove(struct platform_device *pdev)
595{
596 struct uart_port *port;
597 int i;
598
599 for (i = 0; (i < MCF_MAXPORTS); i++) {
600 port = &mcf_ports[i].port;
601 if (port)
602 uart_remove_one_port(&mcf_driver, port);
603 }
604
605 return 0;
606}
607
608/****************************************************************************/
609
610static struct platform_driver mcf_platform_driver = {
611 .probe = mcf_probe,
612 .remove = __devexit_p(mcf_remove),
613 .driver = {
614 .name = "mcfuart",
615 .owner = THIS_MODULE,
616 },
617};
618
619/****************************************************************************/
620
621static int __init mcf_init(void)
622{
623 int rc;
624
625 printk("ColdFire internal UART serial driver\n");
626
627 rc = uart_register_driver(&mcf_driver);
628 if (rc)
629 return rc;
630 rc = platform_driver_register(&mcf_platform_driver);
631 if (rc)
632 return rc;
633 return 0;
634}
635
636/****************************************************************************/
637
638static void __exit mcf_exit(void)
639{
640 platform_driver_unregister(&mcf_platform_driver);
641 uart_unregister_driver(&mcf_driver);
642}
643
644/****************************************************************************/
645
646module_init(mcf_init);
647module_exit(mcf_exit);
648
649MODULE_AUTHOR("Greg Ungerer <gerg@snapgear.com>");
650MODULE_DESCRIPTION("Freescale ColdFire UART driver");
651MODULE_LICENSE("GPL");
652
653/****************************************************************************/
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 8dd5a6afd5..8bdaa157ff 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -11,9 +11,9 @@
11#include <linux/timer.h> 11#include <linux/timer.h>
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13#include <linux/device.h> 13#include <linux/device.h>
14#include <linux/scatterlist.h>
14#include <linux/usb/quirks.h> 15#include <linux/usb/quirks.h>
15#include <asm/byteorder.h> 16#include <asm/byteorder.h>
16#include <asm/scatterlist.h>
17 17
18#include "hcd.h" /* for usbcore internals */ 18#include "hcd.h" /* for usbcore internals */
19#include "usb.h" 19#include "usb.h"
@@ -437,13 +437,11 @@ int usb_sg_init (
437#if defined(CONFIG_HIGHMEM) || defined(CONFIG_IOMMU) 437#if defined(CONFIG_HIGHMEM) || defined(CONFIG_IOMMU)
438 io->urbs[i]->transfer_buffer = NULL; 438 io->urbs[i]->transfer_buffer = NULL;
439#else 439#else
440 io->urbs[i]->transfer_buffer = 440 io->urbs[i]->transfer_buffer = sg_virt(&sg[i]);
441 page_address(sg[i].page) + sg[i].offset;
442#endif 441#endif
443 } else { 442 } else {
444 /* hc may use _only_ transfer_buffer */ 443 /* hc may use _only_ transfer_buffer */
445 io->urbs [i]->transfer_buffer = 444 io->urbs [i]->transfer_buffer = sg_virt(&sg[i]);
446 page_address (sg [i].page) + sg [i].offset;
447 len = sg [i].length; 445 len = sg [i].length;
448 } 446 }
449 447
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index e7d982a715..91e999c9f6 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -519,8 +519,7 @@ static void mts_do_sg (struct urb* transfer)
519 context->fragment++; 519 context->fragment++;
520 mts_int_submit_urb(transfer, 520 mts_int_submit_urb(transfer,
521 context->data_pipe, 521 context->data_pipe,
522 page_address(sg[context->fragment].page) + 522 sg_virt(&sg[context->fragment]),
523 sg[context->fragment].offset,
524 sg[context->fragment].length, 523 sg[context->fragment].length,
525 context->fragment + 1 == scsi_sg_count(context->srb) ? 524 context->fragment + 1 == scsi_sg_count(context->srb) ?
526 mts_data_done : mts_do_sg); 525 mts_data_done : mts_do_sg);
@@ -557,7 +556,7 @@ mts_build_transfer_context(struct scsi_cmnd *srb, struct mts_desc* desc)
557 return; 556 return;
558 } else { 557 } else {
559 sg = scsi_sglist(srb); 558 sg = scsi_sglist(srb);
560 desc->context.data = page_address(sg[0].page) + sg[0].offset; 559 desc->context.data = sg_virt(&sg[0]);
561 desc->context.data_length = sg[0].length; 560 desc->context.data_length = sg[0].length;
562 } 561 }
563 562
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index e901d31e05..ea31621464 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -360,9 +360,9 @@ static void free_sglist (struct scatterlist *sg, int nents)
360 if (!sg) 360 if (!sg)
361 return; 361 return;
362 for (i = 0; i < nents; i++) { 362 for (i = 0; i < nents; i++) {
363 if (!sg [i].page) 363 if (!sg_page(&sg[i]))
364 continue; 364 continue;
365 kfree (page_address (sg [i].page) + sg [i].offset); 365 kfree (sg_virt(&sg[i]));
366 } 366 }
367 kfree (sg); 367 kfree (sg);
368} 368}
diff --git a/drivers/usb/storage/protocol.c b/drivers/usb/storage/protocol.c
index cc8f7c52c7..889622baac 100644
--- a/drivers/usb/storage/protocol.c
+++ b/drivers/usb/storage/protocol.c
@@ -195,7 +195,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
195 * the *offset and *index values for the next loop. */ 195 * the *offset and *index values for the next loop. */
196 cnt = 0; 196 cnt = 0;
197 while (cnt < buflen) { 197 while (cnt < buflen) {
198 struct page *page = sg->page + 198 struct page *page = sg_page(sg) +
199 ((sg->offset + *offset) >> PAGE_SHIFT); 199 ((sg->offset + *offset) >> PAGE_SHIFT);
200 unsigned int poff = 200 unsigned int poff =
201 (sg->offset + *offset) & (PAGE_SIZE-1); 201 (sg->offset + *offset) & (PAGE_SIZE-1);
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
new file mode 100644
index 0000000000..9e33fc4da8
--- /dev/null
+++ b/drivers/virtio/Kconfig
@@ -0,0 +1,8 @@
1# Virtio always gets selected by whoever wants it.
2config VIRTIO
3 bool
4
5# Similarly the virtio ring implementation.
6config VIRTIO_RING
7 bool
8 depends on VIRTIO
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
new file mode 100644
index 0000000000..f70e40971d
--- /dev/null
+++ b/drivers/virtio/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_VIRTIO) += virtio.o
2obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c
new file mode 100644
index 0000000000..983d482fba
--- /dev/null
+++ b/drivers/virtio/config.c
@@ -0,0 +1,13 @@
1/* Configuration space parsing helpers for virtio.
2 *
3 * The configuration is [type][len][... len bytes ...] fields.
4 *
5 * Copyright 2007 Rusty Russell, IBM Corporation.
6 * GPL v2 or later.
7 */
8#include <linux/err.h>
9#include <linux/virtio.h>
10#include <linux/virtio_config.h>
11#include <linux/bug.h>
12#include <asm/system.h>
13
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
new file mode 100644
index 0000000000..15d7787dea
--- /dev/null
+++ b/drivers/virtio/virtio.c
@@ -0,0 +1,189 @@
1#include <linux/virtio.h>
2#include <linux/spinlock.h>
3#include <linux/virtio_config.h>
4
5static ssize_t device_show(struct device *_d,
6 struct device_attribute *attr, char *buf)
7{
8 struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
9 return sprintf(buf, "%hu", dev->id.device);
10}
11static ssize_t vendor_show(struct device *_d,
12 struct device_attribute *attr, char *buf)
13{
14 struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
15 return sprintf(buf, "%hu", dev->id.vendor);
16}
17static ssize_t status_show(struct device *_d,
18 struct device_attribute *attr, char *buf)
19{
20 struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
21 return sprintf(buf, "0x%08x", dev->config->get_status(dev));
22}
23static ssize_t modalias_show(struct device *_d,
24 struct device_attribute *attr, char *buf)
25{
26 struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
27
28 return sprintf(buf, "virtio:d%08Xv%08X\n",
29 dev->id.device, dev->id.vendor);
30}
31static struct device_attribute virtio_dev_attrs[] = {
32 __ATTR_RO(device),
33 __ATTR_RO(vendor),
34 __ATTR_RO(status),
35 __ATTR_RO(modalias),
36 __ATTR_NULL
37};
38
39static inline int virtio_id_match(const struct virtio_device *dev,
40 const struct virtio_device_id *id)
41{
42 if (id->device != dev->id.device)
43 return 0;
44
45 return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor != dev->id.vendor;
46}
47
48/* This looks through all the IDs a driver claims to support. If any of them
49 * match, we return 1 and the kernel will call virtio_dev_probe(). */
50static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
51{
52 unsigned int i;
53 struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
54 const struct virtio_device_id *ids;
55
56 ids = container_of(_dr, struct virtio_driver, driver)->id_table;
57 for (i = 0; ids[i].device; i++)
58 if (virtio_id_match(dev, &ids[i]))
59 return 1;
60 return 0;
61}
62
63static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
64{
65 struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
66
67 return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
68 dev->id.device, dev->id.vendor);
69}
70
71static struct bus_type virtio_bus = {
72 .name = "virtio",
73 .match = virtio_dev_match,
74 .dev_attrs = virtio_dev_attrs,
75 .uevent = virtio_uevent,
76};
77
78static void add_status(struct virtio_device *dev, unsigned status)
79{
80 dev->config->set_status(dev, dev->config->get_status(dev) | status);
81}
82
83static int virtio_dev_probe(struct device *_d)
84{
85 int err;
86 struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
87 struct virtio_driver *drv = container_of(dev->dev.driver,
88 struct virtio_driver, driver);
89
90 add_status(dev, VIRTIO_CONFIG_S_DRIVER);
91 err = drv->probe(dev);
92 if (err)
93 add_status(dev, VIRTIO_CONFIG_S_FAILED);
94 else
95 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
96 return err;
97}
98
99int register_virtio_driver(struct virtio_driver *driver)
100{
101 driver->driver.bus = &virtio_bus;
102 driver->driver.probe = virtio_dev_probe;
103 return driver_register(&driver->driver);
104}
105EXPORT_SYMBOL_GPL(register_virtio_driver);
106
107void unregister_virtio_driver(struct virtio_driver *driver)
108{
109 driver_unregister(&driver->driver);
110}
111EXPORT_SYMBOL_GPL(unregister_virtio_driver);
112
113int register_virtio_device(struct virtio_device *dev)
114{
115 int err;
116
117 dev->dev.bus = &virtio_bus;
118 sprintf(dev->dev.bus_id, "%u", dev->index);
119
120 /* Acknowledge that we've seen the device. */
121 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
122
123 /* device_register() causes the bus infrastructure to look for a
124 * matching driver. */
125 err = device_register(&dev->dev);
126 if (err)
127 add_status(dev, VIRTIO_CONFIG_S_FAILED);
128 return err;
129}
130EXPORT_SYMBOL_GPL(register_virtio_device);
131
132void unregister_virtio_device(struct virtio_device *dev)
133{
134 device_unregister(&dev->dev);
135}
136EXPORT_SYMBOL_GPL(unregister_virtio_device);
137
138int __virtio_config_val(struct virtio_device *vdev,
139 u8 type, void *val, size_t size)
140{
141 void *token;
142 unsigned int len;
143
144 token = vdev->config->find(vdev, type, &len);
145 if (!token)
146 return -ENOENT;
147
148 if (len != size)
149 return -EIO;
150
151 vdev->config->get(vdev, token, val, size);
152 return 0;
153}
154EXPORT_SYMBOL_GPL(__virtio_config_val);
155
156int virtio_use_bit(struct virtio_device *vdev,
157 void *token, unsigned int len, unsigned int bitnum)
158{
159 unsigned long bits[16];
160
161 /* This makes it convenient to pass-through find() results. */
162 if (!token)
163 return 0;
164
165 /* bit not in range of this bitfield? */
166 if (bitnum * 8 >= len / 2)
167 return 0;
168
169 /* Giant feature bitfields are silly. */
170 BUG_ON(len > sizeof(bits));
171 vdev->config->get(vdev, token, bits, len);
172
173 if (!test_bit(bitnum, bits))
174 return 0;
175
176 /* Set acknowledge bit, and write it back. */
177 set_bit(bitnum + len * 8 / 2, bits);
178 vdev->config->set(vdev, token, bits, len);
179 return 1;
180}
181EXPORT_SYMBOL_GPL(virtio_use_bit);
182
183static int virtio_init(void)
184{
185 if (bus_register(&virtio_bus) != 0)
186 panic("virtio bus registration failed");
187 return 0;
188}
189core_initcall(virtio_init);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
new file mode 100644
index 0000000000..0e4baca21b
--- /dev/null
+++ b/drivers/virtio/virtio_ring.c
@@ -0,0 +1,313 @@
1/* Virtio ring implementation.
2 *
3 * Copyright 2007 Rusty Russell IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include <linux/virtio.h>
20#include <linux/virtio_ring.h>
21#include <linux/device.h>
22
23#ifdef DEBUG
24/* For development, we want to crash whenever the ring is screwed. */
25#define BAD_RING(vq, fmt...) \
26 do { dev_err(&vq->vq.vdev->dev, fmt); BUG(); } while(0)
27#define START_USE(vq) \
28 do { if ((vq)->in_use) panic("in_use = %i\n", (vq)->in_use); (vq)->in_use = __LINE__; mb(); } while(0)
29#define END_USE(vq) \
30 do { BUG_ON(!(vq)->in_use); (vq)->in_use = 0; mb(); } while(0)
31#else
32#define BAD_RING(vq, fmt...) \
33 do { dev_err(&vq->vq.vdev->dev, fmt); (vq)->broken = true; } while(0)
34#define START_USE(vq)
35#define END_USE(vq)
36#endif
37
38struct vring_virtqueue
39{
40 struct virtqueue vq;
41
42 /* Actual memory layout for this queue */
43 struct vring vring;
44
45 /* Other side has made a mess, don't try any more. */
46 bool broken;
47
48 /* Number of free buffers */
49 unsigned int num_free;
50 /* Head of free buffer list. */
51 unsigned int free_head;
52 /* Number we've added since last sync. */
53 unsigned int num_added;
54
55 /* Last used index we've seen. */
56 unsigned int last_used_idx;
57
58 /* How to notify other side. FIXME: commonalize hcalls! */
59 void (*notify)(struct virtqueue *vq);
60
61#ifdef DEBUG
62 /* They're supposed to lock for us. */
63 unsigned int in_use;
64#endif
65
66 /* Tokens for callbacks. */
67 void *data[];
68};
69
70#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
71
72static int vring_add_buf(struct virtqueue *_vq,
73 struct scatterlist sg[],
74 unsigned int out,
75 unsigned int in,
76 void *data)
77{
78 struct vring_virtqueue *vq = to_vvq(_vq);
79 unsigned int i, avail, head, uninitialized_var(prev);
80
81 BUG_ON(data == NULL);
82 BUG_ON(out + in > vq->vring.num);
83 BUG_ON(out + in == 0);
84
85 START_USE(vq);
86
87 if (vq->num_free < out + in) {
88 pr_debug("Can't add buf len %i - avail = %i\n",
89 out + in, vq->num_free);
90 END_USE(vq);
91 return -ENOSPC;
92 }
93
94 /* We're about to use some buffers from the free list. */
95 vq->num_free -= out + in;
96
97 head = vq->free_head;
98 for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
99 vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
100 vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT)
101 + sg->offset;
102 vq->vring.desc[i].len = sg->length;
103 prev = i;
104 sg++;
105 }
106 for (; in; i = vq->vring.desc[i].next, in--) {
107 vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
108 vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT)
109 + sg->offset;
110 vq->vring.desc[i].len = sg->length;
111 prev = i;
112 sg++;
113 }
114 /* Last one doesn't continue. */
115 vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
116
117 /* Update free pointer */
118 vq->free_head = i;
119
120 /* Set token. */
121 vq->data[head] = data;
122
123 /* Put entry in available array (but don't update avail->idx until they
124 * do sync). FIXME: avoid modulus here? */
125 avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
126 vq->vring.avail->ring[avail] = head;
127
128 pr_debug("Added buffer head %i to %p\n", head, vq);
129 END_USE(vq);
130 return 0;
131}
132
133static void vring_kick(struct virtqueue *_vq)
134{
135 struct vring_virtqueue *vq = to_vvq(_vq);
136 START_USE(vq);
137 /* Descriptors and available array need to be set before we expose the
138 * new available array entries. */
139 wmb();
140
141 vq->vring.avail->idx += vq->num_added;
142 vq->num_added = 0;
143
144 /* Need to update avail index before checking if we should notify */
145 mb();
146
147 if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
148 /* Prod other side to tell it about changes. */
149 vq->notify(&vq->vq);
150
151 END_USE(vq);
152}
153
154static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
155{
156 unsigned int i;
157
158 /* Clear data ptr. */
159 vq->data[head] = NULL;
160
161 /* Put back on free list: find end */
162 i = head;
163 while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
164 i = vq->vring.desc[i].next;
165 vq->num_free++;
166 }
167
168 vq->vring.desc[i].next = vq->free_head;
169 vq->free_head = head;
170 /* Plus final descriptor */
171 vq->num_free++;
172}
173
174/* FIXME: We need to tell other side about removal, to synchronize. */
175static void vring_shutdown(struct virtqueue *_vq)
176{
177 struct vring_virtqueue *vq = to_vvq(_vq);
178 unsigned int i;
179
180 for (i = 0; i < vq->vring.num; i++)
181 detach_buf(vq, i);
182}
183
184static inline bool more_used(const struct vring_virtqueue *vq)
185{
186 return vq->last_used_idx != vq->vring.used->idx;
187}
188
189static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len)
190{
191 struct vring_virtqueue *vq = to_vvq(_vq);
192 void *ret;
193 unsigned int i;
194
195 START_USE(vq);
196
197 if (!more_used(vq)) {
198 pr_debug("No more buffers in queue\n");
199 END_USE(vq);
200 return NULL;
201 }
202
203 i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
204 *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
205
206 if (unlikely(i >= vq->vring.num)) {
207 BAD_RING(vq, "id %u out of range\n", i);
208 return NULL;
209 }
210 if (unlikely(!vq->data[i])) {
211 BAD_RING(vq, "id %u is not a head!\n", i);
212 return NULL;
213 }
214
215 /* detach_buf clears data, so grab it now. */
216 ret = vq->data[i];
217 detach_buf(vq, i);
218 vq->last_used_idx++;
219 END_USE(vq);
220 return ret;
221}
222
223static bool vring_restart(struct virtqueue *_vq)
224{
225 struct vring_virtqueue *vq = to_vvq(_vq);
226
227 START_USE(vq);
228 BUG_ON(!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT));
229
230 /* We optimistically turn back on interrupts, then check if there was
231 * more to do. */
232 vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
233 mb();
234 if (unlikely(more_used(vq))) {
235 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
236 END_USE(vq);
237 return false;
238 }
239
240 END_USE(vq);
241 return true;
242}
243
244irqreturn_t vring_interrupt(int irq, void *_vq)
245{
246 struct vring_virtqueue *vq = to_vvq(_vq);
247
248 if (!more_used(vq)) {
249 pr_debug("virtqueue interrupt with no work for %p\n", vq);
250 return IRQ_NONE;
251 }
252
253 if (unlikely(vq->broken))
254 return IRQ_HANDLED;
255
256 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
257 if (vq->vq.callback && !vq->vq.callback(&vq->vq))
258 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
259
260 return IRQ_HANDLED;
261}
262
263static struct virtqueue_ops vring_vq_ops = {
264 .add_buf = vring_add_buf,
265 .get_buf = vring_get_buf,
266 .kick = vring_kick,
267 .restart = vring_restart,
268 .shutdown = vring_shutdown,
269};
270
271struct virtqueue *vring_new_virtqueue(unsigned int num,
272 struct virtio_device *vdev,
273 void *pages,
274 void (*notify)(struct virtqueue *),
275 bool (*callback)(struct virtqueue *))
276{
277 struct vring_virtqueue *vq;
278 unsigned int i;
279
280 vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
281 if (!vq)
282 return NULL;
283
284 vring_init(&vq->vring, num, pages);
285 vq->vq.callback = callback;
286 vq->vq.vdev = vdev;
287 vq->vq.vq_ops = &vring_vq_ops;
288 vq->notify = notify;
289 vq->broken = false;
290 vq->last_used_idx = 0;
291 vq->num_added = 0;
292#ifdef DEBUG
293 vq->in_use = false;
294#endif
295
296 /* No callback? Tell other side not to bother us. */
297 if (!callback)
298 vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
299
300 /* Put everything in free lists. */
301 vq->num_free = num;
302 vq->free_head = 0;
303 for (i = 0; i < num-1; i++)
304 vq->vring.desc[i].next = i+1;
305
306 return &vq->vq;
307}
308
309void vring_del_virtqueue(struct virtqueue *vq)
310{
311 kfree(to_vvq(vq));
312}
313
diff --git a/drivers/watchdog/mpc5200_wdt.c b/drivers/watchdog/mpc5200_wdt.c
index 9cfb975766..11f6a111e7 100644
--- a/drivers/watchdog/mpc5200_wdt.c
+++ b/drivers/watchdog/mpc5200_wdt.c
@@ -176,6 +176,8 @@ static int mpc5200_wdt_probe(struct of_device *op, const struct of_device_id *ma
176 176
177 has_wdt = of_get_property(op->node, "has-wdt", NULL); 177 has_wdt = of_get_property(op->node, "has-wdt", NULL);
178 if (!has_wdt) 178 if (!has_wdt)
179 has_wdt = of_get_property(op->node, "fsl,has-wdt", NULL);
180 if (!has_wdt)
179 return -ENODEV; 181 return -ENODEV;
180 182
181 wdt = kzalloc(sizeof(*wdt), GFP_KERNEL); 183 wdt = kzalloc(sizeof(*wdt), GFP_KERNEL);
@@ -254,6 +256,7 @@ static int mpc5200_wdt_shutdown(struct of_device *op)
254 256
255static struct of_device_id mpc5200_wdt_match[] = { 257static struct of_device_id mpc5200_wdt_match[] = {
256 { .compatible = "mpc5200-gpt", }, 258 { .compatible = "mpc5200-gpt", },
259 { .compatible = "fsl,mpc5200-gpt", },
257 {}, 260 {},
258}; 261};
259static struct of_platform_driver mpc5200_wdt_driver = { 262static struct of_platform_driver mpc5200_wdt_driver = {